一个过cf turnstile跟五秒盾的方法

使用DrissionPage采集某个cloudflare保护的网页,当ip质量好时会有5秒盾,ip质量不好时会出现turnstile。

之前我写过突破cf的五秒盾,那个是手工获取cookie后填入实现,当时好像确实也没有很好的自动过这个的方案,使用DrissionPage过5秒盾很简单,tab.get(),然后再sleep(5)就行了。但是要过turnstile比较麻烦,我在网上找到一个脚本,可以很顺畅的过turnstile,记录一下。

turnstile_bypass.py 代码如下

import os
import tempfile
import json
import shutil
import subprocess
from sys import platform

try:
from DrissionPage import Chromium, ChromiumOptions
except ImportError:
subprocess.check_call(['pip', 'install', 'DrissionPage'])

MANIFEST_CONTENT = {
"manifest_version": 3,
"name": "Turnstile Patcher",
"version": "0.1",
"content_scripts": [{
"js": ["./script.js"],
"matches": ["<all_urls>"],
"run_at": "document_start",
"all_frames": True,
"world": "MAIN"
}]
}

SCRIPT_CONTENT = """
function getRandomInt(min, max) {
return Math.floor(Math.random() * (max - min + 1)) + min;
}
let screenX = getRandomInt(800, 1200);
let screenY = getRandomInt(400, 600);
Object.defineProperty(MouseEvent.prototype, 'screenX', { value: screenX });
Object.defineProperty(MouseEvent.prototype, 'screenY', { value: screenY });
"""

def _create_extension() -> str:
"""创建临时扩展文件"""
temp_dir = tempfile.mkdtemp(prefix='turnstile_extension_')

try:
manifest_path = os.path.join(temp_dir, 'manifest.json')
with open(manifest_path, 'w', encoding='utf-8') as f:
json.dump(MANIFEST_CONTENT, f, indent=4)

script_path = os.path.join(temp_dir, 'script.js')
with open(script_path, 'w', encoding='utf-8') as f:
f.write(SCRIPT_CONTENT.strip())

return temp_dir

except Exception as e:
_cleanup_extension(temp_dir)
raise Exception(f"创建扩展失败: {e}")

def _cleanup_extension(path: str):
"""清理临时扩展文件"""
try:
if os.path.exists(path):
shutil.rmtree(path)
except Exception as e:
print(f"清理临时文件失败: {e}")

def get_patched_browser(options: ChromiumOptions = None,headless = True) -> Chromium:
"""
创建一个带有 Turnstile 绕过功能的浏览器实例

Args:
options: ChromiumOptions 对象,如果为 None 则创建默认配置

Returns:
Chromium: 返回配置好的浏览器实例
"""
platform_id = "Windows NT 10.0; Win64; x64"
if platform == "linux" or platform == "linux2":
platform_id = "X11; Linux x86_64"
elif platform == "darwin":
platform_id = "Macintosh; Intel Mac OS X 10_15_7"
user_agent =f"Mozilla/5.0 ({platform_id}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.6943.99 Safari/537.36"

if options is None:
options = ChromiumOptions().auto_port()

if headless is True:
options.headless(True)
options.set_user_agent(user_agent)

options.set_argument("--no-sandbox")

if "--blink-settings=imagesEnabled=false" in options._arguments:
raise RuntimeError("To bypass Turnstile, imagesEnabled must be True")
if "--incognito" in options._arguments:
raise RuntimeError("Cannot bypass Turnstile in incognito mode. Please run in normal browser mode.")

try:
extension_path = _create_extension()
options.add_extension(extension_path)
browser = Chromium(options)
shutil.rmtree(extension_path)
return browser

except Exception as e:
if 'extension_path' in locals() and os.path.exists(extension_path):
shutil.rmtree(extension_path)
raise e

def click_turnstile_checkbox(tab) -> bool:
"""
等待 Turnstile 加载完成并点击

Args:
tab: 由 get_patched_browser() 得到的 Chromium 的标签页对象

Returns:
bool: 是否通过 turnstile 验证
"""
try:
if not tab.wait.eles_loaded("@name=cf-turnstile-response"):
raise RuntimeError("未检测到 cloudflare turnstile 组件")
solution = tab.ele("@name=cf-turnstile-response")
wrapper = solution.parent()
iframe = wrapper.shadow_root.ele("tag:iframe")
iframe_body = iframe.ele("tag:body").shadow_root
checkbox = iframe_body.ele("tag:input",timeout=20)
success = iframe_body.ele("@id=success")
checkbox.click()
return True if tab.wait.ele_displayed(success,timeout=1) else False

except Exception as e:
print(f"Turnstile 处理失败: {e}")
return False

引用

from DrissionPage import Chromium, ChromiumOptions
import turnstile_bypass
tab = Chromium().latest_tab
tab.get('https://test.com/forum.php')
time.sleep(5)
print(turnstile_bypass.click_turnstile_checkbox(tab))

经过测试,可以100%通过,终于可以愉快的采集了

站内相关文章:

Comment ()
评论是一种美德,说点什么吧,否则我会恨你的。。。