playwright
from playwright.sync_api import sync_playwright
url=''
def fetch_with_playwright(url):
with sync_playwright() as p:
browser = p.chromium.launch(headless=True) # 调试阶段建议关闭无头模式
context = browser.new_context(
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
viewport={"width": 1920, "height": 1080}, # 模拟真实分辨率
locale="zh-CN", # 设置为中文
timezone_id="Asia/Shanghai", # 模拟中国时区
)
# 打开新页面
page = context.new_page()
# 访问目标页面
page.goto(url, wait_until="load")
# 滚动页面
# page.evaluate("window.scrollBy(0, document.body.scrollHeight)")
# 提取页面内容
content = page.content()
print(content)
# 关闭浏览器
browser.close()
# 调用函数
fetch_with_playwright(url)
requests (tls_client替代)
import tls_client
url=''
# 创建一个 session,指定伪造浏览器行为
session = tls_client.Session(
client_identifier="chrome_110", # 指定模拟 Chrome 浏览器版本
ja3_string="771,4866-4867-4865-49196-49195-49188-49187-49162-49161-159-158-107-103-57-56-52393-52392-255,0-11-10-35-13-5-23-65281-43-45-51-21,29-23-24,0"
)
# 添加浏览器常见的请求头
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Language": "zh-CN,zh;q=0.9"
}
# 更新请求头
session.headers.update(headers)
# 发起请求
response = session.get(url)
# 打印返回内容
print(f"状态码: {response.status_code}")
print(f"页面内容: {response.text[:500]}") # 打印前 500 字符,避免输出过多内容
httpx
import httpx
import ssl
url=''
# 自定义 SSL 配置
ssl_context = ssl.create_default_context()
ssl_context.set_ciphers('ECDHE+AESGCM')
# 创建客户端实例并发起请求
with httpx.Client(verify=ssl_context) as client:
response = client.get(url)
print(f"状态码: {response.status_code}")
print(f"页面内容: {response.text[:500]}") # 打印前500个字符
正文完