-
Notifications
You must be signed in to change notification settings - Fork 0
/
collect_captcha.py
99 lines (78 loc) · 3.49 KB
/
collect_captcha.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import os
import time
import base64
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# 修改保存图像路径的设置
save_image_path = os.path.join(os.getcwd(), 'saved_captcha')
# 确保文件夹存在
os.makedirs(save_image_path, exist_ok=True)
def process_and_read_captcha(driver, save_image_path):
# 显式等待确保 captchaImg 元素加载完成
wait = WebDriverWait(driver, 10)
box_certification_element = wait.until(EC.presence_of_element_located((By.CLASS_NAME, "box_certification")))
captcha_element = box_certification_element.find_element(By.CLASS_NAME, "captcha")
api_element = captcha_element.find_element(By.CLASS_NAME, "api")
captcha_img_element = api_element.find_element(By.ID, "captchaImg")
# 获取图片的 base64 数据
captcha_img_src = captcha_img_element.get_attribute("src")
# 确保获取的是base64编码的数据
if captcha_img_src.startswith("data:image"):
base64_img_data = captcha_img_src.split(",")[1]
img_data = base64.b64decode(base64_img_data)
# 使用 os.path.join 构建完整的文件路径
image_file_path = os.path.join(save_image_path, f'{10 * x + i + 1}.png')
# 保存图片
with open(image_file_path, 'wb') as file:
file.write(img_data)
print(f"验证码图片已保存: {image_file_path}")
else:
print("验证码图片源格式不正确")
# 浏览器配置对象
chrome_options = Options()
print("浏览器配置对象")
# 连接到手动启动的 Chrome 实例
chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
print("连接到手动启动的 Chrome 实例")
# 隐藏 WebDriver 的痕迹
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
print("隐藏 WebDriver 的痕迹")
# 启动 WebDriver 并连接到已启动的 Chrome 实例
driver = webdriver.Chrome(options=chrome_options)
print("启动 WebDriver 并连接到已启动的 Chrome 实例")
# 设置隐式等待
driver.implicitly_wait(5)
print("设置隐式等待")
tabs = driver.window_handles
print("打开多个标签页后所有窗口句柄:", tabs)
input("等待用户按回车键以开始检测预订按钮")
try:
# 打印当前Chrome的所有句柄
print("当前Chrome的所有句柄:", driver.window_handles)
# 检查是否有窗口句柄可用
if driver.window_handles:
# 指定切换窗口默认为最新
driver.switch_to.window(driver.window_handles[0])
print("切换到的窗口句柄:", driver.current_window_handle)
else:
print("没有可用的窗口句柄。")
# 循环语句进行200次
for x in range(20):
for i in range(10):
# 使用正确的文件路径格式
process_and_read_captcha(driver, save_image_path)
# 等待时间
n = 0.5
time.sleep(n)
# 点击刷新按钮
reload_button = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.ID, 'btnReload'))
)
reload_button.click()
# 由于连续点击12次会弹窗警告 只能暂停30s
time.sleep(30)
except Exception as e:
print(f"发生错误: {str(e)}")