forked from eastee/rebreakcaptcha
-
Notifications
You must be signed in to change notification settings - Fork 0
/
rebreakcaptcha.py
195 lines (151 loc) · 8.21 KB
/
rebreakcaptcha.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
import requests
import io
import random
import time
import os
# Speech Recognition Imports
from pydub import AudioSegment
import speech_recognition as sr
# Selenium
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.common.exceptions import NoSuchElementException
from selenium import webdriver
# Firefox / Gecko Driver Related
FIREFOX_BIN_PATH = r"C:\Program Files (x86)\Mozilla Firefox\firefox.exe"
GECKODRIVER_BIN = r"C:\geckodriver.exe"
# Randomization Related
MIN_RAND = 0.64
MAX_RAND = 1.27
LONG_MIN_RAND = 4.78
LONG_MAX_RAND = 11.1
NUMBER_OF_ITERATIONS = 100
RECAPTCHA_PAGE_URL = "https://www.google.com/recaptcha/api2/demo"
class rebreakcaptcha(object):
def __init__(self):
os.environ["PATH"] += os.pathsep + GECKODRIVER_BIN
self.driver = webdriver.Firefox(firefox_binary=FirefoxBinary(FIREFOX_BIN_PATH))
def is_exists_by_xpath(self, xpath):
try:
self.driver.find_element_by_xpath(xpath)
except NoSuchElementException:
return False
return True
def get_recaptcha_challenge(self):
while 1:
# Navigate to a ReCaptcha page
self.driver.get(RECAPTCHA_PAGE_URL)
time.sleep(random.uniform(MIN_RAND, MAX_RAND))
# Get all the iframes on the page
iframes = self.driver.find_elements_by_tag_name("iframe")
# Switch focus to ReCaptcha iframe
self.driver.switch_to_frame(iframes[0])
time.sleep(random.uniform(MIN_RAND, MAX_RAND))
# Verify ReCaptcha checkbox is present
if not self.is_exists_by_xpath('//div[@class="recaptcha-checkbox-checkmark" and @role="presentation"]'):
print("[{0}] No element in the frame!!".format(self.current_iteration))
continue
# Click on ReCaptcha checkbox
self.driver.find_element_by_xpath('//div[@class="recaptcha-checkbox-checkmark" and @role="presentation"]').click()
time.sleep(random.uniform(LONG_MIN_RAND, LONG_MAX_RAND))
# Check if the ReCaptcha has no challenge
if self.is_exists_by_xpath('//span[@aria-checked="true"]'):
print("[{0}] ReCaptcha has no challenge. Trying again!".format(self.current_iteration))
else:
return
def get_audio_challenge(self, iframes):
# Switch to the last iframe (the new one)
self.driver.switch_to_frame(iframes[-1])
# Check if the audio challenge button is present
if not self.is_exists_by_xpath('//button[@title="Get an audio challenge"]'):
print("[{0}] No element of audio challenge!!".format(self.current_iteration))
return False
print("[{0}] Clicking on audio challenge".format(self.current_iteration))
# Click on the audio challenge button
self.driver.find_element_by_xpath('//button[@title="Get an audio challenge"]').click()
time.sleep(random.uniform(LONG_MIN_RAND, LONG_MAX_RAND))
def get_challenge_audio(self, url):
# Download the challenge audio and store in memory
request = requests.get(url)
audio_file = io.BytesIO(request.content)
# Convert the audio to a compatible format in memory
converted_audio = io.BytesIO()
sound = AudioSegment.from_mp3(audio_file)
sound.export(converted_audio, format="wav")
converted_audio.seek(0)
return converted_audio
def speech_to_text(self, audio_source):
# Initialize a new recognizer with the audio in memory as source
recognizer = sr.Recognizer()
with sr.AudioFile(audio_source) as source:
audio = recognizer.record(source) # read the entire audio file
audio_output = ""
# recognize speech using Google Speech Recognition
try:
audio_output = recognizer.recognize_google(audio)
# for testing purposes, we're just using the default API key
# to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
# instead of `r.recognize_google(audio)`
print("[{0}] Google Speech Recognition: ".format(self.current_iteration) + audio_output)
except sr.UnknownValueError:
print("[{0}] Google Speech Recognition could not understand audio".format(self.current_iteration))
except sr.RequestError as e:
print("[{0}] Could not request results from Google Speech Recognition service; {1}".format(self.current_iteration).format(e))
return audio_output
def solve_audio_challenge(self):
# Verify audio challenge download button is present
if not self.is_exists_by_xpath('//a[@class="rc-audiochallenge-download-link"]') and \
not self.is_exists_by_xpath('//div[@class="rc-text-challenge"]'):
print("[{0}] No element in audio challenge download link!!".format(self.current_iteration))
return False
# If text challenge - reload the challenge
while self.is_exists_by_xpath('//div[@class="rc-text-challenge"]'):
print("[{0}] Got a text challenge! Reloading!".format(self.current_iteration))
self.driver.find_element_by_id('recaptcha-reload-button').click()
time.sleep(random.uniform(MIN_RAND, MAX_RAND))
# Get the audio challenge URI from the download link
download_object = self.driver.find_element_by_xpath('//a[@class="rc-audiochallenge-download-link"]')
download_link = download_object.get_attribute('href')
# Get the challenge audio to send to Google
converted_audio = self.get_challenge_audio(download_link)
# Send the audio to Google Speech Recognition API and get the output
audio_output = self.speech_to_text(converted_audio)
# Enter the audio challenge solution
self.driver.find_element_by_id('audio-response').send_keys(audio_output)
time.sleep(random.uniform(LONG_MIN_RAND, LONG_MAX_RAND))
# Click on verify
self.driver.find_element_by_id('recaptcha-verify-button').click()
time.sleep(random.uniform(LONG_MIN_RAND, LONG_MAX_RAND))
return True
def solve(self, current_iteration):
self.current_iteration = current_iteration + 1
# Get a ReCaptcha Challenge
self.get_recaptcha_challenge()
# Switch to page's main frame
self.driver.switch_to.default_content()
# Get all the iframes on the page again- there is a new one with a challenge
iframes = self.driver.find_elements_by_tag_name("iframe")
# Get audio challenge
self.get_audio_challenge(iframes)
# Solve the audio challenge
if not self.solve_audio_challenge():
return False
# Check if there is another audio challenge and solve it too
while self.is_exists_by_xpath('//div[@class="rc-audiochallenge-error-message"]') and \
self.is_exists_by_xpath('//div[contains(text(), "Multiple correct solutions required")]'):
print("[{0}] Need to solve more. Let's do this!".format(self.current_iteration))
self.solve_audio_challenge()
# Switch to the ReCaptcha iframe to verify it is solved
self.driver.switch_to.default_content()
self.driver.switch_to_frame(iframes[0])
return self.is_exists_by_xpath('//span[@aria-checked="true"]')
def main():
rebreakcaptcha_obj = rebreakcaptcha()
counter = 0
for i in xrange(NUMBER_OF_ITERATIONS):
if rebreakcaptcha_obj.solve(i):
counter += 1
time.sleep(random.uniform(LONG_MIN_RAND, LONG_MAX_RAND))
print("Successful breaks: {0}".format(counter))
print("Total successful breaks: {0}\{1}".format(counter, NUMBER_OF_ITERATIONS))
if __name__ == '__main__':
main()