Skip to content

Commit 19caf9e

Browse files
Meatplaycodders
Meatplay
authored andcommitted
WIP solve amazon captcha with 2captcha
Use coordinate method Only search for relevant info on page
1 parent 44a8037 commit 19caf9e

9 files changed

+104
-14
lines changed

Pipfile

+1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ gunicorn = "23.0.0"
3535
flask-api = {editable = true, ref = "develop", git = "git+https://github.com/flask-api/flask-api.git"}
3636
setuptools = "==75.6.0"
3737
certifi = "==2024.12.14"
38+
2captcha-python = "*"
3839

3940
[dev-packages]
4041
exceptiongroup = "*"

Pipfile.lock

+14-5
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

flathunter/abstract_crawler.py

+62-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
from typing import Optional, Any
66
import json
77

8+
from io import BytesIO
9+
import base64
10+
811
import backoff
912
import requests
1013
# pylint: disable=unused-import
@@ -13,10 +16,11 @@
1316
from bs4 import BeautifulSoup
1417

1518
from selenium.common.exceptions import NoSuchElementException, TimeoutException
16-
from selenium.webdriver import Chrome
19+
from selenium.webdriver import Chrome, Keys
1720
from selenium.webdriver.common.by import By
1821
from selenium.webdriver.support import expected_conditions as EC
1922
from selenium.webdriver.support.wait import WebDriverWait
23+
from selenium.webdriver.common.action_chains import ActionChains
2024

2125
from flathunter import proxies
2226
from flathunter.captcha.captcha_solver import CaptchaUnsolvableError
@@ -196,6 +200,7 @@ def resolve_geetest(self, driver):
196200
driver.refresh()
197201
raise
198202

203+
# pylint: disable=too-many-locals
199204
@backoff.on_exception(wait_gen=backoff.constant,
200205
exception=CaptchaUnsolvableError,
201206
max_tries=3)
@@ -268,6 +273,62 @@ def log_filter(log_):
268273
driver.refresh()
269274
raise
270275

276+
@backoff.on_exception(wait_gen=backoff.constant,
277+
exception=CaptchaUnsolvableError,
278+
max_tries=3)
279+
def resolve_amazon(self, driver):
280+
"""Resolve Amazon Captcha"""
281+
try:
282+
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
283+
sleep(3)
284+
shadowelement = driver.execute_script(
285+
"return document.querySelector('awswaf-captcha').shadowRoot"
286+
)
287+
my_img = shadowelement.find_element(By.ID, "root")
288+
size = my_img.size
289+
select_l = my_img.find_element(By.TAG_NAME, "select")
290+
select_l.click()
291+
sleep(1)
292+
select_l.send_keys(Keys.DOWN)
293+
sleep(3)
294+
shadowelement = driver.execute_script(
295+
"return document.querySelector('awswaf-captcha').shadowRoot"
296+
)
297+
my_img = shadowelement.find_element(By.ID, "root")
298+
screenshot = my_img.screenshot_as_png
299+
screenshot_bytes = BytesIO(screenshot)
300+
base64_screenshot = base64.b64encode(screenshot_bytes.getvalue()).decode('utf-8')
301+
# Send image in 2captcha service
302+
result = self.captcha_solver.solve_amazon(base64_screenshot)
303+
logger.info(result.token)
304+
l = result.token.split(':')[1].split(';')
305+
l = [[int(val.split('=')[1]) for val in coord.split(',')] for coord in l]
306+
button_coord = [size['width'] - 30, size['height'] - 30]
307+
l.append(button_coord)
308+
actions = ActionChains(driver)
309+
for i in l:
310+
actions.move_to_element_with_offset(my_img, i[0] - 160, i[1] - 211).click()
311+
actions.perform()
312+
sleep(0.5)
313+
actions.reset_actions()
314+
sleep(1)
315+
try:
316+
confirm_button = my_img.find_element(By.ID, "amzn-btn-verify-internal")
317+
actions.move_to_element_with_offset(confirm_button, 40, 15).click()
318+
actions.perform()
319+
sleep(4)
320+
except NoSuchElementException:
321+
pass
322+
try:
323+
driver.find_element(By.TAG_NAME, "awswaf-captcha")
324+
except NoSuchElementException:
325+
logger.info("Captcha solved")
326+
else:
327+
raise CaptchaUnsolvableError()
328+
except Exception as ex:
329+
driver.refresh()
330+
raise CaptchaUnsolvableError() from ex
331+
271332
@backoff.on_exception(wait_gen=backoff.constant,
272333
exception=CaptchaUnsolvableError,
273334
max_tries=3)

flathunter/captcha/capmonster_solver.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
"""Captcha solver for CapMonster Captcha Solving Service (https://capmonster.cloud)"""
2-
import json
32
from typing import Dict
43
from time import sleep
54
import backoff
@@ -8,8 +7,6 @@
87
from flathunter.logging import logger
98
from flathunter.captcha.captcha_solver import (
109
CaptchaSolver,
11-
CaptchaBalanceEmpty,
12-
CaptchaUnsolvableError,
1310
GeetestResponse,
1411
AwsAwfResponse,
1512
RecaptchaResponse,
@@ -26,6 +23,8 @@ def solve_recaptcha(self, google_site_key: str, page_url: str) -> RecaptchaRespo
2623
"""Should be implemented in subclass"""
2724
raise NotImplementedError("Recaptcha captcha solving is not implemented for Capmonster")
2825

26+
# pylint: disable=too-many-arguments
27+
# pylint: disable=too-many-positional-arguments
2928
def solve_awswaf(
3029
self,
3130
sitekey: str,

flathunter/captcha/captcha_solver.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ class AwsAwfResponse:
2222
"""Response from AWS WAF"""
2323
token: str
2424

25-
2625
class CaptchaSolver:
2726
"""Interface for Captcha solvers"""
2827

@@ -39,6 +38,8 @@ def solve_geetest(self, geetest: str, challenge: str, page_url: str) -> GeetestR
3938
"""Should be implemented in subclass"""
4039
raise NotImplementedError()
4140

41+
# pylint: disable=too-many-arguments
42+
# pylint: disable=too-many-positional-arguments
4243
def solve_awswaf(
4344
self,
4445
sitekey: str,

flathunter/captcha/imagetyperz_solver.py

+2
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ def solve_recaptcha(self, google_site_key: str, page_url: str) -> RecaptchaRespo
5959
)
6060
return RecaptchaResponse(self.__retrieve_imagetyperz_result(captcha_id))
6161

62+
# pylint: disable=too-many-arguments
63+
# pylint: disable=too-many-positional-arguments
6264
def solve_awswaf(
6365
self,
6466
sitekey: str,

flathunter/captcha/twocaptcha_solver.py

+16-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from time import sleep
55
import backoff
66
import requests
7+
from twocaptcha import TwoCaptcha
78

89
from flathunter.logging import logger
910
from flathunter.captcha.captcha_solver import (
@@ -47,6 +48,8 @@ def solve_recaptcha(self, google_site_key: str, page_url: str) -> RecaptchaRespo
4748
captcha_id = self.__submit_2captcha_request(params)
4849
return RecaptchaResponse(self.__retrieve_2captcha_result(captcha_id))
4950

51+
# pylint: disable=too-many-arguments
52+
# pylint: disable=too-many-positional-arguments
5053
def solve_awswaf(
5154
self,
5255
sitekey: str,
@@ -56,8 +59,19 @@ def solve_awswaf(
5659
captcha_script: str,
5760
page_url: str
5861
) -> AwsAwfResponse:
59-
"""Should be implemented at some point"""
60-
raise NotImplementedError("AWS WAF captchas not supported for 2Captcha")
62+
"""Using the `solve_amazon` method instead"""
63+
raise NotImplementedError()
64+
65+
def solve_amazon(
66+
self,
67+
image_b64: str
68+
) -> AwsAwfResponse:
69+
"""Solve AWS WAF by processing an image"""
70+
solver = TwoCaptcha(self.api_key, defaultTimeout=60, pollingInterval=5)
71+
result = solver.coordinates(image_b64, lang='en')
72+
if result is None:
73+
raise CaptchaUnsolvableError("Got None from 2captcha solve")
74+
return AwsAwfResponse(result["code"])
6175

6276
@backoff.on_exception(**CaptchaSolver.backoff_options)
6377
def __submit_2captcha_request(self, params: Dict[str, str]) -> str:

flathunter/crawler/immobilienscout.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
from flathunter.abstract_crawler import Crawler
1212
from flathunter.logging import logger
1313
from flathunter.chrome_wrapper import get_chrome_driver
14-
from flathunter.captcha.twocaptcha_solver import TwoCaptchaSolver
1514
from flathunter.exceptions import DriverLoadException
1615

1716
STATIC_URL_PATTERN = re.compile(r'https://www\.immobilienscout24\.de')
@@ -35,7 +34,7 @@ class Immobilienscout(Crawler):
3534

3635
URL_PATTERN = STATIC_URL_PATTERN
3736

38-
JSON_PATH_PARSER_ENTRIES = parse("$..['resultlist.realEstate']")
37+
JSON_PATH_PARSER_ENTRIES = parse("$..['resultlistEntries']..['resultlist.realEstate']")
3938
JSON_PATH_PARSER_IMAGES = parse("$..galleryAttachments"
4039
"..attachment[?'@xsi.type'=='common:Picture']"
4140
"..['@href'].`sub(/(.*\\\\.jpe?g).*/, \\\\1)`")
@@ -117,6 +116,8 @@ def get_results(self, search_url, max_pages=None):
117116

118117
def get_entries_from_javascript(self):
119118
"""Get entries from JavaScript"""
119+
if "Warum haben wir deine Anfrage blockiert?" in self.get_driver_force().page_source:
120+
self.resolve_amazon(self.get_driver_force())
120121
try:
121122
result_json = self.get_driver_force().execute_script('return window.IS24.resultList;')
122123
except JavascriptException:

flathunter/gmaps_duration_processor.py

+2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ def process_expose(self, expose):
2424

2525
def get_formatted_durations(self, address):
2626
"""Return a formatted list of GoogleMaps durations"""
27+
if address is None:
28+
return ""
2729
out = ""
2830
for duration in self.config.get('durations', []):
2931
if 'destination' in duration and 'name' in duration:

0 commit comments

Comments
 (0)