diff --git a/requirements.txt b/requirements.txt index a4e8fe74..c4aa2d33 100644 --- a/requirements.txt +++ b/requirements.txt @@ -38,4 +38,5 @@ SQLAlchemy-Utils~=0.38.2 ujson~=5.2.0 web3~=5.13.1 Werkzeug~=1.0.1 -markupsafe==2.0.1 \ No newline at end of file +markupsafe==2.0.1 +wget==3.2 diff --git a/say/crawler/__init__.py b/say/crawler/__init__.py index b758e940..ede6f44d 100644 --- a/say/crawler/__init__.py +++ b/say/crawler/__init__.py @@ -2,7 +2,8 @@ import html import re from typing import NamedTuple - +import subprocess +import json import requests from cachetools import TTLCache from cachetools import cached @@ -115,53 +116,66 @@ def __init__(self, url): except IndexError: self.dkp = None - def get_data(self, force=False): + + def call_api(self, url): + try: + # Run the wget command and capture the output + result = subprocess.run(['wget', '-qO-', url], capture_output=True, text=True, check=True) + return result.stdout + except subprocess.CalledProcessError as e: + return f"An error occurred: {e}" + + def parse_result(self, api_response): + try: + # Parse the JSON response + json_response = json.loads(api_response) + return json_response + except json.JSONDecodeError as e: + return f"An error occurred while parsing JSON: {e}" + + def get_data(self): result = None + parsed_result = None if self.dkp is None: return url = self.API_URL_NOT_FRESH % self.dkp - if force: - r = requests.get(url) - else: - r = request_with_cache(url) + api_response = self.call_api(url) + parsed_result = self.parse_result(api_response) + + print(parsed_result) + if int(parsed_result["status"]) == 200: + parsed_result = self.parse_result(api_response) + elif parsed_result["status"] == 302 and "fresh" in parsed_result["redirect_url"]["uri"]: + url = self.API_URL_FRESH % self.dkp + api_response = self.call_api(url) + parsed_result = self.parse_result(api_response) + if parsed_result["status"] != 200: + print("Cold not update!") + return + else: + parsed_result = self.parse_result(api_response) - if r.status_code == 200: - result = r.json() else: - print(r) - print(url) print("Cold not update!") + print(url) return + result = parsed_result["data"] - # fresh products have different api / Digikala redirect to new link for fresh product - # Typical response: {'status': 302, 'redirect_url': {'base': None, 'uri': '/fresh/product/dkp-10269403/'}} - if result["status"] == 302 and "fresh" in result["redirect_url"]["uri"]: - url = self.API_URL_FRESH % self.dkp - if force: - r = requests.get(url) - else: - r = request_with_cache(url) - result = r.json() - - print("url and result:") - print(url) - print(result) - if r.status_code != 200: - return - data = result['data'] - - if data['product'].get('is_inactive'): + if result['product'].get('is_inactive'): return dict(cost='unavailable', img=None, title=None) - title = data['product']['title_fa'] - if data['product']['status'] == 'marketable': - cost = int(data['product']['default_variant']['price']['rrp_price']) // 10 + title = result['product']['title_fa'] + if result['product']['status'] == 'marketable': + cost = int(result['product']['default_variant']['price']['rrp_price']) // 10 else: cost = 'unavailable' - img = data['product']['images']['main']['url'][0] + img = result['product']['images']['main']['url'][0] return dict(cost=cost, img=img, title=title) + + +