Skip to content

Commit

Permalink
retialer 403 fix
Browse files Browse the repository at this point in the history
  • Loading branch information
ehsan-g committed Dec 7, 2024
1 parent e54d53c commit f10c7e9
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 33 deletions.
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,5 @@ SQLAlchemy-Utils~=0.38.2
ujson~=5.2.0
web3~=5.13.1
Werkzeug~=1.0.1
markupsafe==2.0.1
markupsafe==2.0.1
wget==3.2
78 changes: 46 additions & 32 deletions say/crawler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
import html
import re
from typing import NamedTuple

import subprocess
import json
import requests
from cachetools import TTLCache
from cachetools import cached
Expand Down Expand Up @@ -115,53 +116,66 @@ def __init__(self, url):
except IndexError:
self.dkp = None

def get_data(self, force=False):

def call_api(self, url):
try:
# Run the wget command and capture the output
result = subprocess.run(['wget', '-qO-', url], capture_output=True, text=True, check=True)
return result.stdout
except subprocess.CalledProcessError as e:
return f"An error occurred: {e}"

def parse_result(self, api_response):
try:
# Parse the JSON response
json_response = json.loads(api_response)
return json_response
except json.JSONDecodeError as e:
return f"An error occurred while parsing JSON: {e}"

def get_data(self):
result = None
parsed_result = None

if self.dkp is None:
return

url = self.API_URL_NOT_FRESH % self.dkp
if force:
r = requests.get(url)
else:
r = request_with_cache(url)
api_response = self.call_api(url)
parsed_result = self.parse_result(api_response)

print(parsed_result)
if int(parsed_result["status"]) == 200:
parsed_result = self.parse_result(api_response)
elif parsed_result["status"] == 302 and "fresh" in parsed_result["redirect_url"]["uri"]:
url = self.API_URL_FRESH % self.dkp
api_response = self.call_api(url)
parsed_result = self.parse_result(api_response)
if parsed_result["status"] != 200:
print("Cold not update!")
return
else:
parsed_result = self.parse_result(api_response)

if r.status_code == 200:
result = r.json()
else:
print(r)
print(url)
print("Cold not update!")
print(url)
return

result = parsed_result["data"]

# fresh products have different api / Digikala redirect to new link for fresh product
# Typical response: {'status': 302, 'redirect_url': {'base': None, 'uri': '/fresh/product/dkp-10269403/'}}
if result["status"] == 302 and "fresh" in result["redirect_url"]["uri"]:
url = self.API_URL_FRESH % self.dkp
if force:
r = requests.get(url)
else:
r = request_with_cache(url)
result = r.json()

print("url and result:")
print(url)
print(result)

if r.status_code != 200:
return
data = result['data']

if data['product'].get('is_inactive'):
if result['product'].get('is_inactive'):
return dict(cost='unavailable', img=None, title=None)

title = data['product']['title_fa']
if data['product']['status'] == 'marketable':
cost = int(data['product']['default_variant']['price']['rrp_price']) // 10
title = result['product']['title_fa']
if result['product']['status'] == 'marketable':
cost = int(result['product']['default_variant']['price']['rrp_price']) // 10
else:
cost = 'unavailable'

img = data['product']['images']['main']['url'][0]
img = result['product']['images']['main']['url'][0]
return dict(cost=cost, img=img, title=title)



0 comments on commit f10c7e9

Please sign in to comment.