diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4dd1491 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,16 @@ +beautifulsoup4==4.8.1 +blessings==1.7 +bs4==0.0.1 +certifi==2019.9.11 +chardet==3.0.4 +Click==7.0 +colorama==0.4.1 +idna==2.8 +inquirer==2.6.3 +python-editor==1.0.4 +readchar==2.0.1 +requests==2.22.0 +six==1.12.0 +soupsieve==1.9.4 +stronghold==1.5 +urllib3==1.25.6 diff --git a/src/sites/kabum.py b/src/sites/kabum.py index f786ad5..2a45be1 100644 --- a/src/sites/kabum.py +++ b/src/sites/kabum.py @@ -1,5 +1,7 @@ import re import requests +from bs4 import BeautifulSoup as bSoup +import pdb class Kabum(): def fetch(self, link): @@ -10,50 +12,31 @@ def fetch(self, link): - discount -> Boolean ''' - patterns = { - 'title': re.compile(r'''class="titulo_det">(.+)'''), - 'discount_price': re.compile(r'''preco_desconto_avista-cm">R\$\s(\d*\.?\d*\.?\d*\,\d*)<'''), - } - # Open the link try: response = requests.get(link) page = response.text except Exception as e: print("Link Error: %s" % link) - - # will return a "null" infos object - infos = {'title': link.split('/')[-1], 'price': 0.00, 'discount': False} - return infos + return {'title': link.split('/')[-1], 'price': 0.00, 'discount': False} infos = dict() - # Title fetch - title_re = patterns['title'].search(page) + with open('test.html', 'w') as file: + file.write(page) + soup = bSoup(page, 'html.parser') + + # title fetch try: - infos['title'] = title_re.group(1) + infos['title'] = soup.article.h1.string except Exception as e: raise Exception('No title found on the link: %s' % link) - # Price fetch - regular_price_re = patterns['regular_price'].search(page) + # price fetch try: - infos['price'] = regular_price_re.group(1) + infos['price'] = float(soup.article.find(attrs={'itemprop': 'price'})['content']) infos['discount'] = False except Exception as e: - discount_price_re = patterns['discount_price'].search(page) - try: - infos['price'] = discount_price_re.group(1) - infos['discount'] = True - except Exception as ej: - raise Exception('No price found on link: %s' % link) - - # Convert price to a float - if infos['discount'] == False: - infos['price'] = float(infos['price']) - else: - infos['price'] = float(infos['price'].replace('.','').replace(',', '.')) + raise Exception('No price found on link: %s' % link) - # Everything went OK by this point self.fetched = True return infos