-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrapeinlist.py
92 lines (67 loc) · 3.05 KB
/
scrapeinlist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import json
from bs4 import BeautifulSoup
import requests
from selenium import webdriver
import time
from selenium.webdriver.chrome.options import Options as ChromeOptions
import os
from selenium.webdriver.chrome.service import Service
def insidelist(url):
options = ChromeOptions()
options.binary_location = os.environ.get("GOOGLE-CHROME-BIN")
options.add_argument("--headless=new")
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
service = Service(os.environ.get('CHROMEDRIVER_PATH'))
driver = webdriver.Chrome(service=service, options=options)
driver.get(url)
time.sleep(5)
html = driver.page_source
driver.quit()
soup = BeautifulSoup(html, 'html.parser')
elements_for_neg_pam_stan = soup.select('li.css-1r0si1e')
name = soup.select_one('h4', class_='css-77x51t').get_text() # pasuje
cena = soup.select_one('h3', class_='css-93ez2t').get_text() if soup.select_one('h3', class_='css-93ez2t') is not None else None #pasuje
for element in elements_for_neg_pam_stan:
if "do negocjacji" in element.get_text().lower():
negocjacja = element.get_text() #pasuje
break
else:
negocjacja = 'loh'
wysylka = soup.select_one('span', class_='css-tj1qbd').get_text() if soup.select_one('span', class_='css-tj1qbd') is not None else None #pasuje
miasto = soup.select_one('p.css-1cju8pu.er34gjf0').get_text() if soup.select_one('p.css-1cju8pu.er34gjf0') is not None else None # pasuje
opis = soup.select_one('div.css-1t507yq.er34gjf0').get_text() if soup.select_one('div.css-1t507yq.er34gjf0') is not None else None # pasuje
for element in elements_for_neg_pam_stan:
if 'wbudowana' in element.get_text().lower():
pamiec = element.get_text(strip=True).split(':')[1].strip() if ':' in element.get_text() else None
break #pasuje
else:
pamiec = ''
for element in elements_for_neg_pam_stan:
if 'stan' in element.get_text().lower():
stan = desired_text = element.get_text(strip=True).split(':')[1].strip() if ':' in element.get_text() else None
break
else:
stan = ''
image_section = soup.select_one('div.swiper.swiper-initialized.swiper-horizontal.swiper-backface-hidden') if soup.select_one('div.swiper.swiper-initialized.swiper-horizontal.swiper-backface-hidden') is not None else 'None'
image_class = image_section.select('img') if image_section.select('img') is not None else 'None'
image_list = []
for image in image_class:
image_url = image.get('src')
parts = image_url.split(":443")
modified_image_url = "".join(parts)
image_list.append(modified_image_url)
item = {
'Nazwa': name,
'Cena': cena,
'Negocjacja': negocjacja,
'Wysylka': wysylka,
'Miasto': miasto,
'Opis': opis,
'Pamiec': pamiec,
'Stan': stan,
'image': image_list,
'Url': url
}
return item