Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

рефакторинг #1

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .vs/Avito/v15/.suo
Binary file not shown.
Binary file added .vs/slnx.sqlite
Binary file not shown.
6 changes: 3 additions & 3 deletions Avito/Category.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from Avito.categories import categories


def getListCategories(html):
def get_list_categories(html):
""" Функция возвращает список категорий. """
soup = BeautifulSoup(html, 'lxml')
all_li = soup.find('nav', class_='category-map').find_all('li', class_='category-map-item')
Expand All @@ -18,10 +18,10 @@ def getListCategories(html):
return links


def getDictCategories(ip_list):
def get_dict_categories(ip_list):
""" Функция возвращает словарь с категориями. """
dict_categories = {}
links = getListCategories(Request('https://www.avito.ru/', ip_list).getHtml())
links = get_list_categories(Request('https://www.avito.ru/', ip_list).getHtml())

# Добавляем в словарь категории. Ключём является название категории на русском языке,
# значение является название категории на английском языке
Expand Down
6 changes: 3 additions & 3 deletions Avito/District.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from Avito.Request import Request


def getData(html):
def get_data(html):
data = {}
soup = BeautifulSoup(html, "lxml")
tds = soup.find("optgroup", label="район").find_all("option")
Expand All @@ -14,12 +14,12 @@ def getData(html):
return data


def getDistrict(city, ip):
def get_district(city, ip):
""" Функция возвращает словарь с районами city.
Ключём является название района на русском,
значение является название района на английском. """

url = "https://avito.ru/{}".format(city)
district = getData(Request(url, ip).getHtml())
district = get_data(Request(url, ip).getHtml())
print(district)
return district
6 changes: 3 additions & 3 deletions Avito/Goods.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
links = []


def getLinksGoods(html):
def get_links_goods(html):
soup = BeautifulSoup(html, "lxml")
tds = soup.find_all("div", class_="item item_table clearfix js-catalog-item-enum js-item-extended item_table_extended snippet-experiment item_hide-elements")
for td in tds:
Expand All @@ -23,11 +23,11 @@ def getLinksGoods(html):
return links


def getGoods(urls, ip):
def get_goods(urls, ip):
""" Функция возвращает список ссылок товаров. """
for url in urls:
html = Request(url, ip).getHtml()
getLinksGoods(html)
get_links_goods(html)
return links


Expand Down
12 changes: 6 additions & 6 deletions Avito/Links.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
urls = []


def getLastPage(html):
def get_last_page(html):
try:
soup = BeautifulSoup(html, "lxml")
pages = soup.find("div", class_="pagination-pages clearfix").find_all("a", class_="pagination-page")
Expand All @@ -15,10 +15,10 @@ def getLastPage(html):
return 1


def getLinks(city, categories, subcategories, district, ip, qq):
def get_links(city, categories, subcategories, district, ip, qq):
for subcategory in subcategories:
url = "https://avito.ru/{}/{}/{}?p={}&{}={}".format(city, categories, subcategory, 1, qq, district)
n = int(getLastPage(Request(url, ip).getHtml()))
n = int(get_last_page(Request(url, ip).getHtml()))
print(n)
for page in range(1, n + 1):
url = "https://avito.ru/{}/{}/{}?p={}&{}={}".format(city, categories, subcategory, page, qq, district)
Expand All @@ -27,11 +27,11 @@ def getLinks(city, categories, subcategories, district, ip, qq):
print(url)


def linksToProductPages(city, categories, subcategories, districts, ip, qq):
def links_to_product_pages(city, categories, subcategories, districts, ip, qq):
""" Функция возвращает список ссылок страниц с товарими. """
if type(districts) == dict:
for district in districts:
getLinks(city, categories, subcategories, districts[district], ip, qq)
get_links(city, categories, subcategories, districts[district], ip, qq)
else:
getLinks(city, categories, subcategories, districts, ip, qq)
get_links(city, categories, subcategories, districts, ip, qq)
return urls
8 changes: 4 additions & 4 deletions Avito/Metro.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"Санкт-перербург": "spb"}


def getData(html, label, tag, attribute):
def get_data(html, label, tag, attribute):
data = {}
soup = BeautifulSoup(html, "lxml")
if label:
Expand All @@ -21,14 +21,14 @@ def getData(html, label, tag, attribute):
return data


def getMetro(city, ip):
def get_metro(city, ip):
if city in city_with_metro:
url = "https://www.avito.ru/s/avito/components/metro-map/svg-maps/metro-map-{}.svg".format(
city_with_metro[city])
html = Request(url, ip).forMetro()
name = getData(html, False, "text", "data-st-id")
name = get_data(html, False, "text", "data-st-id")
return name
else:
html = Request("https://www.avito.ru/{}".format(city), ip).getHtml()
name = getData(html, True, "option", "value")
name = get_data(html, True, "option", "value")
return name
16 changes: 8 additions & 8 deletions Avito/Number.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,21 @@ def main(self):
break
except:
continue
name = self.getName()
self.buttonClick()
image = self.getImage()
name = self.get_name()
self.button_click()
image = self.get_image()
self.write(image)
number = self.getNamber()
number = self.get_namber()
data = "{} - {}".format(name, str(number))
print(data)
all_number.append(data)

def getName(self):
def get_name(self):
""" Метод возвращает название товара. """
name = self.driver.find_element_by_xpath('//span[@class="title-info-title-text"]').text
return name

def buttonClick(self):
def button_click(self):
""" Метод кликает на кнопку для получения номера телефона. """
while True:
try:
Expand All @@ -46,7 +46,7 @@ def buttonClick(self):
except:
continue

def getImage(self):
def get_image(self):
""" Метод возвращает изображение номера телефона в байтах. """
while True:
try:
Expand All @@ -63,7 +63,7 @@ def write(self, img):
with open(self.name, "wb") as f:
f.write(img)

def getNamber(self):
def get_namber(self):
""" Метод возвращает номер телефона. """
tessdata_dir_config = '--tessdata-dir "C:\Tesseract-OCR"'
image = Image.open(self.name)
Expand Down
6 changes: 3 additions & 3 deletions Avito/NumberTelephone.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from Avito.Number import NumberTelephone, all_number


def getNumberTelephone(urls, n):
def get_number_telephone(urls, n):
""" Функция возвращает список номеров телефона продавцов. """
try:
rmtree("AvitoIMG")
Expand All @@ -23,11 +23,11 @@ def getNumberTelephone(urls, n):
break

for list in lists:
r = threading.Thread(target=Threading, args=(lists[lists.index(list)], lists.index(list)))
r = threading.Thread(target=threading, args=(lists[lists.index(list)], lists.index(list)))
r.start()


def Threading(urls, index):
def threading(urls, index):
for url in urls:
number = NumberTelephone(url, urls.index(url), index)
number.main()
Expand Down
8 changes: 4 additions & 4 deletions Avito/Proxies.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
proxy_list = []


def getHtml(url):
def get_html(url):
r = requests.get(url)
return r.text


def listIP(html):
def list_iP(html):
soup = BeautifulSoup(html, "lxml")
trs = soup.find("tbody").find_all("tr")
for tr in trs:
Expand All @@ -21,10 +21,10 @@ def listIP(html):
return proxy_list


def getProxy():
def get_proxy():
""" Функция возвращает список https прокси. """
url = "https://www.sslproxies.org/"
proxy_list = listIP(getHtml(url))
proxy_list = list_iP(get_html(url))
return proxy_list


Expand Down
4 changes: 2 additions & 2 deletions Avito/Request.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def __init__(self, url, list_ip):
self.headers = {
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'}

def getHtml(self, returned=None):
def get_html(self, returned=None):
for ip in self.list_ip:
try:
html = self.response(ip)
Expand All @@ -26,7 +26,7 @@ def response(self, ip):
if len(r.text) > 80000:
return r.text

def forMetro(self):
def for_metro(self):
for ip in self.list_ip:
try:
r = get(self.url, proxies={"https": ip}, headers=self.headers, timeout=5)
Expand Down
6 changes: 3 additions & 3 deletions Avito/Subcategories.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from Avito.Request import Request


def getData(html):
def get_data(html):
links = []
soup = BeautifulSoup(html, "lxml")
tds = soup.find("div", class_="catalog-counts__section").find_all("li")
Expand All @@ -13,12 +13,12 @@ def getData(html):
return links


def getSubcategories(category, ip):
def get_subcategories(category, ip):
""" Функция возвращает список подкатегорий category. """
url = "https://avito.ru/rossiya/{}".format(category)
while True:
try:
subcategories = getData(Request(url, ip).getHtml())
subcategories = get_data(Request(url, ip).getHtml())
print(subcategories)
return subcategories
except:
Expand Down
28 changes: 14 additions & 14 deletions Avito/__main__.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,40 @@
# coding=utf-8
from Avito.District import getDistrict
from Avito.District import get_district
from Avito.City import cities
from Avito.Links import linksToProductPages
from Avito.Goods import getGoods
from Avito.Category import getDictCategories
from Avito.NumberTelephone import getNumberTelephone
from Avito.Subcategories import getSubcategories
from Avito.Links import links_to_product_pages
from Avito.Goods import get_goods
from Avito.Category import get_dict_categories
from Avito.NumberTelephone import get_number_telephone
from Avito.Subcategories import get_subcategories
from Avito.Metro import getMetro
from Avito.Proxies import getProxy
from Avito.Proxies import get_proxy


def main():
proxy_list = getProxy()
proxy_list = get_proxy()

categories = getDictCategories(proxy_list)
categories = get_dict_categories(proxy_list)

city = cities["Ижевск"]

district = getDistrict(city, proxy_list)
district = get_district(city, proxy_list)

# Если в city метро, а не районы, то
# metro = getMetro(city, proxy_list)
# Получаем словарь с метро city.

category = categories["Велосипеды"]

subcategories = getSubcategories(category, proxy_list)
subcategories = get_subcategories(category, proxy_list)

links_to_product_pages = linksToProductPages(city, category, subcategories, district['Октябрьский'], proxy_list, "district")
links_to_product_pages = links_to_product_pages(city, category, subcategories, district['Октябрьский'], proxy_list, "district")

goods = getGoods(links_to_product_pages, proxy_list)
goods = get_goods(links_to_product_pages, proxy_list)

# Колличество потоков
n = 5

numbers = getNumberTelephone(goods, n)
numbers = get_number_telephone(goods, n)


if __name__ == '__main__':
Expand Down
12 changes: 6 additions & 6 deletions Test/NumberTest.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# coding=utf-8
from Avito.Goods import getGoods
from Avito.NumberTelephone import getNumberTelephone
from Avito.Proxies import getProxy
from Avito.Goods import get_goods
from Avito.NumberTelephone import get_number_telephone
from Avito.Proxies import get_proxy

proxy_list = getProxy()
goods = getGoods(["https://avito.ru/izhevsk/velosipedy/zapchasti_i_aksessuary?p=4&district=164",
proxy_list = get_proxy()
goods = get_goods(["https://avito.ru/izhevsk/velosipedy/zapchasti_i_aksessuary?p=4&district=164",
"https://avito.ru/izhevsk/velosipedy/zapchasti_i_aksessuary?p=3&district=164",
"https://avito.ru/izhevsk/velosipedy/zapchasti_i_aksessuary?p=2&district=164",
"https://avito.ru/izhevsk/velosipedy/zapchasti_i_aksessuary?p=1&district=164"], proxy_list)

# Колличество потоков
n = 5

numbers = getNumberTelephone(goods, n)
numbers = get_number_telephone(goods, n)