From b6a51f15f877b9dfa6bf5f970b2baa88707d1247 Mon Sep 17 00:00:00 2001 From: Ganzidaeyong Date: Wed, 16 Oct 2024 23:19:38 +0900 Subject: [PATCH 1/2] fix: uncomment deletion logic --- handler.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/handler.py b/handler.py index 3600f1b..ef8eb0a 100644 --- a/handler.py +++ b/handler.py @@ -113,15 +113,14 @@ def menus_transaction(crawled_meals, cursor): db_menus = cursor.fetchall() new_menus, deleted_menus, edited_menus = compare_menus(db_menus, crawled_meals, restaurants) - # 20240924 ~ 20240926 축제 기간 동안 임시 조치입니다. - # if deleted_menus: - # deleted_menus_id = [str(menu.get("id")) for menu in deleted_menus] - # delete_menus_query = f""" - # DELETE FROM menu - # WHERE id in ({','.join(deleted_menus_id)}); - # """ - # cursor.execute(delete_menus_query) - # send_deleted_menus_message(deleted_menus) + if deleted_menus: + deleted_menus_id = [str(menu.get("id")) for menu in deleted_menus] + delete_menus_query = f""" + DELETE FROM menu + WHERE id in ({','.join(deleted_menus_id)}); + """ + cursor.execute(delete_menus_query) + send_deleted_menus_message(deleted_menus) insert_menus_query = """ INSERT INTO menu(restaurant_id, code, date, type, name_kr, price, etc) From b9eef6e3ebc1ea3cb9107103477722496e1f74c7 Mon Sep 17 00:00:00 2001 From: Hyungwook Choi <69342392+GoGiants1@users.noreply.github.com> Date: Mon, 11 Nov 2024 15:44:20 +0900 Subject: [PATCH 2/2] Fix: SNU dorm crawl (#98) * fix: Snudorm Crawler Breaking Changes and Minor Bug fix * lint --- crawlers/base_crawler.py | 59 ++++++- crawlers/misc/snudorm_crawler.py | 108 ++++++++++++ crawlers/snuco_crawler.py | 67 +++----- crawlers/snudorm_crawler.py | 273 ++++++++++++++++++++----------- crawlers/vet_crawler.py | 5 +- handler.py | 24 ++- 6 files changed, 395 insertions(+), 141 deletions(-) create mode 100644 crawlers/misc/snudorm_crawler.py diff --git a/crawlers/base_crawler.py b/crawlers/base_crawler.py index 9e64f8a..2583745 100644 --- a/crawlers/base_crawler.py +++ b/crawlers/base_crawler.py @@ -138,6 +138,63 @@ def normalize(self, meal, **kwargs): return meal +class RemoveMealNumber(MealNormalizer): + def normalize(self, meal, **kwargs): + if "①" in meal.name or "②" in meal.name: + meal.set_name(meal.name.replace("①", "")) + meal.set_name(meal.name.replace("②", "")) + return meal + + +class RemoveMealIdentifierFromMealName(MealNormalizer): + identifiers = ["(잇템)"] + + def normalize(self, meal, **kwargs): + for identifier in self.identifiers: + meal.set_name(meal.name.replace(identifier, "")) + return meal + + +class RemoveInfoFromMealName(MealNormalizer): + info_sign = ["※", "►", "※", "브레이크 타임"] + + def normalize(self, meal, **kwargs): + meal.set_name(re.sub("(" + "|".join(self.info_sign) + ").*", "", meal.name)) + return meal + + +class FindRestaurantDetail(MealNormalizer): + restaurant_regex = [ + r"(.*)\( ?(\d층.*)\)(.*)", + r"(.*)\((.*식당) ?\)(.*)", + r"(.*)< ?(\d층.*)>(.*)", + r"(.*)<(.*식당) ?>(.*)", + r"(.*)<(테이크아웃)>(.*)", + ] + + def normalize(self, meal, **kwargs): + for regex in self.restaurant_regex: + m = re.match(regex, meal.name) + if m: + meal.set_restaurant(meal.restaurant + ">" + m.group(2).strip()) + meal.set_name(m.group(1).strip() + m.group(3).strip()) + return meal + + +class AddRestaurantDetail(MealNormalizer): + def normalize(self, meal, **kwargs): + details = kwargs.get("restaurant_detail", []) + final_restaurants = kwargs.get("final_restaurants", []) + restaurant = meal.restaurant + for detail in details: + restaurant = restaurant + ">" + detail + if text_normalizer(detail, True) in final_restaurants: + break + meal.set_restaurant(restaurant) + + return meal + + class RestaurantCrawler(metaclass=ABCMeta): headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0"} url = "" @@ -188,7 +245,7 @@ def __init__(self): self.meals = [] @abstractmethod - async def run_30days(self): + async def run_7days(self): pass async def run(self, url=None, **kwargs): diff --git a/crawlers/misc/snudorm_crawler.py b/crawlers/misc/snudorm_crawler.py new file mode 100644 index 0000000..005f039 --- /dev/null +++ b/crawlers/misc/snudorm_crawler.py @@ -0,0 +1,108 @@ +import asyncio +import urllib3 +import aiohttp +from bs4 import BeautifulSoup +import datetime +from pytz import timezone + +from crawlers.base_crawler import ( + MealNormalizer, + RestaurantCrawler, + Meal, + text_normalizer, + FindPrice, + FindParenthesisHash, +) + + +class AddRestaurantDetail(MealNormalizer): + def normalize(self, meal, **kwargs): + details = kwargs.get("restaurant_detail", []) + final_restaurants = kwargs.get("final_restaurants", []) + restaurant = meal.restaurant + for detail in details: + restaurant = restaurant + ">" + detail + if text_normalizer(detail, True) in final_restaurants: + break + meal.set_restaurant(restaurant) + return meal + + +class SnudormRestaurantCrawler(RestaurantCrawler): + url = "https://snudorm.snu.ac.kr/wp-admin/admin-ajax.php" + menucost_url = "https://snudorm.snu.ac.kr/food-schedule/" + restaurant = "기숙사식당" + normalizer_classes = [FindPrice, FindParenthesisHash, AddRestaurantDetail] + + async def get_menucosts(self): + urllib3.disable_warnings() + async with aiohttp.ClientSession(headers=self.headers, connector=aiohttp.TCPConnector(ssl=False)) as session: + async with session.get(self.menucost_url) as response: + html = await response.read() + soup = BeautifulSoup(html, "html.parser") + lis = soup.select("div.board > ul > li") + prices = {} + for li in lis: + spans = li.find_all("span") + prices[spans[0].text] = spans[1].text + return prices + + async def run_30days(self): + date = datetime.datetime.now(timezone("Asia/Seoul")).date() + menucosts = await self.get_menucosts() + tasks = [self.run(date=date + datetime.timedelta(weeks=i), menucosts=menucosts) for i in range(4)] + return await asyncio.gather(*tasks, return_exceptions=True) + + async def run(self, date=None, menucosts=None, **kwargs): + if not date: + date = datetime.datetime.now(timezone("Asia/Seoul")).date() + if not menucosts: + menucosts = await self.get_menucosts() + urllib3.disable_warnings() + async with aiohttp.ClientSession(headers=self.headers, connector=aiohttp.TCPConnector(ssl=False)) as session: + data = { + "action": "metapresso_dorm_food_week_list", + "start_week_date": date.isoformat(), + "target_blog": "39", + } + async with session.post(self.url, data=data) as response: + html = await response.read() + soup = BeautifulSoup(html, "html.parser") + self.crawl(soup, menucosts=menucosts, **kwargs) + + def crawl(self, soup, menucosts=None, **kwargs): + if not menucosts: + menucosts = {} + + trs = soup.select("table > tbody > tr") + ths = soup.select("table > thead > tr > th") + dates = [th.text for th in ths[-7:]] + type = "" + restaurant_detail = [[] for _ in range(len(trs))] + + for row_idx, tr in enumerate(trs): + tds = tr.select("td") + + for td in tds[:-7]: + rowspan = td.attrs.get("rowspan") + rowspan = int(rowspan[0]) if rowspan else 1 + type_tmp = text_normalizer(td.text) + if type_tmp in Meal.type_handler: + type = type_tmp + else: + for i in range(rowspan): + restaurant_detail[row_idx + i].append(td.text) + + for col_idx, td in enumerate(tds[-7:]): + ul = td.find("ul") + if ul: + for li in ul.find_all("li", recursive=False): + spans = li.find_all("span") + name = spans[-1].text + price = menucosts.get(spans[0].text) + restaurant = self.restaurant + meal = Meal(restaurant, name, dates[col_idx], type, price) + meal = self.normalize( + meal, restaurant_detail=restaurant_detail[row_idx], final_restaurants=["아워홈"] + ) + self.found_meal(meal) diff --git a/crawlers/snuco_crawler.py b/crawlers/snuco_crawler.py index a5d2ad3..893662e 100644 --- a/crawlers/snuco_crawler.py +++ b/crawlers/snuco_crawler.py @@ -7,47 +7,15 @@ from crawlers.base_crawler import ( FindParenthesisHash, FindPrice, + FindRestaurantDetail, Meal, - MealNormalizer, + RemoveInfoFromMealName, + RemoveMealNumber, RestaurantCrawler, text_normalizer, ) -class RemoveMealNumber(MealNormalizer): - def normalize(self, meal, **kwargs): - if "①" in meal.name or "②" in meal.name: - meal.set_name(meal.name.replace("①", "")) - meal.set_name(meal.name.replace("②", "")) - return meal - - -class RemoveInfoFromMealName(MealNormalizer): - info_sign = ["※", "►", "※", "브레이크 타임"] - - def normalize(self, meal, **kwargs): - meal.set_name(re.sub("(" + "|".join(self.info_sign) + ").*", "", meal.name)) - return meal - - -class FindRestaurantDetail(MealNormalizer): - restaurant_regex = [ - r"(.*)\( ?(\d층.*)\)(.*)", - r"(.*)\((.*식당) ?\)(.*)", - r"(.*)< ?(\d층.*)>(.*)", - r"(.*)<(.*식당) ?>(.*)", - r"(.*)<(테이크아웃)>(.*)", - ] - - def normalize(self, meal, **kwargs): - for regex in self.restaurant_regex: - m = re.match(regex, meal.name) - if m: - meal.set_restaurant(meal.restaurant + ">" + m.group(2).strip()) - meal.set_name(m.group(1).strip() + m.group(3).strip()) - return meal - - class SnucoRestaurantCrawler(RestaurantCrawler): url = "https://snuco.snu.ac.kr/foodmenu/" normalizer_classes = [ @@ -57,9 +25,21 @@ class SnucoRestaurantCrawler(RestaurantCrawler): RemoveInfoFromMealName, RemoveMealNumber, ] - next_line_str = ["봄", "소반", "콤비메뉴", "셀프코너", "채식뷔페", "추가코너", "돈까스비빔면셋트", "탄탄비빔면셋트"] + next_line_str = [ + "봄", + "소반", + "콤비메뉴", + "셀프코너", + "채식뷔페", + "추가코너", + "돈까스비빔면셋트", + "탄탄비빔면셋트", + ] next_line_keyword = ["지역맛집따라잡기", "호구셋트"] # 다음 한 줄 있는 것들 - multi_line_keywords = {"+": ["셀프코너", "채식뷔페", "뷔페"], " / ": ["추가코너"]} # 다음에 여러줄 있는 것들 + multi_line_keywords = { + "+": ["셀프코너", "채식뷔페", "뷔페"], + " / ": ["추가코너"], + } # 다음에 여러줄 있는 것들 multi_line_finisher = { "셀프코너": "주문식메뉴" } # multiline이 끝나는 지표. ex. 로직상 주문식 메뉴까지 append된 뒤에 확인한다. 따라서 마지막에 주문식 메뉴 따로 빼줘야함 @@ -115,7 +95,10 @@ def get_multi_line_delimiter(self, meal): if not meal: return None code = text_normalizer(meal.name, True) - for keyword, finisher in self.multi_line_finisher.items(): # finisher 발견되면 delimiter가 없는 것 취급 + for ( + keyword, + finisher, + ) in self.multi_line_finisher.items(): # finisher 발견되면 delimiter가 없는 것 취급 if keyword in code and finisher in code: return None for delimiter, keywords in self.multi_line_keywords.items(): @@ -133,9 +116,9 @@ def combine(self, last_meal, meal, delimiter=": "): last_meal.set_price(meal.price) return last_meal - async def run_30days(self): + async def run_7days(self): date = datetime.datetime.now(timezone("Asia/Seoul")).date() - tasks = [self.run(date=date + datetime.timedelta(days=i)) for i in range(30)] + tasks = [self.run(date=date + datetime.timedelta(days=i)) for i in range(7)] return await asyncio.gather(*tasks, return_exceptions=True) async def run(self, date=None, **kwargs): @@ -148,8 +131,8 @@ def found_meal(self, meal): if meal and self.is_meal_name_when_normalized(meal.name) and "교직" not in meal.name: self.meals.append(meal) - def get_name_from_raw_restaurant(self, row_restaurant): - normalized = text_normalizer(row_restaurant) + def get_name_from_raw_restaurant(self, raw_restaurant): + normalized = text_normalizer(raw_restaurant) phone_match = re.match(r".*\((\d+-\d+)\)", normalized) if phone_match is None: diff --git a/crawlers/snudorm_crawler.py b/crawlers/snudorm_crawler.py index 005f039..896c269 100644 --- a/crawlers/snudorm_crawler.py +++ b/crawlers/snudorm_crawler.py @@ -1,108 +1,199 @@ import asyncio -import urllib3 -import aiohttp -from bs4 import BeautifulSoup import datetime +import re + +from bs4 import BeautifulSoup from pytz import timezone from crawlers.base_crawler import ( - MealNormalizer, - RestaurantCrawler, + AddRestaurantDetail, + FindParenthesisHash, + FindPrice, Meal, + RemoveInfoFromMealName, + RemoveMealIdentifierFromMealName, + RestaurantCrawler, text_normalizer, - FindPrice, - FindParenthesisHash, ) -class AddRestaurantDetail(MealNormalizer): - def normalize(self, meal, **kwargs): - details = kwargs.get("restaurant_detail", []) - final_restaurants = kwargs.get("final_restaurants", []) - restaurant = meal.restaurant - for detail in details: - restaurant = restaurant + ">" + detail - if text_normalizer(detail, True) in final_restaurants: - break - meal.set_restaurant(restaurant) - return meal - - class SnudormRestaurantCrawler(RestaurantCrawler): - url = "https://snudorm.snu.ac.kr/wp-admin/admin-ajax.php" - menucost_url = "https://snudorm.snu.ac.kr/food-schedule/" + url = "https://snudorm.snu.ac.kr/foodmenu/" restaurant = "기숙사식당" - normalizer_classes = [FindPrice, FindParenthesisHash, AddRestaurantDetail] - - async def get_menucosts(self): - urllib3.disable_warnings() - async with aiohttp.ClientSession(headers=self.headers, connector=aiohttp.TCPConnector(ssl=False)) as session: - async with session.get(self.menucost_url) as response: - html = await response.read() - soup = BeautifulSoup(html, "html.parser") - lis = soup.select("div.board > ul > li") - prices = {} - for li in lis: - spans = li.find_all("span") - prices[spans[0].text] = spans[1].text - return prices - - async def run_30days(self): + normalizer_classes = [ + FindPrice, + FindParenthesisHash, + AddRestaurantDetail, + RemoveInfoFromMealName, + RemoveMealIdentifierFromMealName, + ] + + next_line_str = [ + "봄", + "소반", + "콤비메뉴", + "셀프코너", + "채식뷔페", + "추가코너", + "돈까스비빔면셋트", + "탄탄비빔면셋트", + ] + next_line_keyword = [] # 다음 한 줄 있는 것들 + multi_line_keywords = {} # 다음에 여러줄 있는 것들 + multi_line_finisher = {} # multiline이 끝나는 지표. ex. 로직상 주문식 메뉴까지 append된 뒤에 확인한다. 따라서 마지막에 주문식 메뉴 따로 빼줘야함 + multi_line_finisher_pair = {} + + restaurant_phone_dict = {} + restaurant_adapter = {"생협기숙사(919동)": "919동", "아워홈(901동)": "아워홈"} + + except_restaurant_list = [] # snudorm에서 처리 + + def __init__(self): + super().__init__() + + def is_next_line_keyword(self, meal): + if not meal: + return False + code = text_normalizer(meal.name, True) + return any((str == code) for str in self.next_line_str) or any((str in code) for str in self.next_line_keyword) + + def filter_menu_names(self, meal_names: list): + return [name for name in meal_names if self.is_meal_name_when_normalized(name)] + + def filter_and_split_menu_names(self, meal_name: list): + names = [] + for name in meal_name: + if name == "" or name == "\xa0": + continue + splitted = re.split(r"(3층 교직원|\d+\s*원)", name) + if len(splitted) == 1: + names.append(name) + else: + for i, v in enumerate(splitted): + if re.match(r"\d+\s*원", v): + if i - 1 >= 0: + splitted[i - 1] += v + splitted[i] = "" + names += [v for v in splitted if v != ""] + return names + + def get_multi_line_delimiter(self, meal): + if not meal: + return None + code = text_normalizer(meal.name, True) + for ( + keyword, + finisher, + ) in self.multi_line_finisher.items(): # finisher 발견되면 delimiter가 없는 것 취급 + if keyword in code and finisher in code: + return None + for delimiter, keywords in self.multi_line_keywords.items(): + if any((str in code) for str in keywords): + return delimiter + return None + + def combine(self, last_meal, meal, delimiter=": "): + if not last_meal: + return meal + if not meal: + return last_meal + last_meal.set_name(last_meal.name + delimiter + meal.name) + if not last_meal.price: + last_meal.set_price(meal.price) + return last_meal + + async def run_7days(self): date = datetime.datetime.now(timezone("Asia/Seoul")).date() - menucosts = await self.get_menucosts() - tasks = [self.run(date=date + datetime.timedelta(weeks=i), menucosts=menucosts) for i in range(4)] + tasks = [self.run(date=date + datetime.timedelta(days=i)) for i in range(7)] return await asyncio.gather(*tasks, return_exceptions=True) - async def run(self, date=None, menucosts=None, **kwargs): + async def run(self, date=None, **kwargs): if not date: date = datetime.datetime.now(timezone("Asia/Seoul")).date() - if not menucosts: - menucosts = await self.get_menucosts() - urllib3.disable_warnings() - async with aiohttp.ClientSession(headers=self.headers, connector=aiohttp.TCPConnector(ssl=False)) as session: - data = { - "action": "metapresso_dorm_food_week_list", - "start_week_date": date.isoformat(), - "target_blog": "39", - } - async with session.post(self.url, data=data) as response: - html = await response.read() - soup = BeautifulSoup(html, "html.parser") - self.crawl(soup, menucosts=menucosts, **kwargs) - - def crawl(self, soup, menucosts=None, **kwargs): - if not menucosts: - menucosts = {} - - trs = soup.select("table > tbody > tr") - ths = soup.select("table > thead > tr > th") - dates = [th.text for th in ths[-7:]] - type = "" - restaurant_detail = [[] for _ in range(len(trs))] - - for row_idx, tr in enumerate(trs): - tds = tr.select("td") - - for td in tds[:-7]: - rowspan = td.attrs.get("rowspan") - rowspan = int(rowspan[0]) if rowspan else 1 - type_tmp = text_normalizer(td.text) - if type_tmp in Meal.type_handler: - type = type_tmp - else: - for i in range(rowspan): - restaurant_detail[row_idx + i].append(td.text) - - for col_idx, td in enumerate(tds[-7:]): - ul = td.find("ul") - if ul: - for li in ul.find_all("li", recursive=False): - spans = li.find_all("span") - name = spans[-1].text - price = menucosts.get(spans[0].text) - restaurant = self.restaurant - meal = Meal(restaurant, name, dates[col_idx], type, price) - meal = self.normalize( - meal, restaurant_detail=restaurant_detail[row_idx], final_restaurants=["아워홈"] - ) - self.found_meal(meal) + url = self.url + f"?date={date.year}-{date.month:02d}-{date.day:02d}" + await super().run(url, date=date, **kwargs) + + def found_meal(self, meal): + if meal and self.is_meal_name_when_normalized(meal.name): + self.meals.append(meal) + + def get_name_from_raw_restaurant(self, raw_restaurant): + normalized = text_normalizer(raw_restaurant) + + # 기존 기숙사 식당 이름과 매칭되도록 함 + # 24.11.11 기준 생협기숙사(919동), 아워홈(901동)만 존재 + # 24.11.10 기준 기숙사식당>919동, 기숙사식당>아워홈으로 되어있음 + full_restaurant_anme = self.restaurant + ">" + self.restaurant_adapter.get(normalized) + return full_restaurant_anme + + def crawl(self, soup: BeautifulSoup, **kwargs): + date = kwargs.get("date", datetime.datetime.now(timezone("Asia/Seoul")).date()) + table = soup.find("table", {"class": "menu-table"}) + if not table: + return + trs = table.tbody.find_all("tr", recursive=False) + + for tr in trs: + tds = tr.find_all("td", recursive=False) + + raw_restaurant = tds[0].text + restaurant = self.get_name_from_raw_restaurant(raw_restaurant) + if restaurant in self.except_restaurant_list: + continue + + for col_idx, td in enumerate(tds[1:]): + # meal type이 더 이상 ths에 포함되지 않고 tds 내부로 이동. + meal_type = td["class"][0] + + # td.text에서 식단을 한번에 가져오는 것으로 변경 + names = td.text.split("\n") + + last_meal = None + next_line_merged = False + filtered_names = [] + filtered_names = self.filter_menu_names(names) + + for name in filtered_names: + meal = Meal(restaurant, name, date, meal_type) + meal = self.normalize(meal) + + if self.is_meal_name_when_normalized(meal.name): + # ISSUE#54 220동 이름 오류 수정 + # ex) ㅁ 바비든든( ~ ): 덮밥류 -> 바비든든: 덮밥류 + + name_cleaned = meal.name + for to_clean in ["ㅁ ", "( ~ )", "(~)"]: + name_cleaned = name_cleaned.replace(to_clean, "") + meal.set_name(name_cleaned) + + # 다음 한줄만 추가하는 경우 + if not next_line_merged and self.is_next_line_keyword(last_meal): + last_meal = self.combine(last_meal, meal) + next_line_merged = True + + else: + delimiter = self.get_multi_line_delimiter(last_meal) + # delimiter에 해당하는 경우에는 여기 걸림 + if delimiter is not None: + last_meal = self.combine(last_meal, meal, delimiter) + # 그래서 여기서 combine 된다. + else: # delimit 하지 않는 경우는 + for finisher_to_remove in self.multi_line_finisher_pair.values(): + if finisher_to_remove in str(last_meal): + finisher_removed_name = last_meal.name.replace(finisher_to_remove, "") + if finisher_removed_name.endswith("+"): + finisher_removed_name = finisher_removed_name[:-1] + last_meal.set_name(finisher_removed_name) + self.found_meal(last_meal) + last_meal = meal # 그거 자체로 메뉴다. + next_line_merged = False + elif self.get_multi_line_delimiter(last_meal) is None: + if meal.restaurant != restaurant: + meal = Meal(raw_restaurant, name, date, meal_type) + meal = self.normalize(meal) + restaurant = meal.restaurant + self.found_meal(last_meal) + last_meal = None + next_line_merged = False + if last_meal: + self.found_meal(last_meal) diff --git a/crawlers/vet_crawler.py b/crawlers/vet_crawler.py index 7d6b60b..9413162 100644 --- a/crawlers/vet_crawler.py +++ b/crawlers/vet_crawler.py @@ -1,12 +1,13 @@ import asyncio -from crawlers.base_crawler import RestaurantCrawler, Meal + +from crawlers.base_crawler import Meal, RestaurantCrawler class VetRestaurantCrawler(RestaurantCrawler): url = "https://vet.snu.ac.kr/금주의-식단/" restaurant = "수의대식당" - async def run_30days(self): + async def run_7days(self): return await asyncio.gather(self.run(), return_exceptions=True) def crawl(self, soup, **kwargs): diff --git a/handler.py b/handler.py index ef8eb0a..f48b9fe 100644 --- a/handler.py +++ b/handler.py @@ -141,7 +141,7 @@ def menus_transaction(crawled_meals, cursor): async def run_crawlers(crawlers): - tasks = [asyncio.create_task(crawler.run_30days()) for crawler in crawlers] + tasks = [asyncio.create_task(crawler.run_7days()) for crawler in crawlers] return await asyncio.gather(*tasks, return_exceptions=True) @@ -149,7 +149,11 @@ def crawl_debug(**kwargs): arg_date = kwargs.get("date") arg_restaurant = kwargs.get("restaurant") - crawlers = [VetRestaurantCrawler(), SnudormRestaurantCrawler(), SnucoRestaurantCrawler()] + crawlers = [ + VetRestaurantCrawler(), + SnudormRestaurantCrawler(), + SnucoRestaurantCrawler(), + ] results = asyncio.run(run_crawlers(crawlers)) for result in results: for err in result: @@ -165,12 +169,18 @@ def crawl_debug(**kwargs): ndate = datetime.datetime(int(arg_date[:4]), int(arg_date[4:6]), int(arg_date[6:])).date() crawled_meals = list( - filter(lambda meal: (meal.date == ndate and arg_restaurant in meal.restaurant), crawled_meals) + filter( + lambda meal: (meal.date == ndate and arg_restaurant in meal.restaurant), + crawled_meals, + ) ) else: crawled_meals = list( - filter(lambda meal: (meal.date >= today and arg_restaurant in meal.restaurant), crawled_meals) + filter( + lambda meal: (meal.date >= today and arg_restaurant in meal.restaurant), + crawled_meals, + ) ) for meal in crawled_meals: @@ -189,7 +199,11 @@ def crawl(event, context): cursor = siksha_db.cursor(pymysql.cursors.DictCursor) try: print("Start crawling") - crawlers = [VetRestaurantCrawler(), SnudormRestaurantCrawler(), SnucoRestaurantCrawler()] + crawlers = [ + VetRestaurantCrawler(), + SnudormRestaurantCrawler(), + SnucoRestaurantCrawler(), + ] results = asyncio.run(run_crawlers(crawlers)) for result in results: for err in result: