Skip to content

Commit

Permalink
fix: 더이상 ths내 type 존재 않음 (#72)
Browse files Browse the repository at this point in the history
  • Loading branch information
GanziDaeyong authored Feb 26, 2024
1 parent 33934a3 commit aef44f8
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 7 deletions.
14 changes: 13 additions & 1 deletion crawlers/base_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,19 @@ class Meal:
BR = "BR"
LU = "LU"
DN = "DN"
type_handler = {BR: BR, LU: LU, DN: DN, "아침": BR, "점심": LU, "저녁": DN, "중식": LU, "석식": DN}
type_handler = {
BR: BR,
LU: LU,
DN: DN,
"아침": BR,
"점심": LU,
"저녁": DN,
"중식": LU,
"석식": DN,
"breakfast": BR,
"lunch": LU,
"dinner": DN,
}

def __init__(self, restaurant="", name="", date=None, type="", price=None, etc=None):
self.set_restaurant(restaurant)
Expand Down
13 changes: 7 additions & 6 deletions crawlers/snuco_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,11 +159,7 @@ def crawl(self, soup, **kwargs):
table = soup.find("table", {"class": "menu-table"})
if not table:
return
ths = table.select("thead > tr > th")
trs = table.tbody.find_all("tr", recursive=False)
types = []
for th in ths[1:]:
types.append(th.text)

for tr in trs:
tds = tr.find_all("td", recursive=False)
Expand All @@ -173,7 +169,11 @@ def crawl(self, soup, **kwargs):
for except_restaurant_name in self.except_restaurant_name_list
):
continue

for col_idx, td in enumerate(tds[1:]):
# meal type이 더 이상 ths에 포함되지 않고 tds 내부로 이동.
meal_type = td["class"][0]

# td.text에서 식단을 한번에 가져오는 것으로 변경
names = td.text.split("\n")
restaurant = text_normalizer(row_restaurant)
Expand All @@ -186,7 +186,7 @@ def crawl(self, soup, **kwargs):
filtered_names = self.filter_menu_names(names)

for name in filtered_names:
meal = Meal(restaurant, name, date, types[col_idx])
meal = Meal(restaurant, name, date, meal_type)
meal = self.normalize(meal)

if self.is_meal_name_when_normalized(meal.name):
Expand All @@ -210,6 +210,7 @@ def crawl(self, soup, **kwargs):
if not next_line_merged and self.is_next_line_keyword(last_meal):
last_meal = self.combine(last_meal, meal)
next_line_merged = True

else:
delimiter = self.get_multi_line_delimiter(last_meal)
# delimiter에 해당하는 경우에는 여기 걸림
Expand All @@ -228,7 +229,7 @@ def crawl(self, soup, **kwargs):
next_line_merged = False
elif self.get_multi_line_delimiter(last_meal) is None:
if meal.restaurant != restaurant:
meal = Meal(row_restaurant, name, date, types[col_idx])
meal = Meal(row_restaurant, name, date, meal_type)
meal = self.normalize(meal)
restaurant = meal.restaurant
self.found_meal(last_meal)
Expand Down

0 comments on commit aef44f8

Please sign in to comment.