Skip to content

Commit

Permalink
Prod ver.2024.07.29
Browse files Browse the repository at this point in the history
Prod ver.2024.07.29
  • Loading branch information
GoGiants1 authored Jul 29, 2024
2 parents ca974d2 + 2f1eb5b commit a5d6de1
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 30 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ default:

.PHONY: lint
lint:
black --check .
black .
pylint --recursive=yes .
28 changes: 18 additions & 10 deletions crawlers/base_crawler.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from abc import ABCMeta, abstractmethod
import re
import datetime
from bs4 import BeautifulSoup
from pytz import timezone
import urllib3
import json
import re
from abc import ABCMeta, abstractmethod

import aiohttp
import urllib3
from bs4 import BeautifulSoup
from pytz import timezone


def text_normalizer(text, only_letters=False):
Expand Down Expand Up @@ -189,15 +190,22 @@ async def run(self, url=None, **kwargs):
urllib3.disable_warnings()
if url is None:
url = self.url
async with aiohttp.ClientSession(headers=self.headers, connector=aiohttp.TCPConnector(ssl=False)) as session:
async with session.get(url) as response:
try:
try:
async with aiohttp.ClientSession(
headers=self.headers,
connector=aiohttp.TCPConnector(ssl=False),
) as session:
async with session.get(url) as response:
if response.status != 200:
print(f"Failed to fetch {url}: Status code {response.status}")
return
html = await response.read()
# html = await response.text()
soup = BeautifulSoup(html, "html.parser")
self.crawl(soup, **kwargs)
except Exception as e:
print(f"Error in Run: {str(e)}")
except Exception as e:
print(f"Error in Run: {str(e)}")
print(f"URL: {url}")

def normalize(self, meal, **kwargs):
for normalizer_cls in self.normalizer_classes:
Expand Down
10 changes: 6 additions & 4 deletions crawlers/snuco_crawler.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import asyncio
import datetime
import re

from pytz import timezone

from crawlers.base_crawler import (
FindParenthesisHash,
FindPrice,
Meal,
MealNormalizer,
RestaurantCrawler,
Meal,
text_normalizer,
FindPrice,
FindParenthesisHash,
)


Expand Down Expand Up @@ -48,7 +49,7 @@ def normalize(self, meal, **kwargs):


class SnucoRestaurantCrawler(RestaurantCrawler):
url = "https://snuco.snu.ac.kr/ko/foodmenu"
url = "https://snuco.snu.ac.kr/foodmenu/"
normalizer_classes = [
FindPrice,
FindParenthesisHash,
Expand Down Expand Up @@ -77,6 +78,7 @@ class SnucoRestaurantCrawler(RestaurantCrawler):
"8805545": "3식당",
"8801939": "302동식당",
"8898955": "301동식당",
"8871123": "220동식당",
}
except_restaurant_list = ["기숙사식당"] # snudorm에서 처리

Expand Down
24 changes: 13 additions & 11 deletions handler.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
import pymysql
import os
import argparse
import asyncio
import datetime
from pytz import timezone
import os
from itertools import compress
import asyncio
import argparse

import pymysql
from pytz import timezone

from crawlers.base_crawler import text_normalizer
from crawlers.vet_crawler import VetRestaurantCrawler
from crawlers.snudorm_crawler import SnudormRestaurantCrawler
from crawlers.snuco_crawler import SnucoRestaurantCrawler
from crawlers.snudorm_crawler import SnudormRestaurantCrawler
from crawlers.vet_crawler import VetRestaurantCrawler
from slack import (
send_new_restaurants_message,
_send_slack_message,
send_deleted_menus_message,
send_new_menus_message,
send_edited_menus_message,
_send_slack_message,
send_new_menus_message,
send_new_restaurants_message,
)


Expand Down Expand Up @@ -209,7 +211,7 @@ def crawl(event, context):
except Exception as e:
siksha_db.rollback()
print(e)
_send_slack_message("Crawling has been failed")
_send_slack_message(f"Crawling has been failed: {str(e)}")
return "Crawling has been failed"
finally:
cursor.close()
Expand Down
16 changes: 12 additions & 4 deletions slack.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
import requests
import os

import requests


def _send_slack_message(message: str):
slack_token = os.environ.get("SLACK_TOKEN")
slack_channel = os.environ["SLACK_CHANNEL"]
if not slack_token:
print("No Slack token provided. Skipping sending message.")
return
body = {"channel": slack_token, "text": message}
headers = {"Authorization": f'Bearer {os.environ["SLACK_TOKEN"]}'}
requests.post("https://slack.com/api/chat.postMessage", headers=headers, data=body, timeout=100)
body = {"channel": slack_channel, "text": message}
headers = {"Authorization": f"Bearer {slack_token}"}
try:
res = requests.post("https://slack.com/api/chat.postMessage", headers=headers, data=body, timeout=100)
res.raise_for_status()
except Exception as e:
print(f"Failed to send Slack message: {str(e)}")
print(f"Response: {e.response.text if e.response else 'No response'}")


def send_deleted_menus_message(menus: list):
Expand Down

0 comments on commit a5d6de1

Please sign in to comment.