Skip to content

Commit

Permalink
Merge pull request #13 from gist-helper/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
kimtks456 authored Feb 20, 2023
2 parents 447bc48 + 32c79a3 commit e7f4b9d
Show file tree
Hide file tree
Showing 6 changed files with 254 additions and 96 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@
*.pyc
.venv
.DS_Store
.idea

!config.json
11 changes: 8 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
1. To test the parsing code 
0. git checkout release

1. To test the parsing code

python parsing.py

2. To apply the parsing result to server
You need to specify the ip and port of backend server. 
python parsing.py http://ip:port/meals/create

(You need to specify the ip and port of backend server)

python parsing.py http://ip:port/meals/create
7 changes: 7 additions & 0 deletions config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"filepath_bldg1_1_kor" : "./1_1_kor.xlsx",
"filepath_bldg1_1_eng" : "./1_1_eng.xlsx",
"filepath_bldg1_2_kor" : "./1_2_kor.xlsx",
"filepath_bldg1_2_eng" : "./1_2_eng.xlsx",
"filepath_bldg2" : "./2.xlsx"
}
103 changes: 61 additions & 42 deletions meal.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ class Meal:
kind: str
menu: str
special: str
def __init__(self, bldgType, langType, dateType, kindType,

def __init__(self, bldgType, langType, dateType, kindType,
bldg, date, kind, menu, special) -> None:
self.bldgType = bldgType
self.langType = langType
Expand All @@ -21,12 +21,14 @@ def __init__(self, bldgType, langType, dateType, kindType,
self.menu = menu
self.special = special


class MealWrapper:
meal: Meal

def __init__(self) -> None:
self.meal = Meal()


# constant
KOR = 0
ENG = 1
Expand All @@ -36,62 +38,79 @@ def __init__(self) -> None:
BLDG2_1ST = 2

BREAKFAST = 0
LUNCH = 1
DINNER = 2
SPECIAL = 3
LUNCH = 1
DINNER = 2
SPECIAL = 3

DATE_LEN = 10
DATE = ["Mon", "Tue", "Wed", "Thr", "Fri", "Sat", "Sun"]
DATE_1st = ["Mon", "Tue", "Wed", "Thr", "Fri"] # 1학 요일
DATE_2nd = ["Mon", "Tue", "Wed", "Thr", "Fri", "Sat", "Sun"] # 2학 요일

# kind of meal
MEAL_KIND_KOR = [ "조식", #0
"중식", #1
"석식" ] #2
MEAL_KIND_ENG = [ "Breakfast", #0
"Lunch", #1
"Dinner" ] #2
MEAL_KIND_KOR = ["조식", # 0
"중식", # 1
"석식"] # 2
MEAL_KIND_ENG = ["Breakfast", # 0
"Lunch", # 1
"Dinner"] # 2

# kind of building according to building type
BLDG_KIND_KOR = [ "1학생회관 1층", #0
"1학생회관 2층", #1
"2학생회관 1층" ] #2
BLDG_KIND_ENG = [ "Student Union Bldg.1 1st floor", #0
"Student Union Bldg.1 2nd floor", #1
"Student Union Bldg.2 1st floor" ] #2
BLDG_KIND_KOR = ["1학생회관 1층", # 0
"1학생회관 2층", # 1
"2학생회관 1층"] # 2
BLDG_KIND_ENG = ["Student Union Bldg.1 1st floor", # 0
"Student Union Bldg.1 2nd floor", # 1
"Student Union Bldg.2 1st floor"] # 2

# excel column according to building type
EXCEL_COL_BLDG0 = [] #Bldg.1 1st
EXCEL_COL_BLDG1 = [] #Bldg.2 2nd
EXCEL_COL_BLDG2 = ["A", "B", "C", "Mon", "Tue", "Wed", "Thr", "Fri", "Sat", "Sun"] #Bldg.2 1st
# EXCEL_COL_BLDG0 = ["A", "B", "Mon", "Mon", "Tue", "Tue", "Wed", "Wed", "Thr", "Thr", "Fri", "Fri"] # Bldg.1 1st
# EXCEL_COL_BLDG1 = ["A", "Mon", "Tue", "Wed", "Thr", "Fri"] # Bldg.1 2nd
# EXCEL_COL_BLDG2 = ["A", "B", "C", "Mon", "Tue", "Wed", "Thr", "Fri", "Sat", "Sun"] # Bldg.2 1st
'''
column 예외 존재 (2023.2.6의 1학1층한글 엑셀보면 쓸모없는 col이 있을 수 있음)
이를 위해 요일 있는 행의 index로 접근
'''
EXCEL_COL_BLDG0 = [2, 11]
EXCEL_COL_BLDG0_EXCEPT = []
EXCEL_COL_BLDG1 = [1, 6]
EXCEL_COL_BLDG2 = [3, 10]

DATE_INDEX_BLDG0 = 0 #Bldg.1 1st
DATE_INDEX_BLDG1 = 0 #Bldg.2 2nd
DATE_INDEX_BLDG2 = 1 #Bldg.2 1st
DATE_INDEX_BLDG0 = 4 # Bldg.1 1st
DATE_INDEX_BLDG1 = 1 # Bldg.1 2nd
DATE_INDEX_BLDG2 = 1 # Bldg.2 1st

INDEX_ENDPOINTS_BLDG0 = [] #Bldg.1 1st
INDEX_ENDPOINTS_BLDG1 = [] #Bldg.2 2nd
INDEX_ENDPOINTS_BLDG2 = [[2, 12], [12, 20], [22, 29], [20, 22]] #Bldg.2 1st
INDEX_ENDPOINTS_BLDG0 = [[5, 15], [15, 22], [23, 30], [22, 23]] # Bldg.1 1st
INDEX_ENDPOINTS_BLDG1 = [[0, 0], [2, 12], [0, 0], [0, 0]] # Bldg.1 2nd
INDEX_ENDPOINTS_BLDG1_EXCEPT = [4, 6] # 1학2층 엑셀은 숨겨진 행(5,7번) 존재하고 dummy 들어가있어 제외해야함.
INDEX_ENDPOINTS_BLDG2 = [[2, 12], [12, 20], [22, 29], [20, 22]] # Bldg.2 1st

# allergy type according to allergy code
ALGY_ING_KOR = [ "계란류", "우유", "메밀", "땅콩", "대두",
"밀", "고등어", "게", "새우", "돼지고기",
"복숭아", "토마토", "아황산류", "호두", "닭고기",
"쇠고기", "오징어", "조개류", "잣" ]
ALGY_ING_ENG = [ "egg", "milk", "buckwheat", "peanut", "soybean",
"wheat", "mackerel", "crab", "shrimp", "pork",
"peach", "tomato", "sulgite", "walnut", "chicken",
"beef", "squid", "shellfish", "pine nut" ]
ALGY_1_ING_KOR = ["난류", "우유", "메밀", "대두", "땅콩", "밀",
"새우", "돼지고기", "닭고기", "쇠고기", "오징어",
"고등어", "조개류", "토마토", "아황산염"]
ALGY_1_ING_ENG = ["egg", ",milk", "buckwheat", "soybean", "peanut", "wheat",
"shrimp", "pork", "chicken", "beef", "squid",
"mackerel", "shellfish", "tomato", "sulgite"]
ALGY_2_ING_KOR = ["계란류", "우유", "메밀", "땅콩", "대두",
"밀", "고등어", "게", "새우", "돼지고기",
"복숭아", "토마토", "아황산류", "호두", "닭고기",
"쇠고기", "오징어", "조개류", "잣"]
ALGY_2_ING_ENG = ["egg", "milk", "buckwheat", "peanut", "soybean",
"wheat", "mackerel", "crab", "shrimp", "pork",
"peach", "tomato", "sulgite", "walnut", "chicken",
"beef", "squid", "shellfish", "pine nut"]

# [langType]
ALGY_ING = [ALGY_ING_KOR, ALGY_ING_ENG]
ALGY_ING_1 = [ALGY_1_ING_KOR, ALGY_1_ING_ENG]
ALGY_ING_2 = [ALGY_2_ING_KOR, ALGY_2_ING_ENG]

# [langType][kindType]
MEAL_KIND = [MEAL_KIND_KOR, MEAL_KIND_ENG]
MEAL_KIND = [MEAL_KIND_KOR, MEAL_KIND_ENG]

# [langType][bldgType]
BLDG_KIND = [BLDG_KIND_KOR, BLDG_KIND_ENG]
BLDG_KIND = [BLDG_KIND_KOR, BLDG_KIND_ENG]

# [bldgType]
EXCEL_COL_BLDG = [EXCEL_COL_BLDG0, EXCEL_COL_BLDG1, EXCEL_COL_BLDG2]
DATE_INDEX = [DATE_INDEX_BLDG0, DATE_INDEX_BLDG1, DATE_INDEX_BLDG2]
INDEX_ENDPOINTS = [INDEX_ENDPOINTS_BLDG0, INDEX_ENDPOINTS_BLDG1, INDEX_ENDPOINTS_BLDG2]
EXCEL_COL_BLDG = [EXCEL_COL_BLDG0, EXCEL_COL_BLDG1, EXCEL_COL_BLDG2]
DATE_INDEX = [DATE_INDEX_BLDG0, DATE_INDEX_BLDG1, DATE_INDEX_BLDG2]
INDEX_ENDPOINTS = [INDEX_ENDPOINTS_BLDG0, INDEX_ENDPOINTS_BLDG1, INDEX_ENDPOINTS_BLDG2]
161 changes: 132 additions & 29 deletions parsing.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,119 @@
import sys
import os
from os.path import exists
import requests
import json
import pandas as pd
from meal import Meal
from meal import Meal, INDEX_ENDPOINTS_BLDG1_EXCEPT
from meal import KOR, ENG
from meal import BREAKFAST, LUNCH, DINNER
from meal import BLDG1_1ST, BLDG1_2ND, BLDG2_1ST
from meal import DATE_LEN, DATE
from meal import BLDG1_1ST, BLDG1_2ND, BLDG2_1ST
from meal import DATE_LEN, DATE_2nd,DATE_1st
from meal import MEAL_KIND
from meal import BLDG_KIND
from meal import EXCEL_COL_BLDG, INDEX_ENDPOINTS, DATE_INDEX
from util import ComplexEncoder, sanitize_menu
from util import parsing_date, parsing_meal
from util import parsing_date, parsing_meal


def parsing(excel_path: str, bldgType: int, langType: int) -> list:
col = EXCEL_COL_BLDG [bldgType]
endpoint = INDEX_ENDPOINTS[bldgType]
bldg = BLDG_KIND [langType][bldgType]
time_index = DATE_INDEX [bldgType]

sheet = pd.read_excel(excel_path, sheet_name=langType,
index_col=None, header=None, names=col, engine='openpyxl')

col = EXCEL_COL_BLDG [bldgType]
endpoint = INDEX_ENDPOINTS[bldgType]
bldg = BLDG_KIND [langType][bldgType]
time_index = DATE_INDEX [bldgType]
start_col_idx = col[0]
end_col_idx = col[1]

parsing_result = []
for (dateType, date_str) in enumerate(DATE):
date_sheet = sheet[date_str]
date = parsing_date(time_index, date_sheet)
for kindType in [BREAKFAST, LUNCH, DINNER]:
kind = MEAL_KIND[langType][kindType]
menu, speical = parsing_meal[kindType](endpoint, date_sheet, langType, dateType)
meal = Meal(bldgType, langType, dateType, kindType,
bldg, date, kind, menu, speical)
parsing_result.append(meal.__dict__)

# 1학 1층 한글
if bldgType == BLDG1_1ST and langType == 0:
xl = pd.read_excel(excel_path, sheet_name=None,
index_col=None, header=None, names=None, engine='openpyxl')

# 맨 뒤에서 2번째 sheet가 이번주 sheet
last_sheet = list(xl.keys())[-2]
sheet = xl[last_sheet]

# 짝수 column만 사용
sheet_keys = sheet.keys()[start_col_idx : end_col_idx]
sheet_keys = sheet_keys[0::2]

for (dateType, date_str) in enumerate(sheet_keys):
date_sheet = sheet[date_str]
date = parsing_date(time_index, date_sheet)
for kindType in [BREAKFAST, LUNCH, DINNER]:
kind = MEAL_KIND[langType][kindType]
menu, special = parsing_meal[kindType](endpoint, date_sheet, langType, dateType, bldgType)
meal = Meal(bldgType, langType, dateType, kindType,
bldg, date, kind, menu, special)
parsing_result.append(meal.__dict__)
return parsing_result

# 1학 1층 영어
elif bldgType == BLDG1_1ST and langType == 1:
sheet = pd.read_excel(excel_path, sheet_name=0,
index_col=None, header=None, names=None, engine='openpyxl')

# 짝수 column만 사용
sheet_keys = sheet.keys()[start_col_idx: end_col_idx]
sheet_keys = sheet_keys[0::2]

for (dateType, date_str) in enumerate(sheet_keys):
date_sheet = sheet[date_str]
date = parsing_date(time_index, date_sheet)
for kindType in [BREAKFAST, LUNCH, DINNER]:
kind = MEAL_KIND[langType][kindType]
menu, special = parsing_meal[kindType](endpoint, date_sheet, langType, dateType, bldgType)
meal = Meal(bldgType, langType, dateType, kindType,
bldg, date, kind, menu, special)
parsing_result.append(meal.__dict__)
return parsing_result

# 1학 2층
elif bldgType == BLDG1_2ND:
sheet = pd.read_excel(excel_path, sheet_name=0,
index_col=None, header=None, names=None, engine='openpyxl')

for (dateType, date_str) in enumerate(sheet.keys()[start_col_idx : end_col_idx]):
date_sheet = sheet[date_str]
date = parsing_date(time_index, date_sheet)
for kindType in [LUNCH]:
kind = MEAL_KIND[langType][kindType]
menu, special = parsing_meal[kindType](endpoint, date_sheet, langType, dateType, bldgType, INDEX_ENDPOINTS_BLDG1_EXCEPT)
meal = Meal(bldgType, langType, dateType, kindType,
bldg, date, kind, menu, special)
parsing_result.append(meal.__dict__)
return parsing_result

return parsing_result

if bldgType == 0 & langType == 1: #1학 영어 엑셀
for (dateType, date_str) in enumerate(DATE_1st):
date_sheet = sheet[date_str]
date = parsing_date(time_index, date_sheet)
for kindType in [BREAKFAST, LUNCH, DINNER]:
kind = MEAL_KIND[langType][kindType]
menu, special = parsing_meal[kindType](endpoint, date_sheet, langType, dateType, bldgType)
meal = Meal(bldgType, langType, dateType, kindType,
bldg, date, kind, menu, special)
parsing_result.append(meal.__dict__)

# 2학
elif bldgType == BLDG2_1ST:
sheet = pd.read_excel(excel_path, sheet_name=langType,
index_col=None, header=None, names=None, engine='openpyxl')

for (dateType, date_str) in enumerate(sheet.keys()[start_col_idx : end_col_idx]):
date_sheet = sheet[date_str]
date = parsing_date(time_index, date_sheet)
for kindType in [BREAKFAST, LUNCH, DINNER]:
kind = MEAL_KIND[langType][kindType]
menu, special = parsing_meal[kindType](endpoint, date_sheet, langType, dateType, bldgType)
meal = Meal(bldgType, langType, dateType, kindType,
bldg, date, kind, menu, special)
parsing_result.append(meal.__dict__)
return parsing_result


def parsingTest():
excel_dir_path = "./excel"
Expand All @@ -50,26 +129,49 @@ def parsingTest():
jsonFile = open("./2nd1floor_meal.json", "w", encoding="utf-8")
json.dump(parsing_result, jsonFile, indent=4,
ensure_ascii=False, cls=ComplexEncoder)
print("-------------------------------------------------")

print("-------------------------------------------------")


if __name__ == "__main__":
Mode = 0
with open('config.json') as f:
config = json.load(f)
if Mode == 0:
excel_path = "./2학생회관.xlsx"
filepath_bldg1_1_kor = config['filepath_bldg1_1_kor']
filepath_bldg1_1_eng = config['filepath_bldg1_1_eng']
filepath_bldg1_2_kor = config['filepath_bldg1_2_kor']
filepath_bldg1_2_eng = config['filepath_bldg1_2_eng']
filepath_bldg2 = config['filepath_bldg2']

parsing_result = []
print("-------------------------------------------------")
print("parsing xlsx to json...")
parsing_result.extend(parsing(excel_path, BLDG2_1ST, KOR)) #2학 1층, 한글
parsing_result.extend(parsing(excel_path, BLDG2_1ST, ENG)) #2학 1층, 영어

if exists(filepath_bldg1_1_kor) and exists(filepath_bldg1_1_eng):
parsing_result.extend(parsing(filepath_bldg1_1_kor, BLDG1_1ST, KOR)) #1학 1층, 한글
parsing_result.extend(parsing(filepath_bldg1_1_eng, BLDG1_1ST, ENG)) #1학 1층, 영어
None
if exists(filepath_bldg1_2_kor) and exists(filepath_bldg1_2_eng):
parsing_result.extend(parsing(filepath_bldg1_2_kor, BLDG1_2ND, KOR)) #1학 2층, 한글
parsing_result.extend(parsing(filepath_bldg1_2_eng, BLDG1_2ND, ENG)) #1학 2층, 영어
None
if exists(filepath_bldg2):
parsing_result.extend(parsing(filepath_bldg2, BLDG2_1ST, KOR)) #2학 1층, 한글
parsing_result.extend(parsing(filepath_bldg2, BLDG2_1ST, ENG)) #2학 1층, 영어
None

print("-------------------------------------------------")

if len(sys.argv) == 1:
# no url, save to local as json
print("-------------------------------------------------")
print("saving json...")
jsonFile = open("./2nd1floor_meal.json", "w", encoding="utf-8")

jsonFile = open("./meal.json", "w", encoding="utf-8")
json.dump(parsing_result, jsonFile, indent=4,
ensure_ascii=False, cls=ComplexEncoder)
ensure_ascii=False, cls=ComplexEncoder)
print("-------------------------------------------------")

elif len(sys.argv) == 2:
# post to server
url = sys.argv[1]
Expand All @@ -85,5 +187,6 @@ def parsingTest():
print(response)
print()
print("-------------------------------------------------")

elif Mode == 1:
parsingTest()
Loading

0 comments on commit e7f4b9d

Please sign in to comment.