-
Notifications
You must be signed in to change notification settings - Fork 1
/
01-recipe_tags.py
98 lines (82 loc) · 3.09 KB
/
01-recipe_tags.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import ast
from pathlib import Path
import pandas as pd
import streamlit as st
import yaml
RECIPE_FILE = Path("food-com-recipes/RAW_recipes.csv")
TAGS_FILE = Path("food-com-recipes/all_tags.yaml")
# wide display
st.set_page_config(layout="wide")
prog_bar = st.progress(20, "Loading Recipe Data...")
recipes = pd.read_csv(RECIPE_FILE)
prog_bar.progress(40, "Extracting Tags...")
# Put all tags into a set after converting from string to list
tags = {
tag for tag_list in recipes["tags"] for tag in ast.literal_eval(tag_list) if tag
}
prog_bar.progress(50, "Creating DataFrame...")
data = pd.DataFrame(tags, columns=["tags"])
columns = {
"tags": st.column_config.TextColumn(required=True, validate="^[a-z-]+$"),
"season": st.column_config.CheckboxColumn(),
"event": st.column_config.CheckboxColumn(),
"time": st.column_config.CheckboxColumn(),
"difficulty": st.column_config.CheckboxColumn(),
"equipment": st.column_config.CheckboxColumn(),
"cooking_method": st.column_config.CheckboxColumn(),
"cuisine": st.column_config.CheckboxColumn(),
"course": st.column_config.CheckboxColumn(),
"dietary_restrictions": st.column_config.CheckboxColumn(),
"key_ingredient": st.column_config.CheckboxColumn(),
"dish": st.column_config.CheckboxColumn(),
}
# Load tags.yaml if it exists
prog_bar.progress(60, "Loading Tag Categories File...")
with TAGS_FILE.open("r") as f:
saved_tags: dict = yaml.safe_load(f)
prog_bar.progress(70, "Adding New Columns...")
# Add new columns with all unsaved values set to False
for col in columns:
if col != "tags":
data[col] = 0
prog_bar.progress(80, "Assigning Saved Tags...")
# Set saved values to True
for row in data.index:
for col in columns:
if col != "tags":
if col in saved_tags and data["tags"][row] in saved_tags[col]:
saved_tags[col].remove(data["tags"][row])
data[col][row] = 1
prog_bar.progress(90, "Appending Missing Tags...")
# Add remaining saved values to the end of the dataframe
remaining_saved_tags = [itm for lst in saved_tags.values() for itm in lst]
new_data = []
for tag in remaining_saved_tags:
row = {"tags": tag}
for col in columns:
if col != "tags":
row[col] = 1 if tag in saved_tags[col] else 0
new_data.append(row)
data = pd.concat([data, pd.DataFrame(new_data)], ignore_index=True)
prog_bar.progress(100, "Done!")
edited = st.data_editor(
data,
hide_index=False,
use_container_width=True,
column_config=columns,
height=1300,
num_rows="dynamic",
)
if st.button("Save", type="primary"):
# Replace True with tag name
for col in columns:
if col != "tags":
for tag_index in edited[edited[col] == 1].index:
edited[col][tag_index] = edited["tags"][tag_index]
# Create dict without 'tags' column, ignore NaN values
edited = edited.drop("tags", axis=1)
data_dict = edited.to_dict("list")
data_dict = {k: list({x for x in v if x != 0}) for k, v in data_dict.items()}
with TAGS_FILE.open("w") as f:
yaml.safe_dump(data_dict, f)
st.success("Saved!")