Skip to content

Commit

Permalink
Cleaned and documented cleaning-up regexes
Browse files Browse the repository at this point in the history
  • Loading branch information
trnila committed Aug 11, 2024
1 parent f4e2dee commit f864ed4
Showing 1 changed file with 8 additions and 4 deletions.
12 changes: 8 additions & 4 deletions lunches.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,14 +588,18 @@ async def gather_restaurants(allowed_restaurants=None):
(re.compile(r"^\s*(Polévka|BUSINESS MENU|business|SALÁT TÝDNE)", re.IGNORECASE), ""),
(re.compile(r"k menu\s*$"), ""),
(re.compile(r"(s|š|S|Š)vestk"), "Trnk"),
# ugly space before comma or colon
(re.compile(r"\s*(,|:)\s*"), "\\1 "),
# HTML tags
(re.compile(r"<[^<]+?>"), ""),
# grammage
(re.compile(r"\d+\s*(g|ml|l|ks)( |,)"), ""),
# alergens pattern 'Al ('
(re.compile(r"\s*A?l?\.?\s*\("), "("),
# brackets
(re.compile(r"\([^)]+\)"), ""),
(re.compile(r"(\s*[0-9]+\s*,)+\s*$"), ""),
(re.compile(r"A?\s*[0-9]+(,[0-9]+)*,? "), ""),
(re.compile(r"\s+,"), ","),
(re.compile(r" +"), " "),
# multiple white-spaces
(re.compile(r"\s+"), " "),
]
UPPER_REGEXP = re.compile(r"[A-ZÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ]")

Expand Down

0 comments on commit f864ed4

Please sign in to comment.