-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add reconcile command to add new bars from gemeente api
- Loading branch information
1 parent
7d19afe
commit ffada18
Showing
14 changed files
with
516 additions
and
253 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__pycache__/ |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
bars.json | ||
cafes.json | ||
gmaps_cache/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
from dataclasses import dataclass | ||
from enum import Enum | ||
from typing import List | ||
|
||
import requests | ||
from requests import Session | ||
|
||
from data.bars.models import LocationResponse, Feature, NewLocation | ||
|
||
KROEG_ENDPOINT_LIVE = "https://kroeg.jan.tf" | ||
KROEG_ENDPOINT_LOCAL = "http://localhost:8080" | ||
AMS_ENDPOINT = "https://api.data.amsterdam.nl/v1/wfs/horeca/?REQUEST=GetFeature&SERVICE=WFS&version=2.0.0&count=5000&typenames=exploitatievergunning&BBOX=4.58565,52.03560,5.31360,52.48769,urn:ogc:def:crs:EPSG::4326&outputformat=geojson&srsName=urn:ogc:def:crs:EPSG::4326" | ||
|
||
|
||
class Env(Enum): | ||
LOCAL = "local" | ||
LIVE = "live" | ||
|
||
|
||
@dataclass | ||
class Credentials: | ||
email: str | ||
password: str | ||
|
||
def to_dict(self) -> dict: | ||
return {"email": self.email, "password": self.password} | ||
|
||
|
||
def kroeg_endpoint(env: Env) -> str: | ||
return KROEG_ENDPOINT_LOCAL if env == Env.LOCAL else KROEG_ENDPOINT_LIVE | ||
|
||
|
||
def kroeg_credentials(env: Env) -> Credentials: | ||
if env == Env.LOCAL: | ||
return Credentials(email="[email protected]", password="somepassw0rdthatisok") | ||
else: | ||
email = input("Email: ") | ||
password = input("Password: ") | ||
return Credentials(email=email, password=password) | ||
|
||
|
||
def load_remote_kroegen_dataset(session: Session, env: Env) -> List[LocationResponse]: | ||
r = session.get(kroeg_endpoint(env) + "/bars?only_published=false") | ||
r.raise_for_status() | ||
return [LocationResponse.from_dict(x) for x in r.json()] | ||
|
||
|
||
def load_gemeente_amsterdam_dataset() -> List[Feature]: | ||
r = requests.get(AMS_ENDPOINT) | ||
r.raise_for_status() | ||
return [Feature.from_json(x) for x in r.json()["features"]] | ||
|
||
|
||
def authenticate_api(session: Session, env: Env) -> None: | ||
if "user_id" in session.cookies: | ||
return | ||
|
||
credentials = kroeg_credentials(env) | ||
r = session.post(kroeg_endpoint(env) + "/session/login", data=credentials.to_dict()) | ||
r.raise_for_status() | ||
|
||
|
||
def add_location(f: NewLocation, session: Session, env: Env) -> None: | ||
authenticate_api(session, env) | ||
r = session.post(kroeg_endpoint(env) + "/bar", json=f.to_dict()) | ||
r.raise_for_status() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
from typing import ( | ||
Dict, | ||
List, | ||
Final, | ||
Set, | ||
Iterator, | ||
Iterable, | ||
Callable, | ||
Tuple, | ||
) | ||
import json | ||
|
||
from data.bars.models import Feature | ||
from gmaps import get_likeliest_place | ||
|
||
|
||
MANUAL_ZAAK_NAAM_REPLACEMENTS: Final[Dict[str, str]] = { | ||
"Café Beurre B.V., Van Limburg Stirumstraat 115": "Café Beurre", | ||
"Café Bloemers/Colak Holding B.V.": "Café Bloemers", | ||
"Mooy (eig. B.C. van Baaijen)": "Café Mooy", | ||
"Mediacentrum de Kroon B.V. (Zn)": "Café Restaurant De Kroon", | ||
"Hannekes Boom...Sinds 1662": "Hannekes Boom", | ||
"M.B.M. Horeca (Club Smokey)": "Club Smokey", | ||
"Gollem Proeflokaal - Gollem D. Stalpertstraat B.V": "Gollem Proeflokaal", | ||
} | ||
|
||
MANUAL_EXCLUDED_ZAAK_NUMMER: Final[Set[int]] = { | ||
10005, # Lovers Horeca | ||
12386, # Dubai Lounge | ||
18279, # Smashburgers (actually restaurant) | ||
17984, # Cannibale Royale Amstelveenseweg | ||
} | ||
|
||
MANUAL_INCLUDED_ZAAK_NUMMER: Final[Set[int]] = { | ||
15742, # Clos | ||
} | ||
|
||
|
||
def apply_filter( | ||
data: Iterable[Feature], fn: Callable[[Feature], Tuple[bool, Feature]] | ||
) -> Iterator[Feature]: | ||
for d in data: | ||
is_ok, new_d = fn(d) | ||
if is_ok: | ||
yield new_d | ||
|
||
|
||
def filter_on_cafes(f: Feature) -> Tuple[bool, Feature]: | ||
return f.properties.zaak_categorie in { | ||
"Café", | ||
"Café met zaalverhuur", | ||
}, f | ||
|
||
|
||
def filter_on_nachtzaak(f: Feature) -> Tuple[bool, Feature]: | ||
return f.properties.zaak_categorie in {"Nachtzaak"}, f | ||
|
||
|
||
def filter_on_restaurants(f: Feature) -> Tuple[bool, Feature]: | ||
return f.properties.zaak_categorie == "Restaurant", f | ||
|
||
|
||
def filter_on_restaurant_name(f: Feature) -> Tuple[bool, Feature]: | ||
if f.properties.zaaknaam is None: | ||
return False, f | ||
|
||
name = f.properties.zaaknaam.lower() | ||
is_cafe = "cafe" in name or "café" in name or "bar" in name | ||
return is_cafe or f.properties.zaaknummer in MANUAL_INCLUDED_ZAAK_NUMMER, f | ||
|
||
|
||
def filter_manual_exclusions(f: Feature) -> Tuple[bool, Feature]: | ||
return f.properties.zaaknummer not in MANUAL_EXCLUDED_ZAAK_NUMMER, f | ||
|
||
|
||
def filter_coffeeshops(f: Feature) -> Tuple[bool, Feature]: | ||
if f.properties.zaaknaam is None: | ||
return False, f | ||
return "coffee" not in f.properties.zaaknaam.lower(), f | ||
|
||
|
||
def filter_hotels(f: Feature) -> Tuple[bool, Feature]: | ||
if f.properties.zaaknaam is None: | ||
return False, f | ||
return "hotel" not in f.properties.zaaknaam.lower(), f | ||
|
||
|
||
def _filter_and_enrich_using_gmaps( | ||
f: Feature, enforce_bar_type: bool | ||
) -> Tuple[bool, Feature]: | ||
""" | ||
Enable "enforce_bar_types" for stricter search which yields more actual bars, but also yields | ||
fewer results than a general address/name search. (Some bars don't show up when searching for | ||
the "bar" type) | ||
""" | ||
lng, lat = json.loads(f.geometry)["coordinates"] | ||
if not f.properties.zaaknaam: | ||
return False, f | ||
|
||
result = get_likeliest_place( | ||
f.properties.zaaknaam, | ||
f.properties.adres, | ||
(lat, lng), | ||
enforce_bar_type=enforce_bar_type, | ||
) | ||
|
||
if result is None: | ||
return False, f | ||
|
||
# print(f.properties.zaaknaam, " -> ", result["name"]) | ||
f.properties.zaaknaam = result["name"] | ||
f.properties.google_place_id = result["place_id"] | ||
|
||
return True, f | ||
|
||
|
||
def filter_and_enrich_using_gmaps(f: Feature) -> Tuple[bool, Feature]: | ||
""" | ||
Use Google Maps Place API to cross-reference with Gemeente Amsterdam data. | ||
This generally yields better location names and Google keeps better track of bars that are | ||
still in business. | ||
""" | ||
return _filter_and_enrich_using_gmaps(f, False) | ||
|
||
|
||
def filter_and_enrich_using_gmaps_enforce_bar(f: Feature) -> Tuple[bool, Feature]: | ||
return _filter_and_enrich_using_gmaps(f, True) | ||
|
||
|
||
def manual_substitutions_zaaknaam(f: Feature) -> Tuple[bool, Feature]: | ||
if f.properties.zaaknaam in MANUAL_ZAAK_NAAM_REPLACEMENTS: | ||
f.properties.zaaknaam = MANUAL_ZAAK_NAAM_REPLACEMENTS[f.properties.zaaknaam] | ||
|
||
return True, f | ||
|
||
|
||
def beautify_zaaknaam(f: Feature) -> Tuple[bool, Feature]: | ||
if f.properties.zaaknaam is None: | ||
return False, f | ||
|
||
f.properties.zaaknaam = ( | ||
f.properties.zaaknaam.replace(" Amsterdam B.V.", "") | ||
.replace(" B.V.", "") | ||
.replace(" B.V", "") | ||
.replace(" VOF", "") | ||
.replace("V.O.F. ", "") | ||
.strip() | ||
) | ||
return True, f | ||
|
||
|
||
def prepare_data(data: Iterable[Feature]) -> List[Feature]: | ||
"""Filter and sanitize data""" | ||
nachtzaak_operations = [ | ||
filter_on_nachtzaak, | ||
filter_and_enrich_using_gmaps, | ||
] | ||
|
||
cafe_operations = [ | ||
filter_on_cafes, | ||
filter_coffeeshops, | ||
filter_manual_exclusions, | ||
manual_substitutions_zaaknaam, | ||
beautify_zaaknaam, | ||
filter_and_enrich_using_gmaps, | ||
] | ||
|
||
# Some restaurants are also bars, we try to pull some additional bars from there | ||
restaurant_operations = [ | ||
filter_on_restaurants, | ||
filter_on_restaurant_name, | ||
beautify_zaaknaam, | ||
filter_and_enrich_using_gmaps_enforce_bar, | ||
] | ||
|
||
cafe_data: Iterable[Feature] = data | ||
for operation in cafe_operations: | ||
cafe_data = apply_filter(cafe_data, operation) | ||
|
||
restaurant_data: Iterable[Feature] = data | ||
for operation in restaurant_operations: | ||
restaurant_data = apply_filter(restaurant_data, operation) | ||
|
||
nachtzaak_data: Iterable[Feature] = data | ||
for operation in nachtzaak_operations: | ||
nachtzaak_data = apply_filter(nachtzaak_data, operation) | ||
|
||
return list(nachtzaak_data) + list(cafe_data) + list(restaurant_data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.