Skip to content

Commit

Permalink
feat: add reconcile command to add new bars from gemeente api
Browse files Browse the repository at this point in the history
  • Loading branch information
SchutteJan committed Aug 4, 2024
1 parent 7d19afe commit ffada18
Show file tree
Hide file tree
Showing 14 changed files with 516 additions and 253 deletions.
1 change: 1 addition & 0 deletions data/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__pycache__/
Empty file added data/__init__.py
Empty file.
1 change: 1 addition & 0 deletions data/bars/.gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
bars.json
cafes.json
gmaps_cache/
8 changes: 5 additions & 3 deletions data/bars/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ https://www.amsterdam.nl/ondernemen/horeca/horeca-kaart/

https://api.data.amsterdam.nl/dcatd/datasets/GsY50tEkoJKCGw

## Fallacies

Some assumptions that you might have about this data that turn out not to be true:

1. When a location is no longer in the dataset, it no longer has a license


## Explore
Expand Down Expand Up @@ -39,7 +44,4 @@ jq '.features[] | select(.properties.zaak_categorie == "Onbekend") | .properties
```bash
# Filter on Cafes
jq '.features[] | select(.properties.zaak_categorie == "Café")' bars.json > cafes.json

poetry run python render_template.py

```
Empty file added data/bars/__init__.py
Empty file.
66 changes: 66 additions & 0 deletions data/bars/datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from dataclasses import dataclass
from enum import Enum
from typing import List

import requests
from requests import Session

from data.bars.models import LocationResponse, Feature, NewLocation

KROEG_ENDPOINT_LIVE = "https://kroeg.jan.tf"
KROEG_ENDPOINT_LOCAL = "http://localhost:8080"
AMS_ENDPOINT = "https://api.data.amsterdam.nl/v1/wfs/horeca/?REQUEST=GetFeature&SERVICE=WFS&version=2.0.0&count=5000&typenames=exploitatievergunning&BBOX=4.58565,52.03560,5.31360,52.48769,urn:ogc:def:crs:EPSG::4326&outputformat=geojson&srsName=urn:ogc:def:crs:EPSG::4326"


class Env(Enum):
LOCAL = "local"
LIVE = "live"


@dataclass
class Credentials:
email: str
password: str

def to_dict(self) -> dict:
return {"email": self.email, "password": self.password}


def kroeg_endpoint(env: Env) -> str:
return KROEG_ENDPOINT_LOCAL if env == Env.LOCAL else KROEG_ENDPOINT_LIVE


def kroeg_credentials(env: Env) -> Credentials:
if env == Env.LOCAL:
return Credentials(email="[email protected]", password="somepassw0rdthatisok")
else:
email = input("Email: ")
password = input("Password: ")
return Credentials(email=email, password=password)


def load_remote_kroegen_dataset(session: Session, env: Env) -> List[LocationResponse]:
r = session.get(kroeg_endpoint(env) + "/bars?only_published=false")
r.raise_for_status()
return [LocationResponse.from_dict(x) for x in r.json()]


def load_gemeente_amsterdam_dataset() -> List[Feature]:
r = requests.get(AMS_ENDPOINT)
r.raise_for_status()
return [Feature.from_json(x) for x in r.json()["features"]]


def authenticate_api(session: Session, env: Env) -> None:
if "user_id" in session.cookies:
return

credentials = kroeg_credentials(env)
r = session.post(kroeg_endpoint(env) + "/session/login", data=credentials.to_dict())
r.raise_for_status()


def add_location(f: NewLocation, session: Session, env: Env) -> None:
authenticate_api(session, env)
r = session.post(kroeg_endpoint(env) + "/bar", json=f.to_dict())
r.raise_for_status()
188 changes: 188 additions & 0 deletions data/bars/filters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
from typing import (
Dict,
List,
Final,
Set,
Iterator,
Iterable,
Callable,
Tuple,
)
import json

from data.bars.models import Feature
from gmaps import get_likeliest_place


MANUAL_ZAAK_NAAM_REPLACEMENTS: Final[Dict[str, str]] = {
"Café Beurre B.V., Van Limburg Stirumstraat 115": "Café Beurre",
"Café Bloemers/Colak Holding B.V.": "Café Bloemers",
"Mooy (eig. B.C. van Baaijen)": "Café Mooy",
"Mediacentrum de Kroon B.V. (Zn)": "Café Restaurant De Kroon",
"Hannekes Boom...Sinds 1662": "Hannekes Boom",
"M.B.M. Horeca (Club Smokey)": "Club Smokey",
"Gollem Proeflokaal - Gollem D. Stalpertstraat B.V": "Gollem Proeflokaal",
}

MANUAL_EXCLUDED_ZAAK_NUMMER: Final[Set[int]] = {
10005, # Lovers Horeca
12386, # Dubai Lounge
18279, # Smashburgers (actually restaurant)
17984, # Cannibale Royale Amstelveenseweg
}

MANUAL_INCLUDED_ZAAK_NUMMER: Final[Set[int]] = {
15742, # Clos
}


def apply_filter(
data: Iterable[Feature], fn: Callable[[Feature], Tuple[bool, Feature]]
) -> Iterator[Feature]:
for d in data:
is_ok, new_d = fn(d)
if is_ok:
yield new_d


def filter_on_cafes(f: Feature) -> Tuple[bool, Feature]:
return f.properties.zaak_categorie in {
"Café",
"Café met zaalverhuur",
}, f


def filter_on_nachtzaak(f: Feature) -> Tuple[bool, Feature]:
return f.properties.zaak_categorie in {"Nachtzaak"}, f


def filter_on_restaurants(f: Feature) -> Tuple[bool, Feature]:
return f.properties.zaak_categorie == "Restaurant", f


def filter_on_restaurant_name(f: Feature) -> Tuple[bool, Feature]:
if f.properties.zaaknaam is None:
return False, f

name = f.properties.zaaknaam.lower()
is_cafe = "cafe" in name or "café" in name or "bar" in name
return is_cafe or f.properties.zaaknummer in MANUAL_INCLUDED_ZAAK_NUMMER, f


def filter_manual_exclusions(f: Feature) -> Tuple[bool, Feature]:
return f.properties.zaaknummer not in MANUAL_EXCLUDED_ZAAK_NUMMER, f


def filter_coffeeshops(f: Feature) -> Tuple[bool, Feature]:
if f.properties.zaaknaam is None:
return False, f
return "coffee" not in f.properties.zaaknaam.lower(), f


def filter_hotels(f: Feature) -> Tuple[bool, Feature]:
if f.properties.zaaknaam is None:
return False, f
return "hotel" not in f.properties.zaaknaam.lower(), f


def _filter_and_enrich_using_gmaps(
f: Feature, enforce_bar_type: bool
) -> Tuple[bool, Feature]:
"""
Enable "enforce_bar_types" for stricter search which yields more actual bars, but also yields
fewer results than a general address/name search. (Some bars don't show up when searching for
the "bar" type)
"""
lng, lat = json.loads(f.geometry)["coordinates"]
if not f.properties.zaaknaam:
return False, f

result = get_likeliest_place(
f.properties.zaaknaam,
f.properties.adres,
(lat, lng),
enforce_bar_type=enforce_bar_type,
)

if result is None:
return False, f

# print(f.properties.zaaknaam, " -> ", result["name"])
f.properties.zaaknaam = result["name"]
f.properties.google_place_id = result["place_id"]

return True, f


def filter_and_enrich_using_gmaps(f: Feature) -> Tuple[bool, Feature]:
"""
Use Google Maps Place API to cross-reference with Gemeente Amsterdam data.
This generally yields better location names and Google keeps better track of bars that are
still in business.
"""
return _filter_and_enrich_using_gmaps(f, False)


def filter_and_enrich_using_gmaps_enforce_bar(f: Feature) -> Tuple[bool, Feature]:
return _filter_and_enrich_using_gmaps(f, True)


def manual_substitutions_zaaknaam(f: Feature) -> Tuple[bool, Feature]:
if f.properties.zaaknaam in MANUAL_ZAAK_NAAM_REPLACEMENTS:
f.properties.zaaknaam = MANUAL_ZAAK_NAAM_REPLACEMENTS[f.properties.zaaknaam]

return True, f


def beautify_zaaknaam(f: Feature) -> Tuple[bool, Feature]:
if f.properties.zaaknaam is None:
return False, f

f.properties.zaaknaam = (
f.properties.zaaknaam.replace(" Amsterdam B.V.", "")
.replace(" B.V.", "")
.replace(" B.V", "")
.replace(" VOF", "")
.replace("V.O.F. ", "")
.strip()
)
return True, f


def prepare_data(data: Iterable[Feature]) -> List[Feature]:
"""Filter and sanitize data"""
nachtzaak_operations = [
filter_on_nachtzaak,
filter_and_enrich_using_gmaps,
]

cafe_operations = [
filter_on_cafes,
filter_coffeeshops,
filter_manual_exclusions,
manual_substitutions_zaaknaam,
beautify_zaaknaam,
filter_and_enrich_using_gmaps,
]

# Some restaurants are also bars, we try to pull some additional bars from there
restaurant_operations = [
filter_on_restaurants,
filter_on_restaurant_name,
beautify_zaaknaam,
filter_and_enrich_using_gmaps_enforce_bar,
]

cafe_data: Iterable[Feature] = data
for operation in cafe_operations:
cafe_data = apply_filter(cafe_data, operation)

restaurant_data: Iterable[Feature] = data
for operation in restaurant_operations:
restaurant_data = apply_filter(restaurant_data, operation)

nachtzaak_data: Iterable[Feature] = data
for operation in nachtzaak_operations:
nachtzaak_data = apply_filter(nachtzaak_data, operation)

return list(nachtzaak_data) + list(cafe_data) + list(restaurant_data)
5 changes: 5 additions & 0 deletions data/bars/gmaps.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ def get_distance(a: Tuple[float, float], b: Tuple[float, float]) -> float:
return distance


def gmaps_place_details(place_id: str) -> dict:
gmaps = get_gmaps_client()
return gmaps.place(place_id)


@cache.memoize(typed=True)
def gmaps_place_search(
address: str,
Expand Down
Loading

0 comments on commit ffada18

Please sign in to comment.