diff --git a/data/bars/datasets.py b/data/bars/datasets.py index 00704ba..17e2551 100644 --- a/data/bars/datasets.py +++ b/data/bars/datasets.py @@ -1,6 +1,6 @@ from dataclasses import dataclass from enum import Enum -from typing import List +from typing import List, Dict, Any import requests from requests import Session @@ -39,8 +39,12 @@ def kroeg_credentials(env: Env) -> Credentials: return Credentials(email=email, password=password) -def load_remote_kroegen_dataset(session: Session, env: Env) -> List[LocationResponse]: - r = session.get(kroeg_endpoint(env) + "/bars?only_published=false") +def load_remote_kroegen_dataset( + session: Session, env: Env, only_published: bool = False +) -> List[LocationResponse]: + r = session.get( + kroeg_endpoint(env) + "/bars?only_published=" + str(only_published).lower() + ) r.raise_for_status() return [LocationResponse.from_dict(x) for x in r.json()] @@ -64,3 +68,11 @@ def add_location(f: NewLocation, session: Session, env: Env) -> None: authenticate_api(session, env) r = session.post(kroeg_endpoint(env) + "/bar", json=f.to_dict()) r.raise_for_status() + + +def update_properties_bar( + bar_id: int, session: Session, env: Env, properties: Dict[str, Any] +) -> None: + authenticate_api(session, env) + r = session.patch(kroeg_endpoint(env) + f"/bar/{bar_id}", json=properties) + r.raise_for_status() diff --git a/data/bars/gmaps.py b/data/bars/gmaps.py index dc47eb2..c3230b1 100644 --- a/data/bars/gmaps.py +++ b/data/bars/gmaps.py @@ -1,5 +1,7 @@ import math from typing import Tuple, Optional, Dict + +import requests from diskcache import Cache import googlemaps @@ -8,9 +10,11 @@ cache = Cache("./gmaps_cache") -def get_gmaps_client(): - GMAPS_API: str = os.environ.get("GMAPS_API") - return googlemaps.Client(key=GMAPS_API) +def get_gmaps_client() -> googlemaps.Client: + api_key = os.environ.get("GMAPS_API") + if not api_key: + raise ValueError("No Google Maps API key found") + return googlemaps.Client(key=api_key) # returns distance in meters between two lat/long points @@ -39,6 +43,7 @@ def get_distance(a: Tuple[float, float], b: Tuple[float, float]) -> float: return distance +@cache.memoize(typed=True) def gmaps_place_details(place_id: str) -> dict: gmaps = get_gmaps_client() return gmaps.place(place_id) @@ -55,6 +60,26 @@ def gmaps_place_search( return gmaps.places(address, location=location, language=language, type=type) +@cache.memoize(typed=True) +def get_url_redirect(url: str) -> str: + r = requests.get(url, headers={"Range": "bytes=0-0"}, allow_redirects=False) + r.raise_for_status() + return r.headers["Location"] + + +def get_image_url(place_id: str) -> Optional[str]: + try: + place_details = gmaps_place_details(place_id) + photo_reference = place_details["result"]["photos"][0]["photo_reference"] + except Exception as e: + print(f"No photos found for {place_id}: {e}") + return None + + # Don't want to expose the Google Maps API key, so instead we get the redirected image URL instead + gmaps_url = f"https://maps.googleapis.com/maps/api/place/photo?maxwidth=512&photo_reference={photo_reference}&key={os.environ.get("GMAPS_API")}" + return get_url_redirect(gmaps_url) + + def get_likeliest_place( expected_name: str, address: str, diff --git a/data/bars/reconcile.py b/data/bars/reconcile.py index ed48364..c4d9ef9 100644 --- a/data/bars/reconcile.py +++ b/data/bars/reconcile.py @@ -2,14 +2,17 @@ import click import requests +from requests import Session from data.bars.datasets import ( load_remote_kroegen_dataset, load_gemeente_amsterdam_dataset, add_location, Env, + update_properties_bar, ) from data.bars import filters +from data.bars.gmaps import get_image_url from data.bars.models import Feature, LocationResponse, NewLocation @@ -23,21 +26,8 @@ def id_to_location( return {loc.gem_ams_id: loc for loc in locations if loc.gem_ams_id is not None} -@click.command() -@click.option("--apply", is_flag=True) -@click.option( - "--env", type=click.Choice([e.value for e in Env]), default=Env.LOCAL.value -) -def main(apply: bool, env: str): - e = Env(env) - if not apply: - print("DRY RUN - NO DB CHANGES") - - print(f"Operating on {e} environment") - - session = requests.session() - - current_bars = load_remote_kroegen_dataset(session, e) +def process_new_bars(apply: bool, env: Env, session: Session): + current_bars = load_remote_kroegen_dataset(session, env) gem_ams_bars = filters.prepare_data(load_gemeente_amsterdam_dataset()) current_id_to_bar = id_to_location(current_bars) @@ -50,10 +40,54 @@ def main(apply: bool, env: str): new_loc = NewLocation.from_feature(ams_id_to_bar[new_id]) print(new_loc) if apply: - add_location(new_loc, session, e) + add_location(new_loc, session, env) else: print("No new bars") +def update_images(apply: bool, env: Env, session: Session, only_missing: bool = True): + current_bars = load_remote_kroegen_dataset(session, env, only_published=False) + + for bar in current_bars: + if bar.imageurl is not None and only_missing: + continue + + if bar.google_place_id is None: + continue + + image_url = get_image_url(bar.google_place_id) + + if image_url is not None: + print(f"Updating image for {bar.name}: {image_url}") + if apply: + update_properties_bar(bar.id, session, env, {"imageurl": image_url}) + else: + print(f"No image found for {bar.name}") + + +@click.command() +@click.option("--apply", is_flag=True) +@click.option( + "--env", type=click.Choice([e.value for e in Env]), default=Env.LOCAL.value +) +@click.option( + "--operation", type=click.Choice(["new_bars", "update_images"]), default="new_bars" +) +def main(apply: bool, env: str, operation: str): + e = Env(env) + if not apply: + print("DRY RUN - NO DB CHANGES") + + print(f"Performing {operation} on {e} environment") + + session = requests.session() + + if operation == "new_bars": + process_new_bars(apply, e, session) + + elif operation == "update_images": + update_images(apply, e, session) + + if __name__ == "__main__": main()