From 1057435af01e6291800ce07f545b7774e61ce3f8 Mon Sep 17 00:00:00 2001 From: spwoodcock Date: Wed, 25 Oct 2023 06:56:16 +0100 Subject: [PATCH] refactor: update get_osm_extracts func for clarity --- src/backend/app/projects/project_crud.py | 31 ++++++++++++++++++------ 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/src/backend/app/projects/project_crud.py b/src/backend/app/projects/project_crud.py index 10d59fd1da..a972d644ff 100644 --- a/src/backend/app/projects/project_crud.py +++ b/src/backend/app/projects/project_crud.py @@ -576,6 +576,12 @@ def remove_z_dimension(coord): def get_osm_extracts(boundary: str): + """Request an extract from raw-data-api and extract the file contents. + + - The query is posted to raw-data-api and job initiated for fetching the extract. + - The status of the job is polled every few seconds, until 'SUCCESS' is returned. + - The resulting zip file is downloaded, extracted, and data returned. + """ # Filters for osm extracts query = { "filters": { @@ -587,19 +593,27 @@ def get_osm_extracts(boundary: str): } } + # Boundary to extract data for json_boundary = json.loads(boundary) - if json_boundary.get("features", None) is not None: query["geometry"] = json_boundary # query["geometry"] = json_boundary["features"][0]["geometry"] - else: query["geometry"] = json_boundary + # Filename to generate + query["fileName"] = "extract" + # File format to generate + query["outputType"] = "geojson" + extract_filename = f'{query["fileName"]}.{query["outputType"]}' + log.debug(f"Setting data extract file name to: {extract_filename}") + + log.debug(f"Query for raw data api: {query}") base_url = settings.UNDERPASS_API_URL query_url = f"{base_url}/snapshot/" headers = {"accept": "application/json", "Content-Type": "application/json"} + # Send the request to raw data api result = requests.post(query_url, data=json.dumps(query), headers=headers) if result.status_code == 200: @@ -607,23 +621,26 @@ def get_osm_extracts(boundary: str): else: return False + # Check status of task (PENDING, or SUCCESS) task_url = f"{base_url}/tasks/status/{task_id}" - # extracts = requests.get(task_url) while True: result = requests.get(task_url, headers=headers) if result.json()["status"] == "PENDING": - time.sleep(1) + # Wait 2 seconds before polling again + time.sleep(2) elif result.json()["status"] == "SUCCESS": break + # TODO update code to generate fgb file format + # then input the download_url directly into our database + # (no need to download the file and extract) zip_url = result.json()["result"]["download_url"] - zip_url result = requests.get(zip_url, headers=headers) # result.content fp = BytesIO(result.content) zfp = zipfile.ZipFile(fp, "r") - zfp.extract("Export.geojson", "/tmp/") - data = json.loads(zfp.read("Export.geojson")) + zfp.extract(extract_filename, "/tmp/") + data = json.loads(zfp.read(extract_filename)) for feature in data["features"]: properties = feature["properties"]