Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "Feature/post processing" #284

Merged
merged 1 commit into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,3 @@ Pipfile.lock
#backend
backend/data
backend/.env

.DS_Store
85 changes: 6 additions & 79 deletions API/api_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

# Reader imports
from src.app import CustomExport, PolygonStats, RawData, S3FileTransfer
from src.post_processing.processor import PostProcessor
from src.config import ALLOW_BIND_ZIP_FILTER
from src.config import CELERY_BROKER_URL as celery_broker_uri
from src.config import CELERY_RESULT_BACKEND as celery_backend
Expand All @@ -40,7 +39,6 @@
RawDataCurrentParams,
RawDataOutputType,
)
from src.post_processing.processor import PostProcessor

if ENABLE_SOZIP:
# Third party imports
Expand Down Expand Up @@ -77,12 +75,7 @@ def create_readme_content(default_readme, polygon_stats):


def zip_binding(
working_dir,
exportname_parts,
geom_dump,
polygon_stats,
geojson_stats,
default_readme,
working_dir, exportname_parts, geom_dump, polygon_stats, default_readme
):
logging.debug("Zip Binding Started!")
upload_file_path = os.path.join(
Expand All @@ -95,9 +88,6 @@ def zip_binding(
),
}

if geojson_stats:
additional_files["stats.json"] = geojson_stats

for name, content in additional_files.items():
temp_path = os.path.join(working_dir, name)
with open(temp_path, "w") as f:
Expand Down Expand Up @@ -219,60 +209,11 @@ def process_raw_data(self, params, user=None):
file_parts,
)

# Post-proccessing: Generate GeoJSON/HTML stats and transliterations
polygon_stats = None
geojson_stats_html = None
geojson_stats_json = None
download_html_url = None
if "include_stats" or "include_translit" in params.dict():
post_processor = PostProcessor(
{
"include_stats": params.include_stats,
"include_translit": params.include_translit,
}
)

if params.include_stats:
post_processor.filters = params.filters

post_processor.init()

geom_area, geom_dump, working_dir = RawData(
params, str(self.request.id)
).extract_current_data(file_parts, post_processor.post_process_line)

if params.include_stats:
geojson_stats_json = json.dumps(post_processor.geoJSONStats.dict())

# Create a HTML summary of stats
if params.include_stats_html:
tpl = "stats"
if "waterway" in post_processor.geoJSONStats.config.keys:
tpl = "stats_waterway"
elif "highway" in post_processor.geoJSONStats.config.keys:
tpl = "stats_highway"
elif "building" in post_processor.geoJSONStats.config.keys:
tpl = "stats_building"
project_root = pathlib.Path(__file__).resolve().parent
tpl_path = os.path.join(
project_root,
"../src/post_processing/{tpl}_tpl.html".format(tpl=tpl),
)
geojson_stats_html = post_processor.geoJSONStats.html(
tpl_path
).build()
upload_html_path = os.path.join(
working_dir, os.pardir, f"{exportname_parts[-1]}.html"
)
with open(upload_html_path, "w") as f:
f.write(geojson_stats_html)

else:
geom_area, geom_dump, working_dir = RawData(
params, str(self.request.id)
).extract_current_data(file_parts)

geom_area, geom_dump, working_dir = RawData(
params, str(self.request.id)
).extract_current_data(file_parts)
inside_file_size = 0
polygon_stats = None
if "include_stats" in params.dict():
if params.include_stats:
feature = {
Expand All @@ -281,14 +222,12 @@ def process_raw_data(self, params, user=None):
"properties": {},
}
polygon_stats = PolygonStats(feature).get_summary_stats()

if bind_zip:
upload_file_path, inside_file_size = zip_binding(
working_dir=working_dir,
exportname_parts=exportname_parts,
geom_dump=geom_dump,
polygon_stats=polygon_stats,
geojson_stats=geojson_stats_json,
default_readme=DEFAULT_README_TEXT,
)

Expand All @@ -301,7 +240,6 @@ def process_raw_data(self, params, user=None):
upload_file_path = file_path
inside_file_size += os.path.getsize(file_path)
break # only take one file inside dir , if contains many it should be inside zip

# check if download url will be generated from s3 or not from config
if use_s3_to_upload:
file_transfer_obj = S3FileTransfer()
Expand All @@ -315,6 +253,7 @@ def process_raw_data(self, params, user=None):
pattern = r"(hotosm_project_)(\d+)"
match = re.match(pattern, exportname)
if match:
prefix = match.group(1)
project_number = match.group(2)
if project_number:
upload_name = f"TM/{project_number}/{exportname}"
Expand All @@ -333,15 +272,6 @@ def process_raw_data(self, params, user=None):
upload_name,
file_suffix="zip" if bind_zip else params.output_type.lower(),
)

# If there's an HTML file, upload it too
if geojson_stats_html:
download_html_url = file_transfer_obj.upload(
upload_html_path,
upload_name,
file_suffix="html",
)

else:
# give the static file download url back to user served from fastapi static export path
download_url = str(upload_file_path)
Expand All @@ -367,9 +297,6 @@ def process_raw_data(self, params, user=None):
}
if polygon_stats:
final_response["stats"] = polygon_stats
if download_html_url:
final_response["download_html_url"] = download_html_url

return final_response

except Exception as ex:
Expand Down
1 change: 1 addition & 0 deletions backend/field_update
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ class Database:
try:
self.cursor.execute(query)
self.conn.commit()
# print(query)
try:
result = self.cursor.fetchall()

Expand Down
2 changes: 1 addition & 1 deletion backend/raw_backend
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ if __name__ == "__main__":

if not args.replication:
osm2pgsql.append("--drop")

print(osm2pgsql)
run_subprocess_cmd(osm2pgsql)

basic_index_cmd = [
Expand Down
7 changes: 0 additions & 7 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,3 @@ psutil==5.9.8

## logging
tqdm==4.66.2

# stats for geojson data
geojson-stats==0.2.4

# transliterations
transliterate==1.10.2

33 changes: 4 additions & 29 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
from psycopg2.extras import DictCursor
from slugify import slugify
from tqdm import tqdm
from .post_processing.processor import PostProcessor

# Reader imports
from src.config import (
Expand Down Expand Up @@ -641,7 +640,7 @@ def ogr_export(query, outputtype, working_dir, dump_temp_path, params):
os.remove(query_path)

@staticmethod
def query2geojson(con, extraction_query, dump_temp_file_path, plugin_fn=None):
def query2geojson(con, extraction_query, dump_temp_file_path):
"""Function written from scratch without being dependent on any library, Provides better performance for geojson binding"""
# creating geojson file
pre_geojson = """{"type": "FeatureCollection","features": ["""
Expand All @@ -661,12 +660,10 @@ def query2geojson(con, extraction_query, dump_temp_file_path, plugin_fn=None):
for row in cursor:
if first:
first = False
f.write(row[0])
else:
f.write(",")
if plugin_fn:
f.write(plugin_fn(row[0]))
else:
f.write((row[0]))
f.write(row[0])
cursor.close() # closing connection to avoid memory issues
# close the writing geojson with last part
f.write(post_geojson)
Expand Down Expand Up @@ -714,7 +711,7 @@ def get_grid_id(geom, cur):
country_export,
)

def extract_current_data(self, exportname, plugin_fn=None):
def extract_current_data(self, exportname):
"""Responsible for Extracting rawdata current snapshot, Initially it creates a geojson file , Generates query , run it with 1000 chunk size and writes it directly to the geojson file and closes the file after dump
Args:
exportname: takes filename as argument to create geojson file passed from routers
Expand Down Expand Up @@ -780,7 +777,6 @@ def extract_current_data(self, exportname, plugin_fn=None):
country_export=country_export,
),
dump_temp_file_path,
plugin_fn,
) # uses own conversion class
if output_type == RawDataOutputType.SHAPEFILE.value:
(
Expand Down Expand Up @@ -1492,29 +1488,8 @@ def process_export_format(export_format):
layer_creation_options=layer_creation_options_str,
query_dump_path=export_format_path,
)

run_ogr2ogr_cmd(ogr2ogr_cmd)

# Post-processing GeoJSON files
# Adds: stats, HTML stats summary and transliterations
if export_format.driver_name == "GeoJSON" and (
self.params.include_stats or self.params.include_translit
):
post_processor = PostProcessor(
{
"include_stats": self.params.include_stats,
"include_translit": self.params.include_translit,
"include_stats_html": self.params.include_stats_html,
}
)
post_processor.init()
post_processor.custom(
categories=self.params.categories,
export_format_path=export_format_path,
export_filename=export_filename,
file_export_path=file_export_path,
)

zip_file_path = os.path.join(file_export_path, f"{export_filename}.zip")
zip_path = self.file_to_zip(export_format_path, zip_file_path)

Expand Down
1 change: 0 additions & 1 deletion src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,6 @@ def not_raises(func, *args, **kwargs):
logging.error(
"Error creating HDX configuration: %s, Disabling the hdx exports feature", e
)

ENABLE_HDX_EXPORTS = False

if ENABLE_HDX_EXPORTS:
Expand Down
Empty file removed src/post_processing/__init__.py
Empty file.
61 changes: 0 additions & 61 deletions src/post_processing/geojson_stats.py

This file was deleted.

Loading
Loading