Skip to content

Commit

Permalink
Merge pull request #204 from hotosm/feature/hdx_custom_exports
Browse files Browse the repository at this point in the history
  • Loading branch information
kshitijrajsharma authored Jan 20, 2024
2 parents 73c1b76 + a9d2fce commit 587a8dc
Show file tree
Hide file tree
Showing 12 changed files with 1,759 additions and 1,127 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/Unit-Test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ jobs:
ALLOW_BIND_ZIP_FILTER: ${{ secrets.ALLOW_BIND_ZIP_FILTER }}
APP_SECRET_KEY: ${{ secrets.APP_SECRET_KEY }}
ENABLE_HDX_EXPORTS: ${{ secrets.ENABLE_HDX_EXPORTS }}
ENABLE_CUSTOM_EXPORTS: True
ENABLE_POLYGON_STATISTICS_ENDPOINTS: ${{ secrets.ENABLE_POLYGON_STATISTICS_ENDPOINTS }}
POLYGON_STATISTICS_API_URL: ${{ secrets.POLYGON_STATISTICS_API_URL }}
HDX_API_KEY: ${{ secrets.HDX_API_KEY }}
Expand All @@ -58,6 +59,7 @@ jobs:
export PGPASSWORD='admin';
psql -U postgres -h localhost -p 5434 raw < tests/fixtures/pokhara.sql
psql -U postgres -h localhost -p 5434 raw < backend/sql/countries.sql
psql -U postgres -h localhost -p 5434 raw < API/data/hdx.sql
- name: Install gdal
run: sudo apt-get update && sudo apt-get -y install gdal-bin python3-gdal && sudo apt-get -y autoremove && sudo apt-get clean
Expand Down
40 changes: 4 additions & 36 deletions API/api_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import sozipfile.sozipfile as zipfile
from celery import Celery

from src.app import HDX, PolygonStats, RawData, S3FileTransfer
from src.app import CustomExport, PolygonStats, RawData, S3FileTransfer
from src.config import ALLOW_BIND_ZIP_FILTER
from src.config import CELERY_BROKER_URL as celery_broker_uri
from src.config import CELERY_RESULT_BACKEND as celery_backend
Expand Down Expand Up @@ -178,9 +178,6 @@ def process_raw_data(self, params):

# getting file size of zip , units are in bytes converted to mb in response
zip_file_size = os.path.getsize(upload_file_path)
# watches the status code of the link provided and deletes the file if it is 200
if use_s3_to_upload:
watch_s3_upload(download_url, upload_file_path)
if use_s3_to_upload or bind_zip:
# remove working dir from the machine , if its inside zip / uploaded we no longer need it
remove_file(working_dir)
Expand Down Expand Up @@ -217,11 +214,11 @@ def process_custom_request(self, params):

if not params.dataset:
params.dataset = DatasetConfig()
hdx_object = HDX(params)
custom_object = CustomExport(params)
try:
return hdx_object.process_hdx_tags()
return custom_object.process_custom_categories()
except Exception as ex:
hdx_object.clean_resources()
custom_object.clean_resources()
raise ex


Expand All @@ -231,32 +228,3 @@ def remove_file(path: str) -> None:
shutil.rmtree(path)
except OSError as ex:
logging.error("Error: %s - %s.", ex.filename, ex.strerror)


def watch_s3_upload(url: str, path: str) -> None:
"""Watches upload of s3 either it is completed or not and removes the temp file after completion
Args:
url (_type_): url generated by the script where data will be available
path (_type_): path where temp file is located at
"""
start_time = time.time()
remove_temp_file = True
check_call = requests.head(url).status_code
if check_call != 200:
logging.debug("Upload is not done yet waiting ...")
while check_call != 200: # check until status is not green
check_call = requests.head(url).status_code
if time.time() - start_time > 300:
logging.error(
"Upload time took more than 5 min , Killing watch : %s , URL : %s",
path,
url,
)
remove_temp_file = False # don't remove the file if upload fails
break
time.sleep(3) # check each 3 second
# once it is verfied file is uploaded finally remove the file
if remove_temp_file:
logging.debug("File is uploaded at %s , flushing out from %s", url, path)
os.unlink(path)
Loading

0 comments on commit 587a8dc

Please sign in to comment.