Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature : HDX Table within database #204

Merged
merged 7 commits into from
Jan 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/Unit-Test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ jobs:
ALLOW_BIND_ZIP_FILTER: ${{ secrets.ALLOW_BIND_ZIP_FILTER }}
APP_SECRET_KEY: ${{ secrets.APP_SECRET_KEY }}
ENABLE_HDX_EXPORTS: ${{ secrets.ENABLE_HDX_EXPORTS }}
ENABLE_CUSTOM_EXPORTS: True
ENABLE_POLYGON_STATISTICS_ENDPOINTS: ${{ secrets.ENABLE_POLYGON_STATISTICS_ENDPOINTS }}
POLYGON_STATISTICS_API_URL: ${{ secrets.POLYGON_STATISTICS_API_URL }}
HDX_API_KEY: ${{ secrets.HDX_API_KEY }}
Expand All @@ -58,6 +59,7 @@ jobs:
export PGPASSWORD='admin';
psql -U postgres -h localhost -p 5434 raw < tests/fixtures/pokhara.sql
psql -U postgres -h localhost -p 5434 raw < backend/sql/countries.sql
psql -U postgres -h localhost -p 5434 raw < API/data/hdx.sql

- name: Install gdal
run: sudo apt-get update && sudo apt-get -y install gdal-bin python3-gdal && sudo apt-get -y autoremove && sudo apt-get clean
Expand Down
40 changes: 4 additions & 36 deletions API/api_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import sozipfile.sozipfile as zipfile
from celery import Celery

from src.app import HDX, PolygonStats, RawData, S3FileTransfer
from src.app import CustomExport, PolygonStats, RawData, S3FileTransfer
from src.config import ALLOW_BIND_ZIP_FILTER
from src.config import CELERY_BROKER_URL as celery_broker_uri
from src.config import CELERY_RESULT_BACKEND as celery_backend
Expand Down Expand Up @@ -178,9 +178,6 @@ def process_raw_data(self, params):

# getting file size of zip , units are in bytes converted to mb in response
zip_file_size = os.path.getsize(upload_file_path)
# watches the status code of the link provided and deletes the file if it is 200
if use_s3_to_upload:
watch_s3_upload(download_url, upload_file_path)
if use_s3_to_upload or bind_zip:
# remove working dir from the machine , if its inside zip / uploaded we no longer need it
remove_file(working_dir)
Expand Down Expand Up @@ -217,11 +214,11 @@ def process_custom_request(self, params):

if not params.dataset:
params.dataset = DatasetConfig()
hdx_object = HDX(params)
custom_object = CustomExport(params)
try:
return hdx_object.process_hdx_tags()
return custom_object.process_custom_categories()
except Exception as ex:
hdx_object.clean_resources()
custom_object.clean_resources()
raise ex


Expand All @@ -231,32 +228,3 @@ def remove_file(path: str) -> None:
shutil.rmtree(path)
except OSError as ex:
logging.error("Error: %s - %s.", ex.filename, ex.strerror)


def watch_s3_upload(url: str, path: str) -> None:
"""Watches upload of s3 either it is completed or not and removes the temp file after completion

Args:
url (_type_): url generated by the script where data will be available
path (_type_): path where temp file is located at
"""
start_time = time.time()
remove_temp_file = True
check_call = requests.head(url).status_code
if check_call != 200:
logging.debug("Upload is not done yet waiting ...")
while check_call != 200: # check until status is not green
check_call = requests.head(url).status_code
if time.time() - start_time > 300:
logging.error(
"Upload time took more than 5 min , Killing watch : %s , URL : %s",
path,
url,
)
remove_temp_file = False # don't remove the file if upload fails
break
time.sleep(3) # check each 3 second
# once it is verfied file is uploaded finally remove the file
if remove_temp_file:
logging.debug("File is uploaded at %s , flushing out from %s", url, path)
os.unlink(path)
Loading
Loading