Skip to content

Commit

Permalink
refactor(api-worker): logic changes for memory optimized zip
Browse files Browse the repository at this point in the history
  • Loading branch information
kshitijrajsharma committed Mar 20, 2024
1 parent 070a2ec commit 4cab5b8
Showing 1 changed file with 20 additions and 15 deletions.
35 changes: 20 additions & 15 deletions API/api_worker.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Standard library imports
import json
import os
import pathlib
Expand All @@ -7,11 +8,13 @@
from datetime import datetime as dt
from datetime import timedelta, timezone

# Third party imports
import humanize
import psutil
import zipfly
from celery import Celery

# Reader imports
from src.app import CustomExport, PolygonStats, RawData, S3FileTransfer
from src.config import ALLOW_BIND_ZIP_FILTER
from src.config import CELERY_BROKER_URL as celery_broker_uri
Expand All @@ -37,8 +40,10 @@
)

if ENABLE_SOZIP:
# Third party imports
import sozipfile.sozipfile as zipfile
else:
# Standard library imports
import zipfile

celery = Celery("Raw Data API")
Expand Down Expand Up @@ -96,21 +101,8 @@ def zip_binding(

system_ram = psutil.virtual_memory().total # system RAM in bytes
if (
inside_file_size < 0.8 * system_ram or inside_file_size < 3 * 1024**3
): # if less than 80% or less than 5 gb
logging.debug("Using default zipfile module for zipping")
with zipfile.ZipFile(
upload_file_path,
"w",
compression=zipfile.ZIP_DEFLATED,
compresslevel=9,
allowZip64=True,
) as zf:
for file_path in pathlib.Path(working_dir).iterdir():
if file_path.is_file():
zf.write(file_path, arcname=file_path.name)

else:
inside_file_size > 0.8 * system_ram or inside_file_size > 3 * 1024**3
): # if file size is greater than 80% of ram or greater than 3 gb
logging.debug(
"Using memory optimized zip",
)
Expand All @@ -127,6 +119,19 @@ def zip_binding(
for chunk in generator:
f.write(chunk)

else:
logging.debug("Using default zipfile module for zipping")
with zipfile.ZipFile(
upload_file_path,
"w",
compression=zipfile.ZIP_DEFLATED,
compresslevel=9,
allowZip64=True,
) as zf:
for file_path in pathlib.Path(working_dir).iterdir():
if file_path.is_file():
zf.write(file_path, arcname=file_path.name)

logging.debug("Zip Binding Done!")
return upload_file_path, inside_file_size

Expand Down

0 comments on commit 4cab5b8

Please sign in to comment.