Skip to content

Commit

Permalink
Sort zip in exporter (#2311)
Browse files Browse the repository at this point in the history
Might fix
google/osv-scanner#1007 (comment)

Sort the entries before adding to the zip archive.
  • Loading branch information
another-rex authored Jun 14, 2024
1 parent d619c8f commit 2c31244
Showing 1 changed file with 9 additions and 5 deletions.
14 changes: 9 additions & 5 deletions docker/exporter/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ def _export_ecosystem_to_bucket(self, ecosystem: str, tmp_dir: str):

zip_path = os.path.join(tmp_dir, 'all.zip')
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zip_file:
files_to_zip = []

@ndb.tasklet
def _export_to_file_and_zipfile(bug):
Expand All @@ -112,15 +113,18 @@ def _export_to_file_and_zipfile(bug):
file_path = os.path.join(tmp_dir, bug.id() + '.json')
vulnerability = yield bug.to_vulnerability_async(include_source=True)
osv.write_vulnerability(vulnerability, file_path)
# Tasklets are not truly multiple threads;they are actually
# event loops, which makes it safe to write to ZIP files."
# Details: https://cloud.google.com/appengine/docs/legacy/
# standard/python/ndb/async#tasklets
zip_file.write(file_path, os.path.basename(file_path))

files_to_zip.append(file_path)

# This *should* pause here until
# all the exports have been written to disk.
osv.Bug.query(
osv.Bug.ecosystem == ecosystem).map(_export_to_file_and_zipfile)

files_to_zip.sort()
for file_path in files_to_zip:
zip_file.write(file_path, os.path.basename(file_path))

with concurrent.futures.ThreadPoolExecutor(
max_workers=_EXPORT_WORKERS) as executor:
# Note: all.zip is included here
Expand Down

0 comments on commit 2c31244

Please sign in to comment.