Skip to content

Commit

Permalink
ENH: update to disable sync for instance-level download
Browse files Browse the repository at this point in the history
We do not currently support instance-level manifests as input
to the command-line download tool. Sync operation is using series
level size for estimating progress. For now, I think it is safe to
assume instance-level download will only be invoked by passing
SOPInstanceUID to the functions/download tool.
  • Loading branch information
fedorov committed Oct 8, 2024
1 parent ef0101d commit 9cafab9
Showing 1 changed file with 16 additions and 8 deletions.
24 changes: 16 additions & 8 deletions idc_index/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1605,6 +1605,11 @@ def download_from_selection(
raise ValueError(
"Instance-level access not possible because instance-level index not installed."
)
if use_s5cmd_sync:
logger.warning(
"s5cmd sync is not supported for downloading individual files. Disabling sync."
)
use_s5cmd_sync = False
elif crdc_series_uuid is not None:
download_df = pd.concat(
[
Expand Down Expand Up @@ -1699,9 +1704,8 @@ def download_from_selection(
)
SELECT
series_aws_url,
CONCAT(TRIM('*' FROM series_aws_url), crdc_instance_uuid, '.dcm') as instance_url,
CONCAT(TRIM('*' FROM series_aws_url), crdc_instance_uuid, '.dcm') as instance_aws_url,
REGEXP_EXTRACT(series_aws_url, '(?:.*?\\/){{3}}([^\\/?#]+)', 1) index_crdc_series_uuid,
series_size_MB,
{hierarchy} as path
FROM
temp
Expand Down Expand Up @@ -1735,13 +1739,13 @@ def download_from_selection(
with tempfile.NamedTemporaryFile(mode="w", delete=False) as manifest_file:
# Determine column containing the URL for instance / series-level access
if sopInstanceUID:
if not "instance_url" in result_df:
result_df["instance_url"] = (
if not "instance_aws_url" in result_df:
result_df["instance_aws_url"] = (
result_df["series_aws_url"].replace("/*", "/")
+ result_df["crdc_instance_uuid"]
+ ".dcm"
)
url_column = "instance_url"
url_column = "instance_aws_url"
else:
url_column = "series_aws_url"

Expand Down Expand Up @@ -1776,6 +1780,12 @@ def download_from_selection(
Temporary download manifest is generated and is passed to self._s5cmd_run
"""
)
if sopInstanceUID:
s5cmd_sync_helper_df = None
else:
s5cmd_sync_helper_df = result_df[
["index_crdc_series_uuid", "s5cmd_cmd", "series_size_MB", "path"]
]
self._s5cmd_run(
endpoint_to_use=aws_endpoint_url,
manifest_file=Path(manifest_file.name),
Expand All @@ -1786,9 +1796,7 @@ def download_from_selection(
use_s5cmd_sync=use_s5cmd_sync,
dirTemplate=dirTemplate,
list_of_directories=list_of_directories,
s5cmd_sync_helper_df=result_df[
["index_crdc_series_uuid", "s5cmd_cmd", "series_size_MB", "path"]
],
s5cmd_sync_helper_df=s5cmd_sync_helper_df,
)

def download_dicom_instance(
Expand Down

0 comments on commit 9cafab9

Please sign in to comment.