-
Notifications
You must be signed in to change notification settings - Fork 27
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
🐛 unarchiving_utils are now based on 7zip cli #6959
base: master
Are you sure you want to change the base?
Changes from 2 commits
97fd911
29533e9
ca87d0b
7f3e41e
be58b0c
53e0728
65b5291
e6d118d
2a2f917
56d7fe1
9c82bc5
5a4cfb5
0f315e0
7ede41a
9d67dd1
0f1642d
71adc5b
01ce053
79c1648
02c9497
c01e35a
13c1485
653e168
6dfcd95
c3b0c1c
56d1871
30346af
a9145e2
46a0c29
c04999c
d48de81
9ff2374
0364e8b
7205c2f
79cc7e8
e69bc9a
e282c95
7279155
571a411
da30d13
9ca1090
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,7 +20,12 @@ | |
from models_library.projects_nodes_io import NodeIDStr | ||
from models_library.services_types import ServicePortKey | ||
from pydantic import ByteSize | ||
from servicelib.archiving_utils import PrunableFolder, archive_dir, unarchive_dir | ||
from servicelib.archiving_utils import ( | ||
ArchiveError, | ||
PrunableFolder, | ||
archive_dir, | ||
unarchive_dir, | ||
) | ||
from servicelib.async_utils import run_sequentially_in_context | ||
from servicelib.file_utils import remove_directory | ||
from servicelib.logging_utils import log_context | ||
|
@@ -46,7 +51,7 @@ class PortTypeName(str, Enum): | |
_FILE_TYPE_PREFIX = "data:" | ||
_KEY_VALUE_FILE_NAME = "key_values.json" | ||
|
||
logger = logging.getLogger(__name__) | ||
_logger = logging.getLogger(__name__) | ||
|
||
# OUTPUTS section | ||
|
||
|
@@ -95,7 +100,7 @@ async def upload_outputs( # pylint:disable=too-many-statements # noqa: PLR0915 | |
port_notifier: PortNotifier, | ||
) -> None: | ||
# pylint: disable=too-many-branches | ||
logger.debug("uploading data to simcore...") | ||
_logger.debug("uploading data to simcore...") | ||
start_time = time.perf_counter() | ||
|
||
settings: ApplicationSettings = get_settings() | ||
|
@@ -138,7 +143,7 @@ async def upload_outputs( # pylint:disable=too-many-statements # noqa: PLR0915 | |
if is_file_type(port.property_type): | ||
src_folder = outputs_path / port.key | ||
files_and_folders_list = list(src_folder.rglob("*")) | ||
logger.debug("Discovered files to upload %s", files_and_folders_list) | ||
_logger.debug("Discovered files to upload %s", files_and_folders_list) | ||
|
||
if not files_and_folders_list: | ||
ports_values[port.key] = (None, None) | ||
|
@@ -213,9 +218,9 @@ async def _archive_dir_notified( | |
if port.key in data and data[port.key] is not None: | ||
ports_values[port.key] = (data[port.key], None) | ||
else: | ||
logger.debug("Port %s not found in %s", port.key, data) | ||
_logger.debug("Port %s not found in %s", port.key, data) | ||
else: | ||
logger.debug("No file %s to fetch port values from", data_file) | ||
_logger.debug("No file %s to fetch port values from", data_file) | ||
|
||
if archiving_tasks: | ||
await limited_gather(*archiving_tasks, limit=4) | ||
|
@@ -228,8 +233,8 @@ async def _archive_dir_notified( | |
|
||
elapsed_time = time.perf_counter() - start_time | ||
total_bytes = sum(_get_size_of_value(x) for x in ports_values.values()) | ||
logger.info("Uploaded %s bytes in %s seconds", total_bytes, elapsed_time) | ||
logger.debug(_CONTROL_TESTMARK_DY_SIDECAR_NODEPORT_UPLOADED_MESSAGE) | ||
_logger.info("Uploaded %s bytes in %s seconds", total_bytes, elapsed_time) | ||
_logger.debug(_CONTROL_TESTMARK_DY_SIDECAR_NODEPORT_UPLOADED_MESSAGE) | ||
|
||
|
||
# INPUTS section | ||
|
@@ -243,14 +248,32 @@ def _is_zip_file(file_path: Path) -> bool: | |
_shutil_move = aiofiles.os.wrap(shutil.move) | ||
|
||
|
||
async def _move_file_to_input_port( | ||
final_path: Path, downloaded_file: Path, dest_folder: PrunableFolder | ||
) -> None: | ||
Comment on lines
+245
to
+247
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this function named so this funciton
|
||
_logger.debug("moving %s", downloaded_file) | ||
GitHK marked this conversation as resolved.
Show resolved
Hide resolved
|
||
final_path = final_path / downloaded_file.name | ||
|
||
# ensure parent exists | ||
final_path.parent.mkdir(exist_ok=True, parents=True) | ||
|
||
await _shutil_move(downloaded_file, final_path) | ||
|
||
# NOTE: after the download the current value of the port | ||
# makes sure previously downloaded files are removed | ||
dest_folder.prune(exclude={final_path}) | ||
GitHK marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
_logger.debug("file moved to %s", final_path) | ||
|
||
|
||
async def _get_data_from_port( | ||
port: Port, *, target_dir: Path, progress_bar: ProgressBarData | ||
) -> tuple[Port, ItemConcreteValue | None, ByteSize]: | ||
async with progress_bar.sub_progress( | ||
steps=2 if is_file_type(port.property_type) else 1, | ||
description=IDStr("getting data"), | ||
) as sub_progress: | ||
with log_context(logger, logging.DEBUG, f"getting {port.key=}"): | ||
with log_context(_logger, logging.DEBUG, f"getting {port.key=}"): | ||
port_data = await port.get(sub_progress) | ||
|
||
if is_file_type(port.property_type): | ||
|
@@ -261,7 +284,7 @@ async def _get_data_from_port( | |
if not downloaded_file or not downloaded_file.exists(): | ||
# the link may be empty | ||
# remove files all files from disk when disconnecting port | ||
logger.debug("removing contents of dir %s", final_path) | ||
_logger.debug("removing contents of dir %s", final_path) | ||
GitHK marked this conversation as resolved.
Show resolved
Hide resolved
|
||
await remove_directory( | ||
final_path, only_children=True, ignore_errors=True | ||
) | ||
|
@@ -270,33 +293,38 @@ async def _get_data_from_port( | |
transferred_bytes = downloaded_file.stat().st_size | ||
|
||
# in case of valid file, it is either uncompressed and/or moved to the final directory | ||
with log_context(logger, logging.DEBUG, "creating directory"): | ||
with log_context(_logger, logging.DEBUG, "creating directory"): | ||
final_path.mkdir(exist_ok=True, parents=True) | ||
port_data = f"{final_path}" | ||
dest_folder = PrunableFolder(final_path) | ||
|
||
if _is_zip_file(downloaded_file): | ||
# unzip updated data to dest_path | ||
logger.debug("unzipping %s", downloaded_file) | ||
unarchived: set[Path] = await unarchive_dir( | ||
archive_to_extract=downloaded_file, | ||
destination_folder=final_path, | ||
progress_bar=sub_progress, | ||
) | ||
|
||
dest_folder.prune(exclude=unarchived) | ||
_logger.debug("unzipping %s", downloaded_file) | ||
try: | ||
unarchived: set[Path] = await unarchive_dir( | ||
archive_to_extract=downloaded_file, | ||
destination_folder=final_path, | ||
progress_bar=sub_progress, | ||
) | ||
dest_folder.prune(exclude=unarchived) | ||
|
||
_logger.debug("all unzipped in %s", final_path) | ||
except ArchiveError: | ||
_logger.warning( | ||
GitHK marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"Could not extract archive '%s' to '%s' moving it to: '%s'", | ||
downloaded_file, | ||
final_path, | ||
final_path / downloaded_file.name, | ||
) | ||
await _move_file_to_input_port( | ||
final_path, downloaded_file, dest_folder | ||
) | ||
|
||
logger.debug("all unzipped in %s", final_path) | ||
else: | ||
logger.debug("moving %s", downloaded_file) | ||
final_path = final_path / Path(downloaded_file).name | ||
await _shutil_move(str(downloaded_file), final_path) | ||
|
||
# NOTE: after the download the current value of the port | ||
# makes sure previously downloaded files are removed | ||
dest_folder.prune(exclude={final_path}) | ||
await _move_file_to_input_port(final_path, downloaded_file, dest_folder) | ||
|
||
logger.debug("all moved to %s", final_path) | ||
_logger.debug("all moved to %s", final_path) | ||
else: | ||
transferred_bytes = sys.getsizeof(port_data) | ||
|
||
|
@@ -312,7 +340,7 @@ async def download_target_ports( | |
progress_bar: ProgressBarData, | ||
port_notifier: PortNotifier | None, | ||
) -> ByteSize: | ||
logger.debug("retrieving data from simcore...") | ||
_logger.debug("retrieving data from simcore...") | ||
start_time = time.perf_counter() | ||
|
||
settings: ApplicationSettings = get_settings() | ||
|
@@ -386,7 +414,7 @@ async def _get_date_from_port_notified( | |
data_file.write_text(json.dumps(data)) | ||
|
||
elapsed_time = time.perf_counter() - start_time | ||
logger.info( | ||
_logger.info( | ||
"Downloaded %s in %s seconds", | ||
total_transfered_bytes.human_readable(decimal=True), | ||
elapsed_time, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
for this kind of logs, I guess you could use the log decorator and spare 1 line of code