From 7dafda93f4973a01f0b893cfa99a93f0e8fe2d8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Paul=20M=C3=BCller?= Date: Tue, 14 Nov 2023 12:27:49 +0100 Subject: [PATCH] fix: delete temporary files when they are not needed anymore (close #24) --- CHANGELOG | 1 + mpl_data_cast/recipe.py | 20 ++++++++++++++++---- tests/test_recipe.py | 20 +++++++++++++++++++- 3 files changed, 36 insertions(+), 5 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index afb8262..61ce200 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,6 @@ 0.5.0 - fix: fast file size check for detecting invalid upload did not take place + - fix: delete temporary files when they are not needed anymore (#24) - setup: migrate to pyproject.toml 0.4.2: - fix: properly check output directory (#20) diff --git a/mpl_data_cast/recipe.py b/mpl_data_cast/recipe.py index bb93890..04954e3 100644 --- a/mpl_data_cast/recipe.py +++ b/mpl_data_cast/recipe.py @@ -81,7 +81,9 @@ def cast(self, path_callback: Callable = None, **kwargs) -> dict: errors.append((path_list[0], traceback.format_exc())) continue ok = self.transfer_to_target_path(temp_path=temp_path, - target_path=targ_path) + target_path=targ_path, + delete_after=True, # [sic!] + ) if not ok: raise ValueError(f"Transfer to {targ_path} failed!") # Walk the directory tree and copy any other files @@ -98,9 +100,12 @@ def cast(self, path_callback: Callable = None, **kwargs) -> dict: target_path = self.path_tar / prel target_path.parent.mkdir(parents=True, exist_ok=True) ok = self.transfer_to_target_path(temp_path=pp, - target_path=target_path) + target_path=target_path, + delete_after=False, # [sic!] + ) if not ok: raise ValueError(f"Transfer to {target_path} failed!") + return { "success": not bool(errors), "errors": errors, @@ -153,7 +158,9 @@ def get_temp_path(self, path_list: list) -> pathlib.Path: @staticmethod def transfer_to_target_path(temp_path: pathlib.Path, target_path: pathlib.Path, - check_existing: bool = True) -> bool: + check_existing: bool = True, + delete_after: bool = False, + ) -> bool: """Transfer a file to another location Parameters @@ -165,6 +172,8 @@ def transfer_to_target_path(temp_path: pathlib.Path, check_existing: bool if `target_path` already exists, perform an MD5sum check and re-copy the file if the check fails + delete_after: bool + whether to delete `temp_path` after transfer Returns ------- @@ -184,7 +193,8 @@ def transfer_to_target_path(temp_path: pathlib.Path, success = Recipe.transfer_to_target_path( temp_path=temp_path, target_path=target_path, - check_existing=False + check_existing=False, + delete_after=False, # [sic!] ) else: # The file is the same, everything is good. @@ -200,6 +210,8 @@ def transfer_to_target_path(temp_path: pathlib.Path, hash_cp = hashfile(target_path) # compare md5hashes (verification) success = hash_ok == hash_cp + if success and delete_after: + temp_path.unlink(missing_ok=True) return success diff --git a/tests/test_recipe.py b/tests/test_recipe.py index 83324a7..c791fc9 100644 --- a/tests/test_recipe.py +++ b/tests/test_recipe.py @@ -21,7 +21,7 @@ def make_example_data(): class DummyRecipe(Recipe): """A pipeline that just concatenates text files""" - def convert_dataset(self, path_list, temp_path): + def convert_dataset(self, path_list, temp_path, **kwargs): data = "" for pp in path_list: data += pp.read_text() @@ -58,6 +58,24 @@ def test_pipeline_cast(): assert text2 == "lorem ipsum dolor sit amet." +def test_pipeline_cast_delete_after(): + path_raw = make_example_data() + path_tar = pathlib.Path(tempfile.mkdtemp()) / "test" + pl = DummyRecipe(path_raw, path_tar) + input_list = sorted(pl.get_raw_data_iterator()) + output_list = [pl.get_target_path(pi) for pi in input_list] + for po in output_list: + assert not po.exists() + ret = pl.cast() + assert ret["success"] + for po in output_list: + assert po.exists() + # This is the actual important test. The instance should remove + # temporary files as it transfers them. + temp_files = sorted(pl.tempdir.rglob("*.txt")) + assert len(temp_files) == 0 + + def test_pipeline_get_target_path(): path_raw = make_example_data() path_tar = pathlib.Path(tempfile.mkdtemp()) / "test"