Skip to content

Commit

Permalink
fix: delete temporary files when they are not needed anymore (close #24)
Browse files Browse the repository at this point in the history
  • Loading branch information
paulmueller committed Nov 14, 2023
1 parent b94ce76 commit 7dafda9
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
0.5.0
- fix: fast file size check for detecting invalid upload did not take place
- fix: delete temporary files when they are not needed anymore (#24)
- setup: migrate to pyproject.toml
0.4.2:
- fix: properly check output directory (#20)
Expand Down
20 changes: 16 additions & 4 deletions mpl_data_cast/recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,9 @@ def cast(self, path_callback: Callable = None, **kwargs) -> dict:
errors.append((path_list[0], traceback.format_exc()))
continue
ok = self.transfer_to_target_path(temp_path=temp_path,
target_path=targ_path)
target_path=targ_path,
delete_after=True, # [sic!]
)
if not ok:
raise ValueError(f"Transfer to {targ_path} failed!")
# Walk the directory tree and copy any other files
Expand All @@ -98,9 +100,12 @@ def cast(self, path_callback: Callable = None, **kwargs) -> dict:
target_path = self.path_tar / prel
target_path.parent.mkdir(parents=True, exist_ok=True)
ok = self.transfer_to_target_path(temp_path=pp,
target_path=target_path)
target_path=target_path,
delete_after=False, # [sic!]
)
if not ok:
raise ValueError(f"Transfer to {target_path} failed!")

return {
"success": not bool(errors),
"errors": errors,
Expand Down Expand Up @@ -153,7 +158,9 @@ def get_temp_path(self, path_list: list) -> pathlib.Path:
@staticmethod
def transfer_to_target_path(temp_path: pathlib.Path,
target_path: pathlib.Path,
check_existing: bool = True) -> bool:
check_existing: bool = True,
delete_after: bool = False,
) -> bool:
"""Transfer a file to another location
Parameters
Expand All @@ -165,6 +172,8 @@ def transfer_to_target_path(temp_path: pathlib.Path,
check_existing: bool
if `target_path` already exists, perform an MD5sum check
and re-copy the file if the check fails
delete_after: bool
whether to delete `temp_path` after transfer
Returns
-------
Expand All @@ -184,7 +193,8 @@ def transfer_to_target_path(temp_path: pathlib.Path,
success = Recipe.transfer_to_target_path(
temp_path=temp_path,
target_path=target_path,
check_existing=False
check_existing=False,
delete_after=False, # [sic!]
)
else:
# The file is the same, everything is good.
Expand All @@ -200,6 +210,8 @@ def transfer_to_target_path(temp_path: pathlib.Path,
hash_cp = hashfile(target_path)
# compare md5hashes (verification)
success = hash_ok == hash_cp
if success and delete_after:
temp_path.unlink(missing_ok=True)
return success


Expand Down
20 changes: 19 additions & 1 deletion tests/test_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def make_example_data():

class DummyRecipe(Recipe):
"""A pipeline that just concatenates text files"""
def convert_dataset(self, path_list, temp_path):
def convert_dataset(self, path_list, temp_path, **kwargs):
data = ""
for pp in path_list:
data += pp.read_text()
Expand Down Expand Up @@ -58,6 +58,24 @@ def test_pipeline_cast():
assert text2 == "lorem ipsum dolor sit amet."


def test_pipeline_cast_delete_after():
path_raw = make_example_data()
path_tar = pathlib.Path(tempfile.mkdtemp()) / "test"
pl = DummyRecipe(path_raw, path_tar)
input_list = sorted(pl.get_raw_data_iterator())
output_list = [pl.get_target_path(pi) for pi in input_list]
for po in output_list:
assert not po.exists()
ret = pl.cast()
assert ret["success"]
for po in output_list:
assert po.exists()
# This is the actual important test. The instance should remove
# temporary files as it transfers them.
temp_files = sorted(pl.tempdir.rglob("*.txt"))
assert len(temp_files) == 0


def test_pipeline_get_target_path():
path_raw = make_example_data()
path_tar = pathlib.Path(tempfile.mkdtemp()) / "test"
Expand Down

0 comments on commit 7dafda9

Please sign in to comment.