From e019b911e13c26b1ecee8f4cdd24f3dbf31c40b7 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Tue, 10 Dec 2024 22:30:12 +0000 Subject: [PATCH] Implement `require_hash` --- CHANGELOG.md | 3 +- src/jupytext/contentsmanager.py | 31 ++++++++++++++--- .../contents_manager/test_contentsmanager.py | 34 +++++++++++++++++++ 3 files changed, 63 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 809c2eca..6b23c8de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,8 @@ Jupytext ChangeLog ------------------- **Fixed** -- We have fixed the notebook corruption issue when using Jupytext with Jupyter-Collaboration ([#1124](https://github.com/mwouts/jupytext/issues/1124), [jupyter-collaboration #214](https://github.com/jupyterlab/jupyter-collaboration/issues/214)) +- We have fixed the notebook corruption issue when using Jupytext with Jupyter-Collaboration ([#1124](https://github.com/mwouts/jupytext/issues/1124), [jupyter-collaboration [#214](https://github.com/mwouts/jupytext/issues/214)](https://github.com/jupyterlab/jupyter-collaboration/issues/214)) +- We have added the `require_hash` argument on the Jupytext contents manager. The hash of a paired file is the concatenation of the hash of the text file and the hash for the `.ipynb` file ([#1165](https://github.com/mwouts/jupytext/issues/1165)) - The `rst2md` tests have been fixed by requiring `sphinx<8` ([#1266](https://github.com/mwouts/jupytext/issues/1266)) **Added** diff --git a/src/jupytext/contentsmanager.py b/src/jupytext/contentsmanager.py index dd6a9564..5d17e626 100644 --- a/src/jupytext/contentsmanager.py +++ b/src/jupytext/contentsmanager.py @@ -190,29 +190,37 @@ def get( content=True, type=None, format=None, + require_hash=False, load_alternative_format=True, ): """Takes a path for an entity and returns its model""" path = path.strip("/") ext = os.path.splitext(path)[1] + super_kwargs = {"content": content, "type": type, "format": format} + if require_hash: + super_kwargs["require_hash"] = require_hash + # Not a notebook? if ( not self.file_exists(path) or self.dir_exists(path) or (type is not None and type != "notebook") ): - return self.super.get(path, content, type, format) + return self.super.get(path, **super_kwargs) config = self.get_config(path, use_cache=content is False) if ext not in self.all_nb_extensions(config): - return self.super.get(path, content, type, format) + return self.super.get(path, **super_kwargs) fmt = preferred_format(ext, config.preferred_jupytext_formats_read) if ext == ".ipynb": - model = self.super.get(path, content, type="notebook", format=format) + super_kwargs["type"] = "notebook" + model = self.super.get(path, **super_kwargs) else: - model = self.super.get(path, content, type="file", format="text") + super_kwargs["type"] = "file" + super_kwargs["format"] = "text" + model = self.super.get(path, **super_kwargs) model["type"] = "notebook" if content: # We may need to update these keys, inherited from text files formats @@ -314,6 +322,21 @@ def read_one_file(alt_path, alt_fmt): # Modification time of a paired notebook is the timestamp of inputs #118 #978 model["last_modified"] = inputs.timestamp + if require_hash: + if inputs.path is None or outputs.path is None: + return model + model_other = self.super.get( + inputs.path if path == outputs.path else outputs.path, + require_hash=True, + ) + # The hash of a paired file is the concatenation of + # the hashes of the input and output files + if path == outputs.path: + model["hash"] = model_other["hash"] + model["hash"] + else: + model["hash"] = model["hash"] + model_other["hash"] + return model + if not content: return model diff --git a/tests/integration/contents_manager/test_contentsmanager.py b/tests/integration/contents_manager/test_contentsmanager.py index 355a67d2..737ddb49 100644 --- a/tests/integration/contents_manager/test_contentsmanager.py +++ b/tests/integration/contents_manager/test_contentsmanager.py @@ -1851,3 +1851,37 @@ def test_move_paired_notebook_to_subdir_1059(tmp_path, python_notebook): model = cm.get("scripts/subdir/my_notebook.py") nb = model["content"] compare_notebooks(nb, python_notebook, fmt="py:percent") + + +def test_hash_changes_if_paired_file_is_edited(tmp_path, python_notebook): + # 1. write py ipynb + cm = jupytext.TextFileContentsManager() + cm.formats = "ipynb,py:percent" + cm.root_dir = str(tmp_path) + + # save ipynb + nb = python_notebook + nb_name = "notebook.ipynb" + cm.save(model=notebook_model(nb), path=nb_name) + org_model = cm.get(nb_name, require_hash=True) + + py_file = tmp_path / "notebook.py" + + text = py_file.read_text() + assert "# %% [markdown]" in text.splitlines(), text + + # modify the timestamp of the paired file + time.sleep(0.5) + py_file.write_text(text) + model = cm.get(nb_name, require_hash=True) + assert model["hash"] == org_model["hash"] + + # modify the paired file + py_file.write_text(text + "\n# %%\n1 + 1\n") + + new_model = cm.get(nb_name, require_hash=True) + assert new_model["hash"] != org_model["hash"] + + # the hash is for the pair (inputs first) + model_from_py_file = cm.get("notebook.py", require_hash=True) + assert model_from_py_file["hash"] == new_model["hash"]