From 284855ab230116a519e6875e86d82acc6cea1e89 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Fri, 1 Mar 2024 10:46:56 -0500 Subject: [PATCH 1/3] Pass `--raw` or `--raw-except=datalad` to addurl --- src/backups2datalad/asyncer.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/backups2datalad/asyncer.py b/src/backups2datalad/asyncer.py index 4a7b32d..b1ddbe4 100644 --- a/src/backups2datalad/asyncer.py +++ b/src/backups2datalad/asyncer.py @@ -379,6 +379,12 @@ async def get_annex_hash(self, filepath: Path) -> str: async def ensure_addurl(self) -> None: async with self.addurl_lock: if self.addurl is None: + env = os.environ.copy() + if self.embargoed: + env["DATALAD_dandi_token"] = self.manager.token + opts = ["--raw-except=datalad"] + else: + opts = ["--raw"] self.addurl = await open_git_annex( "addurl", "-c", @@ -390,8 +396,9 @@ async def ensure_addurl(self) -> None: "--json", "--json-error-messages", "--json-progress", + *opts, path=self.ds.pathobj, - env={**os.environ, "DATALAD_dandi_token": self.manager.token}, + env=env, ) self.nursery.start_soon(self.feed_addurl) self.nursery.start_soon(self.read_addurl) From fc598a13b4685761a63ec8aab9149ce7f07407b5 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Mon, 4 Mar 2024 15:44:43 -0500 Subject: [PATCH 2/3] Update minimum required git-annex version in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5872e97..b09cb78 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ Before running `backups2datalad`, the following setup must be performed: required. - [git-annex](https://git-annex.branchable.com) must be installed. At least - version 10.20230126 is required, though you should endeavor to obtain the + version 10.20240227 is required, though you should endeavor to obtain the latest version. - An API token needs to be obtained for the DANDI instance that is being From c81b62b384a8ff7ea8656906641ce12cdeb2d172 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Thu, 11 Jul 2024 13:44:07 -0400 Subject: [PATCH 3/3] Add check for minimum required git-annex version --- src/backups2datalad/__main__.py | 10 +++++++++- src/backups2datalad/consts.py | 2 ++ src/backups2datalad/util.py | 17 +++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/backups2datalad/__main__.py b/src/backups2datalad/__main__.py index 358a5bd..93c2707 100644 --- a/src/backups2datalad/__main__.py +++ b/src/backups2datalad/__main__.py @@ -24,7 +24,7 @@ from .datasetter import DandiDatasetter from .logging import log from .register_s3 import register_s3urls -from .util import format_errors, pdb_excepthook, quantify +from .util import check_git_annex_version, format_errors, pdb_excepthook, quantify @click.group(context_settings={"help_option_names": ["-h", "--help"]}) @@ -210,6 +210,7 @@ async def update_from_backup( instance, but it can be restricted to only operate on specific Dandisets by giving the IDs of the desired Dandisets as command-line arguments. """ + check_git_annex_version() async with datasetter: if asset_filter is not None: datasetter.config.asset_filter = asset_filter @@ -251,6 +252,7 @@ async def backup_zarrs( """ Create (but do not update) local mirrors of Zarrs for a single Dandiset """ + check_git_annex_version() async with datasetter: if datasetter.config.zarrs is None: raise click.UsageError("Zarr backups not configured in config file") @@ -285,6 +287,7 @@ async def update_github_metadata( This is a maintenance command that should rarely be necessary to run. """ + check_git_annex_version() async with datasetter: await datasetter.update_github_metadata(dandisets, exclude=exclude) @@ -331,6 +334,7 @@ async def release( If the mirror is configured to be pushed to GitHub, a GitHub release will be created for the tag as well. """ + check_git_annex_version() async with datasetter: if asset_filter is not None: datasetter.config.asset_filter = asset_filter @@ -381,6 +385,7 @@ async def populate_cmd( mirrors by giving the IDs of the desired Dandisets as command-line arguments. """ + check_git_annex_version() async with datasetter: if (r := datasetter.config.dandisets.remote) is not None: backup_remote = r.name @@ -440,6 +445,7 @@ async def populate_zarrs( directory, but it can be restricted to only operate on specific mirrors by giving the asset IDs of the desired Zarrs as command-line arguments. """ + check_git_annex_version() async with datasetter: zcfg = datasetter.config.zarrs if zcfg is None: @@ -487,6 +493,7 @@ async def zarr_checksum(dirpath: Path) -> None: Compute the Zarr checksum for the git-annex dataset at `dirpath` using the hashes stored in the annexed files' keys """ + check_git_annex_version() ds = AsyncDataset(dirpath) print(await ds.compute_zarr_checksum()) @@ -503,6 +510,7 @@ async def register_s3urls_cmd(datasetter: DandiDatasetter, dandiset_id: str) -> This command should only be necessary if something went wrong when processing the unembargoing of a Dandiset. """ + check_git_annex_version() async with datasetter: p = datasetter.config.dandiset_root / dandiset_id ds = AsyncDataset(p) diff --git a/src/backups2datalad/consts.py b/src/backups2datalad/consts.py index bb137db..0ba7e9e 100644 --- a/src/backups2datalad/consts.py +++ b/src/backups2datalad/consts.py @@ -10,6 +10,8 @@ DEFAULT_WORKERS = 5 +MINIMUM_GIT_ANNEX_VERSION = "10.20240227" + # Maximum number of Zarrs to process at once ZARR_LIMIT = 10 diff --git a/src/backups2datalad/util.py b/src/backups2datalad/util.py index f7ba664..3c96760 100644 --- a/src/backups2datalad/util.py +++ b/src/backups2datalad/util.py @@ -22,6 +22,7 @@ from ruamel.yaml import YAML from .config import BackupConfig +from .consts import MINIMUM_GIT_ANNEX_VERSION from .logging import PrefixedLogger if TYPE_CHECKING: @@ -281,3 +282,19 @@ def yaml_dump(data: Any) -> str: class UnexpectedChangeError(Exception): pass + + +def check_git_annex_version() -> None: + # Call this function at the start of subcommand functions rather than in + # `main()` so that it doesn't run if a user does `backups2datalad + # subcommand --help` + from datalad.support.external_versions import external_versions + + gaversion = external_versions["cmd:annex"] + if gaversion is None: + raise RuntimeError("git-annex not installed") + elif gaversion < MINIMUM_GIT_ANNEX_VERSION: + raise RuntimeError( + f"git-annex {MINIMUM_GIT_ANNEX_VERSION} or later required, but" + f" version {gaversion} found" + )