Skip to content

Commit

Permalink
Merge pull request #34 from dandi/gh-26
Browse files Browse the repository at this point in the history
Pass `--raw` or `--raw-except=datalad` to addurl
  • Loading branch information
yarikoptic authored Jul 12, 2024
2 parents a7cf320 + c81b62b commit a064ac3
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 3 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Before running `backups2datalad`, the following setup must be performed:
required.

- [git-annex](https://git-annex.branchable.com) must be installed. At least
version 10.20230126 is required, though you should endeavor to obtain the
version 10.20240227 is required, though you should endeavor to obtain the
latest version.

- An API token needs to be obtained for the DANDI instance that is being
Expand Down
10 changes: 9 additions & 1 deletion src/backups2datalad/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from .datasetter import DandiDatasetter
from .logging import log
from .register_s3 import register_s3urls
from .util import format_errors, pdb_excepthook, quantify
from .util import check_git_annex_version, format_errors, pdb_excepthook, quantify


@click.group(context_settings={"help_option_names": ["-h", "--help"]})
Expand Down Expand Up @@ -210,6 +210,7 @@ async def update_from_backup(
instance, but it can be restricted to only operate on specific Dandisets by
giving the IDs of the desired Dandisets as command-line arguments.
"""
check_git_annex_version()
async with datasetter:
if asset_filter is not None:
datasetter.config.asset_filter = asset_filter
Expand Down Expand Up @@ -251,6 +252,7 @@ async def backup_zarrs(
"""
Create (but do not update) local mirrors of Zarrs for a single Dandiset
"""
check_git_annex_version()
async with datasetter:
if datasetter.config.zarrs is None:
raise click.UsageError("Zarr backups not configured in config file")
Expand Down Expand Up @@ -285,6 +287,7 @@ async def update_github_metadata(
This is a maintenance command that should rarely be necessary to run.
"""
check_git_annex_version()
async with datasetter:
await datasetter.update_github_metadata(dandisets, exclude=exclude)

Expand Down Expand Up @@ -331,6 +334,7 @@ async def release(
If the mirror is configured to be pushed to GitHub, a GitHub release will
be created for the tag as well.
"""
check_git_annex_version()
async with datasetter:
if asset_filter is not None:
datasetter.config.asset_filter = asset_filter
Expand Down Expand Up @@ -381,6 +385,7 @@ async def populate_cmd(
mirrors by giving the IDs of the desired Dandisets as command-line
arguments.
"""
check_git_annex_version()
async with datasetter:
if (r := datasetter.config.dandisets.remote) is not None:
backup_remote = r.name
Expand Down Expand Up @@ -440,6 +445,7 @@ async def populate_zarrs(
directory, but it can be restricted to only operate on specific mirrors by
giving the asset IDs of the desired Zarrs as command-line arguments.
"""
check_git_annex_version()
async with datasetter:
zcfg = datasetter.config.zarrs
if zcfg is None:
Expand Down Expand Up @@ -487,6 +493,7 @@ async def zarr_checksum(dirpath: Path) -> None:
Compute the Zarr checksum for the git-annex dataset at `dirpath` using the
hashes stored in the annexed files' keys
"""
check_git_annex_version()
ds = AsyncDataset(dirpath)
print(await ds.compute_zarr_checksum())

Expand All @@ -503,6 +510,7 @@ async def register_s3urls_cmd(datasetter: DandiDatasetter, dandiset_id: str) ->
This command should only be necessary if something went wrong when
processing the unembargoing of a Dandiset.
"""
check_git_annex_version()
async with datasetter:
p = datasetter.config.dandiset_root / dandiset_id
ds = AsyncDataset(p)
Expand Down
9 changes: 8 additions & 1 deletion src/backups2datalad/asyncer.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,12 @@ async def get_annex_hash(self, filepath: Path) -> str:
async def ensure_addurl(self) -> None:
async with self.addurl_lock:
if self.addurl is None:
env = os.environ.copy()
if self.embargoed:
env["DATALAD_dandi_token"] = self.manager.token
opts = ["--raw-except=datalad"]
else:
opts = ["--raw"]
self.addurl = await open_git_annex(
"addurl",
"-c",
Expand All @@ -390,8 +396,9 @@ async def ensure_addurl(self) -> None:
"--json",
"--json-error-messages",
"--json-progress",
*opts,
path=self.ds.pathobj,
env={**os.environ, "DATALAD_dandi_token": self.manager.token},
env=env,
)
self.nursery.start_soon(self.feed_addurl)
self.nursery.start_soon(self.read_addurl)
Expand Down
2 changes: 2 additions & 0 deletions src/backups2datalad/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

DEFAULT_WORKERS = 5

MINIMUM_GIT_ANNEX_VERSION = "10.20240227"

# Maximum number of Zarrs to process at once
ZARR_LIMIT = 10

Expand Down
17 changes: 17 additions & 0 deletions src/backups2datalad/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from ruamel.yaml import YAML

from .config import BackupConfig
from .consts import MINIMUM_GIT_ANNEX_VERSION
from .logging import PrefixedLogger

if TYPE_CHECKING:
Expand Down Expand Up @@ -281,3 +282,19 @@ def yaml_dump(data: Any) -> str:

class UnexpectedChangeError(Exception):
pass


def check_git_annex_version() -> None:
# Call this function at the start of subcommand functions rather than in
# `main()` so that it doesn't run if a user does `backups2datalad
# subcommand --help`
from datalad.support.external_versions import external_versions

gaversion = external_versions["cmd:annex"]
if gaversion is None:
raise RuntimeError("git-annex not installed")
elif gaversion < MINIMUM_GIT_ANNEX_VERSION:
raise RuntimeError(
f"git-annex {MINIMUM_GIT_ANNEX_VERSION} or later required, but"
f" version {gaversion} found"
)

0 comments on commit a064ac3

Please sign in to comment.