diff --git a/.coveragerc b/.coveragerc index f66d607..834f197 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,7 +1,7 @@ # .coveragerc to control coverage.py [run] branch = True -source = dcqc +source = fs-synapse # omit = bad_file.py [paths] diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 5caf379..7783e78 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -6,14 +6,11 @@ name: Build and publish container to Docker Hub on: push: tags: ['v[0-9]*', '[0-9]+.[0-9]+*'] # Match tags that resemble a version - workflow_dispatch: + branches: jobs: build: runs-on: ubuntu-latest - env: - DOCKER_ORG: sagebionetworks - DOCKER_REPO: dcqc steps: - name: Checkout @@ -25,34 +22,31 @@ jobs: name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 - - name: Login to DockerHub + name: Login to GHCR uses: docker/login-action@v2 with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} - - name: Compute short commit SHA ID - id: vars - run: echo "::set-output name=sha_short::$(git rev-parse --short HEAD)" - - - name: Build and push (tagged release) - uses: docker/build-push-action@v3 - if: ${{ github.event_name == 'push' }} + name: Docker meta + id: meta + uses: docker/metadata-action@v4 with: - platforms: linux/amd64,linux/arm64 - context: . - push: true + images: | + ghcr.io/${{ github.repository }} tags: | - ${{ env.DOCKER_ORG }}/${{ env.DOCKER_REPO }}:latest - ${{ env.DOCKER_ORG }}/${{ env.DOCKER_REPO }}:${{ github.ref_name }} - ${{ env.DOCKER_ORG }}/${{ env.DOCKER_REPO }}:commit-${{ steps.vars.outputs.sha_short }} + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}} + type=ref,event=branch + type=sha - - name: Build and push (manual release) + name: Build and push (all tags and branches) uses: docker/build-push-action@v3 - if: ${{ github.event_name == 'workflow_dispatch' }} with: platforms: linux/amd64,linux/arm64 context: . push: true - tags: | - ${{ env.DOCKER_ORG }}/${{ env.DOCKER_REPO }}:commit-${{ steps.vars.outputs.sha_short }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/.isort.cfg b/.isort.cfg index 2d3838f..bfc0f65 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -1,3 +1,3 @@ [settings] profile = black -known_first_party = dcqc +known_first_party = synapsefs diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 575b804..8d00c26 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,7 +23,7 @@ repos: - id: autoflake - repo: https://github.com/pycqa/isort - rev: 5.11.4 + rev: 5.12.0 hooks: - id: isort diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 15a0c6e..65221a9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,6 +1,6 @@ # Contributing -Welcome to `dcqc` contributor's guide. +Welcome to `fs-synapse` contributor's guide. This document focuses on getting any potential contributor familiarized with the development processes, but [other kinds of contributions] are also appreciated. @@ -16,7 +16,7 @@ behavior guidelines. ## Issue Reports -If you experience bugs or general issues with `dcqc`, please have a look +If you experience bugs or general issues with `fs-synapse`, please have a look on the [issue tracker]. If you don't see anything useful there, please feel free to fire an issue report. @@ -34,17 +34,17 @@ you help us to identify the root cause of the issue. ## Documentation Improvements -You can help improve `dcqc` docs by making them more readable and coherent, or +You can help improve `fs-synapse` docs by making them more readable and coherent, or by adding missing information and correcting mistakes. -`dcqc` documentation uses [Sphinx] as its main documentation compiler. +`fs-synapse` documentation uses [Sphinx] as its main documentation compiler. This means that the docs are kept in the same repository as the project code, and that any documentation update is done in the same way was a code contribution. The documentation is written using [CommonMark] with [MyST] extensions. :::{tip} Please notice that the [GitHub web interface] provides a quick way of - propose changes in `dcqc`'s files. While this mechanism can + propose changes in `fs-synapse`'s files. While this mechanism can be tricky for normal code contributions, it works perfectly fine for contributing to the docs, and can be quite handy. @@ -97,8 +97,8 @@ This often provides additional considerations and avoids unnecessary work. 3. Clone this copy to your local disk: ```console - git clone git@github.com:Sage-Bionetworks-Workflows/py-dcqc.git - cd dcqc + git clone git@github.com:Sage-Bionetworks/fs-synapse.git + cd fs-synapse ``` 4. You should run: @@ -116,7 +116,7 @@ This often provides additional considerations and avoids unnecessary work. pipenv run pre-commit install ``` - `dcqc` comes with a lot of hooks configured to automatically help the + `fs-synapse` comes with a lot of hooks configured to automatically help the developer to check the code being written. ### Implement your changes @@ -245,7 +245,7 @@ package: If you are part of the group of maintainers and have correct user permissions on [PyPI], the following steps can be used to release a new version for -`dcqc`: +`fs-synapse`: 1. Make sure all unit tests are successful. 2. Tag the current commit on the main branch with a release tag, e.g., `v1.2.3`. @@ -291,5 +291,5 @@ on [PyPI], the following steps can be used to release a new version for [tox]: https://tox.readthedocs.io/en/stable/ [virtual environment]: https://realpython.com/python-virtual-environments-a-primer/ [virtualenv]: https://virtualenv.pypa.io/en/stable/ -[repository]: https://github.com/sage-bionetworks-workflows/py-dcqc -[issue tracker]: https://github.com/sage-bionetworks-workflows/py-dcqc/issues +[repository]: https://github.com/sage-bionetworks-workflows/py-fs-synapse +[issue tracker]: https://github.com/sage-bionetworks-workflows/py-fs-synapse/issues diff --git a/Dockerfile b/Dockerfile index 5aead46..7784ee1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,4 +12,4 @@ COPY src ./src/ RUN SETUPTOOLS_SCM_PRETEND_VERSION_FOR_SAGETASKS=${PKG_VERSION} \ python -m pip install . -CMD [ "python", "-c", "import dcqc" ] +CMD [ "python", "-c", "import synapsefs" ] diff --git a/Pipfile b/Pipfile index 4b54386..22f6645 100644 --- a/Pipfile +++ b/Pipfile @@ -4,10 +4,10 @@ verify_ssl = true name = "pypi" [packages] -dcqc = {editable = true, path = "."} +fs-synapse = {editable = true, path = "."} [dev-packages] -dcqc = {editable = true, path = ".", extras = ["testing", "dev"]} +fs-synapse = {editable = true, path = ".", extras = ["testing", "dev"]} [requires] python_version = "3.11" diff --git a/Pipfile.lock b/Pipfile.lock index b28a132..4419e35 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "0d7dc0d6eaf114d26ace7be67b886ade99a821308ba874cc3b1951f5e3dcea2d" + "sha256": "310a7566224000a6bec069170fef50d9b839e1ca913734fcc0a84a3f828eb3b5" }, "pipfile-spec": 6, "requires": { @@ -122,17 +122,8 @@ "sha256:f9d0c5c045a3ca9bedfc35dca8526798eb91a07aa7a2c0fee134c6c6f321cbd7", "sha256:ff6f3db31555657f3163b15a6b7c6938d08df7adbfc9dd13d9d19edad678f1e8" ], - "markers": "python_full_version >= '3.6.0'", "version": "==3.0.1" }, - "click": { - "hashes": [ - "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e", - "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48" - ], - "markers": "python_version >= '3.7'", - "version": "==8.1.3" - }, "dcqc": { "editable": true, "path": "." @@ -197,7 +188,7 @@ "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", "version": "==1.16.0" }, "synapseclient": { @@ -208,14 +199,6 @@ "markers": "python_version >= '3.7'", "version": "==2.7.0" }, - "typer": { - "hashes": [ - "sha256:b5e704f4e48ec263de1c0b3a2387cd405a13767d2f907f44c1a08cbad96f606d", - "sha256:ff797846578a9f2a201b53442aedeb543319466870fbe1c701eab66dd7681165" - ], - "markers": "python_version >= '3.6'", - "version": "==0.7.0" - }, "urllib3": { "hashes": [ "sha256:076907bf8fd355cde77728471316625a4d2f7e713c125f51953bb5b3eecf4f72", @@ -296,11 +279,11 @@ }, "zipp": { "hashes": [ - "sha256:83a28fcb75844b5c0cdaf5aa4003c2d728c77e05f5aeabe8e95e56727005fbaa", - "sha256:a7a22e05929290a67401440b39690ae6563279bced5f314609d9d03798f56766" + "sha256:73efd63936398aac78fd92b6f4865190119d6c91b531532e798977ea8dd402eb", + "sha256:9eb0a4c5feab9b08871db0d672745b53450d7f26992fd1e4653aa43345e97b86" ], "markers": "python_version >= '3.7'", - "version": "==3.11.0" + "version": "==3.12.0" } }, "develop": { @@ -482,7 +465,6 @@ "sha256:f9d0c5c045a3ca9bedfc35dca8526798eb91a07aa7a2c0fee134c6c6f321cbd7", "sha256:ff6f3db31555657f3163b15a6b7c6938d08df7adbfc9dd13d9d19edad678f1e8" ], - "markers": "python_full_version >= '3.6.0'", "version": "==3.0.1" }, "click": { @@ -628,14 +610,6 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==0.17.1" }, - "entrypoints": { - "hashes": [ - "sha256:b706eddaa9218a19ebcd67b56818f05bb27589b1ca9e8d797b74affad4ccacd4", - "sha256:f174b5ff827504fd3cd97cc3f8649f3693f51538c7e4bdf3ef002c8429d42f9f" - ], - "markers": "python_version >= '3.6'", - "version": "==0.4" - }, "executing": { "hashes": [ "sha256:0314a69e37426e3608aada02473b4161d4caf5a4b244d1d0c48072b8fee7bacc", @@ -746,11 +720,11 @@ }, "ipython": { "hashes": [ - "sha256:da01e6df1501e6e7c32b5084212ddadd4ee2471602e2cf3e0190f4de6b0ea481", - "sha256:f3bf2c08505ad2c3f4ed5c46ae0331a8547d36bf4b21a451e8ae80c0791db95b" + "sha256:71618e82e6d59487bea059626e7c79fb4a5b760d1510d02fab1160db6fdfa1f7", + "sha256:9c207b0ef2d276d1bfcfeb9a62804336abbe4b170574ea061500952319b1d78c" ], "markers": "python_version >= '3.8'", - "version": "==8.8.0" + "version": "==8.9.0" }, "isort": { "hashes": [ @@ -786,11 +760,11 @@ }, "jupyter-client": { "hashes": [ - "sha256:214668aaea208195f4c13d28eb272ba79f945fc0cf3f11c7092c20b2ca1980e7", - "sha256:52be28e04171f07aed8f20e1616a5a552ab9fee9cbbe6c1896ae170c3880d392" + "sha256:3f67b1c8b7687e6db09bef10ff97669932b5e6ef6f5a8ee56d444b89022c5007", + "sha256:6016b874fd1111d721bc5bee30624399e876e79e6f395d1a559e6dce9fb2e1ba" ], - "markers": "python_version >= '3.7'", - "version": "==7.4.9" + "markers": "python_version >= '3.8'", + "version": "==8.0.1" }, "jupyter-core": { "hashes": [ @@ -1204,7 +1178,7 @@ "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", "version": "==2.8.2" }, "pytz": { @@ -1364,7 +1338,7 @@ "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", "version": "==1.16.0" }, "snowballstemmer": { @@ -1481,7 +1455,7 @@ "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f" ], - "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2'", "version": "==0.10.2" }, "tornado": { @@ -1517,14 +1491,6 @@ "markers": "python_version >= '3.7'", "version": "==5.8.1" }, - "typer": { - "hashes": [ - "sha256:b5e704f4e48ec263de1c0b3a2387cd405a13767d2f907f44c1a08cbad96f606d", - "sha256:ff797846578a9f2a201b53442aedeb543319466870fbe1c701eab66dd7681165" - ], - "markers": "python_version >= '3.6'", - "version": "==0.7.0" - }, "typing-extensions": { "hashes": [ "sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa", @@ -1628,11 +1594,11 @@ }, "zipp": { "hashes": [ - "sha256:83a28fcb75844b5c0cdaf5aa4003c2d728c77e05f5aeabe8e95e56727005fbaa", - "sha256:a7a22e05929290a67401440b39690ae6563279bced5f314609d9d03798f56766" + "sha256:73efd63936398aac78fd92b6f4865190119d6c91b531532e798977ea8dd402eb", + "sha256:9eb0a4c5feab9b08871db0d672745b53450d7f26992fd1e4653aa43345e97b86" ], "markers": "python_version >= '3.7'", - "version": "==3.11.0" + "version": "==3.12.0" } } } diff --git a/README.md b/README.md index d716c9d..cf511ea 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,93 @@ -# py-dcqc +# fs-synapse -[![PyPI-Server](https://img.shields.io/pypi/v/dcqc.svg)](https://pypi.org/project/dcqc/) -[![codecov](https://codecov.io/gh/Sage-Bionetworks-Workflows/py-dcqc/branch/main/graph/badge.svg?token=OCC4MOUG5P)](https://codecov.io/gh/Sage-Bionetworks-Workflows/py-dcqc) +[![ReadTheDocs](https://readthedocs.org/projects/fs-synapse/badge/?version=latest)](https://sage-bionetworks-workflows.github.io/fs-synapse/) +[![PyPI-Server](https://img.shields.io/pypi/v/fs-synapse.svg)](https://pypi.org/project/fs-synapse/) +[![codecov](https://codecov.io/gh/Sage-Bionetworks-Workflows/fs-synapse/branch/main/graph/badge.svg?token=OCC4MOUG5P)](https://codecov.io/gh/Sage-Bionetworks-Workflows/fs-synapse) [![Project generated with PyScaffold](https://img.shields.io/badge/-PyScaffold-005CA0?logo=pyscaffold)](#pyscaffold) +--> + +> A Synapse implementation of the [PyFileSystem2](http://docs.pyfilesystem.org/) interface. + +`fs-synapse` allows us to leverage the [PyFileSystem API](https://docs.pyfilesystem.org/en/latest/interface.html) to interface with Synapse files, folders, and projects. By learning this API, you can write code that is agnostic to where your files are physically located. This is achieved by referring to Synapse entities using URLs. Commented examples are included below, but more details can be found [here](https://docs.pyfilesystem.org/en/latest/openers.html). + +``` +syn://syn50545516 # Synapse project + +syn://syn50557597 # Folder in the above Synapse project +syn://syn50545516/syn50557597 # Same folder, but using a full path +syn://syn50545516/TestSubDir # Same folder, but referenced by name + +syn://syn50555279 # File in the above Synapse project +syn://syn50545516/syn50555279 # Same file, but using a full path +syn://syn50545516/test.txt # Same file, but referenced by name + +syn://syn50545516/ExploratoryTests/report.json # Nested file +``` + +## Benefits + +There are several benefits to using the `fs-synapse` API over `synapseclient`. -> Python package for performing quality control (QC) for data coordination (DC) +```python +from fs import open_fs + +fs = open_fs("syn://") +``` -This Python package provides a framework for performing quality control (QC) on data files. Quality control can range from low-level integrity checks (_e.g._ MD5 checksum, file extension) to high-level checks such as conformance to a format specification and consistency with associated metadata. +### Interact with Synapse using a Pythonic interface + +This [guide](https://docs.pyfilesystem.org/en/latest/guide.html) provides several code examples for various use cases. + +```python +file_url = "syn://syn50555279" + +with fs.open(file_url, "a") as fp: + fp.write("Appending some text to a Synapse file") +``` -Early versions of this package were developed to be used by its sibling, the [nf-dcqc](https://github.com/Sage-Bionetworks-Workflows/nf-dcqc) Nextflow workflow. You can see examples of how to leverage py-dcqc there. Note that the initial command-line interface (CLI) was developed with nf-dcqc in mind, so smaller steps were favored to enable parallelism in Nextflow. Future iterations of this package will include user-friendly, high-level CLI commands. +### Access to several convenience functions + +The full list of available functions are listed [here](https://docs.pyfilesystem.org/en/latest/interface.html). + +```python +folder_url = "syn://syn50696438" + +fs.makedirs(f"{folder_url}/creating/nested/folders/with/one/operation") +``` + +### Refer to Synapse files and folders by name + +You don't have to track as many Synapse IDs. You only need to care about the top-level projects or folders and refer to subfolders and files by name. + +```python +project_url = "syn://syn50545516" + +data_url = f"{project_url}/data/raw.csv" +output_url = f"{project_url}/outputs/processed.csv" + +with fs.open(data_url, "r") as data_fp, fs.open(output_url, "a") as output_fp: + results = number_cruncher(data) + output.write(results) +``` + +### Write Synapse-agnostic code + +Unfortunately, every time you use `synapseclient` for file and folder operations, you are hard-coding a dependency on Synapse into your project. Leveraging `fs-synapse` helps avoid this hard dependency and makes your code more portable to other file backends (_e.g._ S3). You can swap for any other file system by using their URL scheme (_e.g._ `s3://`). Here's [an index](https://www.pyfilesystem.org/page/index-of-filesystems/) of available file systems that you can swap for. + +### Rely on code covered by integration tests + +So you don't have to write the Synapse integration tests yourself! These tests tend to be slow, so delegating that responsibilty to an externally managed package like `fs-synapse` keeps your test suite fast and focused on what you care about. + +In your test code, you can use `mem://` or `temp://` URLs for faster I/O instead of storing and retrieving files on Synapse ([MemoryFS](https://docs.pyfilesystem.org/en/latest/reference/memoryfs.html) and [TempFS](https://docs.pyfilesystem.org/en/latest/reference/tempfs.html)). + +```python +def test_some_feature_of_your_code(): + output_url = "mem://report.json" + cruncher = NumberCruncher() + cruncher.save(output_url) + assert cruncher.fs.exists(output_url) +``` # PyScaffold @@ -19,5 +95,5 @@ This project has been set up using PyScaffold 4.3. For details and usage information on PyScaffold see https://pyscaffold.org/. ```console -putup --name dcqc --markdown --github-actions --pre-commit --license Apache-2.0 py-dcqc +putup --name fs-synapse --markdown --github-actions --pre-commit --license Apache-2.0 fs-synapse ``` diff --git a/demos/SynapseFS.ipynb b/demos/SynapseFS.ipynb index c1b8bc1..a57dff2 100644 --- a/demos/SynapseFS.ipynb +++ b/demos/SynapseFS.ipynb @@ -27,7 +27,7 @@ "outputs": [], "source": [ "from fs import open_fs\n", - "from dcqc import SynapseFS" + "from synapsefs import SynapseFS" ] }, { @@ -248,7 +248,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "fs-synapse-M-_Ymbtp", "language": "python", "name": "python3" }, @@ -262,11 +262,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4 (main, Jul 7 2022, 15:39:05) [Clang 12.0.5 (clang-1205.0.22.11)]" + "version": "3.11.1 (main, Jan 25 2023, 11:24:21) [Clang 13.1.6 (clang-1316.0.21.2.5)]" }, "vscode": { "interpreter": { - "hash": "b3cc5ad3e41ecdab922a1c2f15e033ac62b3a1f322e57b79d2f2f637461ca57a" + "hash": "8255ec51be6b5c8a74f7639e3611bd7db4ab8af9848d9023d77f879d865684d0" } } }, diff --git a/docs/conf.py b/docs/conf.py index 7bc21d3..2ea4ea3 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -34,7 +34,7 @@ from sphinx import apidoc output_dir = os.path.join(__location__, "api") -module_dir = os.path.join(__location__, "../src/dcqc") +module_dir = os.path.join(__location__, "../src/synapsefs") try: shutil.rmtree(output_dir) except FileNotFoundError: @@ -112,7 +112,7 @@ master_doc = "index" # General information about the project. -project = "dcqc" +project = "fs-synapse" copyright = "2022, Bruno Grande" # The version info for the project you're documenting, acts as replacement for @@ -124,7 +124,7 @@ # If you don’t need the separation provided between version and release, # just set them both to the same value. try: - from dcqc import __version__ as version + from synapsefs import __version__ as version except ImportError: version = "" @@ -251,7 +251,7 @@ # html_file_suffix = None # Output file base name for HTML help builder. -htmlhelp_basename = "dcqc-doc" +htmlhelp_basename = "fs-synapse-doc" # -- Options for LaTeX output ------------------------------------------------ @@ -268,7 +268,7 @@ # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ("index", "user_guide.tex", "dcqc Documentation", "Bruno Grande", "manual") + ("index", "user_guide.tex", "fs-synapse Documentation", "Bruno Grande", "manual") ] # The name of an image file (relative to this directory) to place at the top of diff --git a/docs/index.md b/docs/index.md index 1c9c38a..1b12618 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,6 +1,6 @@ -# dcqc +# fs-synapse -Add a short description here! +A Synapse implementation of the [PyFileSystem2](http://docs.pyfilesystem.org/) interface. ## Note diff --git a/pyproject.toml b/pyproject.toml index 87e1e93..586ad05 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,11 +8,8 @@ build-backend = "setuptools.build_meta" # check out https://github.com/pypa/setuptools_scm version_scheme = "no-guess-dev" -[tool.mypy] -disable_error_code = "type-abstract" - [[tool.mypy.overrides]] -module = "dcqc" +module = "synapsefs" disallow_untyped_calls = true disallow_untyped_defs = true disallow_incomplete_defs = true @@ -22,9 +19,5 @@ check_untyped_defs = true module = "synapseclient.*" ignore_missing_imports = true -[[tool.mypy.overrides]] -module = "dcqc.suites.suites" -disable_error_code = "assignment" - [tool.interrogate] ignore-init-method = true diff --git a/setup.cfg b/setup.cfg index 7ba8278..d11d6bf 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,19 +4,19 @@ # https://setuptools.pypa.io/en/latest/references/keywords.html [metadata] -name = dcqc -description = Add a short description here! +name = fs-synapse +description = A Synapse implementation of the PyFileSystem2 interface author = Bruno Grande author_email = bruno.grande@sagebase.org license = Apache-2.0 license_files = LICENSE.txt long_description = file: README.md long_description_content_type = text/markdown; charset=UTF-8; variant=GFM -url = https://github.com/pyscaffold/pyscaffold/ +url = https://github.com/sage-Bionetworks/fs-synapse # Add here related links, for example: project_urls = - Documentation = https://pyscaffold.org/ -# Source = https://github.com/pyscaffold/pyscaffold/ +# Documentation = https://pyscaffold.org/ + Source = https://github.com/sage-Bionetworks/fs-synapse # Changelog = https://pyscaffold.org/en/latest/changelog.html # Tracker = https://github.com/pyscaffold/pyscaffold/issues # Conda-Forge = https://anaconda.org/conda-forge/pyscaffold @@ -50,9 +50,8 @@ python_requires = >=3.8 # Updates here should be reflected in `docs/requirements.txt` install_requires = - synapseclient~=2.7 fs~=2.4 - typer~=0.7.0 + synapseclient~=2.7 [options.packages.find] where = src @@ -61,7 +60,7 @@ exclude = [options.extras_require] # Add here additional requirements for extra features, to install with: -# `pip install dcqc[PDF]` like: +# `pip install fs-synapse[PDF]` like: # PDF = ReportLab; RXP # Dependencies for testing (used by tox and Pipenv) @@ -89,15 +88,13 @@ dev = [options.entry_points] # Add here console scripts like: # console_scripts = -# script_name = dcqc.module:function +# script_name = synapsefs.module:function # For example: # console_scripts = -# fibonacci = dcqc.skeleton:run +# fibonacci = synapsefs.skeleton:run # And any other entry points, for example: -console_scripts = - dcqc = dcqc.main:app fs.opener = - syn = dcqc.filesystems.openers:SynapseFSOpener + syn = synapsefs.opener:SynapseFSOpener [tool:pytest] # Specify command line options as you would do when invoking pytest directly. @@ -106,7 +103,7 @@ fs.opener = # CAUTION: --cov flags may prohibit setting breakpoints while debugging. # Comment those flags to avoid this pytest issue. addopts = - --cov dcqc --cov-report term-missing --cov-report xml + --cov synapsefs --cov-report term-missing --cov-report xml --verbose norecursedirs = dist @@ -148,7 +145,7 @@ per-file-ignores = # PyScaffold's parameters when the project was created. # This will be used when updating. Do not change! version = 4.3 -package = dcqc +package = fs-synapse extensions = github_actions markdown diff --git a/setup.py b/setup.py index d65a61b..af81c58 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ """ - Setup file for dcqc. + Setup file for fs-synapse. Use setup.cfg to configure your project. This file was generated with PyScaffold 4.3. diff --git a/src/dcqc/__main__.py b/src/dcqc/__main__.py deleted file mode 100644 index 458699c..0000000 --- a/src/dcqc/__main__.py +++ /dev/null @@ -1,3 +0,0 @@ -from dcqc.main import app - -app(prog_name="dcqc") diff --git a/src/dcqc/file.py b/src/dcqc/file.py deleted file mode 100644 index fdabcbe..0000000 --- a/src/dcqc/file.py +++ /dev/null @@ -1,381 +0,0 @@ -"""Represent local and remote files and their metadata. - -See module dcqc.target for the multi-file target class. - -Classes: - - FileType: For collecting file type-specific information. - File: For bundling file location and metadata as well as - operations for retrieving file contents. -""" - -from __future__ import annotations - -import os -from collections.abc import Collection, Mapping -from copy import deepcopy -from dataclasses import dataclass -from pathlib import Path -from tempfile import mkdtemp -from typing import Any, ClassVar, Optional -from warnings import warn - -from fs.base import FS - -from dcqc.mixins import SerializableMixin, SerializedObject -from dcqc.utils import is_url_local, open_parent_fs - - -@dataclass -class FileType: - """Bundle information for a given file type.""" - - _registry: ClassVar[dict[str, FileType]] - _registry = dict() - - name: str - file_extensions: tuple[str, ...] - edam_iri: Optional[str] - - def __init__( - self, - name: str, - file_extensions: Collection[str], - edam_iri: Optional[str] = None, - ): - """Construct a FileType object. - - Args: - name: File type name. - file_extensions: Valid file extensions. - edam_iri: EDAM format ontology identifier. - """ - self.name = name - self.file_extensions = tuple(file_extensions) - self.edam_iri = edam_iri - self.register_file_type() - - def register_file_type(self) -> None: - """Register instantiated file type for later retrieval. - - Raises: - ValueError: If the file type's name has already - been registered previously. - """ - name = self.name.lower() - if name in self._registry: - message = f"File type ({name}) is already registered ({self._registry})." - raise ValueError(message) - self._registry[name] = self - - @classmethod - def get_file_type(cls, file_type: str) -> FileType: - """Retrieve file type object based on its name. - - Args: - file_type: File type name. - - Raises: - ValueError: If the given file type name has - not been registered previously. - - Returns: - The file type object with the given name. - """ - file_type = file_type.lower() - if file_type not in cls._registry: - types = list(cls._registry) - message = f"File type ({file_type}) not among available options ({types})." - raise ValueError(message) - return cls._registry[file_type] - - -# TODO: These file types could be moved to an external file -# Instantiated file types are automatically tracked by the FileType class -FileType("*", ()) # To represent all file types -FileType("TXT", (".txt",), "format_1964") -FileType("TIFF", (".tif", ".tiff"), "format_3591") -FileType("OME-TIFF", (".ome.tif", ".ome.tiff"), "format_3727") - - -@dataclass -class File(SerializableMixin): - """Construct a File object. - - Args: - url: Local or remote location of a file. - metadata: File metadata. - relative_to: Used to update any local URLs if they - are relative to a directory other than the - current work directory (default). - """ - - url: str - metadata: dict[str, Any] - type: str - local_path: Optional[Path] - - def __init__( - self, - url: str, - metadata: Mapping[str, Any], - relative_to: Optional[Path] = None, - local_path: Optional[Path] = None, - ): - self.url = self._relativize_url(url, relative_to) - self.metadata = dict(metadata) - self.type = self._pop_file_type() - - self._fs: Optional[FS] - self._fs = None - self._fs_path: Optional[str] - self._fs_path = None - self._name: Optional[str] - self._name = None - - self.local_path = local_path or self._init_local_path() - - def __hash__(self): - return hash((self.url, self.type, tuple(self.metadata.items()))) - - def __eq__(self, other): - return hash(self) == hash(other) - - def _relativize_url(self, url: str, relative_to: Optional[Path]) -> str: - """Update local URLs if relative to a directory other than CWD. - - Args: - url: Local or remote location of a file. - relative_to: Used to update any local URLs if they - are relative to a directory other than the - current work directory (default). - - Returns: - The relativized URL. - """ - if self.is_url_local(url): - relative_to = relative_to or Path.cwd() - scheme, separator, resource = url.rpartition("://") - path = Path(resource) - if not path.is_absolute(): - resource = os.path.relpath(relative_to / resource) - url = f"{scheme}{separator}{resource}" - elif not self.is_url_local(url) and relative_to is not None: - message = f"URL ({url}) is remote. Ignoring relative_to ({relative_to})." - warn(message) - return url - - def _pop_file_type(self) -> str: - """Extract and remove file type from metadata. - - This function defaults to the generic file type - ("*") if the key is absent from the metadata. - - Returns: - The name of the file type in the metadata. - """ - file_type = self.metadata.pop("file_type", "*") - return file_type - - def _init_local_path(self) -> Optional[Path]: - """Initialize local path depending on URL - - Returns: - The local path, if applicable. - """ - if self.is_url_local(): - local_path_str = self.fs.getsyspath(self.fs_path) - local_path = Path(local_path_str) - # Use relative paths for portability (e.g., in Nextflow) - local_path = local_path.relative_to(Path.cwd()) - else: - local_path = None - return local_path - - def _init_fs(self) -> tuple[FS, str]: - """Initialize file system to access URL. - - All queries with this file system should use - `self._fs_path` as the path, not `self.url`. - - Returns: - A file system + basename pair. - """ - fs, fs_path = open_parent_fs(self.url) - self._fs_path = fs_path - self._fs = fs - return fs, fs_path - - @property - def fs(self) -> FS: - """The file system that can access the URL.""" - fs = self._fs - if fs is None: - fs, _ = self._init_fs() - return fs - - @property - def fs_path(self) -> str: - """The path that can be used with the file system.""" - fs_path = self._fs_path - if fs_path is None: - _, fs_path = self._init_fs() - return fs_path - - @property - def name(self) -> str: - """The file name according to the file system.""" - name = self._name - if name is None: - info = self.fs.getinfo(self.fs_path) - name = info.name - return name - - def get_file_type(self) -> FileType: - """Retrieve the relevant file type object. - - Returns: - FileType: File type object - """ - return FileType.get_file_type(self.type) - - def get_metadata(self, key: str) -> Any: - """Retrieve file metadata using a key. - - Args: - key: Metadata key name. - - Raises: - KeyError: If the metadata key doesn't exist. - - Returns: - The metadata value associated with the given key. - """ - if key not in self.metadata: - url = self.url - md = self.metadata - message = f"File ({url}) does not have '{key}' in its metadata ({md})." - raise KeyError(message) - return self.metadata[key] - - def is_url_local(self, url: Optional[str] = None) -> bool: - """Check whether a URL refers to a local location. - - Args: - url: Local or remote location of a file. - Defaults to URL associated with file. - - Returns: - Whether the URL refers to a local location. - """ - url = url or self.url - return is_url_local(url) - - def is_file_local(self) -> bool: - """Check if the file (or a copy) is available locally. - - Unlike :func:`~dcqc.file.File.is_url_local`, this method - considers if a locally staged copy is available regardless - of whether the URL is local or remote. - - To retrieve the location of the local copy, you can use - :func:`~dcqc.file.File.get_local_path`. - - Returns: - Whether the file has a copy available locally. - """ - return self.local_path is not None - - def get_local_path(self) -> Path: - """Retrieve the path of a local copy, if applicable. - - Raises: - FileNotFoundError: If there is no local copy available. - - Returns: - The path to the local copy. - """ - if self.local_path is None: - message = "Local path is unavailable. Use stage() to create a local copy." - raise FileNotFoundError(message) - return Path(self.local_path) - - def stage( - self, - destination: Optional[Path] = None, - overwrite: bool = False, - ) -> Path: - """Create local copy of local or remote file. - - A destination is not required for remote files; it - defaults to a temporary directory. - Local files aren't moved if a destination is omitted. - - Args: - destination: File or folder where to store the file. - Defaults to None. - overwrite: Whether to ignore existing file at the - target destination. Defaults to False. - - Raises: - ValueError: If the parent directory of the - destination does not exist. - FileExistsError: If the destination file already - exists and ``overwrite`` was not enabled. - - Returns: - The path of the local copy. - """ - if not destination: - if self.local_path is not None: - return self.get_local_path() - else: - destination_str = mkdtemp() - destination = Path(destination_str) - - # By this point, destination is defined (not None) - if destination.is_dir(): - destination = destination / self.name - - if not destination.parent.exists(): - dest = str(destination) - message = f"Parent folder of destination ({dest}) does not exist." - raise ValueError(message) - - if destination.exists() and not overwrite: - dest = str(destination) - message = f"Destination ({dest}) already exists. Enable overwrite." - raise FileExistsError(message) - - # By this point, the file either doesn't exist or overwrite is enabled - destination.unlink(missing_ok=True) - - if self.is_url_local(): - local_path = self.get_local_path() - destination.symlink_to(local_path.resolve()) - else: - with destination.open("wb") as dest_file: - self.fs.download(self.fs_path, dest_file) - - self.local_path = destination - return destination - - @classmethod - def from_dict(cls, dictionary: SerializedObject) -> File: - """Deserialize a dictionary into a file. - - Args: - dictionary: A serialized file object. - - Returns: - The reconstructed file object. - """ - dictionary = deepcopy(dictionary) - - file_type = dictionary.pop("type") - dictionary["metadata"]["file_type"] = file_type - - if dictionary["local_path"] is not None: - dictionary["local_path"] = Path(dictionary["local_path"]) - - return cls(**dictionary) diff --git a/src/dcqc/main.py b/src/dcqc/main.py deleted file mode 100644 index fb0fab6..0000000 --- a/src/dcqc/main.py +++ /dev/null @@ -1,184 +0,0 @@ -import os -import sys -from csv import DictWriter -from pathlib import Path -from typing import List - -from typer import Argument, Exit, Option, Typer - -from dcqc import __version__ -from dcqc.file import FileType -from dcqc.parsers import CsvParser, JsonParser -from dcqc.reports import JsonReport -from dcqc.suites.suite_abc import SuiteABC -from dcqc.target import Target -from dcqc.tests.test_abc import ExternalTestMixin, TestABC -from dcqc.utils import is_url_local - -# Make commands optional to allow for `dcqc --version` -app = Typer(invoke_without_command=True) - - -# Common arguments -# Distinguishing between path and path/url arguments until I/O is consistent -input_path_arg = Argument(..., help="Input file") -input_path_list_arg = Argument(..., help="List of input files") -output_arg = Argument(..., help="Path or (remote) URL for output file") -output_dir_arg = Argument(..., help="Directory path or (remote) URL for output files") -output_path_arg = Argument(..., help="Path for output file") -output_dir_path_arg = Argument(..., help="Directory path for output files") - -# Common options -overwrite_opt = Option(False, "--overwrite", "-f", help="Ignore existing files") -required_tests_opt = Option(None, "--required-tests", "-r", help="Required tests") -skipped_tests_opt = Option(None, "--skipped-tests", "-s", help="Skipped tests") - - -@app.callback() -def main(version: bool = False): - """DCQC Python Package""" - if version: - print(f"DCQC Python Package Version: {__version__}") - raise Exit() - - -@app.command() -def create_targets( - input_csv: Path = input_path_arg, - output_dir: str = output_dir_arg, - overwrite: bool = overwrite_opt, -): - """Create target JSON files from a targets CSV file""" - if is_url_local(output_dir): - _, _, resource = output_dir.rpartition("://") - os.makedirs(resource) - - parser = CsvParser(input_csv) - targets = parser.create_targets() - - # Naming the targets by index to ensure no clashes - named_targets = {f"target-{target.id}.json": target for target in targets} - - report = JsonReport() - report.save_many(named_targets, output_dir, overwrite) - - -# TODO: Add `--absolute-paths` option to avoid relative paths in JSON files -@app.command() -def stage_target( - input_json: Path = input_path_arg, - output_dir: Path = output_dir_path_arg, - overwrite: bool = overwrite_opt, -): - """Create local file copies from a target JSON file""" - output_dir.mkdir(parents=True, exist_ok=True) - - target = JsonParser.parse_object(input_json, Target) - for path in target.stage(output_dir, overwrite): - print(f"Finished staging {path!s}...") - - -@app.command() -def create_tests( - input_json: Path = input_path_arg, - output_dir: Path = output_dir_path_arg, - required_tests: List[str] = required_tests_opt, - skipped_tests: List[str] = skipped_tests_opt, - overwrite: bool = overwrite_opt, -): - """Create test JSON files from a target JSON file""" - output_dir.mkdir(parents=True, exist_ok=True) - - target = JsonParser.parse_object(input_json, Target) - suite = SuiteABC.from_target(target, required_tests, skipped_tests) - - report = JsonReport() - for test in suite.tests: - output_path = output_dir / f"{input_json.stem}.{test.type}.json" - output_url = output_path.as_posix() - report.save(test, output_url, overwrite) - - -@app.command() -def create_process( - input_json: Path = input_path_arg, - output_path: Path = output_path_arg, - overwrite: bool = overwrite_opt, -): - """Create external process JSON file from a test JSON file""" - output_path.parent.mkdir(parents=True, exist_ok=True) - - test = JsonParser.parse_object(input_json, ExternalTestMixin) - process = test.generate_process() - output_url = output_path.as_posix() - - report = JsonReport() - report.save(process, output_url, overwrite) - - -@app.command() -def compute_test( - input_json: Path = input_path_arg, - output_path: Path = output_path_arg, - overwrite: bool = overwrite_opt, -): - """Compute the test status from a test JSON file""" - output_path.parent.mkdir(parents=True, exist_ok=True) - - test = JsonParser.parse_object(input_json, TestABC) - test.get_status() - output_url = output_path.as_posix() - - report = JsonReport() - report.save(test, output_url, overwrite) - - -@app.command() -def create_suite( - output: str = output_arg, - input_jsons: List[Path] = input_path_list_arg, - required_tests: List[str] = required_tests_opt, - skipped_tests: List[str] = skipped_tests_opt, - overwrite: bool = overwrite_opt, -): - """Create a suite from a set of test JSON files sharing the same target""" - tests = [JsonParser.parse_object(test_json, TestABC) for test_json in input_jsons] - suite = SuiteABC.from_tests(tests, required_tests, skipped_tests) - report = JsonReport() - report.save(suite, output, overwrite) - - -@app.command() -def combine_suites( - output: str = output_arg, - input_jsons: List[Path] = input_path_list_arg, - overwrite: bool = overwrite_opt, -): - """Combine several suite JSON files into a single JSON report""" - suites = [JsonParser.parse_object(json_, SuiteABC) for json_ in input_jsons] - report = JsonReport() - report.save(suites, output, overwrite) - - -@app.command() -def list_tests(): - """List the tests available for each file type""" - test_classes_by_file_type = SuiteABC.list_test_classes_by_file_type() - - rows = list() - for file_type_name, test_classes in test_classes_by_file_type.items(): - file_type = FileType.get_file_type(file_type_name) - for test_cls in test_classes: - test_dict = { - "file_type": file_type_name, - "edam_iri": file_type.edam_iri, - "test_name": test_cls.__name__, - "test_tier": test_cls.tier, - "test_type": "external" if test_cls.is_external_test else "internal", - } - rows.append(test_dict) - - fieldnames = list(rows[0]) - writer = DictWriter(sys.stdout, fieldnames) - writer.writeheader() - writer.writerows(rows) diff --git a/src/dcqc/mixins.py b/src/dcqc/mixins.py deleted file mode 100644 index fcb6fc7..0000000 --- a/src/dcqc/mixins.py +++ /dev/null @@ -1,79 +0,0 @@ -from __future__ import annotations - -from abc import ABC, abstractmethod -from dataclasses import asdict -from pathlib import PurePath -from typing import Any, TypeVar, cast - -SerializedObject = dict[str, Any] - -T = TypeVar("T", bound="SerializableMixin") - - -class SerializableMixin(ABC): - @classmethod - def from_dict_prepare(cls, dictionary: SerializedObject) -> SerializedObject: - """Validate and prepare dictionary for deserialization.""" - type_ = dictionary.pop("type") - if type_ != cls.__name__: - message = f"Type ({type_}) does not match the class ({cls.__name__})." - raise ValueError(message) - return dictionary - - @staticmethod - def dict_factory(iterable: list[tuple[str, Any]]) -> dict[str, Any]: - """Generate dictionary from dataclass. - - Unlike the built-in version, this function will - handle Path objects. This assumes that the OS - will not change between the serialization and - deserialization steps. - - Args: - iterable: List of attribute name-value pairs. - - Returns: - Dictionary of JSON-serializable attributes. - """ - # Ensure that all values are JSON-serializable - kwargs = {} - for key, value in iterable: - if isinstance(value, PurePath): - kwargs[key] = str(value) - else: - kwargs[key] = value - - return dict(**kwargs) - - def to_dict(self) -> SerializedObject: - """Serialize the file to a dictionary. - - Returns: - A file serialized as a dictionary. - """ - return asdict(self, dict_factory=self.dict_factory) - - @classmethod - @abstractmethod - def from_dict(cls, dictionary: SerializedObject) -> SerializableMixin: - """Deserialize a dictionary into a SerializableMixin object. - - Args: - dictionary: A serialized object. - - Returns: - The reconstructed object. - """ - - def copy(self: T) -> T: - """Create a copy of a serializable object. - - Returns: - A copied object. - """ - dictionary = self.to_dict() - copy = self.from_dict(dictionary) - # Required to prevent this mypy error: - # Incompatible return value type (got "SerializableMixin", expected "T") - copy = cast(T, copy) - return copy diff --git a/src/dcqc/parsers.py b/src/dcqc/parsers.py deleted file mode 100644 index 1a732d4..0000000 --- a/src/dcqc/parsers.py +++ /dev/null @@ -1,137 +0,0 @@ -import csv -import json -from collections.abc import Collection, Iterator -from pathlib import Path -from typing import Any, Optional, Type, TypeVar, cast - -from dcqc.file import File -from dcqc.mixins import SerializableMixin -from dcqc.suites.suite_abc import SuiteABC -from dcqc.target import Target -from dcqc.tests.test_abc import TestABC - -# For context on TypeVar, check out this GitHub PR comment: -# https://github.com/Sage-Bionetworks-Workflows/py-dcqc/pull/8#discussion_r1087141497 -T = TypeVar("T", bound=SerializableMixin) - - -# TODO: Add support for URLs instead of paths -# TODO: Add support for a `unique_id` column -class CsvParser: - path: Path - stage_files: bool - - def __init__(self, path: Path, stage_files: bool = False): - self.path = path - self.stage_files = stage_files - - def list_rows(self) -> Iterator[tuple[int, dict]]: - with self.path.open(newline="") as file: - reader = csv.DictReader(file) - for index, row in enumerate(reader, start=1): - yield index, row - - def _row_to_file(self, row: dict[str, str]) -> File: - url = row.pop("url") - file = File(url, row, relative_to=self.path.parent) - return file - - def create_files(self) -> Iterator[tuple[int, File]]: - for index, row in self.list_rows(): - file = self._row_to_file(row) - if not file.is_file_local() and self.stage_files: - destination = self.path.parent / "staged_files" / f"index_{index}" - destination.mkdir(parents=True, exist_ok=True) - file.stage(destination, overwrite=True) - yield index, file - - def create_targets(self, stage_files: bool = True) -> Iterator[Target]: - for index, file in self.create_files(): - if stage_files: - file.stage() - yield Target(file, id=f"{index:04}") - - def create_suites( - self, - required_tests: Optional[Collection[str]] = None, - skipped_tests: Optional[Collection[str]] = None, - stage_files: bool = True, - ) -> Iterator[SuiteABC]: - for target in self.create_targets(stage_files): - yield SuiteABC.from_target(target, required_tests, skipped_tests) - - -class JsonParser: - path: Path - - def __init__(self, path: Path): - self.path = path - - def load_json(self) -> Any: - with self.path.open("r") as infile: - contents = json.load(infile) - return contents - - def check_expected_cls(self, instance: Any, expected_cls: Type[T]) -> T: - if not isinstance(instance, expected_cls): - cls_name = expected_cls.__name__ - message = f"JSON file ({self.path!s}) is not expected type ({cls_name})." - raise ValueError(message) - instance = cast(T, instance) - return instance - - @classmethod - def get_class(cls, cls_name: str) -> Type[SerializableMixin]: - test_classes = TestABC.list_subclasses() - test_cls_map = {cls.__name__: cls for cls in test_classes} - - suite_classes = SuiteABC.list_subclasses() - suite_cls_map = {cls.__name__: cls for cls in suite_classes} - - if cls_name == "File": - return File - elif cls_name == "Target": - return Target - elif cls_name in test_cls_map: - return test_cls_map[cls_name] - elif cls_name in suite_cls_map: - return suite_cls_map[cls_name] - else: - message = f"Type ({cls_name}) is not recognized." - raise ValueError(message) - - @classmethod - def from_dict(cls, dictionary) -> SerializableMixin: - if "type" not in dictionary: - message = f"Cannot parse JSON object due to missing type ({dictionary})." - raise ValueError(message) - type_name = dictionary["type"] - type_class = cls.get_class(type_name) - object_ = type_class.from_dict(dictionary) - return object_ - - @classmethod - def parse_object(cls, path: Path, expected_cls: Type[T]) -> T: - parser = cls(path) - contents = parser.load_json() - - if isinstance(contents, list): - message = f"JSON file ({parser.path}) contains a list of objects." - raise ValueError(message) - - object_ = cls.from_dict(contents) - expected = parser.check_expected_cls(object_, expected_cls) - return expected - - @classmethod - def parse_objects(cls, path: Path, expected_cls: Type[T]) -> list[T]: - parser = cls(path) - contents = parser.load_json() - - if not isinstance(contents, list): - message = f"JSON file ({cls.path}) does not contain a list of objects." - raise ValueError(message) - - objects = [cls.from_dict(dictionary) for dictionary in contents] - expected = [parser.check_expected_cls(obj, expected_cls) for obj in objects] - return expected diff --git a/src/dcqc/reports.py b/src/dcqc/reports.py deleted file mode 100644 index 829c460..0000000 --- a/src/dcqc/reports.py +++ /dev/null @@ -1,96 +0,0 @@ -import json -from collections.abc import Iterable, Mapping -from typing import Any, Optional, overload - -from fs.base import FS -from fs.errors import ResourceNotFound - -from dcqc.mixins import SerializableMixin, SerializedObject -from dcqc.utils import open_parent_fs - - -# TODO: Refactor instance methods to class methods -class JsonReport: - def __init__(self) -> None: - self._url: Optional[str] = None - self._fs: Optional[FS] = None - self._fs_path: Optional[str] = None - - def _init_fs(self, url) -> tuple[FS, str]: - if self._url != url or self._fs is None or self._fs_path is None: - self._url = url - self._fs, self._fs_path = open_parent_fs(url) - return self._fs, self._fs_path - - def _create_parent_directories(self, url: str): - scheme, separator, resource = url.rpartition("://") - parent_resource, _, _ = resource.rpartition("/") - parent_url = f"{scheme}{separator}{parent_resource}" - fs, fs_path = self._init_fs(parent_url) - try: - info = fs.getinfo(fs_path) - except ResourceNotFound: - fs.makedirs(fs_path, recreate=True) - info = fs.getinfo(fs_path) - if not info.is_dir: - message = f"Parent URL ({url}) does not refer to a directory." - raise NotADirectoryError(message) - - def to_file(self, obj: Any, url: str, overwrite: bool): - fs, fs_path = self._init_fs(url) - self._create_parent_directories(url) - if fs.exists(fs_path) and not overwrite: - message = f"URL ({url}) already exists. Enable `overwrite` to ignore." - raise FileExistsError(message) - # TODO: Implement custom serializer that handles Paths - # (e.g., relativize them based on output JSON path) - with fs.open(fs_path, "w") as outfile: - json.dump(obj, outfile, indent=2) - - # The overloads are necessary to convey the relationship between - # the inputs and outputs: single to single, and many to many. - @overload - def generate(self, items: SerializableMixin) -> SerializedObject: - ... - - @overload - def generate(self, items: Iterable[SerializableMixin]) -> list[SerializedObject]: - ... - - def generate(self, items): - if isinstance(items, Iterable): - report = [item.to_dict() for item in items] - else: - report = items.to_dict() - return report - - @overload - def save( - self, items: SerializableMixin, url: str, overwrite: bool = False - ) -> SerializedObject: - ... - - @overload - def save( - self, items: Iterable[SerializableMixin], url: str, overwrite: bool = False - ) -> list[SerializedObject]: - ... - - def save(self, items, url: str, overwrite: bool = False): - report = self.generate(items) - self.to_file(report, url, overwrite) - return report - - def save_many( - self, - named_items: Mapping[str, SerializableMixin], - parent_url: str, - overwrite: bool = False, - ) -> dict[str, SerializedObject]: - reports = dict() - for name, item in named_items.items(): - report = self.generate(item) - reports[name] = report - report_url = f"{parent_url}/{name}" - self.to_file(report, report_url, overwrite) - return reports diff --git a/src/dcqc/suites/__init__.py b/src/dcqc/suites/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/dcqc/suites/suite_abc.py b/src/dcqc/suites/suite_abc.py deleted file mode 100644 index 295f260..0000000 --- a/src/dcqc/suites/suite_abc.py +++ /dev/null @@ -1,290 +0,0 @@ -from __future__ import annotations - -from abc import ABC -from collections.abc import Collection, Sequence -from copy import deepcopy -from itertools import chain -from typing import ClassVar, Optional, Type, Union - -from dcqc.file import FileType -from dcqc.mixins import SerializableMixin, SerializedObject -from dcqc.target import Target -from dcqc.tests.test_abc import TestABC, TestStatus - - -# TODO: Consider the Composite design pattern once -# we have higher-level QC suites -class SuiteABC(SerializableMixin, ABC): - """Abstract base class for QC test suites. - - Args: - target (Target): Single- or multi-file target. - required_tests (Optional[Collection[str]]): - List of tests that must pass for the - overall suite to pass. Defaults to None, - which requires tier-1 and tier-2 tests. - skipped_tests (Optional[Collection[str]]): - List of tests that should not be - evaluated. Defaults to None. - """ - - # Class attributes - file_type: ClassVar[FileType] - add_tests: ClassVar[tuple[Type[TestABC], ...]] - del_tests: ClassVar[tuple[Type[TestABC], ...]] - - # Instance attributes - type: str - target: Target - required_tests: set[str] - skipped_tests: set[str] - - def __init__( - self, - target: Target, - required_tests: Optional[Collection[str]] = None, - skipped_tests: Optional[Collection[str]] = None, - ): - self.type = self.__class__.__name__ - self.target = target - - test_classes = self.list_test_classes() - test_names = set(test.__name__ for test in test_classes) - - required_tests = required_tests or self._default_required_tests() - self.required_tests = set(required_tests).intersection(test_names) - - skipped_tests = skipped_tests or list() - self.skipped_tests = set(skipped_tests).intersection(test_names) - - self.tests = self.init_test_classes() - self._status = TestStatus.NONE - - @classmethod - def from_target( - cls, - target: Target, - required_tests: Optional[Collection[str]] = None, - skipped_tests: Optional[Collection[str]] = None, - ) -> SuiteABC: - """Generate a suite from a target. - - The suite is selected based on the target file type. - - Args: - target: A QC target. - required_tests: List of requires tests. - Defaults to None, which requires tier-1 - and tier-2 tests. - skipped_tests: List of skipped tests. - Defaults to None. - - Returns: - SuiteABC: An initialized test suite. - """ - file_type = target.get_file_type() - suite_cls = SuiteABC.get_subclass_by_file_type(file_type) - suite = suite_cls(target, required_tests, skipped_tests) - return suite - - @classmethod - def from_tests( - cls, - tests: Sequence[TestABC], - required_tests: Optional[Collection[str]] = None, - skipped_tests: Optional[Collection[str]] = None, - ) -> SuiteABC: - """Generate a suite from a set of tests. - - The tests must all have the same target. - - Args: - tests: Set of tests with the same target. - required_tests: List of requires tests. - Defaults to None, which requires tier-1 - and tier-2 tests. - skipped_tests: List of skipped tests. - Defaults to None. - - Returns: - SuiteABC: An initialized test suite. - """ - targets = list() - suite_tests = list() - skipped_tests = skipped_tests or list() - skipped_tests = set(skipped_tests) - for test in tests: - test_copy = test.copy() - if test_copy.type in skipped_tests: - test_copy.skip() - targets.append(test_copy.target) - suite_tests.append(test_copy) - - representative_target = targets[0] - if not all(representative_target == target for target in targets): - message = f"Not all tests refer to the same target ({targets})." - raise ValueError(message) - suite = cls.from_target(representative_target, required_tests, skipped_tests) - suite.tests = suite_tests - - return suite - - @classmethod - def list_test_classes(cls) -> tuple[Type[TestABC], ...]: - """List all applicable test classes""" - all_tests: set[Type[TestABC]] - all_tests = set() - - superclasses = cls.__mro__ - for cls in reversed(superclasses): # Start from the base class - if hasattr(cls, "add_tests"): - add_tests = set(cls.add_tests) # type: ignore - all_tests.update(add_tests) - if hasattr(cls, "del_tests"): - del_tests = set(cls.del_tests) # type: ignore - all_tests.difference_update(del_tests) - - return tuple(all_tests) - - @classmethod - def list_test_classes_by_file_type(cls) -> dict[str, list[Type[TestABC]]]: - """List test classes by file type.""" - result = dict() - suite_classes = cls.list_subclasses() - for suite_cls in suite_classes: - file_type = suite_cls.file_type.name - test_classes = suite_cls.list_test_classes() - result[file_type] = list(test_classes) - return result - - @classmethod - def _default_required_tests(cls) -> list[str]: - test_classes = cls.list_test_classes() - required_tests = filter(lambda test: test.tier <= 2, test_classes) - required_test_names = [test.__name__ for test in required_tests] - return required_test_names - - def init_test_classes(self) -> list[TestABC]: - """Initialize applicable test classes with target.""" - test_classes = self.list_test_classes() - tests = [] - for test_cls in test_classes: - test_name = test_cls.__name__ - skip = test_name in self.skipped_tests - test = test_cls(self.target, skip) - tests.append(test) - return tests - - @classmethod - def list_subclasses(cls) -> tuple[Type[SuiteABC], ...]: - """List all subclasses.""" - subclasses: list[Type[SuiteABC]] - subclasses = cls.__subclasses__() - - subsubclasses_list = [subcls.list_subclasses() for subcls in subclasses] - subclasses_chain = chain(subclasses, *subsubclasses_list) - all_subclasses = tuple(dict.fromkeys(subclasses_chain)) - return all_subclasses - - @classmethod - def get_subclass_by_name(cls, name: str) -> Type[SuiteABC]: - """Retrieve a subclass by name.""" - subclasses = cls.list_subclasses() - registry = {subcls.__name__: subcls for subcls in subclasses} - if name not in registry: - options = list(registry) - message = f"Suite ({name}) not available ({options})." - raise ValueError(message) - return registry[name] - - @classmethod - def get_subclass_by_file_type( - cls, file_type: Union[str, FileType] - ) -> Type[SuiteABC]: - """Retrieve a subclass by file type.""" - if isinstance(file_type, str): - try: - file_type = FileType.get_file_type(file_type) - except ValueError: - file_type = FileType.get_file_type("*") - name = file_type.name - subclasses = cls.list_subclasses() - registry = {subcls.file_type.name: subcls for subcls in subclasses} - if name not in registry: - # TODO: This might have to be changed if we introduce - # composite file types (e.g., BAM/BAI file pair) - return registry["*"] - return registry[name] - - def compute_tests(self) -> None: - """Compute the status for each initialized test.""" - for test in self.tests: - test.get_status() - - def compute_status(self) -> TestStatus: - """Compute the overall suite status.""" - self.compute_tests() - if self._status is not TestStatus.NONE: - return self._status - for test in self.tests: - test_name = test.type - if test_name not in self.required_tests: - continue - test_status = test.get_status() - self._status = test_status - if self._status == TestStatus.FAIL: - break - return self._status - - def to_dict(self) -> SerializedObject: - suite_status = self.compute_status() - test_dicts = [] - for test in self.tests: - test_dict = test.to_dict() - test_dict.pop("target", None) # Remove redundant `target` info - test_dicts.append(test_dict) - suite_dict = { - "type": self.type, - "target": self.target.to_dict(), - "suite_status": { - "required_tests": list(self.required_tests), - "skipped_tests": list(self.skipped_tests), - "status": suite_status.value, - }, - "tests": test_dicts, - } - return suite_dict - - @classmethod - def from_dict(cls, dictionary: SerializedObject) -> SuiteABC: - """Deserialize a dictionary into a suite. - - Args: - dictionary: A serialized suite object. - - Returns: - The reconstructed suite object. - """ - dictionary = deepcopy(dictionary) - - suite_cls_name = dictionary["type"] - suite_cls = SuiteABC.get_subclass_by_name(suite_cls_name) - - target_dict = dictionary["target"] - target = Target.from_dict(target_dict) - - required_tests = dictionary["suite_status"]["required_tests"] - skipped_tests = dictionary["suite_status"]["skipped_tests"] - suite = suite_cls(target, required_tests, skipped_tests) - - suite_status = TestStatus(dictionary["suite_status"]["status"]) - suite._status = suite_status - - tests = list() - for test_dict in dictionary["tests"]: - test_dict["target"] = target_dict - test = TestABC.from_dict(test_dict) - tests.append(test) - suite.tests = tests - - return suite diff --git a/src/dcqc/suites/suites.py b/src/dcqc/suites/suites.py deleted file mode 100644 index 1d6f099..0000000 --- a/src/dcqc/suites/suites.py +++ /dev/null @@ -1,20 +0,0 @@ -from dcqc.file import FileType -from dcqc.suites.suite_abc import SuiteABC -from dcqc.tests import tests - - -# TODO: Consider moving the filetype-test association logic -# to the file types -class FileSuite(SuiteABC): - file_type = FileType.get_file_type("*") - add_tests = (tests.FileExtensionTest, tests.Md5ChecksumTest) - - -class TiffSuite(FileSuite): - file_type = FileType.get_file_type("TIFF") - add_tests = (tests.LibTiffInfoTest,) - - -class OmeTiffSuite(TiffSuite): - file_type = FileType.get_file_type("OME-TIFF") - add_tests = (tests.OmeXmlSchemaTest, tests.BioFormatsInfoTest) diff --git a/src/dcqc/target.py b/src/dcqc/target.py deleted file mode 100644 index 1fa95e9..0000000 --- a/src/dcqc/target.py +++ /dev/null @@ -1,92 +0,0 @@ -from __future__ import annotations - -from copy import deepcopy -from dataclasses import dataclass -from functools import wraps -from pathlib import Path -from typing import Iterator, Optional - -from dcqc.file import File, FileType -from dcqc.mixins import SerializableMixin, SerializedObject - - -# TODO: Eventually, there might be target-specific metadata -# TODO: Now that Target is much simpler, it might make sense -# to rename the class to FileSet since it currently -# really is just a wrapper for a group of files -# TODO: Maybe the Composite pattern would work here? -@dataclass -class Target(SerializableMixin): - """Construct a multi-file Target. - - Targets ensure support for both single-file - and multi-file tests. - - Args: - *files: Sequence of files objects. - id: A unique identifier for the target. - Defaults to None. - """ - - type: str - id: Optional[str] - files: list[File] - - def __init__(self, *files: File, id: Optional[str] = None): - self.type = self.__class__.__name__ - self.files = list(files) - self.id = id - - def __hash__(self): - return hash(tuple(self.files)) - - def __eq__(self, other): - return hash(self) == hash(other) - - def get_file_type(self) -> FileType: - """Retrieve the file type for the target. - - This function currently only supports targets - composed of a single file. - - Raises: - NotImplementedError: If the target has - more or less than one file. - - Returns: - The file type object. - """ - num_files = len(self.files) - if num_files == 1: - file = self.files[0] - file_type = file.get_file_type() - else: - message = f"Target has {num_files} files, which isn't supported yet." - raise NotImplementedError(message) - return file_type - - @wraps(File.stage) - def stage( - self, - destination: Optional[Path] = None, - overwrite: bool = False, - ) -> Iterator[Path]: - for file in self.files: - yield file.stage(destination, overwrite) - - @classmethod - def from_dict(cls, dictionary: SerializedObject) -> Target: - """Deserialize a dictionary into a target. - - Args: - dictionary: A serialized target object. - - Returns: - The reconstructed target object. - """ - dictionary = deepcopy(dictionary) - dictionary = cls.from_dict_prepare(dictionary) - files = [File.from_dict(d) for d in dictionary["files"]] - id = dictionary["id"] - target = cls(*files, id=id) - return target diff --git a/src/dcqc/tests/__init__.py b/src/dcqc/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/dcqc/tests/test_abc.py b/src/dcqc/tests/test_abc.py deleted file mode 100644 index 161218b..0000000 --- a/src/dcqc/tests/test_abc.py +++ /dev/null @@ -1,215 +0,0 @@ -from __future__ import annotations - -import shlex -from abc import ABC, abstractmethod -from collections.abc import Sequence -from dataclasses import dataclass -from enum import Enum -from pathlib import Path -from typing import ClassVar, Optional, Type - -from dcqc.file import File -from dcqc.mixins import SerializableMixin, SerializedObject -from dcqc.target import Target - - -class TestStatus(Enum): - NONE = "pending" - FAIL = "failed" - PASS = "passed" - SKIP = "skipped" - - -# TODO: Look into the @typing.final decorator -class TestABC(SerializableMixin, ABC): - """Abstract base class for QC tests. - - Args: - target (Target): Single- or multi-file target. - skip (bool, optional): Whether to skip this test, - resulting in ``TestStatus.SKIP`` as status. - Defaults to False. - - Raises: - ValueError: If the test expects a single file and - the given target features multiple files. - """ - - # Class attributes - tier: ClassVar[int] - is_external_test: ClassVar[bool] - is_external_test = False - only_one_file_targets: ClassVar[bool] - only_one_file_targets = True - - # Instance attributes - type: str - target: Target - - def __init__(self, target: Target, skip: bool = False): - self.type = self.__class__.__name__ - self.target = target - self._status = TestStatus.SKIP if skip else TestStatus.NONE - - files = self.target.files - if self.only_one_file_targets and len(files) > 1: - message = f"Test ({self.type}) expected one file, not multiple ({files})." - raise ValueError(message) - - def skip(self): - """Force the test to be skipped.""" - self._status = TestStatus.SKIP - - def get_status(self) -> TestStatus: - """Compute (if applicable) and return the test status.""" - if self._status == TestStatus.NONE: - self._status = self.compute_status() - return self._status - - def _get_single_target_file(self) -> File: - files = self.target.files - return files[0] - - @classmethod - def get_subclass_by_name(cls, test: str) -> Type[TestABC]: - """Retrieve subclass by name.""" - test_classes = TestABC.__subclasses__() - registry = {test_class.__name__: test_class for test_class in test_classes} - if test not in registry: - test_names = list(registry) - message = f"Test ({test}) not among available options ({test_names})." - raise ValueError(message) - return registry[test] - - @classmethod - def list_subclasses(cls) -> list[Type[TestABC]]: - """List all subclasses.""" - test_classes = TestABC.__subclasses__() - return test_classes - - @abstractmethod - def compute_status(self) -> TestStatus: - """Compute the status of the test.""" - - def to_dict(self) -> SerializedObject: - test_dict = { - "type": self.type, - "status": self._status.value, - "target": self.target.to_dict(), - "tier": self.tier, - "is_external_test": self.is_external_test, - } - return test_dict - - @classmethod - def from_dict(cls, dictionary: SerializedObject) -> TestABC: - """Deserialize a dictionary into a test. - - Args: - dictionary: A serialized test object. - - Returns: - The reconstructed test object. - """ - test_cls_name = dictionary.pop("type") - test_cls = cls.get_subclass_by_name(test_cls_name) - - target_dict = dictionary["target"] - target = Target.from_dict(target_dict) - - test = test_cls(target) - - status = TestStatus(dictionary["status"]) - test._status = status - - return test - - -@dataclass -class Process(SerializableMixin): - container: str - command_args: Sequence[str] - cpus: int = 1 - memory: int = 2 # In GB - - def get_command(self) -> str: - return shlex.join(self.command_args) - - def to_dict(self): - dictionary = super(Process, self).to_dict() - del dictionary["command_args"] - dictionary["command"] = self.get_command() - return dictionary - - @classmethod - def from_dict(cls, dictionary: SerializedObject) -> Process: - """Deserialize a dictionary into a process. - - Args: - dictionary: A serialized proces object. - - Returns: - The reconstructed process object. - """ - command = dictionary.pop("command") - command_args = shlex.split(command) - dictionary["command_args"] = command_args - process = cls(**dictionary) - return process - - -class ExternalTestMixin(TestABC): - # Class attributes - is_external_test = True - - # Class constants - STDOUT_PATH: ClassVar[Path] - STDOUT_PATH = Path("std_out.txt") - STDERR_PATH: ClassVar[Path] - STDERR_PATH = Path("std_err.txt") - EXITCODE_PATH: ClassVar[Path] - EXITCODE_PATH = Path("exit_code.txt") - - def compute_status(self) -> TestStatus: - """Compute the status of the test.""" - outputs = self._find_process_outputs() - return self._interpret_process_outputs(outputs) - - @abstractmethod - def generate_process(self) -> Process: - """Generate the process that needs to be run.""" - - @classmethod - def _find_process_outputs( - cls, search_dir: Optional[Path] = None - ) -> dict[str, Path]: - """Locate the output files from the executed process.""" - search_dir = search_dir or Path(".") - outputs = { - "std_out": search_dir / cls.STDOUT_PATH, - "std_err": search_dir / cls.STDERR_PATH, - "exit_code": search_dir / cls.EXITCODE_PATH, - } - - for path in outputs.values(): - if not path.exists(): - message = f"Expected process output ({path}) does not exist." - raise FileNotFoundError(message) - return outputs - - def _interpret_process_outputs(self, outputs: dict[str, Path]) -> TestStatus: - """Interpret the process output files to yield a test status.""" - exit_code = outputs["exit_code"].read_text() - exit_code = exit_code.strip() - if exit_code == "0": - status = TestStatus.PASS - else: - status = TestStatus.FAIL - return status - - # TODO: Include process in dict (add `to_dict()` to Process class) - # def to_dict(self): - # dictionary = super(ExternalTestMixin, self).to_dict() - # process = self.generate_process() - # dictionary["process"] = process - # return dictionary diff --git a/src/dcqc/tests/tests.py b/src/dcqc/tests/tests.py deleted file mode 100644 index f205ee3..0000000 --- a/src/dcqc/tests/tests.py +++ /dev/null @@ -1,98 +0,0 @@ -import hashlib - -from dcqc.file import File -from dcqc.tests.test_abc import ExternalTestMixin, Process, TestABC, TestStatus - - -class FileExtensionTest(TestABC): - tier = 1 - only_one_file_targets = False - - def compute_status(self) -> TestStatus: - status = TestStatus.PASS - for file in self.target.files: - file_type = file.get_file_type() - file_extensions = file_type.file_extensions - if not file.url.endswith(file_extensions): - status = TestStatus.FAIL - break - return status - - -class Md5ChecksumTest(TestABC): - tier = 1 - only_one_file_targets = False - - def compute_status(self) -> TestStatus: - status = TestStatus.PASS - for file in self.target.files: - expected_md5 = file.get_metadata("md5_checksum") - actual_md5 = self._compute_md5_checksum(file) - if expected_md5 != actual_md5: - status = TestStatus.FAIL - break - return status - - def _compute_md5_checksum(self, file: File) -> str: - local_path = file.get_local_path() - hash_md5 = hashlib.md5() - with local_path.open("rb") as f: - for chunk in iter(lambda: f.read(4096), b""): - hash_md5.update(chunk) - actual_md5 = hash_md5.hexdigest() - return actual_md5 - - -class LibTiffInfoTest(ExternalTestMixin, TestABC): - tier = 2 - - def generate_process(self) -> Process: - file = self._get_single_target_file() - path = file.get_local_path().as_posix() - command_args = ["tiffinfo", path] - process = Process( - container="autamus/libtiff:4.4.0", - command_args=command_args, - ) - return process - - -class BioFormatsInfoTest(ExternalTestMixin, TestABC): - tier = 2 - - def generate_process(self) -> Process: - file = self._get_single_target_file() - path = file.get_local_path().as_posix() - command_args = [ - 'export PATH="$PATH:/opt/bftools"', - ";", - "showinf", - "-nopix", - "-novalid", - "-nocore", - path, - ] - process = Process( - container="openmicroscopy/bftools:latest", - command_args=command_args, - ) - return process - - -class OmeXmlSchemaTest(ExternalTestMixin, TestABC): - tier = 2 - - def generate_process(self) -> Process: - file = self._get_single_target_file() - path = file.get_local_path().as_posix() - command_args = [ - 'export PATH="$PATH:/opt/bftools"', - ";", - "xmlvalid", - path, - ] - process = Process( - container="openmicroscopy/bftools:latest", - command_args=command_args, - ) - return process diff --git a/src/dcqc/__init__.py b/src/synapsefs/__init__.py similarity index 50% rename from src/dcqc/__init__.py rename to src/synapsefs/__init__.py index a501334..898020f 100644 --- a/src/dcqc/__init__.py +++ b/src/synapsefs/__init__.py @@ -1,6 +1,4 @@ -"""Top-level dcqc module.""" - -# isort: skip_file +"""Top-level fs-synapse module.""" from importlib.metadata import PackageNotFoundError, version # pragma: no cover @@ -14,19 +12,11 @@ import logging -from fs.opener import registry - -# Import suites to ensure that they are defined and thus discoverable -# It is located here to avoid a circular import -from dcqc.suites import suite_abc -from dcqc.suites import suites - -from dcqc.filesystems.openers import SynapseFSOpener -from dcqc.filesystems.synapsefs import SynapseFS +from synapsefs.open_parent_fs import open_parent_fs +from synapsefs.synapsefs import SynapseFS # Set default logging handler to avoid "No handler found" warnings logging.getLogger(__name__).addHandler(logging.NullHandler()) logging.captureWarnings(True) -# Register PyFileSystem SynapseFS opener -registry.install(SynapseFSOpener) +__all__ = ["SynapseFS", "open_parent_fs"] diff --git a/src/dcqc/utils.py b/src/synapsefs/open_parent_fs.py similarity index 70% rename from src/dcqc/utils.py rename to src/synapsefs/open_parent_fs.py index ca26689..273cd05 100644 --- a/src/dcqc/utils.py +++ b/src/synapsefs/open_parent_fs.py @@ -1,23 +1,9 @@ -import re - from fs import open_fs from fs.base import FS -LOCAL_URL_REGEX = re.compile(r"((file|osfs)://)?/?[^:]+") - - -def is_url_local(url: str) -> bool: - """Check whether a URL refers to a local location. - - Args: - url: Local or remote location of a file. - - Returns: - Whether the URL refers to a local location. - """ - return LOCAL_URL_REGEX.fullmatch(url) is not None - +# TODO: Switch to using parse_fs_url() and register.open() +# https://github.com/PyFilesystem/pyfilesystem2/pull/561#issuecomment-1407436951 def open_parent_fs(url: str) -> tuple[FS, str]: # Split off prefix to avoid issues with `rpartition("/")` scheme, separator, resource = url.rpartition("://") diff --git a/src/dcqc/filesystems/openers.py b/src/synapsefs/opener.py similarity index 90% rename from src/dcqc/filesystems/openers.py rename to src/synapsefs/opener.py index 92d8e71..d0d761e 100644 --- a/src/dcqc/filesystems/openers.py +++ b/src/synapsefs/opener.py @@ -3,7 +3,7 @@ from fs.opener import Opener from fs.opener.parse import ParseResult -from dcqc.filesystems.synapsefs import SynapseFS +from synapsefs.synapsefs import SynapseFS class SynapseFSOpener(Opener): diff --git a/src/dcqc/filesystems/remote_file.py b/src/synapsefs/remote_file.py similarity index 100% rename from src/dcqc/filesystems/remote_file.py rename to src/synapsefs/remote_file.py diff --git a/src/dcqc/filesystems/synapsefs.py b/src/synapsefs/synapsefs.py similarity index 99% rename from src/dcqc/filesystems/synapsefs.py rename to src/synapsefs/synapsefs.py index 15e6d58..1e7b97d 100644 --- a/src/dcqc/filesystems/synapsefs.py +++ b/src/synapsefs/synapsefs.py @@ -33,7 +33,7 @@ from synapseclient.core.utils import iso_to_datetime from synapseclient.entity import Entity, File, Folder, Project, is_container -from dcqc.filesystems.remote_file import RemoteFile +from synapsefs.remote_file import RemoteFile RawInfo = Mapping[str, Mapping[str, object]] diff --git a/tests/conftest.py b/tests/conftest.py index 7bcc649..6447d3a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,27 +1,12 @@ -""" - Dummy conftest.py for dcqc. - - If you don't know what this is for, just leave it empty. - Read more about conftest.py under: - - https://docs.pytest.org/en/stable/fixture.html - - https://docs.pytest.org/en/stable/writing_plugins.html -""" +"""conftest.py for fs-synapse.""" from datetime import datetime from getpass import getuser -from pathlib import Path from uuid import uuid4 import pytest -from dcqc.file import File - -CNFPATH = Path(__file__).resolve() -TESTDIR = CNFPATH.parent -DATADIR = TESTDIR / "data" -OUTDIR = TESTDIR / "outputs" - -OUTDIR.mkdir(exist_ok=True) +from synapsefs import SynapseFS UUID = str(uuid4()) USER = getuser() @@ -29,59 +14,10 @@ RUNID = f"{USER} - {UTCTIME} - {UUID}" # Valid characters: [A-Za-z0-9 .+'()_-] -# Track the list of output files to avoid clashes between tests -outputs = set() - - def pytest_configure(): pytest.RUNID = RUNID # type: ignore -@pytest.fixture -def get_data(): - def _get_data(filename: str) -> Path: - path = DATADIR / filename - if not path.exists(): - raise ValueError(f"Path ({path}) does not exist.") - return path - - yield _get_data - - -@pytest.fixture -def test_files(get_data): - txt_path = get_data("test.txt").as_posix() - tiff_path = get_data("circuit.tif").as_posix() - syn_path = "syn://syn50555279" - good_metadata = { - "file_type": "txt", - "md5_checksum": "14758f1afd44c09b7992073ccf00b43d", - } - bad_metadata = { - "file_type": "tiff", - "md5_checksum": "definitelynottherightmd5checksum", - } - tiff_metadata = { - "file_type": "tiff", - "md5_checksum": "c7b08f6decb5e7572efbe6074926a843", - } - test_files = { - "good": File(txt_path, good_metadata), - "bad": File(txt_path, bad_metadata), - "tiff": File(tiff_path, tiff_metadata), - "synapse": File(syn_path, good_metadata), - } - yield test_files - - -@pytest.fixture -def get_output(): - def _get_output(filename: str) -> Path: - output = OUTDIR / filename - if output in outputs: - message = f"Output ({output}) has already been used in another test." - raise ValueError(message) - outputs.add(output) - return output - - yield _get_output +@pytest.fixture(scope="session") +def synapse_fs(): + yield SynapseFS() diff --git a/tests/data/circuit.tif b/tests/data/circuit.tif deleted file mode 100644 index 88a7bfa..0000000 Binary files a/tests/data/circuit.tif and /dev/null differ diff --git a/tests/data/files.csv b/tests/data/files.csv deleted file mode 100644 index 8c6f51e..0000000 --- a/tests/data/files.csv +++ /dev/null @@ -1,6 +0,0 @@ -url,file_type,md5_checksum -test.txt,TXT,14758f1afd44c09b7992073ccf00b43d -test.txt,TIFF,14758f1afd44c09b7992073ccf00b43d -osfs://test.txt,TXT,definitelynottherightmd5checksum -syn://syn50555279,TXT,14758f1afd44c09b7992073ccf00b43d -circuit.tif,TIFF,c7b08f6decb5e7572efbe6074926a843 diff --git a/tests/data/generate.py b/tests/data/generate.py deleted file mode 100755 index 310faf2..0000000 --- a/tests/data/generate.py +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env python3 - -"""Generate test files using latest version of dcqc.""" - -import os -import sys - -from dcqc.file import File -from dcqc.mixins import SerializableMixin -from dcqc.reports import JsonReport -from dcqc.suites.suite_abc import SuiteABC -from dcqc.target import Target -from dcqc.tests import tests - -# Shared values -data_dir = sys.path[0] -data_dir = os.path.relpath(data_dir) -report = JsonReport() - - -# Shared functions -def export(obj: SerializableMixin, filename: str): - output_url = os.path.join(data_dir, filename) - report.save(obj, output_url, overwrite=True) - - -# target.json -file_url = os.path.join(data_dir, "test.txt") -metadata = {"file_type": "TIFF", "md5_checksum": "14758f1afd44c09b7992073ccf00b43d"} -file = File(file_url, metadata) -target = Target(file, id="001") -export(target, "target.json") - -# test.internal.json -internal_test = tests.Md5ChecksumTest(target) -export(internal_test, "test.internal.json") - -# test.external.json -external_test = tests.LibTiffInfoTest(target) -export(external_test, "test.external.json") - -# test.computed.json -computed_test = tests.Md5ChecksumTest(target) -computed_test.get_status() -export(computed_test, "test.computed.json") - -# suite.json -suite_tests = [internal_test, external_test] -required_tests = ["Md5ChecksumTest"] -skipped_tests = ["LibTiffInfoTest"] -suite = SuiteABC.from_tests(suite_tests, required_tests, skipped_tests) -export(suite, "suite.json") diff --git a/tests/data/small.csv b/tests/data/small.csv deleted file mode 100644 index e239b87..0000000 --- a/tests/data/small.csv +++ /dev/null @@ -1,3 +0,0 @@ -url,file_type,md5_checksum -test.txt,TXT,14758f1afd44c09b7992073ccf00b43d -test.txt,TIFF,14758f1afd44c09b7992073ccf00b43d diff --git a/tests/data/suite.json b/tests/data/suite.json deleted file mode 100644 index e8cb03d..0000000 --- a/tests/data/suite.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "type": "TiffSuite", - "target": { - "type": "Target", - "id": "001", - "files": [ - { - "url": "tests/data/test.txt", - "metadata": { - "md5_checksum": "14758f1afd44c09b7992073ccf00b43d" - }, - "type": "TIFF", - "local_path": "tests/data/test.txt" - } - ] - }, - "suite_status": { - "required_tests": [ - "Md5ChecksumTest" - ], - "skipped_tests": [ - "LibTiffInfoTest" - ], - "status": "passed" - }, - "tests": [ - { - "type": "Md5ChecksumTest", - "status": "passed" - }, - { - "type": "LibTiffInfoTest", - "status": "skipped" - } - ] -} diff --git a/tests/data/target.json b/tests/data/target.json deleted file mode 100644 index 163afc4..0000000 --- a/tests/data/target.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "type": "Target", - "id": "001", - "files": [ - { - "url": "tests/data/test.txt", - "metadata": { - "md5_checksum": "14758f1afd44c09b7992073ccf00b43d" - }, - "type": "TIFF", - "local_path": "tests/data/test.txt" - } - ] -} diff --git a/tests/data/test.computed.json b/tests/data/test.computed.json deleted file mode 100644 index ec9597c..0000000 --- a/tests/data/test.computed.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "type": "Md5ChecksumTest", - "status": "passed", - "target": { - "type": "Target", - "id": "001", - "files": [ - { - "url": "tests/data/test.txt", - "metadata": { - "md5_checksum": "14758f1afd44c09b7992073ccf00b43d" - }, - "type": "TIFF", - "local_path": "tests/data/test.txt" - } - ] - } -} diff --git a/tests/data/test.external.json b/tests/data/test.external.json deleted file mode 100644 index 46c14de..0000000 --- a/tests/data/test.external.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "type": "LibTiffInfoTest", - "status": "pending", - "target": { - "type": "Target", - "id": "001", - "files": [ - { - "url": "tests/data/test.txt", - "metadata": { - "md5_checksum": "14758f1afd44c09b7992073ccf00b43d" - }, - "type": "TIFF", - "local_path": "tests/data/test.txt" - } - ] - } -} diff --git a/tests/data/test.internal.json b/tests/data/test.internal.json deleted file mode 100644 index e6d97a4..0000000 --- a/tests/data/test.internal.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "type": "Md5ChecksumTest", - "status": "pending", - "target": { - "type": "Target", - "id": "001", - "files": [ - { - "url": "tests/data/test.txt", - "metadata": { - "md5_checksum": "14758f1afd44c09b7992073ccf00b43d" - }, - "type": "TIFF", - "local_path": "tests/data/test.txt" - } - ] - } -} diff --git a/tests/data/test.txt b/tests/data/test.txt deleted file mode 100644 index 323fae0..0000000 --- a/tests/data/test.txt +++ /dev/null @@ -1 +0,0 @@ -foobar diff --git a/tests/data/tiffinfo/exit_code.txt b/tests/data/tiffinfo/exit_code.txt deleted file mode 100644 index 573541a..0000000 --- a/tests/data/tiffinfo/exit_code.txt +++ /dev/null @@ -1 +0,0 @@ -0 diff --git a/tests/data/tiffinfo/std_err.txt b/tests/data/tiffinfo/std_err.txt deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/tiffinfo/std_out.txt b/tests/data/tiffinfo/std_out.txt deleted file mode 100644 index ac1266d..0000000 --- a/tests/data/tiffinfo/std_out.txt +++ /dev/null @@ -1,12 +0,0 @@ -=== TIFF directory 0 === -TIFF Directory at offset 0x12a70 (76400) - Image Width: 272 Image Length: 280 - Resolution: 72, 72 - Bits/Sample: 8 - Compression Scheme: PackBits - Photometric Interpretation: min-is-black - Orientation: row 0 top, col 0 lhs - Samples/Pixel: 1 - Rows/Strip: 30 - Planar Configuration: single image plane - ImageDescription: Micrograph of 16-bit A/D converter circuit, courtesy of Steve Decker and Shujaat Nadeem, MIT, 1993. diff --git a/tests/test_acceptance.py b/tests/test_acceptance.py deleted file mode 100644 index 42a21fa..0000000 --- a/tests/test_acceptance.py +++ /dev/null @@ -1,41 +0,0 @@ -import json - -import pytest - -from dcqc.parsers import CsvParser -from dcqc.reports import JsonReport -from dcqc.tests.test_abc import TestABC -from dcqc.utils import open_parent_fs - - -@pytest.mark.integration -def test_json_report_generation(get_data): - # GIVEN a list of external tests to skip (to remain self-contained) - all_tests = TestABC.list_subclasses() - skipped_tests = [test.__name__ for test in all_tests if test.is_external_test] - - # AND a subset of internal tests to be required (to verify suite status behavior) - required_tests = ["Md5ChecksumTest"] - - # AND a CSV file of TXT and TIFF files - csv_path = get_data("files.csv") - - # AND a remote destination for the JSON report - report_url = "syn://syn50696607/report.json" - - # WHEN the CSV file is parsed to generate the relevant QC suites - parser = CsvParser(csv_path) - suites = parser.create_suites(required_tests, skipped_tests) - - # AND those suites are used to generate a JSON report - report = JsonReport() - report.save(suites, report_url, overwrite=True) - - # THEN the file exists - fs, basename = open_parent_fs(report_url) - assert fs.exists(basename) - - # AND the file can be loaded by the `json` module - with fs.open(basename) as infile: - contents = json.load(infile) - assert contents diff --git a/tests/test_file.py b/tests/test_file.py deleted file mode 100644 index f0f1cbc..0000000 --- a/tests/test_file.py +++ /dev/null @@ -1,94 +0,0 @@ -import os -from pathlib import Path -from tempfile import TemporaryDirectory - -import pytest - -from dcqc.file import File, FileType - - -def test_for_an_error_if_registering_a_duplicate_file_type(): - with pytest.raises(ValueError): - FileType("txt", (".foo",)) - - -def test_for_an_error_when_requesting_for_an_unregistered_file_type(): - with pytest.raises(ValueError): - FileType.get_file_type("foo") - - -def test_for_an_error_when_retrieving_missing_metadata_on_a_file(test_files): - test_file = test_files["good"] - with pytest.raises(KeyError): - test_file.get_metadata("foo") - - -def test_that_a_local_file_is_not_moved_when_requesting_a_local_path(test_files): - test_file = test_files["good"] - url_before = test_file.url - local_path = test_file.get_local_path() - url_after = test_file.url - assert url_before == url_after - assert os.path.exists(local_path) - - -@pytest.mark.integration -def test_for_an_error_when_getting_local_path_for_an_unstaged_remote_file(test_files): - file = test_files["synapse"] - with pytest.raises(FileNotFoundError): - file.get_local_path() - - -@pytest.mark.integration -def test_that_a_local_file_is_not_moved_when_staged_without_a_destination(test_files): - test_file = test_files["good"] - path_before = test_file.get_local_path() - path_after = test_file.stage() - assert path_before == path_after - - -@pytest.mark.integration -def test_that_a_local_file_is_symlinked_when_staged_with_a_destination(test_files): - test_file = test_files["good"] - with TemporaryDirectory() as tmp_dir: - original_path = Path(test_file.get_local_path()) - tmp_dir_path = Path(tmp_dir) - test_file.stage(tmp_dir_path) - staged_path = Path(test_file.get_local_path()) - assert staged_path.is_symlink() - assert staged_path.resolve() == original_path.resolve() - - -@pytest.mark.integration -def test_that_a_local_temporary_path_is_created_when_staging_a_remote_file(test_files): - file = test_files["synapse"] - file.stage() - assert file.get_local_path() is not None - - -@pytest.mark.integration -def test_that_a_remote_file_is_created_when_staged_with_a_destination(test_files): - test_file = test_files["synapse"] - with TemporaryDirectory() as tmp_dir: - tmp_dir_path = Path(tmp_dir) - test_file.stage(tmp_dir_path) - local_path = test_file.get_local_path() - assert local_path.exists() - assert not local_path.is_symlink() - - -def test_that_a_file_can_be_saved_and_restored_without_changing(test_files): - file_1 = test_files["good"] - file_1_dict = file_1.to_dict() - file_2 = File.from_dict(file_1_dict) - file_2_dict = file_2.to_dict() - assert file_1 == file_2 - assert file_1_dict == file_2_dict - - -def test_that_an_absolute_local_url_is_unchanged_when_using_relative_to(get_data): - test_path = get_data("test.txt") - test_url = test_path.resolve().as_posix() - metadata = {"file_type": "TXT"} - file = File(test_url, metadata, relative_to=Path.cwd()) - assert file.url == test_url diff --git a/tests/test_main.py b/tests/test_main.py deleted file mode 100644 index 9e6caed..0000000 --- a/tests/test_main.py +++ /dev/null @@ -1,90 +0,0 @@ -import shutil -from typing import Any - -import pytest -from typer.testing import CliRunner - -from dcqc.main import app - - -# Using a test class to mark all tests as "integration" -@pytest.mark.integration -class TestCLI: - def run_command(self, arguments: list[Any]): - runner = CliRunner() - str_arguments = [str(arg) for arg in arguments] - result = runner.invoke(app, str_arguments) - assert result.exit_code == 0 - return result - - def test_create_targets(self, get_data, get_output): - input_csv = get_data("small.csv") - output_dir = get_output("create_targets") - shutil.rmtree(output_dir, ignore_errors=True) - - assert not output_dir.exists() - args = ["create-targets", input_csv, output_dir] - self.run_command(args) - assert len(list(output_dir.iterdir())) > 0 - - def test_stage_target(self, get_data, get_output): - input_json = get_data("target.json") - output_dir = get_output("stage_target") - shutil.rmtree(output_dir, ignore_errors=True) - - assert not output_dir.exists() - args = ["stage-target", input_json, output_dir] - self.run_command(args) - assert len(list(output_dir.iterdir())) > 0 - - def test_create_tests(self, get_data, get_output): - input_json = get_data("target.json") - output_dir = get_output("create_tests") - shutil.rmtree(output_dir, ignore_errors=True) - - assert not output_dir.exists() - args = ["create-tests", "-r", "Md5ChecksumTest", input_json, output_dir] - self.run_command(args) - assert len(list(output_dir.iterdir())) > 0 - - def test_create_process(self, get_data, get_output): - input_json = get_data("test.external.json") - output_path = get_output("create_process") / "process.json" - output_path.unlink(missing_ok=True) - - assert not output_path.exists() - args = ["create-process", input_json, output_path] - self.run_command(args) - assert output_path.exists() - - def test_compute_test(self, get_data, get_output): - input_json = get_data("test.internal.json") - output_path = get_output("compute_test") / "test.json" - output_path.unlink(missing_ok=True) - - assert not output_path.exists() - args = ["compute-test", input_json, output_path] - self.run_command(args) - assert output_path.exists() - - def test_create_suite(self, get_data, get_output): - input_json = get_data("test.computed.json") - output_path = get_output("create_suite") / "suite.json" - output_path.unlink(missing_ok=True) - - args = ["create-suite", output_path, input_json, input_json, input_json] - self.run_command(args) - assert output_path.exists() - - def test_combine_suites(self, get_data, get_output): - input_json = get_data("suite.json") - output_path = get_output("combine_suites") / "suites.json" - output_path.unlink(missing_ok=True) - - args = ["combine-suites", output_path, input_json, input_json, input_json] - self.run_command(args) - assert output_path.exists() - - def test_list_tests(self): - args = ["list-tests"] - self.run_command(args) diff --git a/tests/test_parsers.py b/tests/test_parsers.py deleted file mode 100644 index a28be19..0000000 --- a/tests/test_parsers.py +++ /dev/null @@ -1,14 +0,0 @@ -from collections.abc import Generator - -from dcqc.parsers import CsvParser -from dcqc.target import Target - - -def test_that_parsing_a_targets_csv_file_yields_qc_targets(get_data): - csv_path = get_data("files.csv") - parser = CsvParser(csv_path) - result = parser.create_targets() - assert isinstance(result, Generator) - result = list(result) - assert len(result) > 1 - assert all(isinstance(x, Target) for x in result) diff --git a/tests/test_reports.py b/tests/test_reports.py deleted file mode 100644 index 57fd878..0000000 --- a/tests/test_reports.py +++ /dev/null @@ -1,18 +0,0 @@ -import pytest - -from dcqc.reports import JsonReport - - -def test_for_error_when_creating_report_if_file_already_exists(get_data, test_files): - existing_url = get_data("test.txt").as_posix() - file = test_files["good"] - report = JsonReport() - with pytest.raises(FileExistsError): - report.save(file, existing_url) - - -def test_that_a_single_object_can_be_reported_on(test_files): - test_file = test_files["good"] - report_url = "mem://report.json" - report = JsonReport() - report.save(test_file, report_url, overwrite=True) diff --git a/tests/test_suites.py b/tests/test_suites.py deleted file mode 100644 index 93dcece..0000000 --- a/tests/test_suites.py +++ /dev/null @@ -1,76 +0,0 @@ -import pytest - -from dcqc.file import FileType -from dcqc.suites.suite_abc import SuiteABC -from dcqc.suites.suites import FileSuite, OmeTiffSuite, TiffSuite -from dcqc.target import Target -from dcqc.tests.test_abc import TestABC, TestStatus -from dcqc.tests.tests import LibTiffInfoTest - - -class RedundantFileSuite(TiffSuite): - del_tests = (LibTiffInfoTest,) - - -FileType("Unpaired", ()) - - -class DummyTest(TestABC): - def compute_status(self) -> TestStatus: - return TestStatus.NONE - - -def test_that_a_file_suite_results_in_multiple_tests(): - tests = FileSuite.list_test_classes() - assert len(tests) > 0 - assert all(issubclass(test, TestABC) for test in tests) - - -def test_that_deleting_a_just_added_test_results_in_the_same_test_list(): - tests_1 = FileSuite.list_test_classes() - tests_2 = RedundantFileSuite.list_test_classes() - assert set(tests_1) == set(tests_2) - - -def test_that_the_ome_tiff_suite_has_a_superset_of_the_tiff_suite_tests(): - tiff_tests = TiffSuite.list_test_classes() - ome_tiff_tests = OmeTiffSuite.list_test_classes() - assert set(ome_tiff_tests) > set(tiff_tests) - - -def test_that_a_test_suite_can_be_retrieved_by_name(): - actual = SuiteABC.get_subclass_by_name("OmeTiffSuite") - assert actual is OmeTiffSuite - - -def test_for_an_error_when_retrieving_a_nonexistent_test_suite_by_name(): - with pytest.raises(ValueError): - SuiteABC.get_subclass_by_name("FooBarSuite") - - -def test_that_a_test_suite_can_be_retrieved_by_file_type_class(): - file_type = FileType.get_file_type("OME-TIFF") - actual = SuiteABC.get_subclass_by_file_type(file_type) - assert actual is OmeTiffSuite - - -def test_that_a_test_suite_can_be_retrieved_by_file_type_str(): - actual = SuiteABC.get_subclass_by_file_type("OME-TIFF") - assert actual is OmeTiffSuite - - -def test_that_the_generic_file_suite_is_retrieved_for_a_random_file_type(): - actual = SuiteABC.get_subclass_by_file_type("Foo-Bar") - assert actual is FileSuite - - -def test_that_the_generic_file_suite_is_retrieved_for_an_unpaired_file_type(): - actual = SuiteABC.get_subclass_by_file_type("Unpaired") - assert actual is FileSuite - - -def test_that_the_default_required_tests_are_only_tiers_1_and_2(test_files): - tiff_file = test_files["tiff"] - tiff_target = Target(tiff_file) - tiff_suite = TiffSuite(tiff_target) - assert all(test.tier <= 2 for test in tiff_suite.tests) diff --git a/tests/test_filesystems.py b/tests/test_synapsefs.py similarity index 96% rename from tests/test_filesystems.py rename to tests/test_synapsefs.py index 37813a9..a2f77c3 100644 --- a/tests/test_filesystems.py +++ b/tests/test_synapsefs.py @@ -1,3 +1,5 @@ +# TODO: This test suite can probably benefit from being broken up + import json import os import unittest @@ -11,51 +13,13 @@ from synapseclient import Folder, Synapse from synapseclient.core.exceptions import SynapseFileNotFoundError, SynapseHTTPError -from dcqc.filesystems.remote_file import RemoteFile -from dcqc.filesystems.synapsefs import SynapseFS, synapse_errors - - -@pytest.fixture(scope="session") -def synapse_fs(): - yield SynapseFS() - - -@pytest.mark.integration -def test_that_synapsefs_can_be_initialized_with_different_roots(): - # Rootless - SynapseFS() - SynapseFS("") - - # Synapse ID for a project or folder - SynapseFS("syn50545516") - SynapseFS("syn50557597") - - # Synapse ID and path to a subfolder - SynapseFS("syn50545516/TestSubDir") - - # Synapse and path to a file - with pytest.raises(CreateFailed): - SynapseFS("syn50545516/test.txt") - - # Path with no Synapse ID - with pytest.raises(CreateFailed): - SynapseFS("DCQC Test Project") - - # Path that doesn't start with a Synapse ID - with pytest.raises(CreateFailed): - SynapseFS("DCQC Test Project/syn50557597") - - -@pytest.mark.integration -def test_that_a_rootless_synapsefs_can_open_a_random_file_by_id(synapse_fs): - with synapse_fs.open("syn50555279") as infile: - contents = infile.read() - assert contents == "foobar\n" +from synapsefs.remote_file import RemoteFile +from synapsefs.synapsefs import SynapseFS, synapse_errors def test_for_an_error_with_a_path_that_does_not_start_with_a_synapse_id(synapse_fs): with pytest.raises(ValueError): - synapse_fs._path_to_synapse_id("DCQC Test Project/syn50555279") + synapse_fs._path_to_synapse_id("SynapseFS Test Project/syn50555279") def test_that_a_path_with_multiple_synapse_ids_can_be_traversed(synapse_fs): @@ -106,8 +70,8 @@ def test_that_a_remote_file_without_a_close_on_callable_can_be_closed(): remote_file.close() -def test_that_staging_a_local_file_creates_a_copy(get_data): - path = get_data("test.txt") +def test_that_staging_a_local_file_creates_a_copy(): + path = Path(__file__) local_fs = open_fs(f"osfs://{path.parent}") with TemporaryDirectory() as tmp_dir_name: tmp_dir_path = Path(tmp_dir_name) @@ -119,6 +83,39 @@ def test_that_staging_a_local_file_creates_a_copy(get_data): assert target_path.exists() +@pytest.mark.integration +def test_that_synapsefs_can_be_initialized_with_different_roots(): + # Rootless + SynapseFS() + SynapseFS("") + + # Synapse ID for a project or folder + SynapseFS("syn50545516") + SynapseFS("syn50557597") + + # Synapse ID and path to a subfolder + SynapseFS("syn50545516/TestSubDir") + + # Synapse and path to a file + with pytest.raises(CreateFailed): + SynapseFS("syn50545516/test.txt") + + # Path with no Synapse ID + with pytest.raises(CreateFailed): + SynapseFS("SynapseFS Test Project") + + # Path that doesn't start with a Synapse ID + with pytest.raises(CreateFailed): + SynapseFS("SynapseFS Test Project/syn50557597") + + +@pytest.mark.integration +def test_that_a_rootless_synapsefs_can_open_a_random_file_by_id(synapse_fs): + with synapse_fs.open("syn50555279") as infile: + contents = infile.read() + assert contents == "foobar\n" + + # Not technically an integration test, but I'm reusing the same mark since it's slow @pytest.mark.integration def test_that_staging_a_synapse_file_creates_a_copy(mocker): diff --git a/tests/test_target.py b/tests/test_target.py deleted file mode 100644 index 7dbac66..0000000 --- a/tests/test_target.py +++ /dev/null @@ -1,22 +0,0 @@ -import pytest - -from dcqc.target import Target - - -def test_that_a_target_can_be_saved_and_restored_without_changing(test_files): - test_file = test_files["good"] - target_1 = Target(test_file) - target_1_dict = target_1.to_dict() - target_2 = Target.from_dict(target_1_dict) - target_2_dict = target_2.to_dict() - assert target_1 == target_2 - assert target_1_dict == target_2_dict - - -def test_for_an_error_when_restoring_a_target_with_a_discordant_type(test_files): - test_file = test_files["good"] - target_1 = Target(test_file) - target_1_dict = target_1.to_dict() - target_1_dict["type"] = "UnexpectedQcTarget" - with pytest.raises(ValueError): - Target.from_dict(target_1_dict) diff --git a/tests/test_tests.py b/tests/test_tests.py deleted file mode 100644 index 1a4f17d..0000000 --- a/tests/test_tests.py +++ /dev/null @@ -1,139 +0,0 @@ -from pathlib import Path -from tempfile import TemporaryDirectory - -import pytest - -from dcqc.target import Target -from dcqc.tests import tests -from dcqc.tests.test_abc import ExternalTestMixin, TestABC, TestStatus - - -def test_that_the_file_extension_test_works_on_correct_files(test_files): - good_file = test_files["good"] - target = Target(good_file) - test = tests.FileExtensionTest(target) - test_status = test.get_status() - assert test_status == TestStatus.PASS - - -def test_that_the_file_extension_test_works_on_incorrect_files(test_files): - good_file = test_files["good"] - bad_file = test_files["bad"] - target = Target(good_file, bad_file) - test = tests.FileExtensionTest(target) - test_status = test.get_status() - assert test_status == TestStatus.FAIL - - -def test_that_the_md5_checksum_test_works_on_a_correct_file(test_files): - good_file = test_files["good"] - target = Target(good_file) - test = tests.Md5ChecksumTest(target) - test_status = test.get_status() - assert test_status == TestStatus.PASS - - -def test_that_the_md5_checksum_test_works_on_incorrect_files(test_files): - good_file = test_files["good"] - bad_file = test_files["bad"] - target = Target(good_file, bad_file) - test = tests.Md5ChecksumTest(target) - test_status = test.get_status() - assert test_status == TestStatus.FAIL - - -def test_that_all_external_tests_inherit_from_the_mixin_first(): - tests = TestABC.list_subclasses() - for test in tests: - if issubclass(test, ExternalTestMixin): - mro = test.__mro__ - mixin_index = mro.index(ExternalTestMixin) - abc_index = mro.index(TestABC) - assert mixin_index < abc_index - - -def test_that_the_libtiff_info_test_correctly_interprets_exit_code_0_and_1( - test_files, mocker -): - tiff_file = test_files["tiff"] - target = Target(tiff_file) - with TemporaryDirectory() as tmp_dir: - path_0 = Path(tmp_dir, "code_0.txt") - path_1 = Path(tmp_dir, "code_1.txt") - path_0.write_text("0") - path_1.write_text("1") - good_outputs = {"std_out": path_1, "std_err": path_1, "exit_code": path_0} - bad_outputs = {"std_out": path_0, "std_err": path_0, "exit_code": path_1} - - test = tests.LibTiffInfoTest(target) - mocker.patch.object(test, "_find_process_outputs", return_value=good_outputs) - test_status = test.get_status() - assert test_status == TestStatus.PASS - - test = tests.LibTiffInfoTest(target) - mocker.patch.object(test, "_find_process_outputs", return_value=bad_outputs) - test_status = test.get_status() - assert test_status == TestStatus.FAIL - - -def test_that_the_libtiff_info_test_command_is_produced(test_files): - tiff_file = test_files["tiff"] - target = Target(tiff_file) - test = tests.LibTiffInfoTest(target) - process = test.generate_process() - assert "tiffinfo" in process.get_command() - - -def test_that_the_bioformats_info_test_command_is_produced(test_files): - tiff_file = test_files["tiff"] - target = Target(tiff_file) - test = tests.BioFormatsInfoTest(target) - process = test.generate_process() - assert "showinf" in process.get_command() - - -def test_that_the_ome_xml_schema_test_command_is_produced(test_files): - tiff_file = test_files["tiff"] - target = Target(tiff_file) - test = tests.OmeXmlSchemaTest(target) - process = test.generate_process() - assert "xmlvalid" in process.get_command() - - -def test_that_the_md5_checksum_test_can_be_retrieved_by_name(): - test = TestABC.get_subclass_by_name("Md5ChecksumTest") - assert test is tests.Md5ChecksumTest - - -def test_for_an_error_when_retrieving_a_random_test_by_name(): - with pytest.raises(ValueError): - TestABC.get_subclass_by_name("FooBar") - - -def test_for_an_error_when_a_libtiff_info_test_is_given_multiple_files(test_files): - tiff_file = test_files["tiff"] - target = Target(tiff_file, tiff_file) - - assert not tests.Md5ChecksumTest.only_one_file_targets - tests.Md5ChecksumTest(target) - - assert tests.LibTiffInfoTest.only_one_file_targets - with pytest.raises(ValueError): - tests.LibTiffInfoTest(target) - - -def test_that_process_output_files_can_be_found(get_data): - std_out = get_data("tiffinfo/std_out.txt") - std_err = get_data("tiffinfo/std_err.txt") - exit_code = get_data("tiffinfo/exit_code.txt") - with TemporaryDirectory() as tmp_dir: - tmp_path = Path(tmp_dir) - - (tmp_path / std_out.name).symlink_to(std_out) - (tmp_path / std_err.name).symlink_to(std_err) - - with pytest.raises(FileNotFoundError): - ExternalTestMixin._find_process_outputs(tmp_path) - - (tmp_path / exit_code.name).symlink_to(exit_code) - ExternalTestMixin._find_process_outputs(tmp_path)