diff --git a/.ci/self_hosted_conda_creation.sh b/.ci/self_hosted_conda_creation.sh new file mode 100755 index 0000000..37a57a3 --- /dev/null +++ b/.ci/self_hosted_conda_creation.sh @@ -0,0 +1,25 @@ +#!/bin/sh + +# Check that the env folder exists or create it +if [ -d ~/envs ] +then + echo "Found existing envs folder" +else + echo "Did not find envs folder, creating" + mkdir ~/envs +fi + +# Check if conda environment exists. if it does, remove it. +if [ -d "~/envs/fedomics_python_$1" ] +then + echo "Found existing fedomics conda environment, removing" + conda env remove --prefix "~/envs/fedomics_python_$1" -y +fi +conda init bash +. ~/.bashrc +# +echo "Creating environment" +yes | conda create --prefix "~/envs/fedomics_python_$1" python="$1" +echo "Created env fedomics_python_$1" +eval "$(conda shell.bash hook)" +conda activate "~/envs/fedomics_python_$1" diff --git a/.ci/self_hosted_conda_removal.sh b/.ci/self_hosted_conda_removal.sh new file mode 100755 index 0000000..675caf6 --- /dev/null +++ b/.ci/self_hosted_conda_removal.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +# Check if conda environment exists. if it does, remove it. +if [ -d "~/envs/fedomics_python_$1" ] +then + echo "Found existing fedomics conda environment, removing" + conda env remove --prefix "~/envs/fedomics_python_$1" -y +fi diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..5009781 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,28 @@ +--- +name: Bug report +about: Create a report to help us improve +title: "[BUG] " +labels: bug +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Provide snippets of code and steps on how to reproduce the behavior. +Please also specify the version you are using. + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Desktop (please complete the following information):** + - OS: [e.g. iOS] + - Version [e.g. 0.02] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..bbcbbe7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..6222fe2 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,16 @@ + + +#### Reference Issue or PRs + + + +#### What does your PR implement? Be specific. diff --git a/.github/workflows/pr_push_validation.yml b/.github/workflows/pr_push_validation.yml new file mode 100644 index 0000000..a1115b5 --- /dev/null +++ b/.github/workflows/pr_push_validation.yml @@ -0,0 +1,66 @@ +name: Python dev + +on: + pull_request: + push: + branches: + - main + +jobs: + testing: + runs-on: ubuntu-latest + strategy: + matrix: + python: ["3.10", "3.11", "3.12"] + name: Testing Python ${{ matrix.python }} + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python }} + + - name: Install Poetry + run: | + python --version + pip install poetry==1.8.2 + + - name: Install dependencies + run: | + python --version + poetry install --with testing + + - name: Download data + run: | + mkdir -p /opt/conda + wget https://repo.anaconda.com/miniconda/Miniconda3-py39_24.5.0-0-Linux-x86_64.sh -O /opt/conda/miniconda.sh + bash /opt/conda/miniconda.sh -b -p /opt/miniconda + poetry run fedpydeseq2-download-data --only_luad --raw_data_output_path /home/runner/work/fedpydeseq2-datasets/fedpydeseq2-datasets/data/raw --conda_activate_path /opt/miniconda/bin/activate + + - name: Testing + run: | + poetry run pytest -v tests + + linting: + runs-on: ubuntu-latest + name: Test Linting + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Install Poetry + run: pip install poetry==1.8.2 + + - name: Install dependencies + run: | + which python + python --version + poetry install --with linting + + - name: Pre-commit checks + run: | + poetry run pre-commit run --all-files --show-diff-on-failure diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e4a2a98 --- /dev/null +++ b/.gitignore @@ -0,0 +1,184 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ + +# VSCode +.vscode/ + +# DS_Store +.DS_Store + +# Data +data/centers_data/* +data/pooled_data/* +data/processed/* +data/raw/tcga/* + + +fedpydeseq2_datasets/download_data/.snakemake/* +fedpydeseq2_datasets/download_data/workflow/.snakemake +fedpydeseq2_datasets/download_data/logs/ +fedpydeseq2_datasets/download_data/results/ + +!data/raw/tcga/centers.csv + +*logs/ + +data/* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..223e5b9 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,33 @@ +default_language_version: + python: python3.11 +repos: + - repo: https://github.com/sirosen/check-jsonschema + rev: 0.27.0 + hooks: + - id: check-github-actions + - id: check-github-workflows + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: trailing-whitespace + name: Trim trailing whitespace + - id: end-of-file-fixer + name: Fix end of files + exclude: \.ipynb$ + - repo: https://github.com/psf/black + rev: 23.11.0 + hooks: + - id: black + additional_dependencies: ["click==8.0.4"] + args: # arguments to configure black + - --line-length=88 + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.5 + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.7.0 + hooks: + - id: mypy + exclude: ^(tests/|docs/source/conf.py|datasets/) diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..b48d852 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Owkin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..ddcff28 --- /dev/null +++ b/README.md @@ -0,0 +1,239 @@ +# Datasets organisation + +This directory contains the data, assets and scripts necessary to: +- download the raw data necessary to run the tests and experiments, when not + available in the repository, in the `download_data` directory; +- open the data when performing a Substra experiment in the `assets` directory; +- store the data in the `data` directory. + +## Data download + +For a detailed description of the data download process, please refer to the +[README](fedpydeseq2_datasets/download_data/README.md). + +If you want to run the pipeline directly, you can use the script which is available in the distribution: `fedpydeseq2-download-data` + + + +```bash +fedpydeseq2-download-data +``` + +By default, this script download the data in the `data/raw` directory at the root of the github repo. + +To change the location of the raw data download, add the following option: +```bash +fedpydeseq2-download-data --raw_data_output_path +``` + +If you only want the LUAD dataset, add the `--only_luad` flag. + +You can pass the `conda` activation path as an argument as well, for example: + +```bash +fedpydeseq2-download-data --raw_data_output_path --conda_activate_path /opt/miniconda/bin/activate +``` + + +**Origin of the data** +- The `Counts_raw.parquet` and `recount3_metadata.csv` files are downloaded from + the [RECOUNT3](https://rna.recount.bio/) database. +- The `tumor_purity_metadata.csv` file is downloaded from the + [Systematic pan-cancer analysis of tumour purity](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4671203/) paper. +- The `cleaned_clinical_metadata.csv` file is downloaded from the + [An Integrated TCGA Pan-Cancer Clinical Data Resource to Drive High-Quality Survival Outcome Analytics](https://www.sciencedirect.com/science/article/pii/S0092867418302290#app2) paper. + +For more detailed references, see the [References](#references) section. +## Assets + +The `assets` directory contains a TCGA opener necessary to open the data on each center +when performing a federated experiment with [Substra](https://docs.substra.org/en/stable/). + +In particular, the `fedpydeseq2_datasets/assets/tcga` directory contains the following files: +``` +assets/tcga +├── description.md +├── opener.py +``` +The opener is a Python script that opens the data and makes it available to the +Substra platform. The `description.md` file contains a description of the data. + +For more details on how the opener works, please refer to +the [Substra documentation](https://docs.substra.org/en/stable/). + +## Raw data organisation + +The `data` directory contains the raw data. +The `raw` directory contains the data downloaded from the original sources, +with the `download_data` scripts. + +It is organized as follows: +``` +data +├── raw +│   └── tcga +│   ├── +│   │   ├── Counts_raw.parquet +│   │   └── recount3_metadata.csv +│   ├── centers.csv +│   ├── cleaned_clinical_metadata.csv +│   └── tumor_purity_metadata.csv + +``` + +## Data preprocessing + +This module not only provides the raw data on which to test `fed-pydeseq2`; it also provides the necessary +preprocessing functions, to organise the data according to their center of origin, and to aggregate the raw +data into metadata and counts acceptable to run pydeseq2 or fed-pydeseq2. + +These preprocessing function usually create the preprocessed data in a `processed_data_path` directory, with the following +structure (the files shown below are created by different preprocessing functions). + +``` +└── + ├── tcga + │   ├── + │   │   ├── counts.parquet + │   │   └── clinical_data.csv + ├── centers_data + │   └── tcga + │   ├── + │   │   ├── + │   │   │   ├── counts_data.csv + │   │   │   ├── metadata.csv + │   │   │   └── ground_truth_dds.pkl + └── pooled_data + └── tcga + ├── + │   ├── counts_data.csv + │   ├── metadata.csv + │   └── ground_truth_dds.pkl +``` + +These files are automatically generated, if they are not +already present from the raw files or if the `force` option is on. + + +Note that the centers are always indexed by an integer, starting from 0. For example, one would +have `center_0,...,center_3` if there are +4 centers in the experiment. + +The `` is an identifier of a differential gene expression task (and its specific hyperparameters) +and a TCGA dataset. `fed-pydeseq2` or `deseq2` can then be run on the data corresponding to that DGE task. + +#### Details on the processed data + +In this repository, we study the following cofactors: +- the `gender` of the patients, which is obtained from the `cleaned_clinical_metadata.csv` file; +- the `CPE` of the samples, which is obtained from the `tumor_purity_metadata.csv` file; +- the `stage` of the patients, which is obtained from the `cleaned_clinical_metadata.csv` file. +this stage is originally a stage between `I` and `IV`, but we have grouped them into `I-II-III` +(`Non-advanced`) and `IV` (`Advanced`) stages, to have a binary covariate. +- the `center_id` of the samples, which is obtained from the `centers.csv` file and used +to create natural centers for the federated experiments. + + +The processing is done by functions in the `fedpydeseq2_datasets` directory. There are three main functions. + +- the `common_preprocessing_tcga` function in the `fedpydeseq2_datasets/common_preprocessing.py` +file; +- the `setup_tcga_dataset` function in the `fedpydeseq2_datasets/process_and_split_data.py` file; +- the `setup_tcga_ground_truth_dds` function in the `tcga_preprocessing/create_reference_dds.py` file. + + +The role of the `common_preprocessing_tcga` function is to generate counts and processed +clinical data for a given cohort (e.g. `LUAD`), from the raw data. +``` +└── processed + ├── tcga + │   ├── + │   │   ├── counts.parquet + │   │   └── clinical_data.csv +``` +The `counts.parquet` file contains the counts data, indexed by TCGA sample barcode, +and with columns corresponding to the gene_id in ENSEMBL convention. +Note that we filter out the `PAR_Y` genes, as they are not common to all patients. +The `clinical_data.csv` file aggregates the different metadata from the different sources +described above in a per-cohort fashion. This `csv` is indexed by the TCGA sample barcode. +It contains the following columns: +- `gender`: the gender of the patient; +- `CPE`: CPE stands for "consensus measurement of purity estimations", and is an +aggregate of different purity estimations for the sample; +- `stage`: the stage of the patient, as an integer between 1 and 4 +- `center_id`: the center id of the sample, as an integer +- `is_normal_tissue`: a boolean indicating if the sample is a normal tissue or not. +- `T` : the tumor grade of the patient, as an integer between 1 and 4 +- `N` : the nodal status of the patient, as an integer between 0 and 3 +- `M` : the metastasis status of the patient, as an integer between 0 and 1 + + +The role of the `setup_tcga_dataset` function and the `setup_tcga_ground_truth_dds` function +is to generate the data necessary for the federated AND corresponding pooled experiments, creating +this part of the arborescence: +``` +└── processed + ├── centers_data + │   └── tcga + │   ├── + │   │   ├── + │   │   │   ├── counts_data.csv + │   │   │   ├── metadata.csv + │   │   │   └── ground_truth_dds.pkl + └── pooled_data + └── tcga + ├── + │   ├── counts_data.csv + │   ├── metadata.csv + │   └── ground_truth_dds.pkl +``` +The `` identifies an experiment. It concatenates +not only the dataset name (TCGA cohort), but also the design factors, continuous factors +as well as other parameters used to filter the data. +The `counts_data.csv` file contains the counts data, indexed by TCGA sample barcode, +and with columns corresponding to the gene_id in ENSEMBL convention. +The `metadata.csv` file contains the clinical data, indexed by the TCGA sample barcode, and +containing only the columns corresponding to a design factor. +The `ground_truth_dds.pkl` file contains the ground truth for the differential expression +analysis, as a `dds` object from the `DESeq2` package. + +For more details on these functions, please refer to their respective documentations. + +> **Note**: the `setup_tcga_dataset` function +> will binarize the `stage` into two categories: `Advanced` and `Non-advanced`. +> `Advanced` corresponds to stage `IV`, and `Non-advanced` corresponds to stages `I`, `II` and `III`. +> For the TCGA-PRAD cohort, we do not have the stage information, but we infer the stage +> from the `T`, `N` and `M` columns. If the `N` or `M` columns are > 0, the stage is IV (see the +> following [reference](https://www.cancer.org/cancer/types/prostate-cancer/detection-diagnosis-staging/staging.html)) +> and hence the `Advanced` stage. Otherwise, it is `Non-advanced`. +## References + +The data downloaded here has mainly been obtained from TCGA and processed by the following +works. + +[1] Aran D, Sirota M, Butte AJ. + Systematic pan-cancer analysis of tumour purity. + Nat Commun. 2015 Dec 4;6:8971. + doi: 10.1038/ncomms9971. + Erratum in: Nat Commun. 2016 Feb 05;7:10707. + doi: 10.1038/ncomms10707. + PMID: 26634437; PMCID: PMC4671203. + + +[2] Jianfang Liu, Tara Lichtenberg, Katherine A. Hoadley, Laila M. Poisson, Alexander J. Lazar, Andrew D. Cherniack, Albert J. Kovatich, Christopher C. Benz, Douglas A. Levine, Adrian V. Lee, Larsson Omberg, Denise M. Wolf, Craig D. Shriver, Vesteinn Thorsson et al. + An Integrated TCGA Pan-Cancer Clinical Data Resource to Drive High-Quality Survival Outcome Analytics, + Cell, + Volume 173, Issue 2, 2018, Pages 400-416.e11, + ISSN 0092-8674, + + + +[3] Wilks C, Zheng SC, Chen FY, Charles R, Solomon B, Ling JP, Imada EL, + Zhang D, Joseph L, Leek JT, Jaffe AE, Nellore A, Collado-Torres L, + Hansen KD, Langmead B (2021). + "recount3: summaries and queries for + large-scale RNA-seq expression and splicing." + _Genome Biol_. + doi:10.1186/s13059-021-02533-6 + , + . diff --git a/fedpydeseq2_datasets/__init__.py b/fedpydeseq2_datasets/__init__.py new file mode 100644 index 0000000..9795584 --- /dev/null +++ b/fedpydeseq2_datasets/__init__.py @@ -0,0 +1,6 @@ +"""Module to preprocess the TCGA data for fedpydeseq2.""" + +from fedpydeseq2_datasets.process_and_split_data import setup_tcga_dataset +from fedpydeseq2_datasets.create_reference_dds import setup_tcga_ground_truth_dds +from fedpydeseq2_datasets.utils import get_experiment_id +from fedpydeseq2_datasets.utils import get_ground_truth_dds_name diff --git a/fedpydeseq2_datasets/aggregate_raw_data.py b/fedpydeseq2_datasets/aggregate_raw_data.py new file mode 100644 index 0000000..6087d00 --- /dev/null +++ b/fedpydeseq2_datasets/aggregate_raw_data.py @@ -0,0 +1,372 @@ +from pathlib import Path + +import pandas as pd + +from fedpydeseq2_datasets.utils import tnm_to_series + +LIST_COL_WITH_UNSPECIFIED_TYPE = [ + "tcga.xml_her2_and_centromere_17_positive_finding_other_measurement_scale_text", + "tcga.xml_fluorescence_in_situ_hybridization_diagnostic_procedure_chromosome_17" + "_signal_result_range", + "tcga.xml_metastatic_breast_carcinoma_immunohistochemistry_er_pos_cell_score", + "tcga.xml_metastatic_breast_carcinoma_immunohistochemistry_pr_pos_cell_score", + "tcga.xml_metastatic_breast_carcinoma_erbb2_immunohistochemistry_level_result", + "tcga.xml_metastatic_breast_carcinoma_lab_proc_her2_neu_in_situ_hybridization" + "_outcome_type", +] + + +def run_sanity_checks_raw_data( + dataset_name: str, + raw_data_path: str | Path, +): + """ + Run sanity checks on the raw data. + + This function runs sanity checks on the raw data to ensure that the data is + correctly formatted and that the data is not corrupted. + + It does so by checking the following conditions: + - The Counts_raw.parquet for the given dataset exists and contains + between 40 000 and 70 000 genes and between 10 and 1300 samples. + - The recount3_metadata.tsv.gz file for the given dataset exists and contains + the columns "external_id" and "tcga.tcga_barcode". + - The cleaned_clinical_metadata.csv file for the given dataset exists and contains + the columns "bcr_patient_barcode", "gender" and "ajcc_pathologic_tumor_stage". + - The tumor_purity_metadata.csv file for the given dataset exists and contains + the columns "Sample ID" and "CPE". + - The centers.csv file for the given dataset exists and contains the columns + "TSS Code" and "Region". + + Parameters + ---------- + dataset_name : str + The TCGA dataset name in the format of "tcga-cohort" (capitalized). + For example, "TCGA-BRCA" for the breast cancer cohort. + + raw_data_path : str or Path + The path to the raw data folder. + This raw data folder is assumed to have the following structure and sub files + + ├── tcga + │ ├── COHORT + │ │ ├── Counts_raw.parquet + │ │ └── recount3_metadata.tsv.gz + │ ├── cleaned_clinical_metadata.csv + │ ├── tumor_purity_metadata.csv + │ └── centers.csv + Note that the `centers` file is already in the repository. The rest of the + files can be downloaded with the snakemake pipeline, and are + already available for the LUAD dataset. + + """ + dataset, cohort = dataset_name.split("-")[:2] + # Convert to path + raw_data_path = Path(raw_data_path) + assert dataset.lower() == "tcga" + # Check that the centers file exists + assert (raw_data_path / "tcga" / "centers.csv").exists() + # Load it + centers = pd.read_csv( + raw_data_path / "tcga" / "centers.csv", + ) + assert "TSS Code" in centers.columns + assert "Region" in centers.columns + # Check that the cleaned clinical metadata file exists + + assert (raw_data_path / "tcga" / "cleaned_clinical_metadata.csv").exists() + # Load it + cleaned_clinical = pd.read_csv( + raw_data_path / "tcga" / "cleaned_clinical_metadata.csv" + ) + # Check it contains the right columns + assert "bcr_patient_barcode" in cleaned_clinical.columns + assert "gender" in cleaned_clinical.columns + assert "ajcc_pathologic_tumor_stage" in cleaned_clinical.columns + # Check that the tumor purity metadata file exists + assert (raw_data_path / "tcga" / "tumor_purity_metadata.csv").exists() + # Load it + tumor_purity = pd.read_csv( + raw_data_path / "tcga" / "tumor_purity_metadata.csv", + ) + assert "Sample ID" in tumor_purity.columns + assert "CPE" in tumor_purity.columns + # Check that the recount3 metadata file exists + assert (raw_data_path / "tcga" / cohort / "recount3_metadata.tsv.gz").exists() + # Load it + + # specify the dtype of the columns to avoid warnings + dtype_dict = {col: "object" for col in LIST_COL_WITH_UNSPECIFIED_TYPE} + recount3_metadata = pd.read_csv( + raw_data_path / "tcga" / cohort / "recount3_metadata.tsv.gz", + sep="\t", + dtype=dtype_dict, + ) + # Check the columns + assert "external_id" in recount3_metadata.columns + assert "tcga.tcga_barcode" in recount3_metadata.columns + + # Check that the counts file exists + assert (raw_data_path / "tcga" / cohort / "Counts_raw.parquet").exists() + # Load it + counts = pd.read_parquet(raw_data_path / "tcga" / cohort / "Counts_raw.parquet") + # Check that the number of genes is roughly between 40 000 and 70 000 + assert 40000 < counts.shape[0] < 70000 + # Check that the number of samples is greater than 10 and less than 1000 + assert 10 < counts.shape[1] < 1300 + return + + +def common_preprocessing_tcga( + dataset_name: str, + raw_data_path: str | Path, + processed_data_path: str | Path, + force: bool = False, +): + """ + Preprocess the TCGA data and merge all different metadata files. + + This function preprocesses the TCGA data and merges all the different metadata + files into a single clinical data file. + + It also indexes the count matrix by the barcodes and removes all genes ending with + PAR_Y. It also removes the gene version by taking the first one. + + Parameters + ---------- + dataset_name : str + The TCGA dataset name in the format of "tcga-cohort" (capitalized). + For example, "TCGA-BRCA" for the breast cancer cohort. + + raw_data_path : str or Path + The path to the raw data folder. + This raw data folder is assumed to have the following structure and sub files + + ├── tcga + │ ├── COHORT + │ │ ├── Counts_raw.parquet + │ │ └── recount3_metadata.tsv.gz + │ ├── cleaned_clinical_metadata.csv + │ ├── tumor_purity_metadata.csv + │ └── centers.csv + Note that the `centers` file is already in the repository. The rest of the + files can be downloaded with the snakemake pipeline, and are + already available for the LUAD dataset. + + processed_data_path : str or Path + The path to the processed data folder. This function will create the following + files in this folder: + + ├── tcga + │ └── COHORT + │ ├── counts.parquet + │ └── clinical_data.csv + + force : bool + If True, the function will run the preprocessing even if the processed data + already exists. Default is False. + + Raises + ------ + ValueError + If there are missing TSS codes in the centers.csv file. + + """ + if dataset_name in ["TCGA-NSCLC", "TCGA-CRC"]: + if dataset_name == "TCGA-NSCLC": + dataset_1, dataset_2 = "TCGA-LUAD", "TCGA-LUSC" + elif dataset_name == "TCGA-CRC": + dataset_1, dataset_2 = "TCGA-COAD", "TCGA-READ" + + common_preprocessing_tcga( + dataset_name=dataset_1, + raw_data_path=raw_data_path, + processed_data_path=processed_data_path, + force=force, + ) + common_preprocessing_tcga( + dataset_name=dataset_2, + raw_data_path=raw_data_path, + processed_data_path=processed_data_path, + force=force, + ) + return + + run_sanity_checks_raw_data(dataset_name, raw_data_path) + dataset, cohort = dataset_name.split("-")[:2] + # Convert to path + raw_data_path = Path(raw_data_path) + processed_data_path = Path(processed_data_path) + assert dataset.lower() == "tcga" + # Load the data from the cohort + # Check if the outputs exist. If they both do, return + if ( + (processed_data_path / "tcga" / cohort / "counts.parquet").exists() + and (processed_data_path / "tcga" / cohort / "clinical_data.csv").exists() + and not force + ): + return + + counts = pd.read_parquet(raw_data_path / "tcga" / cohort / "Counts_raw.parquet") + # specify the dtype of the columns to avoid warnings + dtype_dict = {col: "object" for col in LIST_COL_WITH_UNSPECIFIED_TYPE} + recount3_metadata = pd.read_csv( + raw_data_path / "tcga" / cohort / "recount3_metadata.tsv.gz", + sep="\t", + dtype=dtype_dict, + ) + # Load only the barcodes and the external ids in the recount3 data + + recount3_metadata = recount3_metadata[ + ["external_id", "tcga.tcga_barcode", "tcga.xml_stage_event_tnm_categories"] + ] + # Now create a mapping between the external id and the barcodes + recount3_metadata = recount3_metadata.set_index("external_id") + # apply this to the columns + counts.columns = counts.columns.map(recount3_metadata["tcga.tcga_barcode"]) + # remove the gene_name index + counts = counts.droplevel("gene_name") + # Remove all genes ending with PAR_Y + counts = counts.loc[~counts.index.str.endswith("PAR_Y")] + counts = counts.T + counts.index.name = "barcode" + # Now we filter the gene version by taking the first one. + counts.columns = counts.columns.str.split(".").str[0] + counts = counts.loc[:, ~counts.columns.duplicated()] + counts = counts[~counts.index.duplicated(keep="first")] + # We now have a clean count matrix + # we create a column indicating if normal tissue + is_normal_tissue = ( + recount3_metadata["tcga.tcga_barcode"] + .map(lambda x: x[:15]) + .map(lambda x: 10 <= int(x[-2:]) <= 29) + ) + is_normal_tissue.index = recount3_metadata["tcga.tcga_barcode"] + is_normal_tissue = is_normal_tissue[ + ~is_normal_tissue.index.duplicated(keep="first") + ] + # We create T, N, M columns + tnm_columns = ( + recount3_metadata["tcga.xml_stage_event_tnm_categories"] + .apply(tnm_to_series) + .astype("Int8") + ) + tnm_columns.index = recount3_metadata["tcga.tcga_barcode"] + tnm_columns = tnm_columns[~tnm_columns.index.duplicated(keep="first")] + + # Now we load the clinical data + cleaned_clinical = pd.read_csv( + raw_data_path / "tcga" / "cleaned_clinical_metadata.csv" + ) + # We filter 3 columns + cleaned_clinical = cleaned_clinical[ + ["bcr_patient_barcode", "ajcc_pathologic_tumor_stage", "gender"] + ] + # We create a correspondance between sample and patient + cleaned_clinical = cleaned_clinical.set_index("bcr_patient_barcode") + + # Filter the patient that are not in cleaned_clinical + counts = counts.loc[counts.index.map(lambda x: x[:12] in cleaned_clinical.index)] + is_normal_tissue = is_normal_tissue.loc[counts.index] + # create a dataframe + + barcode_to_patient = pd.DataFrame( + index=counts.index, + data=counts.index.str[:12].to_list(), + columns=["bcr_patient_barcode"], + ) + + cleaned_clinical = cleaned_clinical.loc[barcode_to_patient["bcr_patient_barcode"]] + cleaned_clinical.index = counts.index + # lower case the gender + cleaned_clinical.loc[:, "gender"] = cleaned_clinical["gender"].str.lower() + + # Now we define the stage in 1,2,3,4 or NA if it is normal tissue + def process_stage(stage_str): + if stage_str.startswith("Stage IV"): + return 4 + elif stage_str.startswith("Stage III"): + return 3 + elif stage_str.startswith("Stage II"): + return 2 + elif stage_str.startswith("Stage I"): + return 1 + else: + return pd.NA + + cleaned_clinical.loc[:, "ajcc_pathologic_tumor_stage"] = ( + cleaned_clinical["ajcc_pathologic_tumor_stage"] + .apply(process_stage) + .astype("Int16") + ) + # Add a is_normal_tissue column + cleaned_clinical.loc[:, "is_normal_tissue"] = is_normal_tissue + # Set the tumor stage to NA if normal tissue + cleaned_clinical.loc[is_normal_tissue, "ajcc_pathologic_tumor_stage"] = pd.NA + # Add the T, N, M columns + cleaned_clinical = cleaned_clinical.join(tnm_columns) + # Set T, N, M columns to pd.NA if normal tissue + cleaned_clinical.loc[is_normal_tissue, ["T", "N", "M"]] = pd.NA + # now load the tumor purity metadata + tumor_purity = pd.read_csv( + raw_data_path / "tcga" / "tumor_purity_metadata.csv", + ) + barcode_to_sample = pd.DataFrame( + index=counts.index, + data=counts.index.str[:16].to_list(), + columns=["Sample ID"], + ) + tumor_purity = tumor_purity.set_index("Sample ID") + # Add nan rows for all sample ids which are in the counts but not in + # the tumor purity + missing_sample_ids = list( + set(barcode_to_sample["Sample ID"]) - set(tumor_purity.index) + ) + missing_samples = pd.DataFrame( + index=missing_sample_ids, + columns=tumor_purity.columns, + ).astype(tumor_purity.dtypes) + tumor_purity = pd.concat([tumor_purity, missing_samples], axis=0) + + tumor_purity = tumor_purity.loc[barcode_to_sample["Sample ID"]] + tumor_purity.index = counts.index + # We add the CPE column of the tumor purity to the clinical data + cleaned_clinical.loc[:, "CPE"] = tumor_purity["CPE"] + # Getting the centers info + centers_metadata = pd.read_csv( + raw_data_path / "tcga" / "centers.csv", + ) + centers_metadata = centers_metadata[["TSS Code", "Region"]] + centers_metadata["TSS Code"] = centers_metadata["TSS Code"].apply( + lambda x: x.lstrip("0") + ) + centers_metadata = centers_metadata.set_index("TSS Code") + # Get the TSS code + cleaned_clinical["TSS"] = cleaned_clinical.index.to_series().apply( + lambda x: x.split("-")[1] + ) + # Remove leading zeros in both indexes + cleaned_clinical["TSS"] = cleaned_clinical["TSS"].apply(lambda x: x.lstrip("0")) + diff = set(cleaned_clinical["TSS"]) - set(centers_metadata.index) + if len(diff) > 0: + raise ValueError( + f"Missing TSS codes {diff} in the centers.csv file." + f"Please add these missing codes." + ) + cleaned_clinical = cleaned_clinical.join(centers_metadata, on="TSS") + cleaned_clinical.drop(columns="TSS", inplace=True) + # encode the center + cleaned_clinical["center_id"] = pd.Categorical(cleaned_clinical.Region).codes + + cleaned_clinical.drop(columns="Region", inplace=True) + # Rename ajcc_pathologic_tumor_stage to stage + cleaned_clinical.rename( + columns={"ajcc_pathologic_tumor_stage": "stage"}, inplace=True + ) + # Now we save the data + + output_folder = processed_data_path / "tcga" / cohort + output_folder.mkdir(parents=True, exist_ok=True) + counts.to_parquet(output_folder / "counts.parquet") + cleaned_clinical.to_csv(output_folder / "clinical_data.csv") + return diff --git a/fedpydeseq2_datasets/assets/tcga/description.md b/fedpydeseq2_datasets/assets/tcga/description.md new file mode 100644 index 0000000..370f741 --- /dev/null +++ b/fedpydeseq2_datasets/assets/tcga/description.md @@ -0,0 +1 @@ +TCGA RNA data used for fed-pydeseq2 diff --git a/fedpydeseq2_datasets/assets/tcga/opener.py b/fedpydeseq2_datasets/assets/tcga/opener.py new file mode 100644 index 0000000..e6a3586 --- /dev/null +++ b/fedpydeseq2_datasets/assets/tcga/opener.py @@ -0,0 +1,63 @@ +import pathlib + +import anndata as ad +import pandas as pd +import substratools as tools +from pydeseq2.utils import load_example_data + + +class TCGAOpener(tools.Opener): + """Opener class for TCGA RNA-seq datasets. + + Creates an AnnData object from a path containing a counts_data.csv and a + metadata.csv. + """ + + def fake_data(self, n_samples=None): + """Create a fake AnnData object for testing purposes. + + Parameters + ---------- + n_samples : int + Number of samples to generate. If None, generate 100 samples. + + Returns + ------- + AnnData + An AnnData object with fake counts and metadata. + """ + N_SAMPLES = n_samples if n_samples and n_samples <= 100 else 100 + fake_counts = load_example_data(modality="raw_counts").iloc[:N_SAMPLES] + fake_metadata = load_example_data(modality="metadata").iloc[:N_SAMPLES] + return ad.AnnData(X=fake_counts, obs=fake_metadata) + + def get_data(self, folders): + """Open the TCGA dataset. + + Parameters + ---------- + folders : list + list of paths to the dataset folders, whose first element should contain a + counts_data.csv and a metadata.csv file. + + Returns + ------- + AnnData + An AnnData object containing the counts and metadata loaded for the FL pipe. + """ + # get .csv files + data_path = pathlib.Path(folders[0]).resolve() + counts_data = pd.read_csv(data_path / "counts_data.csv", index_col=0) + metadata = pd.read_csv(data_path / "metadata.csv", index_col=0) + center_id = metadata["center_id"].iloc[0] + # We assume that the center id is not present in the counts data, if it is + # present, we raise an error (it should have been removed in an earlier + # step) + if "center_id" in counts_data.columns: + raise ValueError("center_id column found in counts data") + metadata.drop(columns=["center_id"], inplace=True) + # Build an Anndata object + adata = ad.AnnData(X=counts_data, obs=metadata) + # Add the center id to be accessible within the local states + adata.uns["center_id"] = center_id + return adata diff --git a/fedpydeseq2_datasets/constants.py b/fedpydeseq2_datasets/constants.py new file mode 100644 index 0000000..1c8e482 --- /dev/null +++ b/fedpydeseq2_datasets/constants.py @@ -0,0 +1,29 @@ +from typing import Literal +from typing import cast + +TCGADatasetNames = Literal[ + "TCGA-LUAD", + "TCGA-PAAD", + "TCGA-BRCA", + "TCGA-COAD", + "TCGA-LUSC", + "TCGA-READ", + "TCGA-SKCM", + "TCGA-PRAD", + "TCGA-NSCLC", + "TCGA-CRC", +] + +TCGA_DATASET_NAMES = [ + cast(TCGADatasetNames, dataset) + for dataset in [ + "TCGA-LUAD", + "TCGA-PAAD", + "TCGA-BRCA", + "TCGA-COAD", + "TCGA-LUSC", + "TCGA-READ", + "TCGA-SKCM", + "TCGA-PRAD", + ] +] diff --git a/fedpydeseq2_datasets/create_reference_dds.py b/fedpydeseq2_datasets/create_reference_dds.py new file mode 100644 index 0000000..a14321a --- /dev/null +++ b/fedpydeseq2_datasets/create_reference_dds.py @@ -0,0 +1,367 @@ +import pickle as pkl +from inspect import signature +from pathlib import Path +from typing import Any + +import numpy as np +import pandas as pd +from loguru import logger +from pydeseq2.dds import DeseqDataSet + +from fedpydeseq2_datasets.constants import TCGADatasetNames +from fedpydeseq2_datasets.utils import get_experiment_id +from fedpydeseq2_datasets.utils import get_ground_truth_dds_name + +ALLOWED_DESIGN_FACTORS_TCGA = {"stage", "gender", "CPE"} +ALLOWED_CONTINUOUS_FACTORS_TCGA = {"CPE"} + + +def setup_tcga_ground_truth_dds( + processed_data_path: str | Path, + dataset_name: TCGADatasetNames = "TCGA-LUAD", + small_samples: bool = False, + small_genes: bool = False, + only_two_centers: bool = False, + design_factors: str | list[str] = "stage", + continuous_factors: list[str] | None = None, + reference_dds_ref_level: tuple[str, ...] | None = ("stage", "Advanced"), + force: bool = False, + heterogeneity_method: str | None = None, + heterogeneity_method_param: float | None = None, + pooled: bool = True, + default_refit_cooks: bool = False, + **pydeseq2_kwargs: Any, +): + """Set the ground truth DeseqDataSet for the TCGA dataset. + + This function is given the path to the processed data. + Then it preprocesses the data and initializes the DeseqDataSet. + WARNING: by default, the cooks outliers are NOT refitted. + Afterward, it performs the Deseq2 pipeline. + Finally, it saves the pooled_dds and all the local_dds on the disk. + The local dds is simply the restriction of the global dds to the center id, for + each center. + + + The file structure for the processed data is the following (result + from the `setup_tcga_dataset` function): + + ``` + + ├── centers_data + │ └── tcga + │ └── + │ └── center_0 + │ ├── counts_data.csv + │ └── metadata.csv + ├── pooled_data + │ └── tcga + │ └── + │ ├── counts_data.csv + │ └── metadata.csv + + ``` + + The file structure output for the processed data is the following: + + ``` + + ├── centers_data + │ └── tcga + │ └── + │ └── center_0 + │ ├── counts_data.csv + │ ├── metadata.csv + │ └── ground_truth_dds.pkl # if pooled + │ └── ground_truth_dds-center.pkl # if not pooled + + ├── pooled_data + │ └── tcga + │ └── + │ ├── counts_data.csv + │ ├── metadata.csv + │ └── ground_truth_dds.pkl # if pooled + └── + + ``` + + Parameters + ---------- + processed_data_path : str or Path + Path to processed data. + dataset_name: TCGADatasetNames = "TCGA-LUAD", + The dataset to preprocess, by default "TCGA-LUAD". + small_samples : bool + If True, only preprocess a small subset of the data, by default False. + This small subset is composed of 10 samples per center. + small_genes : bool + If True, only preprocess a small subset of the data features (genes) + , by default False. This small subset is composed of 100 genes. + only_two_centers : bool + If True, split the data in two centers only, by default False. + design_factors : str or list + The design factors. + continuous_factors : list[str] or None + The continuous design factors. + reference_dds_ref_level : tuple or None + The reference level for the design factor. + force : bool + If True, force the setup of the dataset even if the metadata already exists. + Default is False. + heterogeneity_method : str or None + The method to use to generate heterogeneity in the dataset. If None, no + heterogeneity is generated. Default is None. + heterogeneity_method_param : float or None + The parameter of the heterogeneity method. Default is None. + pooled : bool + If True, We compute the pooled dds, and then restrict it to the + center to get the local dds. If False, we only compute the local dds + from the center data only. Default is True. + default_refit_cooks : bool + If True, refit the cooks outliers. Default is False. + **pydeseq2_kwargs + Additional arguments to pass to the pydeseq2 and fed-pydeseq2 classes and + strategies. + + """ + processed_data_path = Path(processed_data_path).resolve() + logger.info(f"Design factors in setup_tcga_ground_truth_dds: {design_factors}") + + refit_cooks = pydeseq2_kwargs.get("refit_cooks", default_refit_cooks) + + experiment_id = get_experiment_id( + dataset_name, + small_samples, + small_genes, + only_two_centers, + design_factors, + continuous_factors, + heterogeneity_method=heterogeneity_method, + heterogeneity_method_param=heterogeneity_method_param, + **pydeseq2_kwargs, + ) + + pooled_data_path = processed_data_path / "pooled_data" / "tcga" / experiment_id + + ground_truth_dds_name = get_ground_truth_dds_name( + reference_dds_ref_level, refit_cooks=refit_cooks, pooled=pooled + ) + + pooled_dds_file_path = pooled_data_path / f"{ground_truth_dds_name}.pkl" + + # Check that all centers dds files were generated + metadata = pd.read_csv( + processed_data_path / "pooled_data" / "tcga" / experiment_id / "metadata.csv", + index_col=0, + ) + + if pooled and (not pooled_dds_file_path.exists() or force): + # -- Process the data and initialize the DeseqDataSet + dds_file_name = get_ground_truth_dds_name( + reference_dds_ref_level, refit_cooks=refit_cooks, pooled=True + ) + + pooled_dds_file_path = pooled_data_path / f"{dds_file_name}.pkl" + + # We pass the default refit_cooks to the setup_ground_truth_dds function + # It is overwritten by the refit_cooks parameter if it is passed + # as a pydesq2_kwargs + setup_ground_truth_dds_kwargs = {"refit_cooks": refit_cooks, **pydeseq2_kwargs} + + _setup_ground_truth_dds( + pooled_data_path, + pooled_dds_file_path, + design_factors, + continuous_factors, + reference_dds_ref_level, + **setup_ground_truth_dds_kwargs, + ) + + for center_id in metadata.center_id.unique(): + center_data_path = ( + processed_data_path + / "centers_data" + / "tcga" + / experiment_id + / f"center_{center_id}" + ) + center_dds_file_path = center_data_path / f"{ground_truth_dds_name}.pkl" + if not center_dds_file_path.exists() or force: + if pooled: + # In that case we need to reprocess the data + _setup_local_ground_truth_dds( + pooled_dds_file_path, + center_data_path.parent, + metadata, + ) + break + # Else, build the local dds from the center data only + try: + _setup_ground_truth_dds( + center_data_path, + center_dds_file_path, + design_factors, + continuous_factors, + reference_dds_ref_level, + **{"refit_cooks": refit_cooks, **pydeseq2_kwargs}, + ) + except ValueError as e: + logger.warning( + f"Error while setting up the local dds for center {center_id}: " + f"{e}, will set None for this center" + ) + with open(center_dds_file_path, "wb") as file: + pkl.dump(None, file) + + +def _setup_local_ground_truth_dds( + pooled_dds_file_path: Path, + center_data_path: Path, + metadata: pd.DataFrame, +): + """Set the local ground truth DeseqDataSet for the TCGA dataset. + + This function is given the path to the pooled_dds and the path to the center data. + Then it loads the pooled_dds and initializes the local DeseqDataSet for each center. + Finally, it saves the local_dds on the disk. + + For the file structure description, see the `setup_tcga_ground_truth_dds` function. + + Parameters + ---------- + pooled_dds_file_path : Path + Path to the pooled_dds. + center_data_path : Path + Path to the center data. + metadata : pd.DataFrame + Metadata of the pooled data. + """ + with open(pooled_dds_file_path, "rb") as file: + pooled_dds = pkl.load(file) + + for k in metadata.center_id.unique(): + local_reference_dds = pooled_dds[pooled_dds.obs.center_id == k].copy() + path = center_data_path / f"center_{k}" + path.mkdir(parents=True, exist_ok=True) + with open(path / pooled_dds_file_path.name, "wb") as file: + pkl.dump(local_reference_dds, file) + + +def _setup_ground_truth_dds( + data_path: Path, + output_file_path: Path, + design_factors: str | list[str], + continuous_factors: list[str] | None, + reference_dds_ref_level: tuple[str, ...] | None, + **pydeseq2_kwargs, +): + """Process the data and initialize the DeseqDataSet. + + This function is given the path to the pooled data. + Then it loads the data and initializes the DeseqDataSet. + Afterward, it performs the Deseq2 pipeline. + Finally, it saves the pooled_dds on the disk. + + For the file structure description, see the `setup_tcga_ground_truth_dds` function. + + Parameters + ---------- + data_path : Path + Path to the data necessary to initialize the DeseqDataSet. + We require that the data path be a directory containing the following files: + - counts_data.csv + - metadata.csv + + output_file_path : Path + Path to save the DeseqDataSet. + It is expected to be a pkl file. + + design_factors : str or list + The design factors. + + continuous_factors : list[str] or None + The list of continuous factors. Factors not in this list will be considered + as categorical. + + reference_dds_ref_level : tuple or None + The reference level for the design factor. + + **pydeseq2_kwargs + Additional arguments to pass to the pydeseq2 and fed-pydeseq2 classes and + strategies. + + """ + counts_data = pd.read_csv( + data_path / "counts_data.csv", + index_col=0, + ) + metadata = pd.read_csv( + data_path / "metadata.csv", + index_col=0, + ) + if "center_id" in counts_data.columns: + counts_data.drop(columns="center_id", inplace=True) + + dds_kwargs = { + parameter_name: parameter_value + for parameter_name, parameter_value in pydeseq2_kwargs.items() + if parameter_name in signature(DeseqDataSet).parameters + } + + ref_level = ( + list(reference_dds_ref_level) if reference_dds_ref_level is not None else None + ) + pooled_dds = DeseqDataSet( + counts=counts_data, + metadata=metadata, + design_factors=design_factors, + ref_level=ref_level, + continuous_factors=continuous_factors, + **dds_kwargs, + ) + + # log the dds parameters + logger.info("Creating a DeseqDataSet with the following parameters:") + logger.info(f"Counts path { data_path / 'counts_data.csv'}") + logger.info(f"Metadata path { data_path / 'metadata.csv'}") + logger.info(f"Design factors {design_factors}") + logger.info(f"Continuous factors {continuous_factors}") + logger.info(f"Reference level {ref_level}") + logger.info(f"PyDESeq2 kwargs {pydeseq2_kwargs}") + + # -- Perform the Deseq2 pipeline. + + # Compute DESeq2 normalization factors using the Median-of-ratios method + pooled_dds.fit_size_factors() + # Fit an independent negative binomial model per gene + pooled_dds.fit_genewise_dispersions() + # Fit a parameterized trend curve for dispersions, of the form + # f(\mu) = \alpha_1/\mu + a_0 + pooled_dds.fit_dispersion_trend() + disp_function_type = pooled_dds.uns["disp_function_type"] + logger.info(f"Dispersion function type: {disp_function_type}") + # Compute prior dispersion variance + pooled_dds.fit_dispersion_prior() + # Refit genewise dispersions a posteriori (shrinks estimates towards trend curve) + pooled_dds.fit_MAP_dispersions() + # Fit log-fold changes (in natural log scale) + pooled_dds.fit_LFC() + + pooled_dds.calculate_cooks() + + if pooled_dds.refit_cooks: + logger.info("DDSEq2: Refitting cooks outliers") + pooled_dds.refit() + # Here, we must change the replaced genes to a boolean array + # As AnnData does not support the series version of replaced + # defined in deseq2 (issues when copying) + if pooled_dds.obsm["replaceable"].sum() == 0: + pooled_dds.varm["replaced"] = np.zeros((pooled_dds.n_vars,), dtype=bool) + + # -- Save the pooled_dds + + output_file_path.parent.mkdir(parents=True, exist_ok=True) + + logger.info(f"Saving the pooled dds at {output_file_path}") + with open(output_file_path, "wb") as file: + pkl.dump(pooled_dds, file) diff --git a/fedpydeseq2_datasets/download_data/README.md b/fedpydeseq2_datasets/download_data/README.md new file mode 100644 index 0000000..69b7c81 --- /dev/null +++ b/fedpydeseq2_datasets/download_data/README.md @@ -0,0 +1,217 @@ +# Data download + +Repository for downloading the raw data necessary to run the tests and experiments. + +This directory contains a [snakemake](https://snakemake.readthedocs.io/) pipeline for downloading RNA-seq data from [RECOUNT3](https://rna.recount.bio/), +tumor purity metadata for TCGA from the [Systematic pan-cancer analysis of tumour purity](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4671203/) paper, +and cleaned clinical metadata for TCGA from the [An Integrated TCGA Pan-Cancer Clinical Data Resource to Drive High-Quality Survival Outcome Analytics](https://www.sciencedirect.com/science/article/pii/S0092867418302290#app2) +paper. + + +## Setup +The only configuration needed is the list of project to be downloaded, specified in [config/config.yaml](config/config.yaml). +The gene nomenclature is "ENSEMBL". +By default the pipeline downloads the data necessary to run the experiments of the paper, +that is the following TCGA cohorts: +```yaml +datasets : + - LUAD + - LUSC + - PAAD + - COAD + - BRCA + - PRAD + - READ + - SKCM +``` + + +## Execution +For execution, you will need to clone this repository, +and go to the root of this README, that is `download_data` + +Note that this pipeline requires a working installation of `conda`, +and requires access to certain `R` packages through `curl`. + +### Directly running the full pipeline with the `fedpydeseq2-download-data` command +If you want to run the pipeline directly, you can use the script which is available in the distribution: `fedpydeseq2-download-data` + + + +```bash +fedpydeseq2-download-data +``` + +By default, this script download the data in the `data/raw` directory at the root of the github repo. + +To change the location of the raw data download, add the following option: +```bash +fedpydeseq2-download-data --raw_data_output_path +``` + +If you only want the LUAD dataset, add the `--only_luad` flag. + +You can pass the `conda` activation path as an argument as well, for example: + +```bash +fedpydeseq2-download-data --raw_data_output_path --conda_activate_path /opt/miniconda/bin/activate +``` + + + + +If you encounter errors, we recommend you use the step by step protocol below. + +### A step by step run of the pipeline + + +To run the pipeline you will need to create a snakemake conda environment : +```bash +conda create -c conda-forge -c bioconda -n snakemake snakemake +conda activate snakemake +conda install -c conda-forge mamba +``` + +To launch the pipeline : +```bash +snakemake --cores all --resources download_concurrent=3 --use-conda +``` +- `--cores all` specifies to use all available cores for parallelization. +- `--resouces download_concurrent=3` means that only 3 download scripts can be run in parallel per physical host. A download script has to be run for each project in the config. It is limited due to some weird behavior of the recount3 R package, and because the download speed is the bottleneck for this rule. +- `--use-conda` means that each rule will use the corresponding conda env, as specified in [workflow/envs/](workflow/envs/). The envs will be created on the first pipeline run, which can take some time. + +### Options +See the [snakemake documentation](https://snakemake.readthedocs.io/en/stable/executing/cli.html) for all useful command line arguments. + +## Details on the `fedpydeseq2-download-data` script + +### Overview + +This script, `download_data.py`, is part of the `fedpydeseq2_datasets` package and is designed to download data using Snakemake workflows. It sets up a temporary environment, configures the necessary paths, and runs Snakemake to download the specified datasets. + +### Prerequisites + +- Python 3.7+ +- Conda +- Snakemake + +### Usage + +#### Command Line Arguments + +- `--only_luad`: Optional flag to download only the LUAD dataset. +- `--raw_data_output_path`: Optional argument to specify the path to the raw data output directory. +- `--conda_activate_path`: Optional argument to specify the path to the Conda activate script. + +#### Example Commands + +1. **Download all datasets**: + ```sh + fedpydeseq2-download-data + ``` + +2. **Download only the LUAD dataset**: + ```sh + fedpydeseq2-download-data --only_luad + ``` + +3. **Specify a custom raw data output path**: + ```sh + fedpydeseq2-download-data --raw_data_output_path /path/to/raw_data + ``` + +4. **Specify a custom Conda activate script path**: + ```sh + fedpydeseq2-download-data --conda_activate_path /path/to/conda_activate.sh + ``` + +### Script Details + + +#### `download_data` Function + +This function handles the main logic for downloading data. + +##### Parameters + +- `config_path`: The path to the configuration file. +- `download_data_directory`: The path to the download data directory. +- `raw_data_output_path`: The path to the raw data output directory. +- `snakemake_env_name`: The name of the Snakemake environment. +- `conda_activate_path`: The path to the Conda activate script (optional). + +#### `create_conda_env` Function + +This function creates a Conda environment based on the provided environment file. + +##### Parameters + +- `env_file`: The path to the environment file. +- `env_prefix`: The prefix (location) where the Conda environment will be created. + +#### `main` Function + +This function parses command line arguments and calls the `download_data` function with the appropriate parameters. + +### Configuration Files + +- `config/config.yaml`: General configuration file for downloading all datasets. +- `config/config_luad.yaml`: Configuration file for downloading only the LUAD dataset. + +### Example Workflow + +1. **Set up the environment**: + - The script creates a temporary directory and copies the specified download data directory to it. + - It reads the configuration file and updates it with the raw data output path. + +2. **Create Conda environment**: + - The script creates a Conda environment using the `snakemake_env.yaml` file. + +3. **Run Snakemake**: + - The script runs Snakemake to download the data, using the specified number of cores and resources. + +4. **Clean up**: + - The script removes the temporary directory after the download is complete. + +### Error Handling + +- The script prints an error message if any exception occurs during the execution. +- It ensures that the temporary directory is cleaned up even if an error occurs. + +### Notes + +- Ensure that Conda is installed and properly configured on your system. +- The script assumes that Snakemake is available in the Conda environment specified by `snakemake_env.yaml`. + + +## References + +The data downloaded here has mainly been obtained from TCGA and processed by the following +works. + +[1] Aran D, Sirota M, Butte AJ. + Systematic pan-cancer analysis of tumour purity. + Nat Commun. 2015 Dec 4;6:8971. + doi: 10.1038/ncomms9971. + Erratum in: Nat Commun. 2016 Feb 05;7:10707. + doi: 10.1038/ncomms10707. + PMID: 26634437; PMCID: PMC4671203. + + +[2] Jianfang Liu, Tara Lichtenberg, Katherine A. Hoadley, Laila M. Poisson, Alexander J. Lazar, Andrew D. Cherniack, Albert J. Kovatich, Christopher C. Benz, Douglas A. Levine, Adrian V. Lee, Larsson Omberg, Denise M. Wolf, Craig D. Shriver, Vesteinn Thorsson et al. + An Integrated TCGA Pan-Cancer Clinical Data Resource to Drive High-Quality Survival Outcome Analytics, + Cell, + Volume 173, Issue 2, 2018, Pages 400-416.e11, + ISSN 0092-8674, + + + +[3] Wilks C, Zheng SC, Chen FY, Charles R, Solomon B, Ling JP, Imada EL, + Zhang D, Joseph L, Leek JT, Jaffe AE, Nellore A, Collado-Torres L, + Hansen KD, Langmead B (2021). + "recount3: summaries and queries for + large-scale RNA-seq expression and splicing." + _Genome Biol_. + doi:10.1186/s13059-021-02533-6 + , + . diff --git a/fedpydeseq2_datasets/download_data/__init__.py b/fedpydeseq2_datasets/download_data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fedpydeseq2_datasets/download_data/assets/centers.csv b/fedpydeseq2_datasets/download_data/assets/centers.csv new file mode 100644 index 0000000..ad933b1 --- /dev/null +++ b/fedpydeseq2_datasets/download_data/assets/centers.csv @@ -0,0 +1,266 @@ +,TSS Code,Source Site,Country,State,Region +0,5,Indivumed,Germany,Other,Europe +1,18,Princess Margaret Hospital (Canada),Canada,Other,Canada +2,21,Fox Chase Cancer Center,USA,PA,Northeast +3,22,Mayo Clinic - Rochester,USA,MN,Midwest +4,2A,Memorial Sloan Kettering Cancer Center,USA,NY,Northeast +5,2J,Mayo Clinic,USA,MN,Midwest +6,2L,Technical University of Munich,Germany,Other,Europe +7,33,Johns Hopkins,USA,MD,South +8,34,University of Pittsburgh,USA,PA,Northeast +9,35,Cureline,USA,CA,West +10,37,Cureline,USA,CA,West +11,38,UNC,USA,NC,South +12,39,MSKCC,USA,NY,Northeast +13,3A,Moffitt Cancer Center,USA,FL,South +14,3C,Columbia University,USA,NY,Northeast +15,3E,Columbia University,USA,NY,Northeast +16,3L,Albert Einstein Medical Center,USA,PA,Northeast +17,3N,Greenville Health System,USA,SC,South +18,43,Christiana Healthcare,USA,DE,South +19,44,Christiana Healthcare,USA,DE,South +20,46,St. Joseph's Medical Center (MD),USA,MD,South +21,49,Johns Hopkins,USA,MD,South +22,4B,Mary Bird Perkins Cancer Center - Our Lady of the Lake,USA,LA,South +23,4H,"Proteogenex, Inc.",USA,CA,West +24,4L,"Proteogenex, Inc.",USA,CA,West +25,4N,Mary Bird Perkins Cancer Center - Our Lady of the Lake,USA,LA,South +26,4T,Duke University,USA,NC,South +27,50,University of Pittsburgh,USA,PA,Northeast +28,51,UNC,USA,NC,South +29,52,University of Miami,USA,FL,South +30,53,University of Miami,USA,FL,South +31,55,International Genomics Consortium,USA,AZ,West +32,56,International Genomics Consortium,USA,AZ,West +33,58,Thoraxklinik at University Hospital Heidelberg,Germany,Other,Europe +34,5L,University of Sao Paulo,Brazil,Other,Other +35,5M,University of Sao Paulo,Brazil,Other,Other +36,5T,Holy Cross,USA,FL,South +37,60,Roswell Park,USA,NY,Northeast +38,62,Thoraxklinik at University Hospital Heidelberg,Germany,Other,Europe +39,63,Ontario Institute for Cancer Research,Canada,Other,Canada +40,64,Fox Chase,USA,PA,Northeast +41,66,Indivumed,Germany,Other,Europe +42,67,St Joseph's Medical Center (MD),USA,MD,South +43,68,Washington University - Cleveland Clinic,USA,OH,Midwest +44,69,Washington University - Cleveland Clinic,USA,OH,Midwest +45,70,ILSBio,Canada,Other,Canada +46,71,ILSBio,Canada,Other,Canada +47,73,Roswell Park,USA,NY,Northeast +48,75,Ontario Institute for Cancer Research (OICR),Canada,Other,Canada +49,77,Prince Charles Hospital,Australia,Other,Other +50,78,Prince Charles Hospital,Australia,Other,Other +51,80,Ontario Institute for Cancer Research (OICR)/Ottawa,Canada,Other,Canada +52,83,CHI-Penrose Colorado,USA,CO,West +53,85,Asterand,USA,MI,Midwest +54,86,Asterand,USA,MI,Midwest +55,90,ABS - IUPUI,USA,IN,Midwest +56,91,ABS - IUPUI,USA,IN,Midwest +57,92,Washington University - St. Louis,USA,MO,Midwest +58,93,Washington University - St. Louis,USA,MO,Midwest +59,94,Washington University - Emory,USA,GA,South +60,95,Washington University - Emory,USA,GA,South +61,96,Washington University - NYU,USA,NY,Northeast +62,97,Washington University - NYU,USA,NY,Northeast +63,98,Washington University - Alabama,USA,AL,South +64,99,Washington University - Alabama,USA,AL,South +65,A1,UCSF,USA,CA,West +66,A2,Walter Reed,USA,WA,West +67,A6,Christiana Healthcare,USA,DE,South +68,A7,Christiana Healthcare,USA,DE,South +69,A8,Indivumed,Germany,Other,Europe +70,AA,Indivumed,Germany,Other,Europe +71,AC,International Genomics Consortium,USA,AZ,West +72,AD,International Genomics Consortium,USA,AZ,West +73,AF,Christiana Healthcare,USA,DE,South +74,AG,Indivumed,Germany,Other,Europe +75,AH,International Genomics Consortium,USA,AZ,West +76,AM,Cureline,USA,CA,West +77,AN,Cureline,USA,CA,West +78,AO,MSKCC,USA,NY,Northeast +79,AQ,UNC,USA,NC,South +80,AR,Mayo,USA,MN,Midwest +81,AU,St. Joseph's Medical Center-(MD),USA,MD,South +82,AY,UNC,USA,NC,South +83,AZ,University of Pittsburgh,USA,PA,Northeast +84,B6,Duke,USA,NC,South +85,BF,Cureline,USA,CA,West +86,BH,University of Pittsburgh,USA,PA,Northeast +87,BM,UNC,USA,NC,South +88,C8,ILSBio,Canada,Other,Canada +89,CA,ILSBio,Canada,Other,Canada +90,CH,Indivumed,Germany,Other,Europe +91,CI,University of Pittsburgh,USA,PA,Northeast +92,CK,Harvard,USA,MA,Northeast +93,CL,Harvard,USA,MA,Northeast +94,CM,MSKCC,USA,NY,Northeast +95,D3,MD Anderson,USA,TX,South +96,D5,Greater Poland Cancer Center,Poland,Other,Europe +97,D8,Greater Poland Cancer Center,Poland,Other,Europe +98,D9,Greater Poland Cancer Center,Poland,Other,Europe +99,DA,Yale,USA,CT,Northeast +100,DC,MSKCC,USA,NY,Northeast +101,DM,University Of Michigan,USA,MI,Midwest +102,DT,ILSBio,Canada,Other,Canada +103,DY,University Of Michigan,USA,MI,Midwest +104,E2,Roswell Park,USA,NY,Northeast +105,E9,Asterand,USA,MI,Midwest +106,EB,Asterand,USA,MI,Midwest +107,EE,University of Sydney,Australia,Other,Other +108,EF,Cureline,USA,CA,West +109,EI,Greater Poland Cancer Center,Poland,Other,Europe +110,EJ,University of Pittsburgh,USA,PA,Northeast +111,ER,University of Pittsburgh,USA,PA,Northeast +112,EW,University of Miami,USA,FL,South +113,F2,UNC,USA,NC,South +114,F4,Asterand,USA,MI,Midwest +115,F5,Asterand,USA,MI,Midwest +116,FB,Asterand,USA,MI,Midwest +117,FC,Asterand,USA,MI,Midwest +118,FR,University of North Carolina,USA,NC,South +119,FS,Essen,Germany,Other,Europe +120,FW,International Genomics Consortium,USA,AZ,West +121,G4,Roswell Park,USA,NY,Northeast +122,G5,Roswell Park,USA,NY,Northeast +123,G9,Roswell Park,USA,NY,Northeast +124,GF,ABS - IUPUI,USA,IN,Midwest +125,GI,ABS - IUPUI,USA,IN,Midwest +126,GM,MD Anderson,USA,TX,South +127,GN,Roswell,USA,NY,Northeast +128,H6,Christiana Healthcare,USA,DE,South +129,H8,ABS - IUPUI,USA,IN,Midwest +130,H9,ABS - IUPUI,USA,IN,Midwest +131,HC,International Genomics Consortium,USA,AZ,West +132,HI,Fox Chase,USA,PA,Northeast +133,HN,Ontario Institute for Cancer Research (OICR),Canada,Other,Canada +134,HR,Ontario Institute for Cancer Research (OICR),Canada,Other,Canada +135,HV,National Cancer Center Korea,Korea,Other,Other +136,HZ,International Genomics Consortium,USA,AZ,West +137,IB,Alberta Health Services,Canada,Other,Canada +138,IH,University of Miami,USA,FL,South +139,J1,ABS - Lahey Clinic,USA,MA,Northeast +140,J2,ABS - Lahey Clinic,USA,MA,Northeast +141,J4,ABS - Lahey Clinic,USA,MA,Northeast +142,JL,ABS - Research Metrics Pakistan,Pakistan,Other,Other +143,KK,MD Anderson Cancer Center,USA,TX,South +144,L1,Hartford,USA,CT,Northeast +145,L3,Gundersen Lutheran Health System,USA,WI,Midwest +146,L4,Gundersen Lutheran Health System,USA,WI,Midwest +147,L9,Candler,USA,GA,South +148,LA,Candler,USA,GA,South +149,LB,Candler,USA,GA,South +150,LD,Hartford Hospital,USA,CT,Northeast +151,LH,Hartford Hospital,USA,CT,Northeast +152,LL,Candler,USA,GA,South +153,LQ,Gundersen Lutheran Health System,USA,WI,Midwest +154,M7,University of North Carolina,USA,NC,South +155,M8,Ontario Institute for Cancer Research (OICR),Canada,Other,Canada +156,MF,University of Minnesota,USA,MN,Midwest +157,MG,BLN - Baylor,USA,TX,South +158,MN,BLN - Baylor,USA,TX,South +159,MP,Washington University - Mayo Clinic,USA,MN,Midwest +160,MS,University of Minnesota,USA,MN,Midwest +161,NC,Washington University - CHUV,Switzerland,Other,Europe +162,NH,Candler,USA,GA,South +163,NJ,Washington University - Rush University,USA,IL,Midwest +164,NK,Washington University - Rush University,USA,IL,Midwest +165,O1,Washington University - CALGB,USA,MO,South +166,O2,Washington University - CALGB,USA,MO,South +167,OD,Saint Mary's Health Care,Other,Other,Other +168,OE,Saint Mary's Health Care,Other,Other,Other +169,OK,Mount Sinai School of Medicine,USA,NY,Northeast +170,OL,University of Chicago,USA,IL,Midwest +171,PE,Fox Chase,USA,PA,Northeast +172,PL,Institute of Human Virology Nigeria,Nigeria,Other,Other +173,PZ,ABS - Lahey Clinic,USA,MA,Northeast +174,Q3,University of Oklahoma HSC,USA,OK,South +175,QB,Emory University,USA,GA,South +176,QG,BLN - Baylor,USA,TX,South +177,QL,University of Chicago,USA,IL,Midwest +178,QU,Harvard Beth Israel,USA,MA,Northeast +179,RB,Emory University,USA,GA,South +180,RL,St. Joseph's Hospital AZ,USA,AZ,West +181,RP,St. Joseph's Hospital AZ,USA,AZ,West +182,S2,Albert Einstein Medical Center,USA,PA,Northeast +183,S3,Albert Einstein Medical Center,USA,PA,Northeast +184,S4,University of Chicago,USA,IL,Midwest +185,SS,Medical College of Georgia,USA,GA,South +186,SU,Global Bioclinical-Moldova,Moldova,Other,Europe +187,T9,Molecular Response,USA,CA,West +188,TK,Global BioClinical - Georgia,USA,GA,South +189,TP,Maine Medical Center,USA,ME,Northeast +190,UL,Boston Medical Center,USA,MA,Northeast +191,US,Garvan Institute of Medical Research,Australia,Other,Other +192,UU,Mary Bird Perkins Cancer Center - Our Lady of the Lake,USA,LA,South +193,V1,University of California San Francisco,USA,CA,West +194,VP,Washington University,USA,MI,South +195,W3,John Wayne Cancer Center,USA,CA,West +196,W8,Greenville Health System,USA,SC,South +197,WE,Norfolk and Norwich Hospital,UK,Other,Europe +198,WS,University of Kansas,USA,KS,Midwest +199,WT,University of Kansas,USA,KS,Midwest +200,WW,Wake Forest University,USA,NC,South +201,X4,Institute for Medical Research,Other,Other,Other +202,XA,University of Minnesota,USA,MN,Midwest +203,XC,Albert Einstein Medical Center,USA,PA,Northeast +204,XD,Providence Portland Medical Center,USA,OR,West +205,XJ,University of Kansas,USA,KS,Midwest +206,XK,Mayo Clinic Arizona,USA,AZ,West +207,XN,University of Sao Paulo,Brazil,Other,Other +208,XQ,University of Sao Paulo,Brazil,Other,Other +209,XV,Capital Biosciences,USA,MD,South +210,XX,Spectrum Health,USA,MI,Midwest +211,Y6,University of Arizona,USA,AZ,West +212,YB,Spectrum Health,USA,MI,Midwest +213,YD,Spectrum Health,USA,MI,Midwest +214,YG,University of Puerto Rico,USA,PR,South +215,YL,PROCURE Biobank,Canada,Other,Canada +216,YY,Roswell Park,USA,NY,Northeast +217,Z2,IDI-IRCCS,Italy,Other,Europe +218,Z5,Cureline,USA,CA,West +219,Z7,John Wayne Cancer Center,USA,CA,West +220,DK,Memorial Sloan Kettering Cancer Center,USA,NY,Northeast +221,FD,BLN – University of Chicago,USA,IL,Midwest +222,UY,University of California San Francisco,USA,CA,West +223,4Z,Barretos Cancer Hospital,Brazil,Other,Other +224,XF,University of Southern California,USA,CA,West +225,YF,University of Puerto Rico ,USA,PR,South +226,G2,MD Anderson,USA,TX,South +227,ZF,University of Sheffield,UK,Other,Europe +228,HQ,Ontario Institute for Cancer Research,Canada,Other,Canada +229,H4,Medical College of Georgia,USA,GA,South +230,GV,BLN - Cleveland Clinic,USA,OH,Midwest +231,E7,Asterand,USA,MI,Midwest +232,PQ,University of Colorado Denver,USA,CO,West +233,C4,Indivumed,Germany,Other,Europe +234,GU,UT Southwestern Medical Center at Dallas,USA,TX,South +235,CU,UNC,USA,NC,South +236,BT,University of Pittsburgh,USA,PA,Northeast +237,CF,ILSBio,Canada,Other,Canada +238,BL,Christiana Healthcare,USA,DE,South +239,E5,Roswell Park,USA,NY,Northeast +240,GC,International Genomics Consortium,USA,AZ,West +241,K4,ABS - Lahey Clinic,USA,MA,Northeast +242,2F,Erasmus MC,Netherlands,Other,Europe +243,GD,ABS - IUPUI,USA,IN,Midwest +244,LC,Hartford,USA,CT,Northeast +245,SY,University Hospital Motol,Czech Republic,Other,Europe +246,5N,University Hospital Erlangen,Germany,Other,Europe +247,KQ,Cornell Medical College,USA,NY,Northeast +248,R3,CHI-Penrose Colorado,USA,CO,West +249,FJ,BLN - Baylor,USA,TX,South +250,S5,University of Oklahoma HSC,USA,OK,South +251,FT,BLN – University of Miami,USA,FL,South +252,YC,Spectrum Health,USA,MI,Midwest +253,LT,Gundersen Lutheran Health System,USA,WI,Midwest +254,MV,University of Minnesota,USA,MN,Midwest +255,6A,University of Kansas,USA,KS,Midwest +256,79,Ontario Institute for Cancer Research,Canada,Other,Canada +257,V7,Medical College of Georgia,USA,GA,South +258,RU,Northwestern University,USA,IL,Midwest +259,J9,Melbourne Health,Australia,Other,Other +260,YJ,Stanford University,USA,CA,West +261,ZG,University Medical Center Hamburg-Eppendorf,Germany,Other,Europe +262,KC,Cornell Medical College,USA,NY,Northeast +263,VN,NCI Urologic Oncology Branch,USA,MD,South +264,YH,Stanford University,USA,CA,West diff --git a/fedpydeseq2_datasets/download_data/assets/ncomms9971-s2.xlsx b/fedpydeseq2_datasets/download_data/assets/ncomms9971-s2.xlsx new file mode 100644 index 0000000..a0b13b4 Binary files /dev/null and b/fedpydeseq2_datasets/download_data/assets/ncomms9971-s2.xlsx differ diff --git a/fedpydeseq2_datasets/download_data/config/config.yaml b/fedpydeseq2_datasets/download_data/config/config.yaml new file mode 100644 index 0000000..72cd2b5 --- /dev/null +++ b/fedpydeseq2_datasets/download_data/config/config.yaml @@ -0,0 +1,9 @@ +datasets : + - LUAD + - LUSC + - PAAD + - COAD + - BRCA + - PRAD + - READ + - SKCM diff --git a/fedpydeseq2_datasets/download_data/config/config_luad.yaml b/fedpydeseq2_datasets/download_data/config/config_luad.yaml new file mode 100644 index 0000000..b5dd8d1 --- /dev/null +++ b/fedpydeseq2_datasets/download_data/config/config_luad.yaml @@ -0,0 +1,2 @@ +datasets : + - LUAD diff --git a/fedpydeseq2_datasets/download_data/config/config_tmp.yaml b/fedpydeseq2_datasets/download_data/config/config_tmp.yaml new file mode 100644 index 0000000..68f386c --- /dev/null +++ b/fedpydeseq2_datasets/download_data/config/config_tmp.yaml @@ -0,0 +1,10 @@ +datasets : + - LUAD + - LUSC + - PAAD + - COAD + - BRCA + - PRAD + - READ + - SKCM +output_path : /Users/umarteau/Projects/fedpydeseq2/fed-pydeseq2-datasets/data/raw diff --git a/fedpydeseq2_datasets/download_data/download_data.py b/fedpydeseq2_datasets/download_data/download_data.py new file mode 100644 index 0000000..7d4b49d --- /dev/null +++ b/fedpydeseq2_datasets/download_data/download_data.py @@ -0,0 +1,166 @@ +import argparse +import shutil +import subprocess +import tempfile +from pathlib import Path + +import yaml # type: ignore + + +def download_data( + config_path: str | Path, + download_data_directory: str | Path, + raw_data_output_path: str | Path, + snakemake_env_name: str, + conda_activate_path: str | Path | None = None, +): + """ + Download the data. + + Parameters + ---------- + config_path : Union[str, Path] + The path to the configuration file. + download_data_directory : Union[str, Path] + The path to the download data directory. + raw_data_output_path : Union[str, Path] + The path to the raw data output directory. + snakemake_env_name : str + The name of the Snakemake environment. + conda_activate_path : Optional[Union[str, Path]], optional + The path to the conda activate script, by default None. + """ + temp_dir = tempfile.mkdtemp() + try: + # Copy the specified directory to the temporary directory + tmp_download_data_path = Path(temp_dir, "download_data") + shutil.copytree(download_data_directory, tmp_download_data_path) + + # Open the configuration file + with open(config_path) as f: + config = yaml.safe_load(f) + + # Add a field "output_path" to the configuration with the raw data output path + config["output_path"] = str(raw_data_output_path) + config_file = Path(tmp_download_data_path, "config", "config.yaml") + config_file.parent.mkdir(parents=True, exist_ok=True) + with open(config_file, "w") as f: + yaml.dump(config, f) + + print("Config file", config_file) + + # Create a conda env + # make an envs direcrory in the temporary directory + envs_dir = Path(temp_dir, "envs") + envs_dir.mkdir(parents=True, exist_ok=True) + + env_prefix = Path(envs_dir, snakemake_env_name) + + create_conda_env( + env_file=Path(tmp_download_data_path, "snakemake_env.yaml"), + env_prefix=env_prefix, + ) + + access_conda_command = ( + f""" + cd {tmp_download_data_path} + echo `{tmp_download_data_path}` + conda init bash + if [ -f ~/.bashrc ]; then + . ~/.bashrc + fi + if [ -f ~/.bash_profile ]; then + . ~/.bash_profile + fi + """ + if conda_activate_path is None + else f""" + . {conda_activate_path} + """ + ) + command = f""" + {access_conda_command} + conda activate {env_prefix} + cd {tmp_download_data_path} + snakemake --cores all --resources download_concurrent=3 \ + --use-conda + """ + + subprocess.run(command, shell=True, check=True, executable="/bin/bash") + + except Exception as e: # noqa BLE001 + print(f"An error occurred: {e}") + finally: + # Clean up the temporary directory + shutil.rmtree(temp_dir) + + +def create_conda_env(env_file: str | Path, env_prefix: str | Path): + """ + Create a Conda environment. + + Parameters + ---------- + env_file : str or Path + The path to the environment file. + env_prefix : str or Path + The prefix (location) where the Conda environment will be created. + """ + try: + # Create the Conda environment + create_env_cmd = ( + f"yes | conda env create --prefix {env_prefix} --file {env_file}" + ) + subprocess.run(create_env_cmd, shell=True, check=True, executable="/bin/bash") + print(f"Conda environment created successfully at '{env_prefix}'.") + except subprocess.CalledProcessError as e: + print(f"An error occurred while creating the Conda environment: {e}") + + +def main(): + """Run main function to download the data.""" + parser = argparse.ArgumentParser("""Download the data.""") + parser.add_argument( + "--only_luad", action="store_true", help="Only download the LUAD dataset" + ) + parser.add_argument( + "--raw_data_output_path", + type=str, + required=False, + help="Path to the raw data output directory", + ) + parser.add_argument( + "--conda_activate_path", + type=str, + help="Path to the conda activate script", + required=False, + ) + args = parser.parse_args() + + if args.only_luad: + config_path = Path(__file__).parent / "config" / "config_luad.yaml" + else: + config_path = Path(__file__).parent / "config" / "config.yaml" + + if args.conda_activate_path is not None: + conda_activate_path = Path(args.conda_activate_path) + else: + conda_activate_path = None + + if args.raw_data_output_path is None: + raw_data_output_path = Path(__file__).parent.parent.parent / "data/raw" + else: + raw_data_output_path = Path(args.raw_data_output_path) + download_data_directory = Path(__file__).parent + + download_data( + config_path=config_path, + download_data_directory=download_data_directory, + raw_data_output_path=raw_data_output_path, + snakemake_env_name="snakemake_env", + conda_activate_path=conda_activate_path, + ) + + +if __name__ == "__main__": + main() diff --git a/fedpydeseq2_datasets/download_data/snakemake_env.yaml b/fedpydeseq2_datasets/download_data/snakemake_env.yaml new file mode 100644 index 0000000..4455a06 --- /dev/null +++ b/fedpydeseq2_datasets/download_data/snakemake_env.yaml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - default + - bioconda +dependencies: + - snakemake=8.20.3 + - mamba=1.5.10 diff --git a/fedpydeseq2_datasets/download_data/workflow/Snakefile b/fedpydeseq2_datasets/download_data/workflow/Snakefile new file mode 100644 index 0000000..828d0f8 --- /dev/null +++ b/fedpydeseq2_datasets/download_data/workflow/Snakefile @@ -0,0 +1,24 @@ +from snakemake.utils import min_version + +##### set minimum snakemake version ##### +min_version("7.30.0") + +##### setup report ##### +configfile: "config/config.yaml" +report: "report/workflow.rst" + +##### load rules ##### +wildcard_constraints: + dataset="[A-Za-z_]+", + output_path="*" + +include: "rules/common.smk" +include: "rules/download_data.smk" +include: "rules/parquet_check_data.smk" +include: "rules/move_data.smk" +include: "rules/check_csv_data.smk" + +##### target rules ##### +rule all: + input: + get_output diff --git a/fedpydeseq2_datasets/download_data/workflow/envs/python.yaml b/fedpydeseq2_datasets/download_data/workflow/envs/python.yaml new file mode 100644 index 0000000..641c187 --- /dev/null +++ b/fedpydeseq2_datasets/download_data/workflow/envs/python.yaml @@ -0,0 +1,9 @@ +channels: + - conda-forge + - default + - bioconda +dependencies: + - python = 3.10 + - pandas = 1.5.3 + - pyarrow = 12.0.1 + - openpyxl = 3.1.2 diff --git a/fedpydeseq2_datasets/download_data/workflow/envs/recount3.yaml b/fedpydeseq2_datasets/download_data/workflow/envs/recount3.yaml new file mode 100644 index 0000000..0ec328a --- /dev/null +++ b/fedpydeseq2_datasets/download_data/workflow/envs/recount3.yaml @@ -0,0 +1,11 @@ +channels: + - conda-forge + - bioconda + - default +dependencies: + - r-base = 4.3.1 + - bioconductor-snapcount = 1.12.0 + - bioconductor-recount = 1.26.0 + - bioconductor-recount3 = 1.10.2 + - r-data.table = 1.14.8 + - r-dbplyr = 2.3.4 diff --git a/fedpydeseq2_datasets/download_data/workflow/rules/check_csv_data.smk b/fedpydeseq2_datasets/download_data/workflow/rules/check_csv_data.smk new file mode 100644 index 0000000..5ddd928 --- /dev/null +++ b/fedpydeseq2_datasets/download_data/workflow/rules/check_csv_data.smk @@ -0,0 +1,35 @@ +rule csvize_check_tcga_tumor_purity: + input: + "assets/ncomms9971-s2.xlsx", + output: + "results/ncomms9971-s2.csv", + touch("results/ncomms9971-s2/csv.done") + conda: + "../envs/python.yaml" + log: + "logs/ncomms9971-s2/csv.log" + script: + "../scripts/csvize_check_tcga_tumor_purity.py" + +rule csvize_check_tcga_cleaned_clinical: + input: + "results/1-s2.0-S0092867418302290-mmc1.xlsx", + output: + "results/1-s2.0-S0092867418302290-mmc1.csv", + touch("results/1-s2.0-S0092867418302290-mmc1/csv.done") + conda: + "../envs/python.yaml" + log: + "logs/1-s2.0-S0092867418302290-mmc1/csv.log" + script: + "../scripts/csvize_check_tcga_clinical_data.py" + +rule check_recount3_metadata_tcga: + input: + "results/{dataset}/metadata.tsv.gz", + output: + "results/{dataset}/checked/metadata.tsv.gz", + log: + "logs/{dataset}/check_recount3_metadata.log" + script: + "../scripts/check_recount3_metadata.py" diff --git a/fedpydeseq2_datasets/download_data/workflow/rules/common.smk b/fedpydeseq2_datasets/download_data/workflow/rules/common.smk new file mode 100644 index 0000000..f62951d --- /dev/null +++ b/fedpydeseq2_datasets/download_data/workflow/rules/common.smk @@ -0,0 +1,28 @@ +from snakemake.utils import validate + +validate(config, "../schemas/config.schema.yaml") + +TCGA_DATASETS = ["ACC", "BLCA", "BRCA", "CESC", "CHOL", "COAD", "DLBC", "ESCA", "GBM", +"HNSC", "KICH", "KIRC", "KIRP", "LAML", "LGG", "LIHC", "LUAD", "LUSC", "MESO", "OV", +"PAAD", "PCPG", "PRAD", "READ", "SARC", "SKCM", "STAD", "TGCT", "THCA", "THYM", "UCEC", +"UCS", "UVM"] + +def get_output(wildcards): + files=[] + datasets = config['datasets'] + output_path = config['output_path'] + for dataset in datasets : + if dataset.upper() in TCGA_DATASETS : + files += [ + f"{output_path}/tcga/{dataset}/recount3_metadata.tsv.gz", + f"{output_path}/tcga/{dataset}/Counts_raw.parquet", + ] + else: + raise ValueError(f"Config Error\n\tThe dataset '{dataset}' from the config is neither a TCGA nor a GTEx project.") + + files += [ + f"{output_path}/tcga/tumor_purity_metadata.csv", + f"{output_path}/tcga/cleaned_clinical_metadata.csv", + f"{output_path}/tcga/centers.csv" + ] + return files diff --git a/fedpydeseq2_datasets/download_data/workflow/rules/download_data.smk b/fedpydeseq2_datasets/download_data/workflow/rules/download_data.smk new file mode 100644 index 0000000..54e2353 --- /dev/null +++ b/fedpydeseq2_datasets/download_data/workflow/rules/download_data.smk @@ -0,0 +1,20 @@ +rule download_recount3_data: + output: + "results/{dataset}/Counts_raw.tsv.gz", + "results/{dataset}/metadata.tsv.gz", + "results/{dataset}/gene_names.tsv.gz", + retries: 3 + resources: download_concurrent=1 + conda: + "../envs/recount3.yaml" + log: + "logs/{dataset}/download.log", + script: + "../scripts/download_recount3_cohort.R" + +rule download_cleaned_tcga_clinical_data: + output: + "results/1-s2.0-S0092867418302290-mmc1.xlsx", + retries: 3 + shell: + "wget -P results/ https://ars.els-cdn.com/content/image/1-s2.0-S0092867418302290-mmc1.xlsx" diff --git a/fedpydeseq2_datasets/download_data/workflow/rules/move_data.smk b/fedpydeseq2_datasets/download_data/workflow/rules/move_data.smk new file mode 100644 index 0000000..23b6978 --- /dev/null +++ b/fedpydeseq2_datasets/download_data/workflow/rules/move_data.smk @@ -0,0 +1,53 @@ +output_path = config["output_path"] + +rule move_recount3_metadata_tcga: + input: + metadata = "results/{dataset}/checked/metadata.tsv.gz", + output: + f"{output_path}"+"/tcga/{dataset}/recount3_metadata.tsv.gz", + shell: + """ + mv {input.metadata} {output} + """ + + +rule move_clinical_data_tcga: + input: + clinical_metadata = "results/1-s2.0-S0092867418302290-mmc1.csv", + clinical_metadata_done = "results/1-s2.0-S0092867418302290-mmc1/csv.done", + output: + f"{output_path}"+"/tcga/cleaned_clinical_metadata.csv", + shell: + """ + mv {input.clinical_metadata} {output} + """ + +rule move_tumor_purity_data_tcga: + input: + tumor_purity_metadata ="results/ncomms9971-s2.csv", + tumor_purity_metadata_done = "results/ncomms9971-s2/csv.done", + output: + f"{output_path}"+"/tcga/tumor_purity_metadata.csv", + shell: + """ + mv {input.tumor_purity_metadata} {output} + """ + +rule move_counts_tcga: + input: + counts = "results/{dataset}/Counts_raw.parquet", + parquet_done = "results/{dataset}/raw_parquet.done", + output: + f"{output_path}"+"/tcga/{dataset}/Counts_raw.parquet", + shell: + """ + mv {input.counts} {output} + """ + +rule copy_centers_csv: + output: + f"{output_path}"+"/tcga/centers.csv" + shell: + """ + cp assets/centers.csv {output} + """ diff --git a/fedpydeseq2_datasets/download_data/workflow/rules/parquet_check_data.smk b/fedpydeseq2_datasets/download_data/workflow/rules/parquet_check_data.smk new file mode 100644 index 0000000..ad47149 --- /dev/null +++ b/fedpydeseq2_datasets/download_data/workflow/rules/parquet_check_data.smk @@ -0,0 +1,13 @@ +rule parquetize_raw_counts: + input: + "results/{dataset}/Counts_raw.tsv.gz", + "results/{dataset}/gene_names.tsv.gz" + output: + "results/{dataset}/Counts_raw.parquet", + touch("results/{dataset}/raw_parquet.done") + conda: + "../envs/python.yaml" + log: + "logs/{dataset}/raw_parquet.log" + script: + "../scripts/parquetize_check_data.py" diff --git a/fedpydeseq2_datasets/download_data/workflow/schemas/config.schema.yaml b/fedpydeseq2_datasets/download_data/workflow/schemas/config.schema.yaml new file mode 100644 index 0000000..12f22ea --- /dev/null +++ b/fedpydeseq2_datasets/download_data/workflow/schemas/config.schema.yaml @@ -0,0 +1,21 @@ +$schema: "http://json-schema.org/draft-06/schema#" +description: snakemake recount3_data configuration file +properties: + datasets: + type: array + items: + type: string + methods: + minItems: 1 + uniqueItems: true + output_path: + type: string + methods: + minLength: 1 + pattern: ^[a-zA-Z0-9_/.-]+$ + description: Path to save the downloaded data + + +required: + - datasets + - output_path diff --git a/fedpydeseq2_datasets/download_data/workflow/scripts/check_recount3_metadata.py b/fedpydeseq2_datasets/download_data/workflow/scripts/check_recount3_metadata.py new file mode 100644 index 0000000..287aaf2 --- /dev/null +++ b/fedpydeseq2_datasets/download_data/workflow/scripts/check_recount3_metadata.py @@ -0,0 +1,40 @@ +# %% +import sys + +import pandas as pd + + +def check_recount3_metadata(input_file: str, output_file: str): + """ + Convert the TCGA clinical data to a CSV file and check its content. + + Parameters + ---------- + input_file : str + Path to the input file, containing metadata. + output_file : str + Path to the output file, containing the metadata in CSV format. + + """ + df = pd.read_csv(input_file, sep="\t") + # Check the columns + assert "external_id" in df.columns + assert "tcga.tcga_barcode" in df.columns + + # Save the dataframe to a tsv file + df.to_csv(output_file, index=False, sep="\t", compression="gzip") + + return + + +with open(snakemake.log[0], "w") as f: # type: ignore # noqa: F821 + sys.stderr = sys.stdout = f + try: + input_file = snakemake.input[0] # type: ignore # noqa: F821 + output_file = snakemake.output[0] # type: ignore # noqa: F821 + check_recount3_metadata(input_file, output_file) + except Exception as e: # noqa: BLE001 + print(e) + sys.exit(1) + +# %% diff --git a/fedpydeseq2_datasets/download_data/workflow/scripts/csvize_check_tcga_clinical_data.py b/fedpydeseq2_datasets/download_data/workflow/scripts/csvize_check_tcga_clinical_data.py new file mode 100644 index 0000000..336ffc3 --- /dev/null +++ b/fedpydeseq2_datasets/download_data/workflow/scripts/csvize_check_tcga_clinical_data.py @@ -0,0 +1,41 @@ +# %% +import sys + +import pandas as pd + + +def csvize_and_check_clinical(input_file: str, output_file: str): + """ + Convert the TCGA clinical data to a CSV file and check its content. + + Parameters + ---------- + input_file : str + Path to the input file, containing metadata. + output_file : str + Path to the output file, containing the metadata in CSV format. + + """ + df = pd.read_excel(input_file, index_col=0) + + # Check it contains the right columns + assert "bcr_patient_barcode" in df.columns + assert "gender" in df.columns + assert "ajcc_pathologic_tumor_stage" in df.columns + + df.to_csv(output_file, index=False) + + return + + +with open(snakemake.log[0], "w") as f: # type: ignore # noqa: F821 + sys.stderr = sys.stdout = f + try: + input_file = snakemake.input[0] # type: ignore # noqa: F821 + output_file = snakemake.output[0] # type: ignore # noqa: F821 + csvize_and_check_clinical(input_file, output_file) + except Exception as e: # noqa: BLE001 + print(e) + sys.exit(1) + +# %% diff --git a/fedpydeseq2_datasets/download_data/workflow/scripts/csvize_check_tcga_tumor_purity.py b/fedpydeseq2_datasets/download_data/workflow/scripts/csvize_check_tcga_tumor_purity.py new file mode 100644 index 0000000..8e37fd2 --- /dev/null +++ b/fedpydeseq2_datasets/download_data/workflow/scripts/csvize_check_tcga_tumor_purity.py @@ -0,0 +1,46 @@ +# %% +import sys + +import pandas as pd + + +def csvize_and_check_tumor_purity(input_file: str, output_file: str): + """ + Convert tumor purity metadata to a CSV file and check its content. + + Parameters + ---------- + input_file : str + Path to the input file, containing metadata. + output_file : str + Path to the output file, containing the metadata in CSV format. + + """ + df = pd.read_excel(input_file) + # Remove first two rows + df = df.iloc[2:] + # Remove last column + df = df.iloc[:, :-1] + # Set first line as header + df.columns = df.iloc[0] + # Remove first line + df = df.iloc[1:] + # Check that the columns are the expected ones + assert "Sample ID" in df.columns + assert "CPE" in df.columns + + df.to_csv(output_file, index=False) + return + + +with open(snakemake.log[0], "w") as f: # type: ignore # noqa: F821 + sys.stderr = sys.stdout = f + try: + input_file = snakemake.input[0] # type: ignore # noqa: F821 + output_file = snakemake.output[0] # type: ignore # noqa: F821 + csvize_and_check_tumor_purity(input_file, output_file) + except Exception as e: # noqa: BLE001 + print(e) + sys.exit(1) + +# %% diff --git a/fedpydeseq2_datasets/download_data/workflow/scripts/download_recount3_cohort.R b/fedpydeseq2_datasets/download_data/workflow/scripts/download_recount3_cohort.R new file mode 100644 index 0000000..520eb52 --- /dev/null +++ b/fedpydeseq2_datasets/download_data/workflow/scripts/download_recount3_cohort.R @@ -0,0 +1,58 @@ +log <- file(snakemake@log[[1]], open = "wt") +sink(log) +sink(log, type = "message") + +library(snapcount) +library(recount3) +library(data.table) + +download_data = function(project, project_info){ + # Download project data + exp = recount3::create_rse( + project_info, + annotation='gencode_v29', + type='gene', + ) + # Save metadata + meta = as.data.frame(colData(exp)) + fwrite( + meta, + file=file.path("results", project, "metadata.tsv.gz"), + sep="\t", + ) + + # Scale coverage counts into read counts + read_counts = as.data.frame(recount3::compute_read_counts(exp)) + assays(exp)$counts = recount3::transform_counts(exp) + + read_counts$gene_id = rownames(read_counts) + read_counts = read_counts[,c('gene_id', colnames(read_counts)[!colnames(read_counts) %in% c('gene_id')])] + + # Save expression data + fwrite( + read_counts, + file=file.path("results", project, paste0("Counts_raw", ".tsv.gz")), + sep="\t", + ) + # Save gene names + fwrite( + data.frame( + gene_name=rowData(exp)[['gene_name']], + gene_id=rowData(exp)[['gene_id']] + ), + file=file.path("results", project, paste0("gene_names.tsv.gz")), + sep="\t", + ) +} + +split_path <- function(x) if (dirname(x)==x) x else c(basename(x),split_path(dirname(x))) +dataset = split_path(snakemake@output[[1]])[2] + +human_projects = recount3::available_projects() +project_info <- subset( + human_projects, + project == toupper(dataset), +) +print(project_info) + +download_data(dataset, project_info) diff --git a/fedpydeseq2_datasets/download_data/workflow/scripts/parquetize_check_data.py b/fedpydeseq2_datasets/download_data/workflow/scripts/parquetize_check_data.py new file mode 100644 index 0000000..4500b6c --- /dev/null +++ b/fedpydeseq2_datasets/download_data/workflow/scripts/parquetize_check_data.py @@ -0,0 +1,45 @@ +# %% +import sys + +import pandas as pd + + +def parquetize_and_check(input_file: str, gene_names: str, output_file: str): + """ + Convert a tabular file to a parquet file and check its content. + + Parameters + ---------- + input_file : str + Path to the input file, containing the counts. + gene_names : str + Path to the file containing the gene names, with two columns: + 'gene_name' corresponding to HGNC and 'gene_id' corresponding to ENSEMBL. + output_file : str + Path to the output file. + + """ + df = pd.read_table(input_file, index_col=0) + # Use multiindex to use both HGNC and Ensembl gene names + df.index = pd.MultiIndex.from_frame(pd.read_table(gene_names)) + df = df.astype("int32") + # Check that the number of genes is roughly between 40 000 and 70 000 + assert 40000 < df.shape[0] < 70000 + # Check that the number of samples is greater than 10 and less than 1000 + assert 10 < df.shape[1] < 1300 + df.to_parquet(output_file) + return + + +with open(snakemake.log[0], "w") as f: # type: ignore # noqa: F821 + sys.stderr = sys.stdout = f + try: + input_file = snakemake.input[0] # type: ignore # noqa: F821 + gene_names = snakemake.input[1] # type: ignore # noqa: F821 + output_file = snakemake.output[0] # type: ignore # noqa: F821 + parquetize_and_check(input_file, gene_names, output_file) + except Exception as e: # noqa: BLE001 + print(e) + sys.exit(1) + +# %% diff --git a/fedpydeseq2_datasets/process_and_split_data.py b/fedpydeseq2_datasets/process_and_split_data.py new file mode 100644 index 0000000..a5ceb5c --- /dev/null +++ b/fedpydeseq2_datasets/process_and_split_data.py @@ -0,0 +1,533 @@ +"""Generate the centers' TCGA datasets. + +The function is used to preprocess and split into centers the tcga dataset. +""" +import shutil +from functools import partial +from pathlib import Path +from typing import cast + +import numpy as np +import pandas as pd +from loguru import logger + +from fedpydeseq2_datasets.aggregate_raw_data import common_preprocessing_tcga +from fedpydeseq2_datasets.constants import TCGADatasetNames +from fedpydeseq2_datasets.utils import get_experiment_id +from fedpydeseq2_datasets.utils import mix_centers + +ALLOWED_DESIGN_FACTORS_TCGA = {"stage", "gender", "CPE"} +ALLOWED_CONTINUOUS_FACTORS_TCGA = {"CPE"} + + +def setup_tcga_dataset( + raw_data_path: str | Path, + processed_data_path: str | Path, + dataset_name: TCGADatasetNames = "TCGA-LUAD", + small_samples: bool = False, + small_genes: bool = False, + only_two_centers: bool = False, + design_factors: str | list[str] = "stage", + continuous_factors: list[str] | None = None, + heterogeneity_method: str | None = None, + heterogeneity_method_param: float | None = None, + force: bool = False, + **pydeseq2_kwargs, +): + """Load, clean and split a TCGA dataset into clients. + + This function is given the path to the raw data. + Afterward, the natural split (centers) of the dataset is used + to separate the data into different dataframes, one for each center. + Finally the function saves those dataframe on the disk as a csv file for each + center. Those datasets would correspond to center's dataset in substrafl. + + The file structure expected as an input is the following: + + ``` + + ├── tcga + │ ├── COHORT + │ │ ├── Counts_raw.parquet + │ │ └── recount3_metadata.tsv.gz + │ ├── centers.csv + │ ├── tumor_purity_metadata.csv + │ └── cleaned_clinical_metadata.csv + └── + ``` + + The processed data path will be built/filled/ checked with the following structure. + The cohort is the tcga cohort, which we collect from the dataset name. + The true dataset name is generated from the dataset name, the design factors, + the continuous factors, the small samples, the small genes and the only two centers + parameters. + + ``` + + ├── tcga + │ └── COHORT + │ ├── counts.parquet + │ └── clinical_data.csv + ├── centers_data + │ └── tcga + │ └── + │ └── center_0 + │ ├── counts_data.csv + │ └── metadata.csv + ├── pooled_data + │ └── tcga + │ └── + │ ├── counts_data.csv + │ └── metadata.csv + └── + ``` + + Parameters + ---------- + raw_data_path : str or Path + Path to raw data. + processed_data_path : str or Path + Path to processed data. + dataset_name: TCGADatasetNames = "TCGA-LUAD", + The dataset to preprocess, by default "TCGA-LUAD". + small_samples : bool + If True, only preprocess a small subset of the data, by default False. + This small subset is composed of 10 samples per center (or the number + of samples in the center if this number is inferior). + small_genes : bool + If True, only preprocess a small subset of the data features (genes) + , by default False. This small subset is composed of 100 genes. + only_two_centers : bool + If True, split the data in two centers only, by default False. + design_factors : str or list + The design factors. + continuous_factors : list[str] or None + The continuous design factors. Factors not in this list will be considered + as categorical. + heterogeneity_method : str or None + The method to use to generate heterogeneity in the dataset. If None, no + heterogeneity is generated. Default is None. + heterogeneity_method_param : float or None + The parameter of the heterogeneity method. Default is None. + force : bool + If True, force the setup of the dataset even if the metadata already exists. + Default is False. + **pydeseq2_kwargs + Additional arguments to pass to the pydeseq2 and fed-pydeseq2 classes and + strategies. + """ + raw_data_path = Path(raw_data_path).resolve() + processed_data_path = Path(processed_data_path).resolve() + + common_preprocessing_tcga( + dataset_name=dataset_name, + raw_data_path=raw_data_path, + processed_data_path=processed_data_path, + force=force, + ) + + if isinstance(design_factors, str): + design_factors = [design_factors] + + experiment_id = get_experiment_id( + dataset_name, + small_samples, + small_genes, + only_two_centers, + design_factors, + continuous_factors, + heterogeneity_method=heterogeneity_method, + heterogeneity_method_param=heterogeneity_method_param, + **pydeseq2_kwargs, + ) + + logger.info(f"Setting up TCGA dataset: {experiment_id}") + + center_data_path = processed_data_path / "centers_data" / "tcga" / experiment_id + first_center_metadata_path = center_data_path / "center_0" / "metadata.csv" + + if not first_center_metadata_path.exists() or force: + logger.info( + "First center metadata does not exist or force=True. Setting up the " + "dataset." + ) + return _setup_tcga_dataset( + processed_data_path, + dataset_name, + small_samples, + small_genes, + only_two_centers, + design_factors, + continuous_factors, + heterogeneity_method, + heterogeneity_method_param, + **pydeseq2_kwargs, + ) + # Check if the metadata contains all the design factors + logger.info( + f"First center metadata exists at {first_center_metadata_path}. " + f"Checking if all design factors are present." + ) + metadata = pd.read_csv(first_center_metadata_path, index_col=0) + for design_factor in design_factors: + if design_factor not in metadata.columns: + logger.info( + f"Design factor {design_factor} not present in the metadata." + f" Setting up the dataset." + ) + return _setup_tcga_dataset( + processed_data_path, + dataset_name, + small_samples, + small_genes, + only_two_centers, + design_factors, + continuous_factors, + heterogeneity_method, + heterogeneity_method_param, + **pydeseq2_kwargs, + ) + + +def _setup_tcga_dataset( + processed_data_path: str | Path, + dataset_name: TCGADatasetNames = "TCGA-LUAD", + small_samples=False, + small_genes=False, + only_two_centers=False, + design_factors: str | list[str] = "stage", + continuous_factors: list[str] | None = None, + heterogeneity_method: str | None = None, + heterogeneity_method_param: float | None = None, + **pydeseq2_kwargs, +): + """Load, clean and split a TCGA dataset into clients. + + This is the main function to perform these operations. + + This function is given the path to the raw data. + Afterward, the natural split (centers) of the dataset is used + to separate the data into different dataframes, one for each center. + Finally the function saves those dataframe on the disk as a csv + file for each center. + Those datasets would correspond to center's dataset in substrafl. + + For the file structure description, see the `setup_tcga_dataset` function. + + Parameters + ---------- + processed_data_path : str or Path + This path will contain a folder 'centers_data' and 'pooled_data' + to store the preprocessed data. + dataset_name: TCGADatasetNames = "TCGA-LUAD", + The dataset to preprocess, by default "TCGA-LUAD". + small_samples : bool + If True, only preprocess a small subset of the data, by default False. + This small subset is composed of 10 samples per center. + small_genes : bool + If True, only preprocess a small subset of the data features (genes) + , by default False. This small subset is composed of 100 genes. + only_two_centers : bool + If True, split the data in two centers only, by default False. + design_factors : str or list + The design factors. + continuous_factors : list[str] or None + The continuous design factors. Factors not in this list will be considered + as categorical. + heterogeneity_method : str or None + The method to use to generate heterogeneity in the dataset. If None, no + heterogeneity is generated. Default is None. + heterogeneity_method_param : float or None + The parameter of the heterogeneity method. Default is None. + **pydeseq2_kwargs + Additional arguments to pass to the pydeseq2 and fed-pydeseq2 classes and + strategies. + + """ + # Check that the design factors and continuous factors are allowed + if isinstance(design_factors, str): + design_factors = [design_factors] + if continuous_factors is None: + continuous_factors = [] + assert set(design_factors).issubset( + ALLOWED_DESIGN_FACTORS_TCGA + ), f"Design factors should be in {ALLOWED_DESIGN_FACTORS_TCGA}" + assert set(continuous_factors).issubset( + ALLOWED_CONTINUOUS_FACTORS_TCGA + ), f"Continuous factors should be in {ALLOWED_CONTINUOUS_FACTORS_TCGA}" + + processed_data_path = Path(processed_data_path).resolve() + sampling_random_generator = np.random.default_rng(42) + + if small_genes: + n_genes = 100 + if small_samples: + n_samples = 10 + + experiment_id = get_experiment_id( + dataset_name, + small_samples, + small_genes, + only_two_centers, + design_factors, + continuous_factors, + heterogeneity_method, + heterogeneity_method_param, + **pydeseq2_kwargs, + ) + + center_data_path = processed_data_path / "centers_data" / "tcga" / experiment_id + + # -- Process the data + logger.info(f"Processing the data for the TCGA dataset: {experiment_id}") + counts_data, metadata = preprocess_tcga( + processed_data_path=processed_data_path, + dataset_name=dataset_name, + only_two_centers=only_two_centers, + design_factors=design_factors, + heterogeneity_method=heterogeneity_method, + heterogeneity_method_param=heterogeneity_method_param, + ) + + if small_genes: + counts_data = counts_data.sample( + n_genes, axis=1, random_state=sampling_random_generator + ) + + if small_samples: + new_counts_data_list: list[pd.DataFrame] = [] + new_metadata_list: list[pd.DataFrame] = [] + + # -- Split the data + logger.info(f"Saving the data for each center {center_data_path}") + for center_id in metadata.center_id.unique(): + counts_dataframe = counts_data.loc[metadata.center_id == center_id] + metadata_dataframe = metadata.loc[metadata.center_id == center_id] + if small_samples: + categorical_factors = list(set(design_factors) - set(continuous_factors)) + n_levels = len(metadata_dataframe[categorical_factors].drop_duplicates()) + n_samples_per_level = max(n_samples // n_levels, 1) + + def _sampling_function(df_, n_samples_, sampling_rng_): + return df_.sample( + min(len(df_), n_samples_), + random_state=sampling_rng_, + replace=False, + ) + + _partialized_sampling_function = partial( + _sampling_function, + n_samples_=n_samples_per_level, + sampling_rng_=sampling_random_generator, + ) + + metadata_dataframe = ( + metadata_dataframe.groupby(categorical_factors, dropna=False) + .apply( + _partialized_sampling_function, + include_groups=False, + ) + .reset_index(categorical_factors) + ) + + counts_dataframe = counts_dataframe.loc[metadata_dataframe.index] + new_counts_data_list.append(counts_dataframe) + new_metadata_list.append(metadata_dataframe) + path = center_data_path / f"center_{center_id}" + + # delete the folder if it exists + # This avoids having old files in the folder when registering the data + # in substra + if path.exists(): + for file in path.iterdir(): + if file.is_dir(): + if file.name == ".ipynb_checkpoints": + logger.info(f"Removing {file}") + shutil.rmtree(file) + else: + raise ValueError( + f"Unexpected directory in the center folder: {file}" + ) + else: + file.unlink() + path.rmdir() + path.mkdir(parents=True) + + counts_dataframe.to_csv(path / "counts_data.csv") + metadata_dataframe.to_csv(path / "metadata.csv") + + if small_samples: + counts_data = pd.concat(new_counts_data_list) + metadata = pd.concat(new_metadata_list) + # Now save to the pooled data folder + pooled_data_path = processed_data_path / "pooled_data" / "tcga" / experiment_id + logger.info(f"Saving the pooled data at {pooled_data_path}") + pooled_data_path.mkdir(parents=True, exist_ok=True) + counts_data.to_csv(pooled_data_path / "counts_data.csv") + metadata.to_csv(pooled_data_path / "metadata.csv") + + +def preprocess_tcga( + processed_data_path: Path, + dataset_name: TCGADatasetNames = "TCGA-LUAD", + only_two_centers=False, + design_factors: str | list[str] = "stage", + heterogeneity_method: str | None = None, + heterogeneity_method_param: float | None = None, +) -> tuple[pd.DataFrame, pd.DataFrame]: + """Preprocess the TCGA dataset. + + If the `stage` design factor is used, the function will binarize the `stage` design + into two categories: `Advanced` and `Non-advanced`. + `Advanced` corresponds to stage `IV`, + and `Non-advanced` corresponds to stages `I`, `II` and `III`. + For the TCGA-PRAD cohort, we do not have the stage information, but we infer the + stage from the `T`, `N` and `M` columns. If the `N` or `M` columns are > 0, + the stage is IV according to: + https://www.cancer.org/cancer/types/prostate-cancer\ + /detection-diagnosis-staging/staging.html + and hence the `Advanced` stage. Otherwise, it is `Non-advanced`. + + + Parameters + ---------- + processed_data_path : Path + Where to find the folder tcga with raw data. + dataset_name: TCGADatasetNames = "TCGA-LUAD", + The dataset to preprocess, by default "TCGA-LUAD". + only_two_centers : bool + If True, split the data in two centers only, by default False. + design_factors : str or list + The design factors. + heterogeneity_method : str or None + The method to use to generate heterogeneity in the dataset. If None, no + heterogeneity is generated. Default is None. + heterogeneity_method_param : float or None + The parameter of the heterogeneity method. Default is None. + + Returns + ------- + counts_data: pd.DataFrame + Processed pooled counts dataset. + metadata: pd.DataFrame + Processed pooled metadata dataset. + + """ + _, cohort = dataset_name.split("-")[:2] + if cohort in ["NSCLC", "CRC"]: + if cohort == "NSCLC": + dataset_1, dataset_2 = cast(TCGADatasetNames, "TCGA-LUAD"), cast( + TCGADatasetNames, "TCGA-LUSC" + ) + elif cohort == "CRC": + dataset_1, dataset_2 = cast(TCGADatasetNames, "TCGA-COAD"), cast( + TCGADatasetNames, "TCGA-READ" + ) + counts_data_1, metadata_1 = preprocess_tcga( + processed_data_path=processed_data_path, + dataset_name=dataset_1, + only_two_centers=only_two_centers, + design_factors=design_factors, + ) + metadata_1["center_id"] = 0 + counts_data_2, metadata_2 = preprocess_tcga( + processed_data_path=processed_data_path, + dataset_name=dataset_2, + only_two_centers=only_two_centers, + design_factors=design_factors, + ) + metadata_2["center_id"] = 1 + counts_data = pd.concat([counts_data_1, counts_data_2]) + metadata = pd.concat([metadata_1, metadata_2]) + + metadata = mix_centers( + metadata, heterogeneity_method, heterogeneity_method_param + ) + return counts_data, metadata + + data_path = processed_data_path / "tcga" / cohort + path_to_counts = data_path / "counts.parquet" + path_to_metadata = data_path / "clinical_data.csv" + # -- Load the data + counts_data = pd.read_parquet(path_to_counts) + + metadata = pd.read_csv(path_to_metadata, index_col=0) + + # -- Process the metadata + + # If the cohort is PRAD, we need to add the stage information + if dataset_name == "TCGA-PRAD": + + def binarize_stage_prad(metadata_row): + if ( + pd.isna(metadata_row["M"]) + and pd.isna(metadata_row["N"]) + and pd.isna(metadata_row["T"]) + ): + return pd.NA + if (not pd.isna(metadata_row["N"])) and (metadata_row["N"] > 0): + return "Advanced" + elif (not pd.isna(metadata_row["M"])) and (metadata_row["M"] > 0): + return "Advanced" + else: + return "Non-advanced" + + metadata["stage"] = metadata.apply(binarize_stage_prad, axis=1) + + else: + # Binarize the stage + def binarize_stage(stage): + if pd.isna(stage): + return pd.NA + elif stage == 4: + return "Advanced" + else: + return "Non-advanced" + + metadata["stage"] = metadata["stage"].apply(binarize_stage) + + # -- Process the data + cols_to_keep = ( + design_factors.copy() if isinstance(design_factors, list) else [design_factors] + ) + cols_to_keep.append("center_id") + metadata = metadata[cols_to_keep] + + # remove samples with NaN design factor + metadata.dropna(subset=design_factors, inplace=True) + + if only_two_centers: + metadata = _merge_centers(metadata) + + counts_data = counts_data.loc[metadata.index] + + return counts_data, metadata + + +def _merge_centers(metadata: pd.DataFrame) -> pd.DataFrame: + """ + Merge the centers into two centers. + + Parameters + ---------- + metadata : pd.DataFrame + Metadata + + Returns + ------- + metadata : pd.DataFrame + Metadata + """ + list_center_ids = np.sort(metadata.center_id.unique()) + assert len(list_center_ids) > 2, "The dataset has less than 2 centers" + dict_mapping = {list_center_ids[k]: 0 for k in range(len(list_center_ids) // 2)} + dict_mapping.update( + { + list_center_ids[k]: 1 + for k in range(len(list_center_ids) // 2, len(list_center_ids)) + } + ) + logger.info(f"Merging centers using the mapping: {dict_mapping}") + metadata["center_id"] = metadata["center_id"].map(dict_mapping) + + return metadata diff --git a/fedpydeseq2_datasets/utils.py b/fedpydeseq2_datasets/utils.py new file mode 100644 index 0000000..3ed5ef8 --- /dev/null +++ b/fedpydeseq2_datasets/utils.py @@ -0,0 +1,435 @@ +import copy +import pickle +import re +from pathlib import Path +from typing import Any + +import numpy as np +import pandas as pd +from loguru import logger + +from fedpydeseq2_datasets.constants import TCGADatasetNames + +IDENTIFYING_PARAMETERS = [ + "contrast", + "refit_cooks", + "alt_hypothesis", + "lfc_null", +] + + +def tnm_to_series(tnm_string: str | None) -> pd.Series: + """Convert a TNM string into a pandas Series with T, N, and M categories. + + If there are multiple categories for the same letter, the maximum number is used. + A typical TNM string looks like 'T1bN2M0'. + + Parameters + ---------- + tnm_string : str or None + The TNM string to convert. + + Returns + ------- + pd.Series + A pandas Series with the T, N, and M categories. + """ + if pd.isna(tnm_string): + return pd.Series({"T": pd.NA, "N": pd.NA, "M": pd.NA}) + + assert isinstance(tnm_string, str), "The TNM string must be a string." + # Split the string into separate TNM categories + tnm_categories = re.findall(r"[TNM]\d+[a-z]*", tnm_string) + + # Initialize the maximum numbers for T, N, and M as None + max_t = max_n = max_m = None + + # Iterate over each category + for category in tnm_categories: + # Extract the category letter and number + letter = category[0] + number = int(re.search(r"\d+", category).group()) # type: ignore + + # Update the maximum number for the corresponding category + if letter == "T": + max_t = max(max_t, number) if max_t is not None else number # type: ignore + elif letter == "N": + max_n = max(max_n, number) if max_n is not None else number # type: ignore + elif letter == "M": + max_m = max(max_m, number) if max_m is not None else number # type: ignore + + # Convert the maximum numbers into a pandas Series + series = pd.Series( + { + "T": max_t if max_t is not None else pd.NA, + "N": max_n if max_n is not None else pd.NA, + "M": max_m if max_m is not None else pd.NA, + } + ) + + return series + + +def get_experiment_id( + dataset_name: TCGADatasetNames | list[TCGADatasetNames], + small_samples: bool, + small_genes: bool, + only_two_centers: bool, + design_factors: str | list[str] = "stage", + continuous_factors: list[str] | None = None, + heterogeneity_method: str | None = None, + heterogeneity_method_param: float | None = None, + **pydeseq2_kwargs: Any, +): + """ + Generate the experiment id. + + Parameters + ---------- + dataset_name : TCGADatasetNames or list[TCGADatasetNames] + Dataset name + small_samples : bool + If True, only preprocess a small subset of the data, by default False. + This small subset is composed of 10 samples per center. + small_genes : bool + If True, only preprocess a small subset of the data features (genes) + only_two_centers : bool + If True, split the data in two centers only. + design_factors : str or list + The design factors. + continuous_factors : list or None + The continuous design factors. Factors not in this list will be considered + as categorical. + heterogeneity_method : str or None + The method used to generate heterogeneity in the data. + heterogeneity_method_param : float or None + The parameter for the heterogeneity method. + **pydeseq2_kwargs : Any + Additional arguments to pass to pydeseq2. + + Returns + ------- + experiment_id : str + The true dataset name + """ + if isinstance(dataset_name, list): + # Create a string with all the dataset names + # Get the tcga cohorts + all_cohorts = "-".join( + sorted([dataset_name.split("-")[1] for dataset_name in dataset_name]) + ) + full_dataset_name = f"TCGA-{all_cohorts}" + else: + full_dataset_name = dataset_name + if isinstance(design_factors, str): + design_factors = [design_factors] + if continuous_factors is None: + continuous_factors = [] + design_factor_str = "_".join(design_factors) + continuous_factor_str = "_".join(continuous_factors) + small_genes_postfix = "-small-genes" if small_genes else "" + small_samples_postfix = "-small-samples" if small_samples else "" + two_centers_postfix = "-two-centers" if only_two_centers else "" + + pydeseq2_kwargs = copy.deepcopy(pydeseq2_kwargs) + if len(design_factors) > 1: + # Add the contrast to the pydeseq2_kwargs + pydeseq2_kwargs["contrast"] = pydeseq2_kwargs.get("contrast", None) + + # Sort the kwargs alphabetically + parameter_names = sorted(pydeseq2_kwargs.keys()) + parameter_str = "" + for parameter_name in parameter_names: + if parameter_name not in IDENTIFYING_PARAMETERS: + continue + parameter_value = pydeseq2_kwargs[parameter_name] + if parameter_value is None: + parameter_str += f"-default_{parameter_name}" + elif isinstance(parameter_value, list): + parameter_str += f"-{parameter_name}-{'_'.join(parameter_value)}" + else: + parameter_str += f"-{parameter_name}-{parameter_value}" + + heterogeneity_postfix = ( + f"heterogeneity-{heterogeneity_method}" + if heterogeneity_method is not None + else "" + ) + heterogeneity_method_param_postfix = ( + f"-{heterogeneity_method_param}" + if heterogeneity_method_param is not None + else "" + ) + + experiment_id = ( + f"{full_dataset_name}-{design_factor_str}-{continuous_factor_str}" + f"{small_genes_postfix}{small_samples_postfix}{two_centers_postfix}" + f"{heterogeneity_postfix}{heterogeneity_method_param_postfix}{parameter_str}" + ) + experiment_id = experiment_id.strip("-_") # Remove trailing '-' and '_' + + return experiment_id + + +def get_ground_truth_dds_name( + reference_dds_ref_level: tuple[str, ...] | None = ("stage", "Advanced"), + refit_cooks: bool = False, + pooled: bool = True, +) -> str: + """ + Generate the ground truth dds name. + + Parameters + ---------- + reference_dds_ref_level : tuple or None + The reference level for the design factor. + refit_cooks : bool + If True, refit the genes with cooks outliers. + TODO this is now obsolete, we should remove it at the end + TODO (but for compatibility reasons we keep it for now). + pooled : bool + If True, we compute the pooled dds, and then restrict it to the + center to get the local dds. If False, we only compute the local dds + from the center data only. + If not pooled, we add the suffix '-center' to the name. + + Returns + ------- + ground_truth_dds_name : str + The ground truth dds name + """ + ref_level_str = ( + "_".join(reference_dds_ref_level) + if reference_dds_ref_level is not None + else "None" + ) + ground_truth_dds_name = f"ground_truth_dds-{ref_level_str}" + if refit_cooks: + ground_truth_dds_name += "-refit_cooks" + if not pooled: + ground_truth_dds_name += "-center" + return ground_truth_dds_name + + +def get_n_centers( + processed_data_path: str | Path, + dataset_name: TCGADatasetNames, + small_samples: bool = False, + small_genes: bool = False, + only_two_centers: bool = False, + design_factors: str | list[str] = "stage", + continuous_factors: list[str] | None = None, + heterogeneity_method: str | None = None, + heterogeneity_method_param: float | None = None, + **pydeseq2_kwargs: Any, +) -> int: + """ + Get the number of centers in the dataset. + + To do so, we open the file containing the metadata and count the number + of unique center ids and count them. + + Parameters + ---------- + processed_data_path : str or Path + The path to the processed data. + + dataset_name : TCGADatasetNames + The name of the dataset to use. + + small_samples : bool + Whether to use a small number of samples. Default is False. + + small_genes : bool + Whether to use a small number of genes. Default is False. + + only_two_centers : bool + Whether to use only two centers. Default is False. + + design_factors : str or list[str] + The design factors to use. Default is "stage". + + continuous_factors : list[str] or None + The continuous factors to use. Default is None. + + heterogeneity_method : str or None + The method to used to define the heterogeneity + of the center's attribution. + + heterogeneity_method_param : float or None + The parameter of the heterogeneity method. + + **pydeseq2_kwargs : Any + Additional arguments to pass to pydeseq2 and fed-pydeseq2. + For example, the contrast. + + Returns + ------- + n_centers : int + The number of centers in the dataset + + """ + # Get the number of centers + processed_data_path = Path(processed_data_path) + experiment_id = get_experiment_id( + dataset_name=dataset_name, + small_samples=small_samples, + small_genes=small_genes, + only_two_centers=only_two_centers, + design_factors=design_factors, + continuous_factors=continuous_factors, + heterogeneity_method=heterogeneity_method, + heterogeneity_method_param=heterogeneity_method_param, + **pydeseq2_kwargs, + ) + n_centers = len( + np.unique( + pd.read_csv( + processed_data_path + / "pooled_data" + / "tcga" + / experiment_id + / "metadata.csv" + )["center_id"] + ) + ) + return n_centers + + +def get_n_centers_from_subfolders(centers_path: str | Path) -> int: + """ + Get the number of centers from a folder. + + Parameters + ---------- + centers_path : str or Path + The path to the folder which must contain + center_{i} subfolders + + Returns + ------- + int + The number of centers + """ + centers_path = Path(centers_path) + n_centers = len([x for x in centers_path.iterdir() if x.name.startswith("center_")]) + # check that the names are correct + assert all((centers_path / f"center_{i}").exists() for i in range(n_centers)) + return n_centers + + +def get_valid_centers_from_subfolders_file( + centers_path: str | Path, filename: str, pkl: bool = False +) -> tuple[int, list[int]]: + """ + Get the number of centers from a folder. + + Parameters + ---------- + centers_path : str or Path + The path to the folder which must contain + center_{i} subfolders + + filename : str + The name of the file to check for in the center_{i} subfolders. + If the file is not found in a center_{i} subfolder, the center is + not considered valid (in practice, this will mean that a DeSEQ2 analysis + will not have been run on the center for lack of samples of all + design factors). If pkl is True, the file is assumed to be a pickle file, + and the file is loaded with pickle.load() to check if it is None. + If it is None, the center is not considered valid. + + pkl : bool + If True, the file is assumed to be a pickle file, and the file is loaded + with pickle.load() to check if it is None. If it is None, the center is + not considered valid. + + Returns + ------- + int + The number of centers + """ + centers_path = Path(centers_path) + n_centers = get_n_centers_from_subfolders(centers_path) + valid_centers = [] + for i in range(n_centers): + if (centers_path / f"center_{i}" / filename).exists(): + if pkl: + with open(centers_path / f"center_{i}" / filename, "rb") as f: + data = pickle.load(f) + if data is not None: + valid_centers.append(i) + else: + valid_centers.append(i) + return n_centers, valid_centers + + +def mix_centers( + metadata: pd.DataFrame, + heterogeneity_method: str | None = None, + heterogeneity_method_param: float | None = None, +): + """ + Mix the centers in the metadata. + + It uses the heterogeneity method and the heterogeneity + method parameter. + + Parameters + ---------- + metadata : pd.DataFrame + The metadata to mix the centers + heterogeneity_method : Optional[str] + The method used to generate heterogeneity in the data. The only option + supported is 'binomial'. + heterogeneity_method_param : Optional[float] + The parameter for the heterogeneity method. Should be between 0 and 1. If + the value is 0, the centers are not mixed. If the value is 1, the centers + are completely mixed. + + Returns + ------- + metadata : pd.DataFrame + The metadata with the centers mixed + + """ + if heterogeneity_method is None: + logger.info("No heterogeneity method specified. Keeping the centers as is.") + return metadata + assert ( + heterogeneity_method_param is not None + ), "The heterogeneity method parameter must be specified." + assert ( + 0 <= heterogeneity_method_param <= 1 + ), "The heterogeneity method parameter must be between 0 and 1." + np.random.seed(42) + assert set(metadata.center_id.unique()) == { + 0, + 1, + }, "The metadata must contain only two centers." + if heterogeneity_method == "binomial": + assert heterogeneity_method_param is not None + mask_center_0 = metadata.center_id == 0 + mask_center_1 = metadata.center_id == 1 + random_binomial_0 = ( + np.random.binomial(1, heterogeneity_method_param / 2.0, mask_center_0.sum()) + ).astype(int) + random_binomial_1 = ( + np.random.binomial( + 1, 1.0 - heterogeneity_method_param / 2.0, mask_center_1.sum() + ) + ).astype(int) + metadata["new_center_id"] = np.zeros(len(metadata), dtype=int) + metadata.loc[mask_center_0, "new_center_id"] = random_binomial_0 + metadata.loc[mask_center_1, "new_center_id"] = random_binomial_1 + cross_table = pd.crosstab(metadata["center_id"], metadata["new_center_id"]) + logger.info("Mixing centers results") + logger.info(cross_table) + metadata.drop(columns=["center_id"], inplace=True) + metadata.rename(columns={"new_center_id": "center_id"}, inplace=True) + + else: + raise ValueError( + f"Unknown heterogeneity method parameter: {heterogeneity_method_param}" + ) + return metadata diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..9a9f28b --- /dev/null +++ b/poetry.lock @@ -0,0 +1,1493 @@ +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. + +[[package]] +name = "anndata" +version = "0.10.8" +description = "Annotated data." +optional = false +python-versions = ">=3.9" +files = [ + {file = "anndata-0.10.8-py3-none-any.whl", hash = "sha256:1b24934dc2674eaf3072cb7010e187aa2b2f4f0e4cf0a32ffeab5ffebe3b1415"}, + {file = "anndata-0.10.8.tar.gz", hash = "sha256:b728a33225eeaaefddf6bed546d935c0f06881c9166621b24de3b492b2f406bb"}, +] + +[package.dependencies] +array-api-compat = ">1.4,<1.5 || >1.5" +exceptiongroup = {version = "*", markers = "python_version < \"3.11\""} +h5py = ">=3.1" +natsort = "*" +numpy = ">=1.23" +packaging = ">=20.0" +pandas = ">=1.4,<2.1.0rc0 || >2.1.0rc0,<2.1.2 || >2.1.2" +scipy = ">1.8" + +[package.extras] +dev = ["pytest-xdist", "setuptools-scm"] +doc = ["awkward (>=2.0.7)", "ipython", "myst-parser", "nbsphinx", "readthedocs-sphinx-search", "scanpydoc[theme,typehints] (>=0.13.4)", "sphinx (>=4.4)", "sphinx-autodoc-typehints (>=1.11.0)", "sphinx-book-theme (>=1.1.0)", "sphinx-copybutton", "sphinx-design (>=0.5.0)", "sphinx-issues", "sphinxext-opengraph", "zarr"] +gpu = ["cupy"] +test = ["awkward (>=2.3)", "boltons", "dask[array,distributed] (>=2022.09.2)", "httpx", "joblib", "loompy (>=3.0.5)", "matplotlib", "openpyxl", "pyarrow", "pytest (>=8.2)", "pytest-cov (>=2.10)", "pytest-memray", "pytest-mock", "scanpy", "scikit-learn", "zarr (<3.0.0a0)"] + +[[package]] +name = "array-api-compat" +version = "1.9.1" +description = "A wrapper around NumPy and other array libraries to make them compatible with the Array API standard" +optional = false +python-versions = ">=3.9" +files = [ + {file = "array_api_compat-1.9.1-py3-none-any.whl", hash = "sha256:41a2703a662832d21619359ddddc5c0449876871f6c01e108c335f2a9432df94"}, + {file = "array_api_compat-1.9.1.tar.gz", hash = "sha256:17bab828c93c79a5bb8b867145b71fcb889686607c5672b060aef437e0359ea8"}, +] + +[package.extras] +cupy = ["cupy"] +dask = ["dask"] +jax = ["jax"] +numpy = ["numpy"] +pytorch = ["pytorch"] +sparse = ["sparse (>=0.15.1)"] + +[[package]] +name = "black" +version = "24.10.0" +description = "The uncompromising code formatter." +optional = false +python-versions = ">=3.9" +files = [ + {file = "black-24.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e6668650ea4b685440857138e5fe40cde4d652633b1bdffc62933d0db4ed9812"}, + {file = "black-24.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1c536fcf674217e87b8cc3657b81809d3c085d7bf3ef262ead700da345bfa6ea"}, + {file = "black-24.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:649fff99a20bd06c6f727d2a27f401331dc0cc861fb69cde910fe95b01b5928f"}, + {file = "black-24.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:fe4d6476887de70546212c99ac9bd803d90b42fc4767f058a0baa895013fbb3e"}, + {file = "black-24.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5a2221696a8224e335c28816a9d331a6c2ae15a2ee34ec857dcf3e45dbfa99ad"}, + {file = "black-24.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f9da3333530dbcecc1be13e69c250ed8dfa67f43c4005fb537bb426e19200d50"}, + {file = "black-24.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4007b1393d902b48b36958a216c20c4482f601569d19ed1df294a496eb366392"}, + {file = "black-24.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:394d4ddc64782e51153eadcaaca95144ac4c35e27ef9b0a42e121ae7e57a9175"}, + {file = "black-24.10.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b5e39e0fae001df40f95bd8cc36b9165c5e2ea88900167bddf258bacef9bbdc3"}, + {file = "black-24.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d37d422772111794b26757c5b55a3eade028aa3fde43121ab7b673d050949d65"}, + {file = "black-24.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14b3502784f09ce2443830e3133dacf2c0110d45191ed470ecb04d0f5f6fcb0f"}, + {file = "black-24.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:30d2c30dc5139211dda799758559d1b049f7f14c580c409d6ad925b74a4208a8"}, + {file = "black-24.10.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cbacacb19e922a1d75ef2b6ccaefcd6e93a2c05ede32f06a21386a04cedb981"}, + {file = "black-24.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1f93102e0c5bb3907451063e08b9876dbeac810e7da5a8bfb7aeb5a9ef89066b"}, + {file = "black-24.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ddacb691cdcdf77b96f549cf9591701d8db36b2f19519373d60d31746068dbf2"}, + {file = "black-24.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:680359d932801c76d2e9c9068d05c6b107f2584b2a5b88831c83962eb9984c1b"}, + {file = "black-24.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:17374989640fbca88b6a448129cd1745c5eb8d9547b464f281b251dd00155ccd"}, + {file = "black-24.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:63f626344343083322233f175aaf372d326de8436f5928c042639a4afbbf1d3f"}, + {file = "black-24.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfa1d0cb6200857f1923b602f978386a3a2758a65b52e0950299ea014be6800"}, + {file = "black-24.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:2cd9c95431d94adc56600710f8813ee27eea544dd118d45896bb734e9d7a0dc7"}, + {file = "black-24.10.0-py3-none-any.whl", hash = "sha256:3bb2b7a1f7b685f85b11fed1ef10f8a9148bceb49853e47a294a3dd963c1dd7d"}, + {file = "black-24.10.0.tar.gz", hash = "sha256:846ea64c97afe3bc677b761787993be4991810ecc7a4a937816dd6bddedc4875"}, +] + +[package.dependencies] +click = ">=8.0.0" +mypy-extensions = ">=0.4.3" +packaging = ">=22.0" +pathspec = ">=0.9.0" +platformdirs = ">=2" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} + +[package.extras] +colorama = ["colorama (>=0.4.3)"] +d = ["aiohttp (>=3.10)"] +jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] +uvloop = ["uvloop (>=0.15.2)"] + +[[package]] +name = "cfgv" +version = "3.4.0" +description = "Validate configuration and produce human readable error messages." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, + {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, +] + +[[package]] +name = "click" +version = "8.1.7" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.7" +files = [ + {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, + {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "contourpy" +version = "1.3.1" +description = "Python library for calculating contours of 2D quadrilateral grids" +optional = false +python-versions = ">=3.10" +files = [ + {file = "contourpy-1.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a045f341a77b77e1c5de31e74e966537bba9f3c4099b35bf4c2e3939dd54cdab"}, + {file = "contourpy-1.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:500360b77259914f7805af7462e41f9cb7ca92ad38e9f94d6c8641b089338124"}, + {file = "contourpy-1.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2f926efda994cdf3c8d3fdb40b9962f86edbc4457e739277b961eced3d0b4c1"}, + {file = "contourpy-1.3.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:adce39d67c0edf383647a3a007de0a45fd1b08dedaa5318404f1a73059c2512b"}, + {file = "contourpy-1.3.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abbb49fb7dac584e5abc6636b7b2a7227111c4f771005853e7d25176daaf8453"}, + {file = "contourpy-1.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0cffcbede75c059f535725c1680dfb17b6ba8753f0c74b14e6a9c68c29d7ea3"}, + {file = "contourpy-1.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ab29962927945d89d9b293eabd0d59aea28d887d4f3be6c22deaefbb938a7277"}, + {file = "contourpy-1.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:974d8145f8ca354498005b5b981165b74a195abfae9a8129df3e56771961d595"}, + {file = "contourpy-1.3.1-cp310-cp310-win32.whl", hash = "sha256:ac4578ac281983f63b400f7fe6c101bedc10651650eef012be1ccffcbacf3697"}, + {file = "contourpy-1.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:174e758c66bbc1c8576992cec9599ce8b6672b741b5d336b5c74e35ac382b18e"}, + {file = "contourpy-1.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3e8b974d8db2c5610fb4e76307e265de0edb655ae8169e8b21f41807ccbeec4b"}, + {file = "contourpy-1.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:20914c8c973f41456337652a6eeca26d2148aa96dd7ac323b74516988bea89fc"}, + {file = "contourpy-1.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19d40d37c1c3a4961b4619dd9d77b12124a453cc3d02bb31a07d58ef684d3d86"}, + {file = "contourpy-1.3.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:113231fe3825ebf6f15eaa8bc1f5b0ddc19d42b733345eae0934cb291beb88b6"}, + {file = "contourpy-1.3.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4dbbc03a40f916a8420e420d63e96a1258d3d1b58cbdfd8d1f07b49fcbd38e85"}, + {file = "contourpy-1.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a04ecd68acbd77fa2d39723ceca4c3197cb2969633836ced1bea14e219d077c"}, + {file = "contourpy-1.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c414fc1ed8ee1dbd5da626cf3710c6013d3d27456651d156711fa24f24bd1291"}, + {file = "contourpy-1.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:31c1b55c1f34f80557d3830d3dd93ba722ce7e33a0b472cba0ec3b6535684d8f"}, + {file = "contourpy-1.3.1-cp311-cp311-win32.whl", hash = "sha256:f611e628ef06670df83fce17805c344710ca5cde01edfdc72751311da8585375"}, + {file = "contourpy-1.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:b2bdca22a27e35f16794cf585832e542123296b4687f9fd96822db6bae17bfc9"}, + {file = "contourpy-1.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:0ffa84be8e0bd33410b17189f7164c3589c229ce5db85798076a3fa136d0e509"}, + {file = "contourpy-1.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805617228ba7e2cbbfb6c503858e626ab528ac2a32a04a2fe88ffaf6b02c32bc"}, + {file = "contourpy-1.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ade08d343436a94e633db932e7e8407fe7de8083967962b46bdfc1b0ced39454"}, + {file = "contourpy-1.3.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:47734d7073fb4590b4a40122b35917cd77be5722d80683b249dac1de266aac80"}, + {file = "contourpy-1.3.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2ba94a401342fc0f8b948e57d977557fbf4d515f03c67682dd5c6191cb2d16ec"}, + {file = "contourpy-1.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efa874e87e4a647fd2e4f514d5e91c7d493697127beb95e77d2f7561f6905bd9"}, + {file = "contourpy-1.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1bf98051f1045b15c87868dbaea84f92408337d4f81d0e449ee41920ea121d3b"}, + {file = "contourpy-1.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:61332c87493b00091423e747ea78200659dc09bdf7fd69edd5e98cef5d3e9a8d"}, + {file = "contourpy-1.3.1-cp312-cp312-win32.whl", hash = "sha256:e914a8cb05ce5c809dd0fe350cfbb4e881bde5e2a38dc04e3afe1b3e58bd158e"}, + {file = "contourpy-1.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:08d9d449a61cf53033612cb368f3a1b26cd7835d9b8cd326647efe43bca7568d"}, + {file = "contourpy-1.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a761d9ccfc5e2ecd1bf05534eda382aa14c3e4f9205ba5b1684ecfe400716ef2"}, + {file = "contourpy-1.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:523a8ee12edfa36f6d2a49407f705a6ef4c5098de4f498619787e272de93f2d5"}, + {file = "contourpy-1.3.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece6df05e2c41bd46776fbc712e0996f7c94e0d0543af1656956d150c4ca7c81"}, + {file = "contourpy-1.3.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:573abb30e0e05bf31ed067d2f82500ecfdaec15627a59d63ea2d95714790f5c2"}, + {file = "contourpy-1.3.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a9fa36448e6a3a1a9a2ba23c02012c43ed88905ec80163f2ffe2421c7192a5d7"}, + {file = "contourpy-1.3.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ea9924d28fc5586bf0b42d15f590b10c224117e74409dd7a0be3b62b74a501c"}, + {file = "contourpy-1.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5b75aa69cb4d6f137b36f7eb2ace9280cfb60c55dc5f61c731fdf6f037f958a3"}, + {file = "contourpy-1.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:041b640d4ec01922083645a94bb3b2e777e6b626788f4095cf21abbe266413c1"}, + {file = "contourpy-1.3.1-cp313-cp313-win32.whl", hash = "sha256:36987a15e8ace5f58d4d5da9dca82d498c2bbb28dff6e5d04fbfcc35a9cb3a82"}, + {file = "contourpy-1.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:a7895f46d47671fa7ceec40f31fae721da51ad34bdca0bee83e38870b1f47ffd"}, + {file = "contourpy-1.3.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:9ddeb796389dadcd884c7eb07bd14ef12408aaae358f0e2ae24114d797eede30"}, + {file = "contourpy-1.3.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:19c1555a6801c2f084c7ddc1c6e11f02eb6a6016ca1318dd5452ba3f613a1751"}, + {file = "contourpy-1.3.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:841ad858cff65c2c04bf93875e384ccb82b654574a6d7f30453a04f04af71342"}, + {file = "contourpy-1.3.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4318af1c925fb9a4fb190559ef3eec206845f63e80fb603d47f2d6d67683901c"}, + {file = "contourpy-1.3.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:14c102b0eab282427b662cb590f2e9340a9d91a1c297f48729431f2dcd16e14f"}, + {file = "contourpy-1.3.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05e806338bfeaa006acbdeba0ad681a10be63b26e1b17317bfac3c5d98f36cda"}, + {file = "contourpy-1.3.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4d76d5993a34ef3df5181ba3c92fabb93f1eaa5729504fb03423fcd9f3177242"}, + {file = "contourpy-1.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:89785bb2a1980c1bd87f0cb1517a71cde374776a5f150936b82580ae6ead44a1"}, + {file = "contourpy-1.3.1-cp313-cp313t-win32.whl", hash = "sha256:8eb96e79b9f3dcadbad2a3891672f81cdcab7f95b27f28f1c67d75f045b6b4f1"}, + {file = "contourpy-1.3.1-cp313-cp313t-win_amd64.whl", hash = "sha256:287ccc248c9e0d0566934e7d606201abd74761b5703d804ff3df8935f523d546"}, + {file = "contourpy-1.3.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b457d6430833cee8e4b8e9b6f07aa1c161e5e0d52e118dc102c8f9bd7dd060d6"}, + {file = "contourpy-1.3.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb76c1a154b83991a3cbbf0dfeb26ec2833ad56f95540b442c73950af2013750"}, + {file = "contourpy-1.3.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:44a29502ca9c7b5ba389e620d44f2fbe792b1fb5734e8b931ad307071ec58c53"}, + {file = "contourpy-1.3.1.tar.gz", hash = "sha256:dfd97abd83335045a913e3bcc4a09c0ceadbe66580cf573fe961f4a825efa699"}, +] + +[package.dependencies] +numpy = ">=1.23" + +[package.extras] +bokeh = ["bokeh", "selenium"] +docs = ["furo", "sphinx (>=7.2)", "sphinx-copybutton"] +mypy = ["contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.11.1)", "types-Pillow"] +test = ["Pillow", "contourpy[test-no-images]", "matplotlib"] +test-no-images = ["pytest", "pytest-cov", "pytest-rerunfailures", "pytest-xdist", "wurlitzer"] + +[[package]] +name = "cycler" +version = "0.12.1" +description = "Composable style cycles" +optional = false +python-versions = ">=3.8" +files = [ + {file = "cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30"}, + {file = "cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c"}, +] + +[package.extras] +docs = ["ipython", "matplotlib", "numpydoc", "sphinx"] +tests = ["pytest", "pytest-cov", "pytest-xdist"] + +[[package]] +name = "distlib" +version = "0.3.9" +description = "Distribution utilities" +optional = false +python-versions = "*" +files = [ + {file = "distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87"}, + {file = "distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403"}, +] + +[[package]] +name = "exceptiongroup" +version = "1.2.2" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, + {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, +] + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "filelock" +version = "3.16.1" +description = "A platform independent file lock." +optional = false +python-versions = ">=3.8" +files = [ + {file = "filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0"}, + {file = "filelock-3.16.1.tar.gz", hash = "sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435"}, +] + +[package.extras] +docs = ["furo (>=2024.8.6)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4.1)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "diff-cover (>=9.2)", "pytest (>=8.3.3)", "pytest-asyncio (>=0.24)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.26.4)"] +typing = ["typing-extensions (>=4.12.2)"] + +[[package]] +name = "fonttools" +version = "4.55.1" +description = "Tools to manipulate font files" +optional = false +python-versions = ">=3.8" +files = [ + {file = "fonttools-4.55.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c17a6f9814f83772cd6d9c9009928e1afa4ab66210a31ced721556651075a9a0"}, + {file = "fonttools-4.55.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c4d14eecc814826a01db87a40af3407c892ba49996bc6e49961e386cd78b537c"}, + {file = "fonttools-4.55.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8589f9a15dc005592b94ecdc45b4dfae9bbe9e73542e89af5a5e776e745db83b"}, + {file = "fonttools-4.55.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfee95bd9395bcd9e6c78955387554335109b6a613db71ef006020b42f761c58"}, + {file = "fonttools-4.55.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:34fa2ecc0bf1923d1a51bf2216a006de2c3c0db02c6aa1470ea50b62b8619bd5"}, + {file = "fonttools-4.55.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9c1c48483148bfb1b9ad951133ceea957faa004f6cb475b67e7bc75d482b48f8"}, + {file = "fonttools-4.55.1-cp310-cp310-win32.whl", hash = "sha256:3e2fc388ca7d023b3c45badd71016fd4185f93e51a22cfe4bd65378af7fba759"}, + {file = "fonttools-4.55.1-cp310-cp310-win_amd64.whl", hash = "sha256:c4c36c71f69d2b3ee30394b0986e5f8b2c461e7eff48dde49b08a90ded9fcdbd"}, + {file = "fonttools-4.55.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5daab3a55d460577f45bb8f5a8eca01fa6cde43ef2ab943b527991f54b735c41"}, + {file = "fonttools-4.55.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:acf1e80cf96c2fbc79e46f669d8713a9a79faaebcc68e31a9fbe600cf8027992"}, + {file = "fonttools-4.55.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e88a0329f7f88a210f09f79c088fb64f8032fc3ab65e2390a40b7d3a11773026"}, + {file = "fonttools-4.55.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03105b42259a8a94b2f0cbf1bee45f7a8a34e7b26c946a8fb89b4967e44091a8"}, + {file = "fonttools-4.55.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9af3577e821649879ab5774ad0e060af34816af556c77c6d3820345d12bf415e"}, + {file = "fonttools-4.55.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:34bd5de3d0ad085359b79a96575cd6bd1bc2976320ef24a2aa152ead36dbf656"}, + {file = "fonttools-4.55.1-cp311-cp311-win32.whl", hash = "sha256:5da92c4b637f0155a41f345fa81143c8e17425260fcb21521cb2ad4d2cea2a95"}, + {file = "fonttools-4.55.1-cp311-cp311-win_amd64.whl", hash = "sha256:f70234253d15f844e6da1178f019a931f03181463ce0c7b19648b8c370527b07"}, + {file = "fonttools-4.55.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9c372e527d58ba64b695f15f8014e97bc8826cf64d3380fc89b4196edd3c0fa8"}, + {file = "fonttools-4.55.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:845a967d3bef3245ba81fb5582dc731f6c2c8417fa211f1068c56893504bc000"}, + {file = "fonttools-4.55.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f03be82bcd4ba4418adf10e6165743f824bb09d6594c2743d7f93ea50968805b"}, + {file = "fonttools-4.55.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c42e935cf146f826f556d977660dac88f2fa3fb2efa27d5636c0b89a60c16edf"}, + {file = "fonttools-4.55.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:96328bf91e05621d8e40d9f854af7a262cb0e8313e9b38e7f3a7f3c4c0caaa8b"}, + {file = "fonttools-4.55.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:291acec4d774e8cd2d8472d88c04643a77a3324a15247951bd6cfc969799b69e"}, + {file = "fonttools-4.55.1-cp312-cp312-win32.whl", hash = "sha256:6d768d6632809aec1c3fa8f195b173386d85602334701a6894a601a4d3c80368"}, + {file = "fonttools-4.55.1-cp312-cp312-win_amd64.whl", hash = "sha256:2a3850afdb0be1f79a1e95340a2059226511675c5b68098d4e49bfbeb48a8aab"}, + {file = "fonttools-4.55.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:0c88d427eaf8bd8497b9051f56e0f5f9fb96a311aa7c72cda35e03e18d59cd16"}, + {file = "fonttools-4.55.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f062c95a725a79fd908fe8407b6ad63e230e1c7d6dece2d5d6ecaf843d6927f6"}, + {file = "fonttools-4.55.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f298c5324c45cad073475146bf560f4110ce2dc2488ff12231a343ec489f77bc"}, + {file = "fonttools-4.55.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f06dbb71344ffd85a6cb7e27970a178952f0bdd8d319ed938e64ba4bcc41700"}, + {file = "fonttools-4.55.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4c46b3525166976f5855b1f039b02433dc51eb635fb54d6a111e0c5d6e6cdc4c"}, + {file = "fonttools-4.55.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:af46f52a21e086a2f89b87bd941c9f0f91e5f769e1a5eb3b37c912228814d3e5"}, + {file = "fonttools-4.55.1-cp313-cp313-win32.whl", hash = "sha256:cd7f36335c5725a3fd724cc667c10c3f5254e779bdc5bffefebb33cf5a75ecb1"}, + {file = "fonttools-4.55.1-cp313-cp313-win_amd64.whl", hash = "sha256:5d6394897710ccac7f74df48492d7f02b9586ff0588c66a2c218844e90534b22"}, + {file = "fonttools-4.55.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:52c4f4b383c56e1a4fe8dab1b63c2269ba9eab0695d2d8e033fa037e61e6f1ef"}, + {file = "fonttools-4.55.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d83892dafdbd62b56545c77b6bd4fa49eef6ec1d6b95e042ee2c930503d1831e"}, + {file = "fonttools-4.55.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:604d5bf16f811fcaaaec2dde139f7ce958462487565edcd54b6fadacb2942083"}, + {file = "fonttools-4.55.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3324b92feb5fd084923a8e89a8248afd5b9f9d81ab9517d7b07cc84403bd448"}, + {file = "fonttools-4.55.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:30f8b1ca9b919c04850678d026fc330c19acaa9e3b282fcacc09a5eb3c8d20c3"}, + {file = "fonttools-4.55.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:1835c98df2cf28c86a66d234895c87df7b9325fd079a8019c5053a389ff55d23"}, + {file = "fonttools-4.55.1-cp38-cp38-win32.whl", hash = "sha256:9f202703720a7cc0049f2ed1a2047925e264384eb5cc4d34f80200d7b17f1b6a"}, + {file = "fonttools-4.55.1-cp38-cp38-win_amd64.whl", hash = "sha256:2efff20aed0338d37c2ff58766bd67f4b9607ded61cf3d6baf1b3e25ea74e119"}, + {file = "fonttools-4.55.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3032d9bf010c395e6eca2851666cafb1f4ecde85d420188555e928ad0144326e"}, + {file = "fonttools-4.55.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0794055588c30ffe25426048e8a7c0a5271942727cd61fc939391e37f4d580d5"}, + {file = "fonttools-4.55.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13ba980e3ffd3206b8c63a365f90dc10eeec27da946d5ee5373c3a325a46d77c"}, + {file = "fonttools-4.55.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d7063babd7434a17a5e355e87de9b2306c85a5c19c7da0794be15c58aab0c39"}, + {file = "fonttools-4.55.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ed84c15144015a58ef550dd6312884c9fb31a2dbc31a6467bcdafd63be7db476"}, + {file = "fonttools-4.55.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e89419d88b0bbfdb55209e03a17afa2d20db3c2fa0d785543c9d0875668195d5"}, + {file = "fonttools-4.55.1-cp39-cp39-win32.whl", hash = "sha256:6eb781e401b93cda99356bc043ababead2a5096550984d8a4ecf3d5c9f859dc2"}, + {file = "fonttools-4.55.1-cp39-cp39-win_amd64.whl", hash = "sha256:db1031acf04523c5a51c3e1ae19c21a1c32bc5f820a477dd4659a02f9cb82002"}, + {file = "fonttools-4.55.1-py3-none-any.whl", hash = "sha256:4bcfb11f90f48b48c366dd638d773a52fca0d1b9e056dc01df766bf5835baa08"}, + {file = "fonttools-4.55.1.tar.gz", hash = "sha256:85bb2e985718b0df96afc659abfe194c171726054314b019dbbfed31581673c7"}, +] + +[package.extras] +all = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "fs (>=2.2.0,<3)", "lxml (>=4.0)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres", "pycairo", "scipy", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.1.0)", "xattr", "zopfli (>=0.1.4)"] +graphite = ["lz4 (>=1.7.4.2)"] +interpolatable = ["munkres", "pycairo", "scipy"] +lxml = ["lxml (>=4.0)"] +pathops = ["skia-pathops (>=0.5.0)"] +plot = ["matplotlib"] +repacker = ["uharfbuzz (>=0.23.0)"] +symfont = ["sympy"] +type1 = ["xattr"] +ufo = ["fs (>=2.2.0,<3)"] +unicode = ["unicodedata2 (>=15.1.0)"] +woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] + +[[package]] +name = "gitdb" +version = "4.0.11" +description = "Git Object Database" +optional = false +python-versions = ">=3.7" +files = [ + {file = "gitdb-4.0.11-py3-none-any.whl", hash = "sha256:81a3407ddd2ee8df444cbacea00e2d038e40150acfa3001696fe0dcf1d3adfa4"}, + {file = "gitdb-4.0.11.tar.gz", hash = "sha256:bf5421126136d6d0af55bc1e7c1af1c397a34f5b7bd79e776cd3e89785c2b04b"}, +] + +[package.dependencies] +smmap = ">=3.0.1,<6" + +[[package]] +name = "gitpython" +version = "3.1.43" +description = "GitPython is a Python library used to interact with Git repositories" +optional = false +python-versions = ">=3.7" +files = [ + {file = "GitPython-3.1.43-py3-none-any.whl", hash = "sha256:eec7ec56b92aad751f9912a73404bc02ba212a23adb2c7098ee668417051a1ff"}, + {file = "GitPython-3.1.43.tar.gz", hash = "sha256:35f314a9f878467f5453cc1fee295c3e18e52f1b99f10f6cf5b1682e968a9e7c"}, +] + +[package.dependencies] +gitdb = ">=4.0.1,<5" + +[package.extras] +doc = ["sphinx (==4.3.2)", "sphinx-autodoc-typehints", "sphinx-rtd-theme", "sphinxcontrib-applehelp (>=1.0.2,<=1.0.4)", "sphinxcontrib-devhelp (==1.0.2)", "sphinxcontrib-htmlhelp (>=2.0.0,<=2.0.1)", "sphinxcontrib-qthelp (==1.0.3)", "sphinxcontrib-serializinghtml (==1.1.5)"] +test = ["coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest (>=7.3.1)", "pytest-cov", "pytest-instafail", "pytest-mock", "pytest-sugar", "typing-extensions"] + +[[package]] +name = "h5py" +version = "3.12.1" +description = "Read and write HDF5 files from Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "h5py-3.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f0f1a382cbf494679c07b4371f90c70391dedb027d517ac94fa2c05299dacda"}, + {file = "h5py-3.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cb65f619dfbdd15e662423e8d257780f9a66677eae5b4b3fc9dca70b5fd2d2a3"}, + {file = "h5py-3.12.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b15d8dbd912c97541312c0e07438864d27dbca857c5ad634de68110c6beb1c2"}, + {file = "h5py-3.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59685fe40d8c1fbbee088c88cd4da415a2f8bee5c270337dc5a1c4aa634e3307"}, + {file = "h5py-3.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:577d618d6b6dea3da07d13cc903ef9634cde5596b13e832476dd861aaf651f3e"}, + {file = "h5py-3.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ccd9006d92232727d23f784795191bfd02294a4f2ba68708825cb1da39511a93"}, + {file = "h5py-3.12.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ad8a76557880aed5234cfe7279805f4ab5ce16b17954606cca90d578d3e713ef"}, + {file = "h5py-3.12.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1473348139b885393125126258ae2d70753ef7e9cec8e7848434f385ae72069e"}, + {file = "h5py-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:018a4597f35092ae3fb28ee851fdc756d2b88c96336b8480e124ce1ac6fb9166"}, + {file = "h5py-3.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:3fdf95092d60e8130ba6ae0ef7a9bd4ade8edbe3569c13ebbaf39baefffc5ba4"}, + {file = "h5py-3.12.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:06a903a4e4e9e3ebbc8b548959c3c2552ca2d70dac14fcfa650d9261c66939ed"}, + {file = "h5py-3.12.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7b3b8f3b48717e46c6a790e3128d39c61ab595ae0a7237f06dfad6a3b51d5351"}, + {file = "h5py-3.12.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:050a4f2c9126054515169c49cb900949814987f0c7ae74c341b0c9f9b5056834"}, + {file = "h5py-3.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c4b41d1019322a5afc5082864dfd6359f8935ecd37c11ac0029be78c5d112c9"}, + {file = "h5py-3.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:e4d51919110a030913201422fb07987db4338eba5ec8c5a15d6fab8e03d443fc"}, + {file = "h5py-3.12.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:513171e90ed92236fc2ca363ce7a2fc6f2827375efcbb0cc7fbdd7fe11fecafc"}, + {file = "h5py-3.12.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:59400f88343b79655a242068a9c900001a34b63e3afb040bd7cdf717e440f653"}, + {file = "h5py-3.12.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3e465aee0ec353949f0f46bf6c6f9790a2006af896cee7c178a8c3e5090aa32"}, + {file = "h5py-3.12.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba51c0c5e029bb5420a343586ff79d56e7455d496d18a30309616fdbeed1068f"}, + {file = "h5py-3.12.1-cp313-cp313-win_amd64.whl", hash = "sha256:52ab036c6c97055b85b2a242cb540ff9590bacfda0c03dd0cf0661b311f522f8"}, + {file = "h5py-3.12.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d2b8dd64f127d8b324f5d2cd1c0fd6f68af69084e9e47d27efeb9e28e685af3e"}, + {file = "h5py-3.12.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4532c7e97fbef3d029735db8b6f5bf01222d9ece41e309b20d63cfaae2fb5c4d"}, + {file = "h5py-3.12.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fdf6d7936fa824acfa27305fe2d9f39968e539d831c5bae0e0d83ed521ad1ac"}, + {file = "h5py-3.12.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84342bffd1f82d4f036433e7039e241a243531a1d3acd7341b35ae58cdab05bf"}, + {file = "h5py-3.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:62be1fc0ef195891949b2c627ec06bc8e837ff62d5b911b6e42e38e0f20a897d"}, + {file = "h5py-3.12.1.tar.gz", hash = "sha256:326d70b53d31baa61f00b8aa5f95c2fcb9621a3ee8365d770c551a13dbbcbfdf"}, +] + +[package.dependencies] +numpy = ">=1.19.3" + +[[package]] +name = "identify" +version = "2.6.3" +description = "File identification library for Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "identify-2.6.3-py2.py3-none-any.whl", hash = "sha256:9edba65473324c2ea9684b1f944fe3191db3345e50b6d04571d10ed164f8d7bd"}, + {file = "identify-2.6.3.tar.gz", hash = "sha256:62f5dae9b5fef52c84cc188514e9ea4f3f636b1d8799ab5ebc475471f9e47a02"}, +] + +[package.extras] +license = ["ukkonen"] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "joblib" +version = "1.4.2" +description = "Lightweight pipelining with Python functions" +optional = false +python-versions = ">=3.8" +files = [ + {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"}, + {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, +] + +[[package]] +name = "kiwisolver" +version = "1.4.7" +description = "A fast implementation of the Cassowary constraint solver" +optional = false +python-versions = ">=3.8" +files = [ + {file = "kiwisolver-1.4.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8a9c83f75223d5e48b0bc9cb1bf2776cf01563e00ade8775ffe13b0b6e1af3a6"}, + {file = "kiwisolver-1.4.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:58370b1ffbd35407444d57057b57da5d6549d2d854fa30249771775c63b5fe17"}, + {file = "kiwisolver-1.4.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aa0abdf853e09aff551db11fce173e2177d00786c688203f52c87ad7fcd91ef9"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8d53103597a252fb3ab8b5845af04c7a26d5e7ea8122303dd7a021176a87e8b9"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:88f17c5ffa8e9462fb79f62746428dd57b46eb931698e42e990ad63103f35e6c"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88a9ca9c710d598fd75ee5de59d5bda2684d9db36a9f50b6125eaea3969c2599"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f4d742cb7af1c28303a51b7a27aaee540e71bb8e24f68c736f6f2ffc82f2bf05"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e28c7fea2196bf4c2f8d46a0415c77a1c480cc0724722f23d7410ffe9842c407"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e968b84db54f9d42046cf154e02911e39c0435c9801681e3fc9ce8a3c4130278"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0c18ec74c0472de033e1bebb2911c3c310eef5649133dd0bedf2a169a1b269e5"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8f0ea6da6d393d8b2e187e6a5e3fb81f5862010a40c3945e2c6d12ae45cfb2ad"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:f106407dda69ae456dd1227966bf445b157ccc80ba0dff3802bb63f30b74e895"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:84ec80df401cfee1457063732d90022f93951944b5b58975d34ab56bb150dfb3"}, + {file = "kiwisolver-1.4.7-cp310-cp310-win32.whl", hash = "sha256:71bb308552200fb2c195e35ef05de12f0c878c07fc91c270eb3d6e41698c3bcc"}, + {file = "kiwisolver-1.4.7-cp310-cp310-win_amd64.whl", hash = "sha256:44756f9fd339de0fb6ee4f8c1696cfd19b2422e0d70b4cefc1cc7f1f64045a8c"}, + {file = "kiwisolver-1.4.7-cp310-cp310-win_arm64.whl", hash = "sha256:78a42513018c41c2ffd262eb676442315cbfe3c44eed82385c2ed043bc63210a"}, + {file = "kiwisolver-1.4.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d2b0e12a42fb4e72d509fc994713d099cbb15ebf1103545e8a45f14da2dfca54"}, + {file = "kiwisolver-1.4.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2a8781ac3edc42ea4b90bc23e7d37b665d89423818e26eb6df90698aa2287c95"}, + {file = "kiwisolver-1.4.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:46707a10836894b559e04b0fd143e343945c97fd170d69a2d26d640b4e297935"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef97b8df011141c9b0f6caf23b29379f87dd13183c978a30a3c546d2c47314cb"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ab58c12a2cd0fc769089e6d38466c46d7f76aced0a1f54c77652446733d2d02"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:803b8e1459341c1bb56d1c5c010406d5edec8a0713a0945851290a7930679b51"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9a9e8a507420fe35992ee9ecb302dab68550dedc0da9e2880dd88071c5fb052"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18077b53dc3bb490e330669a99920c5e6a496889ae8c63b58fbc57c3d7f33a18"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6af936f79086a89b3680a280c47ea90b4df7047b5bdf3aa5c524bbedddb9e545"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:3abc5b19d24af4b77d1598a585b8a719beb8569a71568b66f4ebe1fb0449460b"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:933d4de052939d90afbe6e9d5273ae05fb836cc86c15b686edd4b3560cc0ee36"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:65e720d2ab2b53f1f72fb5da5fb477455905ce2c88aaa671ff0a447c2c80e8e3"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3bf1ed55088f214ba6427484c59553123fdd9b218a42bbc8c6496d6754b1e523"}, + {file = "kiwisolver-1.4.7-cp311-cp311-win32.whl", hash = "sha256:4c00336b9dd5ad96d0a558fd18a8b6f711b7449acce4c157e7343ba92dd0cf3d"}, + {file = "kiwisolver-1.4.7-cp311-cp311-win_amd64.whl", hash = "sha256:929e294c1ac1e9f615c62a4e4313ca1823ba37326c164ec720a803287c4c499b"}, + {file = "kiwisolver-1.4.7-cp311-cp311-win_arm64.whl", hash = "sha256:e33e8fbd440c917106b237ef1a2f1449dfbb9b6f6e1ce17c94cd6a1e0d438376"}, + {file = "kiwisolver-1.4.7-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:5360cc32706dab3931f738d3079652d20982511f7c0ac5711483e6eab08efff2"}, + {file = "kiwisolver-1.4.7-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:942216596dc64ddb25adb215c3c783215b23626f8d84e8eff8d6d45c3f29f75a"}, + {file = "kiwisolver-1.4.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:48b571ecd8bae15702e4f22d3ff6a0f13e54d3d00cd25216d5e7f658242065ee"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ad42ba922c67c5f219097b28fae965e10045ddf145d2928bfac2eb2e17673640"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:612a10bdae23404a72941a0fc8fa2660c6ea1217c4ce0dbcab8a8f6543ea9e7f"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9e838bba3a3bac0fe06d849d29772eb1afb9745a59710762e4ba3f4cb8424483"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:22f499f6157236c19f4bbbd472fa55b063db77a16cd74d49afe28992dff8c258"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693902d433cf585133699972b6d7c42a8b9f8f826ebcaf0132ff55200afc599e"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4e77f2126c3e0b0d055f44513ed349038ac180371ed9b52fe96a32aa071a5107"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:657a05857bda581c3656bfc3b20e353c232e9193eb167766ad2dc58b56504948"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4bfa75a048c056a411f9705856abfc872558e33c055d80af6a380e3658766038"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:34ea1de54beef1c104422d210c47c7d2a4999bdecf42c7b5718fbe59a4cac383"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:90da3b5f694b85231cf93586dad5e90e2d71b9428f9aad96952c99055582f520"}, + {file = "kiwisolver-1.4.7-cp312-cp312-win32.whl", hash = "sha256:18e0cca3e008e17fe9b164b55735a325140a5a35faad8de92dd80265cd5eb80b"}, + {file = "kiwisolver-1.4.7-cp312-cp312-win_amd64.whl", hash = "sha256:58cb20602b18f86f83a5c87d3ee1c766a79c0d452f8def86d925e6c60fbf7bfb"}, + {file = "kiwisolver-1.4.7-cp312-cp312-win_arm64.whl", hash = "sha256:f5a8b53bdc0b3961f8b6125e198617c40aeed638b387913bf1ce78afb1b0be2a"}, + {file = "kiwisolver-1.4.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:2e6039dcbe79a8e0f044f1c39db1986a1b8071051efba3ee4d74f5b365f5226e"}, + {file = "kiwisolver-1.4.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a1ecf0ac1c518487d9d23b1cd7139a6a65bc460cd101ab01f1be82ecf09794b6"}, + {file = "kiwisolver-1.4.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7ab9ccab2b5bd5702ab0803676a580fffa2aa178c2badc5557a84cc943fcf750"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f816dd2277f8d63d79f9c8473a79fe54047bc0467754962840782c575522224d"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf8bcc23ceb5a1b624572a1623b9f79d2c3b337c8c455405ef231933a10da379"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dea0bf229319828467d7fca8c7c189780aa9ff679c94539eed7532ebe33ed37c"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c06a4c7cf15ec739ce0e5971b26c93638730090add60e183530d70848ebdd34"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:913983ad2deb14e66d83c28b632fd35ba2b825031f2fa4ca29675e665dfecbe1"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5337ec7809bcd0f424c6b705ecf97941c46279cf5ed92311782c7c9c2026f07f"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4c26ed10c4f6fa6ddb329a5120ba3b6db349ca192ae211e882970bfc9d91420b"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c619b101e6de2222c1fcb0531e1b17bbffbe54294bfba43ea0d411d428618c27"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:073a36c8273647592ea332e816e75ef8da5c303236ec0167196793eb1e34657a"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3ce6b2b0231bda412463e152fc18335ba32faf4e8c23a754ad50ffa70e4091ee"}, + {file = "kiwisolver-1.4.7-cp313-cp313-win32.whl", hash = "sha256:f4c9aee212bc89d4e13f58be11a56cc8036cabad119259d12ace14b34476fd07"}, + {file = "kiwisolver-1.4.7-cp313-cp313-win_amd64.whl", hash = "sha256:8a3ec5aa8e38fc4c8af308917ce12c536f1c88452ce554027e55b22cbbfbff76"}, + {file = "kiwisolver-1.4.7-cp313-cp313-win_arm64.whl", hash = "sha256:76c8094ac20ec259471ac53e774623eb62e6e1f56cd8690c67ce6ce4fcb05650"}, + {file = "kiwisolver-1.4.7-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5d5abf8f8ec1f4e22882273c423e16cae834c36856cac348cfbfa68e01c40f3a"}, + {file = "kiwisolver-1.4.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:aeb3531b196ef6f11776c21674dba836aeea9d5bd1cf630f869e3d90b16cfade"}, + {file = "kiwisolver-1.4.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b7d755065e4e866a8086c9bdada157133ff466476a2ad7861828e17b6026e22c"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08471d4d86cbaec61f86b217dd938a83d85e03785f51121e791a6e6689a3be95"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7bbfcb7165ce3d54a3dfbe731e470f65739c4c1f85bb1018ee912bae139e263b"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d34eb8494bea691a1a450141ebb5385e4b69d38bb8403b5146ad279f4b30fa3"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9242795d174daa40105c1d86aba618e8eab7bf96ba8c3ee614da8302a9f95503"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a0f64a48bb81af7450e641e3fe0b0394d7381e342805479178b3d335d60ca7cf"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:8e045731a5416357638d1700927529e2b8ab304811671f665b225f8bf8d8f933"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:4322872d5772cae7369f8351da1edf255a604ea7087fe295411397d0cfd9655e"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:e1631290ee9271dffe3062d2634c3ecac02c83890ada077d225e081aca8aab89"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:edcfc407e4eb17e037bca59be0e85a2031a2ac87e4fed26d3e9df88b4165f92d"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4d05d81ecb47d11e7f8932bd8b61b720bf0b41199358f3f5e36d38e28f0532c5"}, + {file = "kiwisolver-1.4.7-cp38-cp38-win32.whl", hash = "sha256:b38ac83d5f04b15e515fd86f312479d950d05ce2368d5413d46c088dda7de90a"}, + {file = "kiwisolver-1.4.7-cp38-cp38-win_amd64.whl", hash = "sha256:d83db7cde68459fc803052a55ace60bea2bae361fc3b7a6d5da07e11954e4b09"}, + {file = "kiwisolver-1.4.7-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3f9362ecfca44c863569d3d3c033dbe8ba452ff8eed6f6b5806382741a1334bd"}, + {file = "kiwisolver-1.4.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e8df2eb9b2bac43ef8b082e06f750350fbbaf2887534a5be97f6cf07b19d9583"}, + {file = "kiwisolver-1.4.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f32d6edbc638cde7652bd690c3e728b25332acbadd7cad670cc4a02558d9c417"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:e2e6c39bd7b9372b0be21456caab138e8e69cc0fc1190a9dfa92bd45a1e6e904"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:dda56c24d869b1193fcc763f1284b9126550eaf84b88bbc7256e15028f19188a"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79849239c39b5e1fd906556c474d9b0439ea6792b637511f3fe3a41158d89ca8"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5e3bc157fed2a4c02ec468de4ecd12a6e22818d4f09cde2c31ee3226ffbefab2"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3da53da805b71e41053dc670f9a820d1157aae77b6b944e08024d17bcd51ef88"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8705f17dfeb43139a692298cb6637ee2e59c0194538153e83e9ee0c75c2eddde"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:82a5c2f4b87c26bb1a0ef3d16b5c4753434633b83d365cc0ddf2770c93829e3c"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ce8be0466f4c0d585cdb6c1e2ed07232221df101a4c6f28821d2aa754ca2d9e2"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:409afdfe1e2e90e6ee7fc896f3df9a7fec8e793e58bfa0d052c8a82f99c37abb"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5b9c3f4ee0b9a439d2415012bd1b1cc2df59e4d6a9939f4d669241d30b414327"}, + {file = "kiwisolver-1.4.7-cp39-cp39-win32.whl", hash = "sha256:a79ae34384df2b615eefca647a2873842ac3b596418032bef9a7283675962644"}, + {file = "kiwisolver-1.4.7-cp39-cp39-win_amd64.whl", hash = "sha256:cf0438b42121a66a3a667de17e779330fc0f20b0d97d59d2f2121e182b0505e4"}, + {file = "kiwisolver-1.4.7-cp39-cp39-win_arm64.whl", hash = "sha256:764202cc7e70f767dab49e8df52c7455e8de0df5d858fa801a11aa0d882ccf3f"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:94252291e3fe68001b1dd747b4c0b3be12582839b95ad4d1b641924d68fd4643"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5b7dfa3b546da08a9f622bb6becdb14b3e24aaa30adba66749d38f3cc7ea9706"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd3de6481f4ed8b734da5df134cd5a6a64fe32124fe83dde1e5b5f29fe30b1e6"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a91b5f9f1205845d488c928e8570dcb62b893372f63b8b6e98b863ebd2368ff2"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40fa14dbd66b8b8f470d5fc79c089a66185619d31645f9b0773b88b19f7223c4"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:eb542fe7933aa09d8d8f9d9097ef37532a7df6497819d16efe4359890a2f417a"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:bfa1acfa0c54932d5607e19a2c24646fb4c1ae2694437789129cf099789a3b00"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:eee3ea935c3d227d49b4eb85660ff631556841f6e567f0f7bda972df6c2c9935"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f3160309af4396e0ed04db259c3ccbfdc3621b5559b5453075e5de555e1f3a1b"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a17f6a29cf8935e587cc8a4dbfc8368c55edc645283db0ce9801016f83526c2d"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:10849fb2c1ecbfae45a693c070e0320a91b35dd4bcf58172c023b994283a124d"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:ac542bf38a8a4be2dc6b15248d36315ccc65f0743f7b1a76688ffb6b5129a5c2"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8b01aac285f91ca889c800042c35ad3b239e704b150cfd3382adfc9dcc780e39"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:48be928f59a1f5c8207154f935334d374e79f2b5d212826307d072595ad76a2e"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f37cfe618a117e50d8c240555331160d73d0411422b59b5ee217843d7b693608"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:599b5c873c63a1f6ed7eead644a8a380cfbdf5db91dcb6f85707aaab213b1674"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:801fa7802e5cfabe3ab0c81a34c323a319b097dfb5004be950482d882f3d7225"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:0c6c43471bc764fad4bc99c5c2d6d16a676b1abf844ca7c8702bdae92df01ee0"}, + {file = "kiwisolver-1.4.7.tar.gz", hash = "sha256:9893ff81bd7107f7b685d3017cc6583daadb4fc26e4a888350df530e41980a60"}, +] + +[[package]] +name = "loguru" +version = "0.7.2" +description = "Python logging made (stupidly) simple" +optional = false +python-versions = ">=3.5" +files = [ + {file = "loguru-0.7.2-py3-none-any.whl", hash = "sha256:003d71e3d3ed35f0f8984898359d65b79e5b21943f78af86aa5491210429b8eb"}, + {file = "loguru-0.7.2.tar.gz", hash = "sha256:e671a53522515f34fd406340ee968cb9ecafbc4b36c679da03c18fd8d0bd51ac"}, +] + +[package.dependencies] +colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""} +win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""} + +[package.extras] +dev = ["Sphinx (==7.2.5)", "colorama (==0.4.5)", "colorama (==0.4.6)", "exceptiongroup (==1.1.3)", "freezegun (==1.1.0)", "freezegun (==1.2.2)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v1.4.1)", "mypy (==v1.5.1)", "pre-commit (==3.4.0)", "pytest (==6.1.2)", "pytest (==7.4.0)", "pytest-cov (==2.12.1)", "pytest-cov (==4.1.0)", "pytest-mypy-plugins (==1.9.3)", "pytest-mypy-plugins (==3.0.0)", "sphinx-autobuild (==2021.3.14)", "sphinx-rtd-theme (==1.3.0)", "tox (==3.27.1)", "tox (==4.11.0)"] + +[[package]] +name = "matplotlib" +version = "3.9.3" +description = "Python plotting package" +optional = false +python-versions = ">=3.9" +files = [ + {file = "matplotlib-3.9.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:41b016e3be4e740b66c79a031a0a6e145728dbc248142e751e8dab4f3188ca1d"}, + {file = "matplotlib-3.9.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e0143975fc2a6d7136c97e19c637321288371e8f09cff2564ecd73e865ea0b9"}, + {file = "matplotlib-3.9.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f459c8ee2c086455744723628264e43c884be0c7d7b45d84b8cd981310b4815"}, + {file = "matplotlib-3.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:687df7ceff57b8f070d02b4db66f75566370e7ae182a0782b6d3d21b0d6917dc"}, + {file = "matplotlib-3.9.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:edd14cf733fdc4f6e6fe3f705af97676a7e52859bf0044aa2c84e55be739241c"}, + {file = "matplotlib-3.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:1c40c244221a1adbb1256692b1133c6fb89418df27bf759a31a333e7912a4010"}, + {file = "matplotlib-3.9.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:cf2a60daf6cecff6828bc608df00dbc794380e7234d2411c0ec612811f01969d"}, + {file = "matplotlib-3.9.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:213d6dc25ce686516208d8a3e91120c6a4fdae4a3e06b8505ced5b716b50cc04"}, + {file = "matplotlib-3.9.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c52f48eb75fcc119a4fdb68ba83eb5f71656999420375df7c94cc68e0e14686e"}, + {file = "matplotlib-3.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3c93796b44fa111049b88a24105e947f03c01966b5c0cc782e2ee3887b790a3"}, + {file = "matplotlib-3.9.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:cd1077b9a09b16d8c3c7075a8add5ffbfe6a69156a57e290c800ed4d435bef1d"}, + {file = "matplotlib-3.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:c96eeeb8c68b662c7747f91a385688d4b449687d29b691eff7068a4602fe6dc4"}, + {file = "matplotlib-3.9.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:0a361bd5583bf0bcc08841df3c10269617ee2a36b99ac39d455a767da908bbbc"}, + {file = "matplotlib-3.9.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e14485bb1b83eeb3d55b6878f9560240981e7bbc7a8d4e1e8c38b9bd6ec8d2de"}, + {file = "matplotlib-3.9.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a8d279f78844aad213c4935c18f8292a9432d51af2d88bca99072c903948045"}, + {file = "matplotlib-3.9.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6c12514329ac0d03128cf1dcceb335f4fbf7c11da98bca68dca8dcb983153a9"}, + {file = "matplotlib-3.9.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6e9de2b390d253a508dd497e9b5579f3a851f208763ed67fdca5dc0c3ea6849c"}, + {file = "matplotlib-3.9.3-cp312-cp312-win_amd64.whl", hash = "sha256:d796272408f8567ff7eaa00eb2856b3a00524490e47ad505b0b4ca6bb8a7411f"}, + {file = "matplotlib-3.9.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:203d18df84f5288973b2d56de63d4678cc748250026ca9e1ad8f8a0fd8a75d83"}, + {file = "matplotlib-3.9.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b651b0d3642991259109dc0351fc33ad44c624801367bb8307be9bfc35e427ad"}, + {file = "matplotlib-3.9.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:66d7b171fecf96940ce069923a08ba3df33ef542de82c2ff4fe8caa8346fa95a"}, + {file = "matplotlib-3.9.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6be0ba61f6ff2e6b68e4270fb63b6813c9e7dec3d15fc3a93f47480444fd72f0"}, + {file = "matplotlib-3.9.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9d6b2e8856dec3a6db1ae51aec85c82223e834b228c1d3228aede87eee2b34f9"}, + {file = "matplotlib-3.9.3-cp313-cp313-win_amd64.whl", hash = "sha256:90a85a004fefed9e583597478420bf904bb1a065b0b0ee5b9d8d31b04b0f3f70"}, + {file = "matplotlib-3.9.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3119b2f16de7f7b9212ba76d8fe6a0e9f90b27a1e04683cd89833a991682f639"}, + {file = "matplotlib-3.9.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:87ad73763d93add1b6c1f9fcd33af662fd62ed70e620c52fcb79f3ac427cf3a6"}, + {file = "matplotlib-3.9.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:026bdf3137ab6022c866efa4813b6bbeddc2ed4c9e7e02f0e323a7bca380dfa0"}, + {file = "matplotlib-3.9.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:760a5e89ebbb172989e8273024a1024b0f084510b9105261b3b00c15e9c9f006"}, + {file = "matplotlib-3.9.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:a42b9dc42de2cfe357efa27d9c50c7833fc5ab9b2eb7252ccd5d5f836a84e1e4"}, + {file = "matplotlib-3.9.3-cp313-cp313t-win_amd64.whl", hash = "sha256:e0fcb7da73fbf67b5f4bdaa57d85bb585a4e913d4a10f3e15b32baea56a67f0a"}, + {file = "matplotlib-3.9.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:031b7f5b8e595cc07def77ec5b58464e9bb67dc5760be5d6f26d9da24892481d"}, + {file = "matplotlib-3.9.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9fa6e193c14d6944e0685cdb527cb6b38b0e4a518043e7212f214113af7391da"}, + {file = "matplotlib-3.9.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e6eefae6effa0c35bbbc18c25ee6e0b1da44d2359c3cd526eb0c9e703cf055d"}, + {file = "matplotlib-3.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10d3e5c7a99bd28afb957e1ae661323b0800d75b419f24d041ed1cc5d844a764"}, + {file = "matplotlib-3.9.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:816a966d5d376bf24c92af8f379e78e67278833e4c7cbc9fa41872eec629a060"}, + {file = "matplotlib-3.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:3fb0b37c896172899a4a93d9442ffdc6f870165f59e05ce2e07c6fded1c15749"}, + {file = "matplotlib-3.9.3-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:5f2a4ea08e6876206d511365b0bc234edc813d90b930be72c3011bbd7898796f"}, + {file = "matplotlib-3.9.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:9b081dac96ab19c54fd8558fac17c9d2c9cb5cc4656e7ed3261ddc927ba3e2c5"}, + {file = "matplotlib-3.9.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0a0a63cb8404d1d1f94968ef35738900038137dab8af836b6c21bb6f03d75465"}, + {file = "matplotlib-3.9.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:896774766fd6be4571a43bc2fcbcb1dcca0807e53cab4a5bf88c4aa861a08e12"}, + {file = "matplotlib-3.9.3.tar.gz", hash = "sha256:cd5dbbc8e25cad5f706845c4d100e2c8b34691b412b93717ce38d8ae803bcfa5"}, +] + +[package.dependencies] +contourpy = ">=1.0.1" +cycler = ">=0.10" +fonttools = ">=4.22.0" +kiwisolver = ">=1.3.1" +numpy = ">=1.23" +packaging = ">=20.0" +pillow = ">=8" +pyparsing = ">=2.3.1" +python-dateutil = ">=2.7" + +[package.extras] +dev = ["meson-python (>=0.13.1)", "numpy (>=1.25)", "pybind11 (>=2.6,!=2.13.3)", "setuptools (>=64)", "setuptools_scm (>=7)"] + +[[package]] +name = "mypy" +version = "1.13.0" +description = "Optional static typing for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "mypy-1.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6607e0f1dd1fb7f0aca14d936d13fd19eba5e17e1cd2a14f808fa5f8f6d8f60a"}, + {file = "mypy-1.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8a21be69bd26fa81b1f80a61ee7ab05b076c674d9b18fb56239d72e21d9f4c80"}, + {file = "mypy-1.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b2353a44d2179846a096e25691d54d59904559f4232519d420d64da6828a3a7"}, + {file = "mypy-1.13.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0730d1c6a2739d4511dc4253f8274cdd140c55c32dfb0a4cf8b7a43f40abfa6f"}, + {file = "mypy-1.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:c5fc54dbb712ff5e5a0fca797e6e0aa25726c7e72c6a5850cfd2adbc1eb0a372"}, + {file = "mypy-1.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:581665e6f3a8a9078f28d5502f4c334c0c8d802ef55ea0e7276a6e409bc0d82d"}, + {file = "mypy-1.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3ddb5b9bf82e05cc9a627e84707b528e5c7caaa1c55c69e175abb15a761cec2d"}, + {file = "mypy-1.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:20c7ee0bc0d5a9595c46f38beb04201f2620065a93755704e141fcac9f59db2b"}, + {file = "mypy-1.13.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3790ded76f0b34bc9c8ba4def8f919dd6a46db0f5a6610fb994fe8efdd447f73"}, + {file = "mypy-1.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:51f869f4b6b538229c1d1bcc1dd7d119817206e2bc54e8e374b3dfa202defcca"}, + {file = "mypy-1.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5c7051a3461ae84dfb5dd15eff5094640c61c5f22257c8b766794e6dd85e72d5"}, + {file = "mypy-1.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:39bb21c69a5d6342f4ce526e4584bc5c197fd20a60d14a8624d8743fffb9472e"}, + {file = "mypy-1.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:164f28cb9d6367439031f4c81e84d3ccaa1e19232d9d05d37cb0bd880d3f93c2"}, + {file = "mypy-1.13.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a4c1bfcdbce96ff5d96fc9b08e3831acb30dc44ab02671eca5953eadad07d6d0"}, + {file = "mypy-1.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:a0affb3a79a256b4183ba09811e3577c5163ed06685e4d4b46429a271ba174d2"}, + {file = "mypy-1.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a7b44178c9760ce1a43f544e595d35ed61ac2c3de306599fa59b38a6048e1aa7"}, + {file = "mypy-1.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5d5092efb8516d08440e36626f0153b5006d4088c1d663d88bf79625af3d1d62"}, + {file = "mypy-1.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de2904956dac40ced10931ac967ae63c5089bd498542194b436eb097a9f77bc8"}, + {file = "mypy-1.13.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:7bfd8836970d33c2105562650656b6846149374dc8ed77d98424b40b09340ba7"}, + {file = "mypy-1.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:9f73dba9ec77acb86457a8fc04b5239822df0c14a082564737833d2963677dbc"}, + {file = "mypy-1.13.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:100fac22ce82925f676a734af0db922ecfea991e1d7ec0ceb1e115ebe501301a"}, + {file = "mypy-1.13.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7bcb0bb7f42a978bb323a7c88f1081d1b5dee77ca86f4100735a6f541299d8fb"}, + {file = "mypy-1.13.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bde31fc887c213e223bbfc34328070996061b0833b0a4cfec53745ed61f3519b"}, + {file = "mypy-1.13.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:07de989f89786f62b937851295ed62e51774722e5444a27cecca993fc3f9cd74"}, + {file = "mypy-1.13.0-cp38-cp38-win_amd64.whl", hash = "sha256:4bde84334fbe19bad704b3f5b78c4abd35ff1026f8ba72b29de70dda0916beb6"}, + {file = "mypy-1.13.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0246bcb1b5de7f08f2826451abd947bf656945209b140d16ed317f65a17dc7dc"}, + {file = "mypy-1.13.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7f5b7deae912cf8b77e990b9280f170381fdfbddf61b4ef80927edd813163732"}, + {file = "mypy-1.13.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7029881ec6ffb8bc233a4fa364736789582c738217b133f1b55967115288a2bc"}, + {file = "mypy-1.13.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3e38b980e5681f28f033f3be86b099a247b13c491f14bb8b1e1e134d23bb599d"}, + {file = "mypy-1.13.0-cp39-cp39-win_amd64.whl", hash = "sha256:a6789be98a2017c912ae6ccb77ea553bbaf13d27605d2ca20a76dfbced631b24"}, + {file = "mypy-1.13.0-py3-none-any.whl", hash = "sha256:9c250883f9fd81d212e0952c92dbfcc96fc237f4b7c92f56ac81fd48460b3e5a"}, + {file = "mypy-1.13.0.tar.gz", hash = "sha256:0291a61b6fbf3e6673e3405cfcc0e7650bebc7939659fdca2702958038bd835e"}, +] + +[package.dependencies] +mypy-extensions = ">=1.0.0" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = ">=4.6.0" + +[package.extras] +dmypy = ["psutil (>=4.0)"] +faster-cache = ["orjson"] +install-types = ["pip"] +mypyc = ["setuptools (>=50)"] +reports = ["lxml"] + +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.5" +files = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] + +[[package]] +name = "natsort" +version = "8.4.0" +description = "Simple yet flexible natural sorting in Python." +optional = false +python-versions = ">=3.7" +files = [ + {file = "natsort-8.4.0-py3-none-any.whl", hash = "sha256:4732914fb471f56b5cce04d7bae6f164a592c7712e1c85f9ef585e197299521c"}, + {file = "natsort-8.4.0.tar.gz", hash = "sha256:45312c4a0e5507593da193dedd04abb1469253b601ecaf63445ad80f0a1ea581"}, +] + +[package.extras] +fast = ["fastnumbers (>=2.0.0)"] +icu = ["PyICU (>=1.0.0)"] + +[[package]] +name = "nodeenv" +version = "1.9.1" +description = "Node.js virtual environment builder" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"}, + {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"}, +] + +[[package]] +name = "numpy" +version = "1.26.4" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"}, + {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"}, + {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4"}, + {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f"}, + {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a"}, + {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2"}, + {file = "numpy-1.26.4-cp310-cp310-win32.whl", hash = "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07"}, + {file = "numpy-1.26.4-cp310-cp310-win_amd64.whl", hash = "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5"}, + {file = "numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71"}, + {file = "numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef"}, + {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e"}, + {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5"}, + {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a"}, + {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a"}, + {file = "numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20"}, + {file = "numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2"}, + {file = "numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218"}, + {file = "numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b"}, + {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b"}, + {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed"}, + {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a"}, + {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0"}, + {file = "numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110"}, + {file = "numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818"}, + {file = "numpy-1.26.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c"}, + {file = "numpy-1.26.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be"}, + {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764"}, + {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3"}, + {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd"}, + {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c"}, + {file = "numpy-1.26.4-cp39-cp39-win32.whl", hash = "sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6"}, + {file = "numpy-1.26.4-cp39-cp39-win_amd64.whl", hash = "sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0"}, + {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"}, +] + +[[package]] +name = "packaging" +version = "24.2" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, + {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, +] + +[[package]] +name = "pandas" +version = "2.2.2" +description = "Powerful data structures for data analysis, time series, and statistics" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"}, + {file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"}, + {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08"}, + {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0"}, + {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51"}, + {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8e5a0b00e1e56a842f922e7fae8ae4077aee4af0acb5ae3622bd4b4c30aedf99"}, + {file = "pandas-2.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:ddf818e4e6c7c6f4f7c8a12709696d193976b591cc7dc50588d3d1a6b5dc8772"}, + {file = "pandas-2.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:696039430f7a562b74fa45f540aca068ea85fa34c244d0deee539cb6d70aa288"}, + {file = "pandas-2.2.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8e90497254aacacbc4ea6ae5e7a8cd75629d6ad2b30025a4a8b09aa4faf55151"}, + {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58b84b91b0b9f4bafac2a0ac55002280c094dfc6402402332c0913a59654ab2b"}, + {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d2123dc9ad6a814bcdea0f099885276b31b24f7edf40f6cdbc0912672e22eee"}, + {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2925720037f06e89af896c70bca73459d7e6a4be96f9de79e2d440bd499fe0db"}, + {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0cace394b6ea70c01ca1595f839cf193df35d1575986e484ad35c4aeae7266c1"}, + {file = "pandas-2.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:873d13d177501a28b2756375d59816c365e42ed8417b41665f346289adc68d24"}, + {file = "pandas-2.2.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9dfde2a0ddef507a631dc9dc4af6a9489d5e2e740e226ad426a05cabfbd7c8ef"}, + {file = "pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce"}, + {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cb51fe389360f3b5a4d57dbd2848a5f033350336ca3b340d1c53a1fad33bcad"}, + {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad"}, + {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3e374f59e440d4ab45ca2fffde54b81ac3834cf5ae2cdfa69c90bc03bde04d76"}, + {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32"}, + {file = "pandas-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23"}, + {file = "pandas-2.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2"}, + {file = "pandas-2.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd"}, + {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863"}, + {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921"}, + {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a"}, + {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92fd6b027924a7e178ac202cfbe25e53368db90d56872d20ffae94b96c7acc57"}, + {file = "pandas-2.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:640cef9aa381b60e296db324337a554aeeb883ead99dc8f6c18e81a93942f5f4"}, + {file = "pandas-2.2.2.tar.gz", hash = "sha256:9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.22.4", markers = "python_version < \"3.11\""}, + {version = ">=1.23.2", markers = "python_version == \"3.11\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, +] +python-dateutil = ">=2.8.2" +pytz = ">=2020.1" +tzdata = ">=2022.7" + +[package.extras] +all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"] +aws = ["s3fs (>=2022.11.0)"] +clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"] +compression = ["zstandard (>=0.19.0)"] +computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"] +consortium-standard = ["dataframe-api-compat (>=0.1.7)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"] +feather = ["pyarrow (>=10.0.1)"] +fss = ["fsspec (>=2022.11.0)"] +gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"] +hdf5 = ["tables (>=3.8.0)"] +html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"] +mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"] +parquet = ["pyarrow (>=10.0.1)"] +performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"] +plot = ["matplotlib (>=3.6.3)"] +postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"] +pyarrow = ["pyarrow (>=10.0.1)"] +spss = ["pyreadstat (>=1.2.0)"] +sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"] +test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.9.2)"] + +[[package]] +name = "pandas-stubs" +version = "2.2.3.241126" +description = "Type annotations for pandas" +optional = false +python-versions = ">=3.10" +files = [ + {file = "pandas_stubs-2.2.3.241126-py3-none-any.whl", hash = "sha256:74aa79c167af374fe97068acc90776c0ebec5266a6e5c69fe11e9c2cf51f2267"}, + {file = "pandas_stubs-2.2.3.241126.tar.gz", hash = "sha256:cf819383c6d9ae7d4dabf34cd47e1e45525bb2f312e6ad2939c2c204cb708acd"}, +] + +[package.dependencies] +numpy = ">=1.23.5" +types-pytz = ">=2022.1.1" + +[[package]] +name = "pathspec" +version = "0.12.1" +description = "Utility library for gitignore style pattern matching of file paths." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, + {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, +] + +[[package]] +name = "pillow" +version = "11.0.0" +description = "Python Imaging Library (Fork)" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pillow-11.0.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:6619654954dc4936fcff82db8eb6401d3159ec6be81e33c6000dfd76ae189947"}, + {file = "pillow-11.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b3c5ac4bed7519088103d9450a1107f76308ecf91d6dabc8a33a2fcfb18d0fba"}, + {file = "pillow-11.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a65149d8ada1055029fcb665452b2814fe7d7082fcb0c5bed6db851cb69b2086"}, + {file = "pillow-11.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88a58d8ac0cc0e7f3a014509f0455248a76629ca9b604eca7dc5927cc593c5e9"}, + {file = "pillow-11.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:c26845094b1af3c91852745ae78e3ea47abf3dbcd1cf962f16b9a5fbe3ee8488"}, + {file = "pillow-11.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:1a61b54f87ab5786b8479f81c4b11f4d61702830354520837f8cc791ebba0f5f"}, + {file = "pillow-11.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:674629ff60030d144b7bca2b8330225a9b11c482ed408813924619c6f302fdbb"}, + {file = "pillow-11.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:598b4e238f13276e0008299bd2482003f48158e2b11826862b1eb2ad7c768b97"}, + {file = "pillow-11.0.0-cp310-cp310-win32.whl", hash = "sha256:9a0f748eaa434a41fccf8e1ee7a3eed68af1b690e75328fd7a60af123c193b50"}, + {file = "pillow-11.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:a5629742881bcbc1f42e840af185fd4d83a5edeb96475a575f4da50d6ede337c"}, + {file = "pillow-11.0.0-cp310-cp310-win_arm64.whl", hash = "sha256:ee217c198f2e41f184f3869f3e485557296d505b5195c513b2bfe0062dc537f1"}, + {file = "pillow-11.0.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:1c1d72714f429a521d8d2d018badc42414c3077eb187a59579f28e4270b4b0fc"}, + {file = "pillow-11.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:499c3a1b0d6fc8213519e193796eb1a86a1be4b1877d678b30f83fd979811d1a"}, + {file = "pillow-11.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c8b2351c85d855293a299038e1f89db92a2f35e8d2f783489c6f0b2b5f3fe8a3"}, + {file = "pillow-11.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f4dba50cfa56f910241eb7f883c20f1e7b1d8f7d91c750cd0b318bad443f4d5"}, + {file = "pillow-11.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:5ddbfd761ee00c12ee1be86c9c0683ecf5bb14c9772ddbd782085779a63dd55b"}, + {file = "pillow-11.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:45c566eb10b8967d71bf1ab8e4a525e5a93519e29ea071459ce517f6b903d7fa"}, + {file = "pillow-11.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b4fd7bd29610a83a8c9b564d457cf5bd92b4e11e79a4ee4716a63c959699b306"}, + {file = "pillow-11.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:cb929ca942d0ec4fac404cbf520ee6cac37bf35be479b970c4ffadf2b6a1cad9"}, + {file = "pillow-11.0.0-cp311-cp311-win32.whl", hash = "sha256:006bcdd307cc47ba43e924099a038cbf9591062e6c50e570819743f5607404f5"}, + {file = "pillow-11.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:52a2d8323a465f84faaba5236567d212c3668f2ab53e1c74c15583cf507a0291"}, + {file = "pillow-11.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:16095692a253047fe3ec028e951fa4221a1f3ed3d80c397e83541a3037ff67c9"}, + {file = "pillow-11.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d2c0a187a92a1cb5ef2c8ed5412dd8d4334272617f532d4ad4de31e0495bd923"}, + {file = "pillow-11.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:084a07ef0821cfe4858fe86652fffac8e187b6ae677e9906e192aafcc1b69903"}, + {file = "pillow-11.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8069c5179902dcdce0be9bfc8235347fdbac249d23bd90514b7a47a72d9fecf4"}, + {file = "pillow-11.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f02541ef64077f22bf4924f225c0fd1248c168f86e4b7abdedd87d6ebaceab0f"}, + {file = "pillow-11.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:fcb4621042ac4b7865c179bb972ed0da0218a076dc1820ffc48b1d74c1e37fe9"}, + {file = "pillow-11.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:00177a63030d612148e659b55ba99527803288cea7c75fb05766ab7981a8c1b7"}, + {file = "pillow-11.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8853a3bf12afddfdf15f57c4b02d7ded92c7a75a5d7331d19f4f9572a89c17e6"}, + {file = "pillow-11.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3107c66e43bda25359d5ef446f59c497de2b5ed4c7fdba0894f8d6cf3822dafc"}, + {file = "pillow-11.0.0-cp312-cp312-win32.whl", hash = "sha256:86510e3f5eca0ab87429dd77fafc04693195eec7fd6a137c389c3eeb4cfb77c6"}, + {file = "pillow-11.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:8ec4a89295cd6cd4d1058a5e6aec6bf51e0eaaf9714774e1bfac7cfc9051db47"}, + {file = "pillow-11.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:27a7860107500d813fcd203b4ea19b04babe79448268403172782754870dac25"}, + {file = "pillow-11.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:bcd1fb5bb7b07f64c15618c89efcc2cfa3e95f0e3bcdbaf4642509de1942a699"}, + {file = "pillow-11.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0e038b0745997c7dcaae350d35859c9715c71e92ffb7e0f4a8e8a16732150f38"}, + {file = "pillow-11.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ae08bd8ffc41aebf578c2af2f9d8749d91f448b3bfd41d7d9ff573d74f2a6b2"}, + {file = "pillow-11.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d69bfd8ec3219ae71bcde1f942b728903cad25fafe3100ba2258b973bd2bc1b2"}, + {file = "pillow-11.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:61b887f9ddba63ddf62fd02a3ba7add935d053b6dd7d58998c630e6dbade8527"}, + {file = "pillow-11.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:c6a660307ca9d4867caa8d9ca2c2658ab685de83792d1876274991adec7b93fa"}, + {file = "pillow-11.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:73e3a0200cdda995c7e43dd47436c1548f87a30bb27fb871f352a22ab8dcf45f"}, + {file = "pillow-11.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fba162b8872d30fea8c52b258a542c5dfd7b235fb5cb352240c8d63b414013eb"}, + {file = "pillow-11.0.0-cp313-cp313-win32.whl", hash = "sha256:f1b82c27e89fffc6da125d5eb0ca6e68017faf5efc078128cfaa42cf5cb38798"}, + {file = "pillow-11.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:8ba470552b48e5835f1d23ecb936bb7f71d206f9dfeee64245f30c3270b994de"}, + {file = "pillow-11.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:846e193e103b41e984ac921b335df59195356ce3f71dcfd155aa79c603873b84"}, + {file = "pillow-11.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4ad70c4214f67d7466bea6a08061eba35c01b1b89eaa098040a35272a8efb22b"}, + {file = "pillow-11.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:6ec0d5af64f2e3d64a165f490d96368bb5dea8b8f9ad04487f9ab60dc4bb6003"}, + {file = "pillow-11.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c809a70e43c7977c4a42aefd62f0131823ebf7dd73556fa5d5950f5b354087e2"}, + {file = "pillow-11.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:4b60c9520f7207aaf2e1d94de026682fc227806c6e1f55bba7606d1c94dd623a"}, + {file = "pillow-11.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:1e2688958a840c822279fda0086fec1fdab2f95bf2b717b66871c4ad9859d7e8"}, + {file = "pillow-11.0.0-cp313-cp313t-win32.whl", hash = "sha256:607bbe123c74e272e381a8d1957083a9463401f7bd01287f50521ecb05a313f8"}, + {file = "pillow-11.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5c39ed17edea3bc69c743a8dd3e9853b7509625c2462532e62baa0732163a904"}, + {file = "pillow-11.0.0-cp313-cp313t-win_arm64.whl", hash = "sha256:75acbbeb05b86bc53cbe7b7e6fe00fbcf82ad7c684b3ad82e3d711da9ba287d3"}, + {file = "pillow-11.0.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:2e46773dc9f35a1dd28bd6981332fd7f27bec001a918a72a79b4133cf5291dba"}, + {file = "pillow-11.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2679d2258b7f1192b378e2893a8a0a0ca472234d4c2c0e6bdd3380e8dfa21b6a"}, + {file = "pillow-11.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eda2616eb2313cbb3eebbe51f19362eb434b18e3bb599466a1ffa76a033fb916"}, + {file = "pillow-11.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20ec184af98a121fb2da42642dea8a29ec80fc3efbaefb86d8fdd2606619045d"}, + {file = "pillow-11.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:8594f42df584e5b4bb9281799698403f7af489fba84c34d53d1c4bfb71b7c4e7"}, + {file = "pillow-11.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:c12b5ae868897c7338519c03049a806af85b9b8c237b7d675b8c5e089e4a618e"}, + {file = "pillow-11.0.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:70fbbdacd1d271b77b7721fe3cdd2d537bbbd75d29e6300c672ec6bb38d9672f"}, + {file = "pillow-11.0.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5178952973e588b3f1360868847334e9e3bf49d19e169bbbdfaf8398002419ae"}, + {file = "pillow-11.0.0-cp39-cp39-win32.whl", hash = "sha256:8c676b587da5673d3c75bd67dd2a8cdfeb282ca38a30f37950511766b26858c4"}, + {file = "pillow-11.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:94f3e1780abb45062287b4614a5bc0874519c86a777d4a7ad34978e86428b8dd"}, + {file = "pillow-11.0.0-cp39-cp39-win_arm64.whl", hash = "sha256:290f2cc809f9da7d6d622550bbf4c1e57518212da51b6a30fe8e0a270a5b78bd"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1187739620f2b365de756ce086fdb3604573337cc28a0d3ac4a01ab6b2d2a6d2"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:fbbcb7b57dc9c794843e3d1258c0fbf0f48656d46ffe9e09b63bbd6e8cd5d0a2"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d203af30149ae339ad1b4f710d9844ed8796e97fda23ffbc4cc472968a47d0b"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21a0d3b115009ebb8ac3d2ebec5c2982cc693da935f4ab7bb5c8ebe2f47d36f2"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:73853108f56df97baf2bb8b522f3578221e56f646ba345a372c78326710d3830"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e58876c91f97b0952eb766123bfef372792ab3f4e3e1f1a2267834c2ab131734"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:224aaa38177597bb179f3ec87eeefcce8e4f85e608025e9cfac60de237ba6316"}, + {file = "pillow-11.0.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:5bd2d3bdb846d757055910f0a59792d33b555800813c3b39ada1829c372ccb06"}, + {file = "pillow-11.0.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:375b8dd15a1f5d2feafff536d47e22f69625c1aa92f12b339ec0b2ca40263273"}, + {file = "pillow-11.0.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:daffdf51ee5db69a82dd127eabecce20729e21f7a3680cf7cbb23f0829189790"}, + {file = "pillow-11.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7326a1787e3c7b0429659e0a944725e1b03eeaa10edd945a86dead1913383944"}, + {file = "pillow-11.0.0.tar.gz", hash = "sha256:72bacbaf24ac003fea9bff9837d1eedb6088758d41e100c1552930151f677739"}, +] + +[package.extras] +docs = ["furo", "olefile", "sphinx (>=8.1)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"] +fpx = ["olefile"] +mic = ["olefile"] +tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] +typing = ["typing-extensions"] +xmp = ["defusedxml"] + +[[package]] +name = "platformdirs" +version = "4.3.6" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." +optional = false +python-versions = ">=3.8" +files = [ + {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"}, + {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"}, +] + +[package.extras] +docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"] +type = ["mypy (>=1.11.2)"] + +[[package]] +name = "pluggy" +version = "1.5.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pre-commit" +version = "3.8.0" +description = "A framework for managing and maintaining multi-language pre-commit hooks." +optional = false +python-versions = ">=3.9" +files = [ + {file = "pre_commit-3.8.0-py2.py3-none-any.whl", hash = "sha256:9a90a53bf82fdd8778d58085faf8d83df56e40dfe18f45b19446e26bf1b3a63f"}, + {file = "pre_commit-3.8.0.tar.gz", hash = "sha256:8bb6494d4a20423842e198980c9ecf9f96607a07ea29549e180eef9ae80fe7af"}, +] + +[package.dependencies] +cfgv = ">=2.0.0" +identify = ">=1.0.0" +nodeenv = ">=0.11.1" +pyyaml = ">=5.1" +virtualenv = ">=20.10.0" + +[[package]] +name = "pyarrow" +version = "15.0.2" +description = "Python library for Apache Arrow" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pyarrow-15.0.2-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:88b340f0a1d05b5ccc3d2d986279045655b1fe8e41aba6ca44ea28da0d1455d8"}, + {file = "pyarrow-15.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eaa8f96cecf32da508e6c7f69bb8401f03745c050c1dd42ec2596f2e98deecac"}, + {file = "pyarrow-15.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23c6753ed4f6adb8461e7c383e418391b8d8453c5d67e17f416c3a5d5709afbd"}, + {file = "pyarrow-15.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f639c059035011db8c0497e541a8a45d98a58dbe34dc8fadd0ef128f2cee46e5"}, + {file = "pyarrow-15.0.2-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:290e36a59a0993e9a5224ed2fb3e53375770f07379a0ea03ee2fce2e6d30b423"}, + {file = "pyarrow-15.0.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:06c2bb2a98bc792f040bef31ad3e9be6a63d0cb39189227c08a7d955db96816e"}, + {file = "pyarrow-15.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:f7a197f3670606a960ddc12adbe8075cea5f707ad7bf0dffa09637fdbb89f76c"}, + {file = "pyarrow-15.0.2-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:5f8bc839ea36b1f99984c78e06e7a06054693dc2af8920f6fb416b5bca9944e4"}, + {file = "pyarrow-15.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f5e81dfb4e519baa6b4c80410421528c214427e77ca0ea9461eb4097c328fa33"}, + {file = "pyarrow-15.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a4f240852b302a7af4646c8bfe9950c4691a419847001178662a98915fd7ee7"}, + {file = "pyarrow-15.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e7d9cfb5a1e648e172428c7a42b744610956f3b70f524aa3a6c02a448ba853e"}, + {file = "pyarrow-15.0.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2d4f905209de70c0eb5b2de6763104d5a9a37430f137678edfb9a675bac9cd98"}, + {file = "pyarrow-15.0.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:90adb99e8ce5f36fbecbbc422e7dcbcbed07d985eed6062e459e23f9e71fd197"}, + {file = "pyarrow-15.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:b116e7fd7889294cbd24eb90cd9bdd3850be3738d61297855a71ac3b8124ee38"}, + {file = "pyarrow-15.0.2-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:25335e6f1f07fdaa026a61c758ee7d19ce824a866b27bba744348fa73bb5a440"}, + {file = "pyarrow-15.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:90f19e976d9c3d8e73c80be84ddbe2f830b6304e4c576349d9360e335cd627fc"}, + {file = "pyarrow-15.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a22366249bf5fd40ddacc4f03cd3160f2d7c247692945afb1899bab8a140ddfb"}, + {file = "pyarrow-15.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2a335198f886b07e4b5ea16d08ee06557e07db54a8400cc0d03c7f6a22f785f"}, + {file = "pyarrow-15.0.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3e6d459c0c22f0b9c810a3917a1de3ee704b021a5fb8b3bacf968eece6df098f"}, + {file = "pyarrow-15.0.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:033b7cad32198754d93465dcfb71d0ba7cb7cd5c9afd7052cab7214676eec38b"}, + {file = "pyarrow-15.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:29850d050379d6e8b5a693098f4de7fd6a2bea4365bfd073d7c57c57b95041ee"}, + {file = "pyarrow-15.0.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:7167107d7fb6dcadb375b4b691b7e316f4368f39f6f45405a05535d7ad5e5058"}, + {file = "pyarrow-15.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e85241b44cc3d365ef950432a1b3bd44ac54626f37b2e3a0cc89c20e45dfd8bf"}, + {file = "pyarrow-15.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:248723e4ed3255fcd73edcecc209744d58a9ca852e4cf3d2577811b6d4b59818"}, + {file = "pyarrow-15.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ff3bdfe6f1b81ca5b73b70a8d482d37a766433823e0c21e22d1d7dde76ca33f"}, + {file = "pyarrow-15.0.2-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:f3d77463dee7e9f284ef42d341689b459a63ff2e75cee2b9302058d0d98fe142"}, + {file = "pyarrow-15.0.2-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:8c1faf2482fb89766e79745670cbca04e7018497d85be9242d5350cba21357e1"}, + {file = "pyarrow-15.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:28f3016958a8e45a1069303a4a4f6a7d4910643fc08adb1e2e4a7ff056272ad3"}, + {file = "pyarrow-15.0.2-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:89722cb64286ab3d4daf168386f6968c126057b8c7ec3ef96302e81d8cdb8ae4"}, + {file = "pyarrow-15.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cd0ba387705044b3ac77b1b317165c0498299b08261d8122c96051024f953cd5"}, + {file = "pyarrow-15.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad2459bf1f22b6a5cdcc27ebfd99307d5526b62d217b984b9f5c974651398832"}, + {file = "pyarrow-15.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58922e4bfece8b02abf7159f1f53a8f4d9f8e08f2d988109126c17c3bb261f22"}, + {file = "pyarrow-15.0.2-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:adccc81d3dc0478ea0b498807b39a8d41628fa9210729b2f718b78cb997c7c91"}, + {file = "pyarrow-15.0.2-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:8bd2baa5fe531571847983f36a30ddbf65261ef23e496862ece83bdceb70420d"}, + {file = "pyarrow-15.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:6669799a1d4ca9da9c7e06ef48368320f5856f36f9a4dd31a11839dda3f6cc8c"}, + {file = "pyarrow-15.0.2.tar.gz", hash = "sha256:9c9bc803cb3b7bfacc1e96ffbfd923601065d9d3f911179d81e72d99fd74a3d9"}, +] + +[package.dependencies] +numpy = ">=1.16.6,<2" + +[[package]] +name = "pydeseq2" +version = "0.4.9" +description = "A python implementation of DESeq2." +optional = false +python-versions = ">=3.9.0" +files = [ + {file = "pydeseq2-0.4.9-py3-none-any.whl", hash = "sha256:7f112fe1dfd3cef1c19e1ead67379d348f2517ad0594fac0fcbae847d0d62020"}, + {file = "pydeseq2-0.4.9.tar.gz", hash = "sha256:0375207775953f43f84ed4279fcb9f11a430d79d038e158bbe74acbc58326d31"}, +] + +[package.dependencies] +anndata = ">=0.8.0" +matplotlib = ">=3.6.2" +numpy = ">=1.23.0" +pandas = ">=1.4.0" +scikit-learn = ">=1.1.0" +scipy = ">=1.11.0" + +[package.extras] +dev = ["coverage", "mypy", "numpydoc", "pandas-stubs", "pre-commit (>=2.13.0)", "pytest (>=6.2.4)"] + +[[package]] +name = "pyparsing" +version = "3.2.0" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pyparsing-3.2.0-py3-none-any.whl", hash = "sha256:93d9577b88da0bbea8cc8334ee8b918ed014968fd2ec383e868fb8afb1ccef84"}, + {file = "pyparsing-3.2.0.tar.gz", hash = "sha256:cbf74e27246d595d9a74b186b810f6fbb86726dbf3b9532efb343f6d7294fe9c"}, +] + +[package.extras] +diagrams = ["jinja2", "railroad-diagrams"] + +[[package]] +name = "pytest" +version = "8.3.4" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6"}, + {file = "pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=1.5,<2" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "pytz" +version = "2024.2" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +files = [ + {file = "pytz-2024.2-py2.py3-none-any.whl", hash = "sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725"}, + {file = "pytz-2024.2.tar.gz", hash = "sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a"}, +] + +[[package]] +name = "pyyaml" +version = "6.0.2" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, + {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed"}, + {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180"}, + {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68"}, + {file = "PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99"}, + {file = "PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e"}, + {file = "PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774"}, + {file = "PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85"}, + {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4"}, + {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e"}, + {file = "PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5"}, + {file = "PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44"}, + {file = "PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab"}, + {file = "PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476"}, + {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48"}, + {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b"}, + {file = "PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4"}, + {file = "PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8"}, + {file = "PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba"}, + {file = "PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5"}, + {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc"}, + {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652"}, + {file = "PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183"}, + {file = "PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563"}, + {file = "PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083"}, + {file = "PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706"}, + {file = "PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a"}, + {file = "PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff"}, + {file = "PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d"}, + {file = "PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19"}, + {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e"}, + {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725"}, + {file = "PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631"}, + {file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"}, + {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, +] + +[[package]] +name = "ruff" +version = "0.2.2" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruff-0.2.2-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:0a9efb032855ffb3c21f6405751d5e147b0c6b631e3ca3f6b20f917572b97eb6"}, + {file = "ruff-0.2.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:d450b7fbff85913f866a5384d8912710936e2b96da74541c82c1b458472ddb39"}, + {file = "ruff-0.2.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecd46e3106850a5c26aee114e562c329f9a1fbe9e4821b008c4404f64ff9ce73"}, + {file = "ruff-0.2.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5e22676a5b875bd72acd3d11d5fa9075d3a5f53b877fe7b4793e4673499318ba"}, + {file = "ruff-0.2.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1695700d1e25a99d28f7a1636d85bafcc5030bba9d0578c0781ba1790dbcf51c"}, + {file = "ruff-0.2.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:b0c232af3d0bd8f521806223723456ffebf8e323bd1e4e82b0befb20ba18388e"}, + {file = "ruff-0.2.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f63d96494eeec2fc70d909393bcd76c69f35334cdbd9e20d089fb3f0640216ca"}, + {file = "ruff-0.2.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6a61ea0ff048e06de273b2e45bd72629f470f5da8f71daf09fe481278b175001"}, + {file = "ruff-0.2.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e1439c8f407e4f356470e54cdecdca1bd5439a0673792dbe34a2b0a551a2fe3"}, + {file = "ruff-0.2.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:940de32dc8853eba0f67f7198b3e79bc6ba95c2edbfdfac2144c8235114d6726"}, + {file = "ruff-0.2.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:0c126da55c38dd917621552ab430213bdb3273bb10ddb67bc4b761989210eb6e"}, + {file = "ruff-0.2.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:3b65494f7e4bed2e74110dac1f0d17dc8e1f42faaa784e7c58a98e335ec83d7e"}, + {file = "ruff-0.2.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:1ec49be4fe6ddac0503833f3ed8930528e26d1e60ad35c2446da372d16651ce9"}, + {file = "ruff-0.2.2-py3-none-win32.whl", hash = "sha256:d920499b576f6c68295bc04e7b17b6544d9d05f196bb3aac4358792ef6f34325"}, + {file = "ruff-0.2.2-py3-none-win_amd64.whl", hash = "sha256:cc9a91ae137d687f43a44c900e5d95e9617cb37d4c989e462980ba27039d239d"}, + {file = "ruff-0.2.2-py3-none-win_arm64.whl", hash = "sha256:c9d15fc41e6054bfc7200478720570078f0b41c9ae4f010bcc16bd6f4d1aacdd"}, + {file = "ruff-0.2.2.tar.gz", hash = "sha256:e62ed7f36b3068a30ba39193a14274cd706bc486fad521276458022f7bccb31d"}, +] + +[[package]] +name = "scikit-learn" +version = "1.5.2" +description = "A set of python modules for machine learning and data mining" +optional = false +python-versions = ">=3.9" +files = [ + {file = "scikit_learn-1.5.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:299406827fb9a4f862626d0fe6c122f5f87f8910b86fe5daa4c32dcd742139b6"}, + {file = "scikit_learn-1.5.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:2d4cad1119c77930b235579ad0dc25e65c917e756fe80cab96aa3b9428bd3fb0"}, + {file = "scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c412ccc2ad9bf3755915e3908e677b367ebc8d010acbb3f182814524f2e5540"}, + {file = "scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a686885a4b3818d9e62904d91b57fa757fc2bed3e465c8b177be652f4dd37c8"}, + {file = "scikit_learn-1.5.2-cp310-cp310-win_amd64.whl", hash = "sha256:c15b1ca23d7c5f33cc2cb0a0d6aaacf893792271cddff0edbd6a40e8319bc113"}, + {file = "scikit_learn-1.5.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:03b6158efa3faaf1feea3faa884c840ebd61b6484167c711548fce208ea09445"}, + {file = "scikit_learn-1.5.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:1ff45e26928d3b4eb767a8f14a9a6efbf1cbff7c05d1fb0f95f211a89fd4f5de"}, + {file = "scikit_learn-1.5.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f763897fe92d0e903aa4847b0aec0e68cadfff77e8a0687cabd946c89d17e675"}, + {file = "scikit_learn-1.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8b0ccd4a902836493e026c03256e8b206656f91fbcc4fde28c57a5b752561f1"}, + {file = "scikit_learn-1.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:6c16d84a0d45e4894832b3c4d0bf73050939e21b99b01b6fd59cbb0cf39163b6"}, + {file = "scikit_learn-1.5.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f932a02c3f4956dfb981391ab24bda1dbd90fe3d628e4b42caef3e041c67707a"}, + {file = "scikit_learn-1.5.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:3b923d119d65b7bd555c73be5423bf06c0105678ce7e1f558cb4b40b0a5502b1"}, + {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"}, + {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"}, + {file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"}, + {file = "scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5"}, + {file = "scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908"}, + {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3"}, + {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12"}, + {file = "scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f"}, + {file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"}, + {file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"}, + {file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"}, + {file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca64b3089a6d9b9363cd3546f8978229dcbb737aceb2c12144ee3f70f95684b7"}, + {file = "scikit_learn-1.5.2-cp39-cp39-win_amd64.whl", hash = "sha256:3bed4909ba187aca80580fe2ef370d9180dcf18e621a27c4cf2ef10d279a7efe"}, + {file = "scikit_learn-1.5.2.tar.gz", hash = "sha256:b4237ed7b3fdd0a4882792e68ef2545d5baa50aca3bb45aa7df468138ad8f94d"}, +] + +[package.dependencies] +joblib = ">=1.2.0" +numpy = ">=1.19.5" +scipy = ">=1.6.0" +threadpoolctl = ">=3.1.0" + +[package.extras] +benchmark = ["matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "pandas (>=1.1.5)"] +build = ["cython (>=3.0.10)", "meson-python (>=0.16.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)"] +docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "polars (>=0.20.30)", "pooch (>=1.6.0)", "pydata-sphinx-theme (>=0.15.3)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)", "sphinx (>=7.3.7)", "sphinx-copybutton (>=0.5.2)", "sphinx-design (>=0.5.0)", "sphinx-design (>=0.6.0)", "sphinx-gallery (>=0.16.0)", "sphinx-prompt (>=1.4.0)", "sphinx-remove-toctrees (>=1.0.0.post1)", "sphinxcontrib-sass (>=0.3.4)", "sphinxext-opengraph (>=0.9.1)"] +examples = ["matplotlib (>=3.3.4)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)"] +install = ["joblib (>=1.2.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)", "threadpoolctl (>=3.1.0)"] +maintenance = ["conda-lock (==2.5.6)"] +tests = ["black (>=24.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.9)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.20.30)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.2.1)", "scikit-image (>=0.17.2)"] + +[[package]] +name = "scipy" +version = "1.14.1" +description = "Fundamental algorithms for scientific computing in Python" +optional = false +python-versions = ">=3.10" +files = [ + {file = "scipy-1.14.1-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:b28d2ca4add7ac16ae8bb6632a3c86e4b9e4d52d3e34267f6e1b0c1f8d87e389"}, + {file = "scipy-1.14.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:d0d2821003174de06b69e58cef2316a6622b60ee613121199cb2852a873f8cf3"}, + {file = "scipy-1.14.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:8bddf15838ba768bb5f5083c1ea012d64c9a444e16192762bd858f1e126196d0"}, + {file = "scipy-1.14.1-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:97c5dddd5932bd2a1a31c927ba5e1463a53b87ca96b5c9bdf5dfd6096e27efc3"}, + {file = "scipy-1.14.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ff0a7e01e422c15739ecd64432743cf7aae2b03f3084288f399affcefe5222d"}, + {file = "scipy-1.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8e32dced201274bf96899e6491d9ba3e9a5f6b336708656466ad0522d8528f69"}, + {file = "scipy-1.14.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8426251ad1e4ad903a4514712d2fa8fdd5382c978010d1c6f5f37ef286a713ad"}, + {file = "scipy-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:a49f6ed96f83966f576b33a44257d869756df6cf1ef4934f59dd58b25e0327e5"}, + {file = "scipy-1.14.1-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:2da0469a4ef0ecd3693761acbdc20f2fdeafb69e6819cc081308cc978153c675"}, + {file = "scipy-1.14.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:c0ee987efa6737242745f347835da2cc5bb9f1b42996a4d97d5c7ff7928cb6f2"}, + {file = "scipy-1.14.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3a1b111fac6baec1c1d92f27e76511c9e7218f1695d61b59e05e0fe04dc59617"}, + {file = "scipy-1.14.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:8475230e55549ab3f207bff11ebfc91c805dc3463ef62eda3ccf593254524ce8"}, + {file = "scipy-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:278266012eb69f4a720827bdd2dc54b2271c97d84255b2faaa8f161a158c3b37"}, + {file = "scipy-1.14.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fef8c87f8abfb884dac04e97824b61299880c43f4ce675dd2cbeadd3c9b466d2"}, + {file = "scipy-1.14.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b05d43735bb2f07d689f56f7b474788a13ed8adc484a85aa65c0fd931cf9ccd2"}, + {file = "scipy-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:716e389b694c4bb564b4fc0c51bc84d381735e0d39d3f26ec1af2556ec6aad94"}, + {file = "scipy-1.14.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:631f07b3734d34aced009aaf6fedfd0eb3498a97e581c3b1e5f14a04164a456d"}, + {file = "scipy-1.14.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:af29a935803cc707ab2ed7791c44288a682f9c8107bc00f0eccc4f92c08d6e07"}, + {file = "scipy-1.14.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:2843f2d527d9eebec9a43e6b406fb7266f3af25a751aa91d62ff416f54170bc5"}, + {file = "scipy-1.14.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:eb58ca0abd96911932f688528977858681a59d61a7ce908ffd355957f7025cfc"}, + {file = "scipy-1.14.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30ac8812c1d2aab7131a79ba62933a2a76f582d5dbbc695192453dae67ad6310"}, + {file = "scipy-1.14.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f9ea80f2e65bdaa0b7627fb00cbeb2daf163caa015e59b7516395fe3bd1e066"}, + {file = "scipy-1.14.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:edaf02b82cd7639db00dbff629995ef185c8df4c3ffa71a5562a595765a06ce1"}, + {file = "scipy-1.14.1-cp312-cp312-win_amd64.whl", hash = "sha256:2ff38e22128e6c03ff73b6bb0f85f897d2362f8c052e3b8ad00532198fbdae3f"}, + {file = "scipy-1.14.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1729560c906963fc8389f6aac023739ff3983e727b1a4d87696b7bf108316a79"}, + {file = "scipy-1.14.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:4079b90df244709e675cdc8b93bfd8a395d59af40b72e339c2287c91860deb8e"}, + {file = "scipy-1.14.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e0cf28db0f24a38b2a0ca33a85a54852586e43cf6fd876365c86e0657cfe7d73"}, + {file = "scipy-1.14.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:0c2f95de3b04e26f5f3ad5bb05e74ba7f68b837133a4492414b3afd79dfe540e"}, + {file = "scipy-1.14.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b99722ea48b7ea25e8e015e8341ae74624f72e5f21fc2abd45f3a93266de4c5d"}, + {file = "scipy-1.14.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5149e3fd2d686e42144a093b206aef01932a0059c2a33ddfa67f5f035bdfe13e"}, + {file = "scipy-1.14.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e4f5a7c49323533f9103d4dacf4e4f07078f360743dec7f7596949149efeec06"}, + {file = "scipy-1.14.1-cp313-cp313-win_amd64.whl", hash = "sha256:baff393942b550823bfce952bb62270ee17504d02a1801d7fd0719534dfb9c84"}, + {file = "scipy-1.14.1.tar.gz", hash = "sha256:5a275584e726026a5699459aa72f828a610821006228e841b94275c4a7c08417"}, +] + +[package.dependencies] +numpy = ">=1.23.5,<2.3" + +[package.extras] +dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy (==1.10.0)", "pycodestyle", "pydevtool", "rich-click", "ruff (>=0.0.292)", "types-psutil", "typing_extensions"] +doc = ["jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.13.1)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0,<=7.3.7)", "sphinx-design (>=0.4.0)"] +test = ["Cython", "array-api-strict (>=2.0)", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "smmap" +version = "5.0.1" +description = "A pure Python implementation of a sliding window memory map manager" +optional = false +python-versions = ">=3.7" +files = [ + {file = "smmap-5.0.1-py3-none-any.whl", hash = "sha256:e6d8668fa5f93e706934a62d7b4db19c8d9eb8cf2adbb75ef1b675aa332b69da"}, + {file = "smmap-5.0.1.tar.gz", hash = "sha256:dceeb6c0028fdb6734471eb07c0cd2aae706ccaecab45965ee83f11c8d3b1f62"}, +] + +[[package]] +name = "threadpoolctl" +version = "3.5.0" +description = "threadpoolctl" +optional = false +python-versions = ">=3.8" +files = [ + {file = "threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467"}, + {file = "threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107"}, +] + +[[package]] +name = "tomli" +version = "2.2.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, + {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8"}, + {file = "tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff"}, + {file = "tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b"}, + {file = "tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea"}, + {file = "tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e"}, + {file = "tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98"}, + {file = "tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4"}, + {file = "tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7"}, + {file = "tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744"}, + {file = "tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec"}, + {file = "tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69"}, + {file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"}, + {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"}, +] + +[[package]] +name = "types-pytz" +version = "2024.2.0.20241003" +description = "Typing stubs for pytz" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-pytz-2024.2.0.20241003.tar.gz", hash = "sha256:575dc38f385a922a212bac00a7d6d2e16e141132a3c955078f4a4fd13ed6cb44"}, + {file = "types_pytz-2024.2.0.20241003-py3-none-any.whl", hash = "sha256:3e22df1336c0c6ad1d29163c8fda82736909eb977281cb823c57f8bae07118b7"}, +] + +[[package]] +name = "typing-extensions" +version = "4.12.2" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +files = [ + {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, + {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, +] + +[[package]] +name = "tzdata" +version = "2024.2" +description = "Provider of IANA time zone data" +optional = false +python-versions = ">=2" +files = [ + {file = "tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd"}, + {file = "tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc"}, +] + +[[package]] +name = "virtualenv" +version = "20.28.0" +description = "Virtual Python Environment builder" +optional = false +python-versions = ">=3.8" +files = [ + {file = "virtualenv-20.28.0-py3-none-any.whl", hash = "sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0"}, + {file = "virtualenv-20.28.0.tar.gz", hash = "sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa"}, +] + +[package.dependencies] +distlib = ">=0.3.7,<1" +filelock = ">=3.12.2,<4" +platformdirs = ">=3.9.1,<5" + +[package.extras] +docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] + +[[package]] +name = "win32-setctime" +version = "1.1.0" +description = "A small Python utility to set file creation time on Windows" +optional = false +python-versions = ">=3.5" +files = [ + {file = "win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad"}, + {file = "win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2"}, +] + +[package.extras] +dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] + +[metadata] +lock-version = "2.0" +python-versions = ">=3.10,<3.13" +content-hash = "62b2c6456e33a45ebdc15be990837e5cb7243b3938876557de16c16d3bba74dc" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..0e5d4c6 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,92 @@ +[tool.poetry] +name = "fedpydeseq2_datasets" +version = "0.1.0" +description = "This package contains utilities to process TCGA data for fedpydeseq2." +authors = ["Boris MUZELLEC "] +readme = "README.md" +packages = [{include = "fedpydeseq2_datasets", from = "./"}] +include = ["fedpydeseq2_datasets/assets", "fedpydeseq2_datasets/download_data"] + + +[build-system] +requires = ["poetry-core>=1.0.0", "setuptools>=65.6.3"] +build-backend = "poetry.core.masonry.api" + +[tool.poetry.dependencies] +python = ">=3.10,<3.13" +numpy = "1.26.4" +pandas = "2.2.2" +pyarrow = "15.0.2" +gitpython = "3.1.43" +anndata = "0.10.8" +pydeseq2 = "0.4.9" +loguru = "0.7.2" +pyyaml = ">=5.1" + + + +[tool.poetry.group.linting] +optional = true + +[tool.poetry.group.linting.dependencies] +ruff = "^0.2.2" +pre-commit = "^3.6.2" +mypy = "^1.8.0" +black = "^24.2.0" +pandas-stubs = "^2.2.0.240218" + +[tool.poetry.group.testing] +optional = true + +[tool.poetry.group.testing.dependencies] +pytest = "^8.0.1" + +[tool.poetry.scripts] +fedpydeseq2-download-data = "fedpydeseq2_datasets.download_data.download_data:main" + +[tool.black] +line-length = 88 + +[tool.ruff] +target-version = "py311" +line-length = 88 +lint.select = [ + "F", # Errors detected by Pyflakes + "E", # Error detected by Pycodestyle + "W", # Warning detected by Pycodestyle + "I", # isort + "D", # pydocstyle + "B", # flake8-bugbear + "TID", # flake8-tidy-imports + "C4", # flake8-comprehensions + "BLE", # flake8-blind-except + "UP", # pyupstage + "RUF100", # Report unused noqa directives + "D401", # Start docstrgins with an imperative verb + "D415", # End docstrings with a period + "D417", # Missing argument descriptions in the docstring +] + +lint.ignore = [ + # Missing docstring in public package + "D104", + # Missing docstring in public module + "D100", + # Missing docstring in __init__ + "D107", + # We don’t want a blank line before a class docstring + "D203", + # We want docstrings to start immediately after the opening triple quote + "D213", +] + +[tool.ruff.lint.isort] +force-single-line = true + +[tool.ruff.lint.pydocstyle] +convention = "numpy" + +[tool.ruff.lint.per-file-ignores] +"tests/*" = ["D"] +# Ignore unused imports in __init__.py files +"*/__init__.py" = ["F401", "I"] diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..ab59313 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,28 @@ +import json +from pathlib import Path + +import pytest + + +@pytest.fixture(scope="session") +def raw_data_path(): + """Fixture to get the path to the raw data.""" + default_paths = Path(__file__).parent / "paths_default.json" + specified_paths = Path(__file__).parent / "paths.json" + if specified_paths.exists(): + with open(specified_paths) as f: + raw_data_path = json.load(f)["raw_data"] + else: + with open(default_paths) as f: + raw_data_path = json.load(f)["raw_data"] + if raw_data_path.startswith("/"): + raw_data_path = Path(raw_data_path) + else: + raw_data_path = Path(__file__).parent / raw_data_path + print("Test raw data path") + return raw_data_path + + +@pytest.fixture(scope="session") +def tmp_processed_data_path(tmpdir_factory): + return Path(tmpdir_factory.mktemp("processed")) diff --git a/tests/paths_default.json b/tests/paths_default.json new file mode 100644 index 0000000..afe8fc7 --- /dev/null +++ b/tests/paths_default.json @@ -0,0 +1 @@ +{"raw_data": "../../fed-pydeseq2-datasets/data/raw"} diff --git a/tests/test_create_reference_dds.py b/tests/test_create_reference_dds.py new file mode 100644 index 0000000..8a0d9d6 --- /dev/null +++ b/tests/test_create_reference_dds.py @@ -0,0 +1,89 @@ +import pytest + +from fedpydeseq2_datasets.create_reference_dds import setup_tcga_ground_truth_dds +from fedpydeseq2_datasets.process_and_split_data import setup_tcga_dataset + + +@pytest.mark.parametrize( + "dataset_name", + [ + "TCGA-LUAD", + ], +) +@pytest.mark.usefixtures("raw_data_path", "tmp_processed_data_path") +def test_tcga_preprocessing_and_pooled_pydeseq2_all_indications_small( + raw_data_path, tmp_processed_data_path, dataset_name +): + """Build all necessary quantities for all tests. + + Builds ground truths. + """ + + setup_tcga_dataset( + raw_data_path, + tmp_processed_data_path, + dataset_name=dataset_name, + small_samples=True, + small_genes=True, + only_two_centers=False, + design_factors="stage", + continuous_factors=None, + refit_cooks=True, + force=True, + ) + print("Setting up TCGA ground truth DESeq2 datasets...") + setup_tcga_ground_truth_dds( + tmp_processed_data_path, + dataset_name=dataset_name, + small_samples=True, + small_genes=True, + only_two_centers=False, + design_factors="stage", + continuous_factors=None, + reference_dds_ref_level=("stage", "Advanced"), + refit_cooks=True, + force=True, + ) + + +@pytest.mark.parametrize( + "dataset_name", + [ + "TCGA-LUAD", + ], +) +@pytest.mark.usefixtures("raw_data_path", "tmp_processed_data_path") +def test_tcga_preprocessing_and_per_center_pydeseq2_all_indications_small( + raw_data_path, tmp_processed_data_path, dataset_name +): + """Build all necessary quantities for all tests. + + Builds ground truths. + """ + + setup_tcga_dataset( + raw_data_path, + tmp_processed_data_path, + dataset_name=dataset_name, + small_samples=True, + small_genes=True, + only_two_centers=False, + design_factors="stage", + continuous_factors=None, + refit_cooks=True, + force=True, + ) + print("Setting up TCGA ground truth DESeq2 datasets...") + setup_tcga_ground_truth_dds( + tmp_processed_data_path, + dataset_name=dataset_name, + small_samples=True, + small_genes=True, + only_two_centers=False, + design_factors="stage", + continuous_factors=None, + reference_dds_ref_level=("stage", "Advanced"), + refit_cooks=True, + force=True, + pooled=False, + ) diff --git a/tests/test_process_and_split_data.py b/tests/test_process_and_split_data.py new file mode 100644 index 0000000..c7fa13d --- /dev/null +++ b/tests/test_process_and_split_data.py @@ -0,0 +1,31 @@ +import pytest + +from fedpydeseq2_datasets.process_and_split_data import setup_tcga_dataset + + +@pytest.mark.parametrize( + "dataset_name", + [ + "TCGA-LUAD", + ], +) +@pytest.mark.usefixtures("raw_data_path", "tmp_processed_data_path") +def test_tcga_preprocessing_all_indications_small( + raw_data_path, tmp_processed_data_path, dataset_name +): + """Build all necessary quantities for all tests. + + Builds ground truths. + """ + + setup_tcga_dataset( + raw_data_path, + tmp_processed_data_path, + dataset_name=dataset_name, + small_samples=True, + small_genes=True, + only_two_centers=False, + design_factors="stage", + continuous_factors=None, + force=True, + ) diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..05bc89a --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,48 @@ +import numpy as np +import pandas as pd +import pytest + +from fedpydeseq2_datasets.utils import mix_centers + + +@pytest.mark.parametrize( + "heterogeneity_method, heterogeneity_method_param", + [ + ("binomial", 0.0), + ("binomial", 0.2), + ("binomial", 0.5), + ("binomial", 0.8), + ("binomial", 1), + ], +) +def test_mix_centers(heterogeneity_method, heterogeneity_method_param): + # Create a sample metadata DataFrame + np.random.seed(0) + metadata = pd.DataFrame( + { + "SampleID": [f"sample_{i}" for i in range(1000)], + "center_id": np.random.choice([0, 1], size=1000), + } + ) + old_metadata = metadata.copy() + # Call the mix_centers function + mix_centers(metadata, heterogeneity_method, heterogeneity_method_param) + + cross_table = pd.crosstab(metadata["center_id"], old_metadata["center_id"]) + + # Expected proportions + expected_cross_table = np.zeros((2, 2)) + center_0_count = sum(old_metadata["center_id"] == 0) + center_1_count = sum(old_metadata["center_id"] == 1) + + # Calculate the expected values based on the binomial distribution + expected_cross_table[0, 0] = center_0_count * (1 - heterogeneity_method_param / 2.0) + expected_cross_table[0, 1] = center_1_count * (heterogeneity_method_param / 2.0) + expected_cross_table[1, 0] = center_0_count * (heterogeneity_method_param / 2.0) + expected_cross_table[1, 1] = center_1_count * (1 - heterogeneity_method_param / 2.0) + + # Allow some tolerance due to randomness + tolerance = 0.1 * (center_0_count + center_1_count) + + # Check that the observed values are close to the expected values + assert np.allclose(cross_table.values, expected_cross_table, atol=tolerance)