From d21ed44934471304b0f4aec0be93fcb74b7aed34 Mon Sep 17 00:00:00 2001 From: Jaeyoung Chun Date: Mon, 25 Oct 2021 10:47:50 -0400 Subject: [PATCH] Upgrade to Cell Ranger 6.1.1 --- Dockerfile | 1 - Dockerfile.cromwell | 1 - LICENSE | 21 +++++ README.md | 200 +++++++++++++++++++++++++++----------------- config.sh | 4 +- 5 files changed, 144 insertions(+), 83 deletions(-) create mode 100644 LICENSE diff --git a/Dockerfile b/Dockerfile index 9bbccdb..5c82b7e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,7 +10,6 @@ ENV PATH /opt/cellranger-${CELLRANGER_VERSION}:$PATH RUN yum group install -y "Development Tools" \ && yum install -y which -# https://support.10xgenomics.com/single-cell-vdj/software/downloads/6.0/ # cell ranger binaries RUN curl -o cellranger-${CELLRANGER_VERSION}.tar.gz ${DOWNLOAD_URL} \ && tar xzf cellranger-${CELLRANGER_VERSION}.tar.gz \ diff --git a/Dockerfile.cromwell b/Dockerfile.cromwell index b9b9b40..7b83be8 100644 --- a/Dockerfile.cromwell +++ b/Dockerfile.cromwell @@ -10,7 +10,6 @@ ENV PATH /opt/cellranger-${CELLRANGER_VERSION}:$PATH RUN yum group install -y "Development Tools" \ && yum install -y which -# https://support.10xgenomics.com/single-cell-vdj/software/downloads/6.0/ # cell ranger binaries RUN curl -o cellranger-${CELLRANGER_VERSION}.tar.gz ${DOWNLOAD_URL} \ && tar xzf cellranger-${CELLRANGER_VERSION}.tar.gz \ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..1ed509a --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 - present Jaeyoung Chun + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index f4f5e02..b1e44e3 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,24 @@ # docker-cellranger -Dockerized Cell Ranger v6.0.2 +Dockerized Cell Ranger v6.1.1 -- GEX: https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/6.0/ -- VDJ: https://support.10xgenomics.com/single-cell-vdj/software/downloads/6.0/ +- GEX: https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/6.1/ +- VDJ: https://support.10xgenomics.com/single-cell-vdj/software/downloads/6.1/ Reference dataset included: - Human reference dataset (GRCh38/Ensembl/10x) required for Cell Ranger V(D)J - Mouse reference dataset (GRCm38/Ensembl/10x) required for Cell Ranger V(D)J +## License + +The code is available to everyone under the standard [MIT license](./LICENSE). However, the code internally uses 10x software, so please make sure that you read and agree to [10x End User Software License](https://www.10xgenomics.com/end-user-software-license-agreement). + ## Build Container Image SCING (Single-Cell pIpeliNe Garden; pronounced as "sing" /siŋ/) is required for smooth and uninteruppted build process (e.g. CI/CD). For setup, please refer to [this page](https://github.com/hisplan/scing). All the instructions below is given under the assumption that you have already configured SCING in your environment. -[SCING](https://github.com/hisplan/scing) installation is required +[SCING](https://github.com/hisplan/scing) installation is required. ```bash conda activate scing @@ -34,7 +38,7 @@ conda activate scing ```bash $ cellranger --help -cellranger cellranger-6.0.2 +cellranger cellranger-6.1.1 Process 10x Genomics Gene Expression, Feature Barcode, and Immune Profiling data USAGE: @@ -45,33 +49,39 @@ FLAGS: -V, --version Prints version information SUBCOMMANDS: - count Count gene expression (targeted or whole-transcriptome) and/or feature barcode reads from a - single sample and GEM well - multi Analyze multiplexed data or combined gene expression/immune profiling/feature barcode data - vdj Assembles single-cell VDJ receptor sequences from 10x Immune Profiling libraries + count Count gene expression (targeted or whole-transcriptome) and/or feature + barcode reads from a single sample and GEM well + multi Analyze multiplexed data or combined gene expression/immune + profiling/feature barcode data + vdj Assembles single-cell VDJ receptor sequences from 10x Immune Profiling + libraries aggr Aggregate data from multiple Cell Ranger runs reanalyze Re-run secondary analysis (dimensionality reduction, clustering, etc) - targeted-compare Analyze targeted enrichment performance by comparing a targeted sample to its cognate parent - WTA sample (used as input for targeted gene expression) - targeted-depth Estimate targeted read depth values (mean reads per cell) for a specified input parent WTA - sample and a target panel CSV file + targeted-compare Analyze targeted enrichment performance by comparing a targeted sample + to its cognate parent WTA sample (used as input for targeted gene + expression) + targeted-depth Estimate targeted read depth values (mean reads per cell) for a + specified input parent WTA sample and a target panel CSV file mkvdjref Prepare a reference for use with CellRanger VDJ - mkfastq Run Illumina demultiplexer on sample sheets that contain 10x-specific sample index sets + mkfastq Run Illumina demultiplexer on sample sheets that contain 10x-specific + sample index sets testrun Execute the 'count' pipeline on a small test dataset mat2csv Convert a gene count matrix to CSV format - mkref Prepare a reference for use with 10x analysis software. Requires a GTF and FASTA + mkref Prepare a reference for use with 10x analysis software. Requires a GTF + and FASTA mkgtf Filter a GTF file by attribute prior to creating a 10x reference upload Upload analysis logs to 10x Genomics support sitecheck Collect linux system configuration information help Prints this message or the help of the given subcommand(s) ``` -## GEX +### GEX ```bash $ cellranger count --help cellranger-count -Count gene expression (targeted or whole-transcriptome) and/or feature barcode reads from a single sample and GEM well +Count gene expression (targeted or whole-transcriptome) and/or feature barcode reads from a single +sample and GEM well USAGE: cellranger count [FLAGS] [OPTIONS] --id --transcriptome @@ -80,11 +90,12 @@ FLAGS: --no-bam Do not generate a bam file --nosecondary Disable secondary analysis, e.g. clustering. Optional --include-introns Include intronic reads in count - --no-libraries Proceed with processing using a --feature-ref but no Feature Barcode libraries - specified with the 'libraries' flag - --no-target-umi-filter Turn off the target UMI filtering subpipeline. Only applies when --target-panel is - used - --dry Do not execute the pipeline. Generate a pipeline invocation (.mro) file and stop + --no-libraries Proceed with processing using a --feature-ref but no Feature + Barcode libraries specified with the 'libraries' flag + --no-target-umi-filter Turn off the target UMI filtering subpipeline. Only applies when + --target-panel is used + --dry Do not execute the pipeline. Generate a pipeline invocation (.mro) + file and stop --disable-ui Do not serve the web UI --noexit Keep web UI running after pipestance completes or fails --nopreflight Skip preflight checks @@ -95,43 +106,54 @@ OPTIONS: --description Sample description to embed in output files [default: ] --transcriptome Path of folder containing 10x-compatible transcriptome reference --fastqs ... Path to input FASTQ data - --project Name of the project folder within a mkfastq or bcl2fastq-generated folder to pick - FASTQs from + --project Name of the project folder within a mkfastq or bcl2fastq-generated + folder from which to pick FASTQs --sample ... Prefix of the filenames of FASTQs to select --lanes ... Only use FASTQs from selected lanes --libraries CSV file declaring input library data sources - --feature-ref Feature reference CSV file, declaring Feature Barcode constructs and associated - barcodes + --feature-ref Feature reference CSV file, declaring Feature Barcode constructs + and associated barcodes --target-panel The target panel CSV file declaring the target panel used, if any - --expect-cells Expected number of recovered cells - --force-cells Force pipeline to use this number of cells, bypassing cell detection. [MINIMUM: 10] + --expect-cells Expected number of recovered cells, used as input to cell calling + algorithm. [default: 3000] + --force-cells Force pipeline to use this number of cells, bypassing cell calling + algorithm. [MINIMUM: 10] --r1-length Hard trim the input Read 1 to this length before analysis --r2-length Hard trim the input Read 2 to this length before analysis - --chemistry Assay configuration. NOTE: by default the assay configuration is detected - automatically, which is the recommened mode. You usually will not need to specify a - chemistry. Options are: 'auto' for autodetection, 'threeprime' for Single Cell 3', - 'fiveprime' for Single Cell 5', 'SC3Pv1' or 'SC3Pv2' or 'SC3Pv3' for Single Cell 3' - v1/v2/v3, 'SC3Pv3LT' for Single Cell 3' v3 LT, 'SC5P-PE' or 'SC5P-R2' for Single Cell - 5', paired-end/R2-only, 'SC-FB' for Single Cell Antibody-only 3' v2 or 5' [default: - auto] - --jobmode Job manager to use. Valid options: local (default), sge, lsf, slurm or a .template - file. Search for help on "Cluster Mode" at support.10xgenomics.com for more details on - configuring the pipeline to use a compute cluster [default: local] - --localcores Set max cores the pipeline may request at one time. Only applies to local jobs - --localmem Set max GB the pipeline may request at one time. Only applies to local jobs - --localvmem Set max virtual address space in GB for the pipeline. Only applies to local jobs - --mempercore Reserve enough threads for each job to ensure enough memory will be available, - assuming each core on your cluster has at least this much memory available. Only - applies in cluster jobmodes - --maxjobs Set max jobs submitted to cluster at one time. Only applies in cluster jobmodes - --jobinterval Set delay between submitting jobs to cluster, in ms. Only applies in cluster jobmodes - --overrides The path to a JSON file that specifies stage-level overrides for cores and memory. - Finer-grained than --localcores, --mempercore and --localmem. Consult the 10x support - website for an example override file + --chemistry Assay configuration. NOTE: by default the assay configuration is + detected automatically, which is the recommened mode. You usually + will not need to specify a chemistry. Options are: 'auto' for + autodetection, 'threeprime' for Single Cell 3', 'fiveprime' for + Single Cell 5', 'SC3Pv1' or 'SC3Pv2' or 'SC3Pv3' for Single Cell + 3' v1/v2/v3, 'SC3Pv3LT' for Single Cell 3' v3 LT, 'SC3Pv3HT' for + Single Cell 3' v3 HT, 'SC5P-PE' or 'SC5P-R2' for Single Cell 5', + paired-end/R2-only, 'SC-FB' for Single Cell Antibody-only 3' v2 or + 5' [default: auto] + --jobmode Job manager to use. Valid options: local (default), sge, lsf, + slurm or path to a .template file. Search for help on "Cluster + Mode" at support.10xgenomics.com for more details on configuring + the pipeline to use a compute cluster [default: local] + --localcores Set max cores the pipeline may request at one time. Only applies + to local jobs + --localmem Set max GB the pipeline may request at one time. Only applies to + local jobs + --localvmem Set max virtual address space in GB for the pipeline. Only applies + to local jobs + --mempercore Reserve enough threads for each job to ensure enough memory will + be available, assuming each core on your cluster has at least this + much memory available. Only applies to cluster jobmodes + --maxjobs Set max jobs submitted to cluster at one time. Only applies to + cluster jobmodes + --jobinterval Set delay between submitting jobs to cluster, in ms. Only applies + to cluster jobmodes + --overrides The path to a JSON file that specifies stage-level overrides for + cores and memory. Finer-grained than --localcores, --mempercore + and --localmem. Consult https://support.10xgenomics.com/ for an + example override file --uiport Serve web UI at http://localhost:PORT ``` -## V(D)J +### V(D)J ```bash $ cellranger vdj --help @@ -143,7 +165,8 @@ USAGE: FLAGS: --denovo Run in reference-free mode (do not use annotations) - --dry Do not execute the pipeline. Generate a pipeline invocation (.mro) file and stop + --dry Do not execute the pipeline. Generate a pipeline invocation (.mro) file and + stop --disable-ui Do not serve the web UI --noexit Keep web UI running after pipestance completes or fails --nopreflight Skip preflight checks @@ -152,36 +175,55 @@ FLAGS: OPTIONS: --id A unique run id and output folder name [a-zA-Z0-9_-]+ --description Sample description to embed in output files [default: ] - --reference Path of folder containing 10x-compatible VDJ reference. Optional if - '--denovo' is specified + --reference + Path of folder containing 10x-compatible VDJ reference. Optional if '--denovo' is + specified + --fastqs ... Path to input FASTQ data - --project Name of the project folder within a mkfastq or bcl2fastq-generated folder - to pick FASTQs from + --project + Name of the project folder within a mkfastq or bcl2fastq-generated folder from which to + pick FASTQs + --sample ... Prefix of the filenames of FASTQs to select --lanes ... Only use FASTQs from selected lanes - --chain Chain type to display metrics for: 'TR' for T cell receptors, 'IG' for B - cell receptors, or 'auto' to autodetect [default: auto] - --inner-enrichment-primers If inner enrichment primers other than those provided in the 10x kits are - used, they need to be specified here as a textfile with one primer per - line. Disable secondary analysis, e.g. clustering - --jobmode Job manager to use. Valid options: local (default), sge, lsf, slurm or a - .template file. Search for help on "Cluster Mode" at - support.10xgenomics.com for more details on configuring the pipeline to use - a compute cluster [default: local] - --localcores Set max cores the pipeline may request at one time. Only applies to local - jobs - --localmem Set max GB the pipeline may request at one time. Only applies to local jobs - --localvmem Set max virtual address space in GB for the pipeline. Only applies to local - jobs - --mempercore Reserve enough threads for each job to ensure enough memory will be - available, assuming each core on your cluster has at least this much memory - available. Only applies in cluster jobmodes - --maxjobs Set max jobs submitted to cluster at one time. Only applies in cluster - jobmodes - --jobinterval Set delay between submitting jobs to cluster, in ms. Only applies in - cluster jobmodes - --overrides The path to a JSON file that specifies stage-level overrides for cores and - memory. Finer-grained than --localcores, --mempercore and --localmem. - Consult the 10x support website for an example override file + --chain + Chain type to display metrics for: 'TR' for T cell receptors, 'IG' for B cell receptors, + or 'auto' to autodetect [default: auto] + + --inner-enrichment-primers + If inner enrichment primers other than those provided in the 10x kits are used, they + need to be specified here as a textfile with one primer per line. Disable secondary + analysis, e.g. clustering + + --jobmode + Job manager to use. Valid options: local (default), sge, lsf, slurm or path to a + .template file. Search for help on "Cluster Mode" at support.10xgenomics.com for more + details on configuring the pipeline to use a compute cluster [default: local] + + --localcores + Set max cores the pipeline may request at one time. Only applies to local jobs + + --localmem + Set max GB the pipeline may request at one time. Only applies to local jobs + + --localvmem + Set max virtual address space in GB for the pipeline. Only applies to local jobs + + --mempercore + Reserve enough threads for each job to ensure enough memory will be available, assuming + each core on your cluster has at least this much memory available. Only applies to + cluster jobmodes + + --maxjobs + Set max jobs submitted to cluster at one time. Only applies to cluster jobmodes + + --jobinterval + Set delay between submitting jobs to cluster, in ms. Only applies to cluster jobmodes + + --overrides + The path to a JSON file that specifies stage-level overrides for cores and memory. + Finer-grained than --localcores, --mempercore and --localmem. Consult + https://support.10xgenomics.com/ for an example override file + --uiport Serve web UI at http://localhost:PORT ``` \ No newline at end of file diff --git a/config.sh b/config.sh index 0617f38..eb0707e 100644 --- a/config.sh +++ b/config.sh @@ -1,6 +1,6 @@ -version="6.0.2" +version="6.1.1" vdj_ref_version="5.0.0" -site_url="https://support.10xgenomics.com/single-cell-vdj/software/downloads/6.0/" +site_url="https://support.10xgenomics.com/single-cell-vdj/software/downloads/6.1/" download_url="" # docker related