From fd5c8f3a769626e521bef36b8193c902c8ef927c Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Wed, 25 Jan 2023 09:09:03 -0500 Subject: [PATCH 01/11] start work on cbioportal script support in Dockerfile --- Dockerfile | 47 +++++++++++++++++++++++++++++++++++++++++++++-- README.md | 3 +++ 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 86ea867e..07a8d12e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,22 +1,65 @@ +FROM ghcr.io/bento-platform/bento_base_image:python-debian-2023.01.17 AS vcf2maf-dep + +WORKDIR /tmp/vcf2maf + +ENV VCF2MAF_VERSION=1.6.21 + +# TODO: I don't like /opt as a home for these +RUN apt-get update -y && \ + apt-get install -y unzip && \ + echo "https://github.com/mskcc/vcf2maf/archive/refs/tags/v${VCF2MAF_VERSION}.zip" && \ + curl -L "https://github.com/mskcc/vcf2maf/archive/refs/tags/v${VCF2MAF_VERSION}.zip" -o vcf2maf.zip && \ + unzip vcf2maf.zip && \ + mv "vcf2maf-${VCF2MAF_VERSION}" vcf2maf && \ + mkdir -p /opt/data && \ + cp vcf2maf/*.pl /opt && \ + cp -r vcf2maf/data /opt/data && \ + rm -rf vcf2maf + FROM ghcr.io/bento-platform/bento_base_image:python-debian-2023.01.17 AS base-deps # Install system packages for HTSLib + SAMtools + curl and jq for workflows # OpenJDK is for running WOMtool/Cromwell +# Perl/cpanminus are for cBioPortal scripts / caches / utilities RUN apt-get update -y && \ - apt-get install -y samtools tabix bcftools curl jq openjdk-17-jre && \ + apt-get install -y \ + samtools \ + tabix \ + bcftools \ + curl \ + jq \ + openjdk-17-jre \ + perl \ + libdbi-perl \ + libperl-dev \ + cpanminus \ + && \ rm -rf /var/lib/apt/lists/* # Boostrap dependencies for setting up and running the Python application RUN pip install --no-cache-dir poetry==1.3.2 gunicorn==20.1.0 "pysam>=0.20.0,<0.21.0" -WORKDIR / +# Install Cromwell ENV CROMWELL_VERSION=84 +WORKDIR / RUN curl -L \ https://github.com/broadinstitute/cromwell/releases/download/${CROMWELL_VERSION}/cromwell-${CROMWELL_VERSION}.jar \ -o cromwell.jar +# Install Ensembl-VEP +ENV VEP_ENSEMBL_RELEASE_VERSION=104.3 +WORKDIR / +RUN git clone --depth 1 -b "release/${VEP_ENSEMBL_RELEASE_VERSION}" https://github.com/Ensembl/ensembl-vep.git && \ + cpanm --installdeps --with-recommends --notest --cpanfile ensembl-vep/cpanfile . && \ + cd ensembl-vep && \ + # Build vep in /ensembl-vep + perl INSTALL.pl -a a --NO_TEST --NO_UPDATE + FROM base-deps AS build-install +# Copy VCF2MAF +COPY --from=vcf2maf-dep /opt /opt + # Backwards-compatible with old BentoV2 container layout RUN mkdir -p /wes/tmp && mkdir -p /data WORKDIR /wes diff --git a/README.md b/README.md index f7a7838b..c56f5101 100644 --- a/README.md +++ b/README.md @@ -126,6 +126,9 @@ CHORD_URL=http://127.0.0.1:5000/ # checks for TLS certificate validity! CHORD_DEBUG=False +# SSL Configuration - whether to validate certificates +BENTO_VALIDATE_SSL=True + # Celery configuration CELERY_RESULT_BACKEND=redis:// CELERY_BROKER_URL=redis:// From c11a2768b709865ed7bcf37ea573947df4a7c1ac Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Wed, 25 Jan 2023 09:31:03 -0500 Subject: [PATCH 02/11] add missing dockerfile deps for ensembl-vep --- Dockerfile | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/Dockerfile b/Dockerfile index 07a8d12e..ac52d57c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,19 +20,23 @@ FROM ghcr.io/bento-platform/bento_base_image:python-debian-2023.01.17 AS base-de # Install system packages for HTSLib + SAMtools + curl and jq for workflows # OpenJDK is for running WOMtool/Cromwell -# Perl/cpanminus are for cBioPortal scripts / caches / utilities +# Perl/libdbi-perl/lib*-dev/cpanminus/unzip are for cBioPortal scripts / caches / utilities RUN apt-get update -y && \ apt-get install -y \ - samtools \ - tabix \ - bcftools \ - curl \ - jq \ - openjdk-17-jre \ - perl \ - libdbi-perl \ - libperl-dev \ - cpanminus \ + samtools \ + tabix \ + bcftools \ + curl \ + jq \ + openjdk-17-jre \ + perl \ + libdbi-perl \ + libperl-dev \ + cpanminus \ + unzip \ + libbz2-dev \ + liblzma-dev \ + zlib1g-dev \ && \ rm -rf /var/lib/apt/lists/* From 66d4c4de2cdf76bd765b87c1c1d466c22c26a4ea Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Wed, 25 Jan 2023 09:33:21 -0500 Subject: [PATCH 03/11] chore: update minor deps --- poetry.lock | 58 ++++++++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/poetry.lock b/poetry.lock index b83a9c3d..2c9982d7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -274,18 +274,18 @@ files = [ [[package]] name = "boto3" -version = "1.26.52" +version = "1.26.56" description = "The AWS SDK for Python" category = "main" optional = false python-versions = ">= 3.7" files = [ - {file = "boto3-1.26.52-py3-none-any.whl", hash = "sha256:319ddb274f8f83b035b88a3b127c465bf6fe3e3fc2d668869b489e992c47ca77"}, - {file = "boto3-1.26.52.tar.gz", hash = "sha256:0b1f82d4565ed875c7975ac0be5665e8d948613c01bcb0e49df6d4f5af670cc8"}, + {file = "boto3-1.26.56-py3-none-any.whl", hash = "sha256:72214a08f337d29a1300d7861872f60ea41016b2a8ad8094fab20d783c8cf1ae"}, + {file = "boto3-1.26.56.tar.gz", hash = "sha256:0ff8667fbfda8390cab2718a4d129374a6ddd6fd1913f79777fd4498f93c84f1"}, ] [package.dependencies] -botocore = ">=1.29.52,<1.30.0" +botocore = ">=1.29.56,<1.30.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.6.0,<0.7.0" @@ -294,14 +294,14 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.29.52" +version = "1.29.56" description = "Low-level, data-driven core of boto 3." category = "main" optional = false python-versions = ">= 3.7" files = [ - {file = "botocore-1.29.52-py3-none-any.whl", hash = "sha256:de55b6333fb13c66da9055972d7e4efff5dcc5a087478a2b70e99d888b29a24c"}, - {file = "botocore-1.29.52.tar.gz", hash = "sha256:a0b89a33305cfa6251c6e1142deb7567e216e37e25363159f45fb81dc5b474e5"}, + {file = "botocore-1.29.56-py3-none-any.whl", hash = "sha256:669ed3a256c4352f8f8a77a24b4d623ab7acc966d843b460d7ce2261a9813a79"}, + {file = "botocore-1.29.56.tar.gz", hash = "sha256:ca4d6403d745218270a20d9ca3ca9a33e3ad2fabb59a96ed8d6e1a824b274c86"}, ] [package.dependencies] @@ -314,14 +314,14 @@ crt = ["awscrt (==0.15.3)"] [[package]] name = "cachetools" -version = "5.2.1" +version = "5.3.0" description = "Extensible memoizing collections and decorators" category = "dev" optional = false python-versions = "~=3.7" files = [ - {file = "cachetools-5.2.1-py3-none-any.whl", hash = "sha256:8462eebf3a6c15d25430a8c27c56ac61340b2ecf60c9ce57afc2b97e450e47da"}, - {file = "cachetools-5.2.1.tar.gz", hash = "sha256:5991bc0e08a1319bb618d3195ca5b6bc76646a49c21d55962977197b301cc1fe"}, + {file = "cachetools-5.3.0-py3-none-any.whl", hash = "sha256:429e1a1e845c008ea6c85aa35d4b98b65d6a9763eeef3e37e92728a12d1de9d4"}, + {file = "cachetools-5.3.0.tar.gz", hash = "sha256:13dfddc7b8df938c21a940dfa6557ce6e94a2f1cdfa58eb90c805721d58f2c14"}, ] [[package]] @@ -580,30 +580,30 @@ toml = ["tomli"] [[package]] name = "debugpy" -version = "1.6.5" +version = "1.6.6" description = "An implementation of the Debug Adapter Protocol for Python" category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "debugpy-1.6.5-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:696165f021a6a17da08163eaae84f3faf5d8be68fb78cd78488dd347e625279c"}, - {file = "debugpy-1.6.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17039e392d6f38388a68bd02c5f823b32a92142a851e96ba3ec52aeb1ce9d900"}, - {file = "debugpy-1.6.5-cp310-cp310-win32.whl", hash = "sha256:62a06eb78378292ba6c427d861246574dc8b84471904973797b29dd33c7c2495"}, - {file = "debugpy-1.6.5-cp310-cp310-win_amd64.whl", hash = "sha256:9984fc00ab372c97f63786c400107f54224663ea293daab7b365a5b821d26309"}, - {file = "debugpy-1.6.5-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:048368f121c08b00bbded161e8583817af5055982d2722450a69efe2051621c2"}, - {file = "debugpy-1.6.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:74e4eca42055759032e3f1909d1374ba1d729143e0c2729bb8cb5e8b5807c458"}, - {file = "debugpy-1.6.5-cp37-cp37m-win32.whl", hash = "sha256:0f9afcc8cad6424695f3356dc9a7406d5b18e37ee2e73f34792881a44b02cc50"}, - {file = "debugpy-1.6.5-cp37-cp37m-win_amd64.whl", hash = "sha256:b5a74ecebe5253344501d9b23f74459c46428b30437fa9254cfb8cb129943242"}, - {file = "debugpy-1.6.5-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:9e809ef787802c808995e5b6ade714a25fa187f892b41a412d418a15a9c4a432"}, - {file = "debugpy-1.6.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:947c686e8adb46726f3d5f19854f6aebf66c2edb91225643c7f44b40b064a235"}, - {file = "debugpy-1.6.5-cp38-cp38-win32.whl", hash = "sha256:377391341c4b86f403d93e467da8e2d05c22b683f08f9af3e16d980165b06b90"}, - {file = "debugpy-1.6.5-cp38-cp38-win_amd64.whl", hash = "sha256:286ae0c2def18ee0dc8a61fa76d51039ca8c11485b6ed3ef83e3efe8a23926ae"}, - {file = "debugpy-1.6.5-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:500dd4a9ff818f5c52dddb4a608c7de5371c2d7d905c505eb745556c579a9f11"}, - {file = "debugpy-1.6.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f3fab217fe7e2acb2d90732af1a871947def4e2b6654945ba1ebd94bd0bea26"}, - {file = "debugpy-1.6.5-cp39-cp39-win32.whl", hash = "sha256:15bc5febe0edc79726517b1f8d57d7ac7c784567b5ba804aab8b1c9d07a57018"}, - {file = "debugpy-1.6.5-cp39-cp39-win_amd64.whl", hash = "sha256:7e84d9e4420122384cb2cc762a00b4e17cbf998022890f89b195ce178f78ff47"}, - {file = "debugpy-1.6.5-py2.py3-none-any.whl", hash = "sha256:8116e40a1cd0593bd2aba01d4d560ee08f018da8e8fbd4cbd24ff09b5f0e41ef"}, - {file = "debugpy-1.6.5.zip", hash = "sha256:5e55e6c79e215239dd0794ee0bf655412b934735a58e9d705e5c544f596f1603"}, + {file = "debugpy-1.6.6-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:0ea1011e94416e90fb3598cc3ef5e08b0a4dd6ce6b9b33ccd436c1dffc8cd664"}, + {file = "debugpy-1.6.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dff595686178b0e75580c24d316aa45a8f4d56e2418063865c114eef651a982e"}, + {file = "debugpy-1.6.6-cp310-cp310-win32.whl", hash = "sha256:87755e173fcf2ec45f584bb9d61aa7686bb665d861b81faa366d59808bbd3494"}, + {file = "debugpy-1.6.6-cp310-cp310-win_amd64.whl", hash = "sha256:72687b62a54d9d9e3fb85e7a37ea67f0e803aaa31be700e61d2f3742a5683917"}, + {file = "debugpy-1.6.6-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:78739f77c58048ec006e2b3eb2e0cd5a06d5f48c915e2fc7911a337354508110"}, + {file = "debugpy-1.6.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23c29e40e39ad7d869d408ded414f6d46d82f8a93b5857ac3ac1e915893139ca"}, + {file = "debugpy-1.6.6-cp37-cp37m-win32.whl", hash = "sha256:7aa7e103610e5867d19a7d069e02e72eb2b3045b124d051cfd1538f1d8832d1b"}, + {file = "debugpy-1.6.6-cp37-cp37m-win_amd64.whl", hash = "sha256:f6383c29e796203a0bba74a250615ad262c4279d398e89d895a69d3069498305"}, + {file = "debugpy-1.6.6-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:23363e6d2a04d726bbc1400bd4e9898d54419b36b2cdf7020e3e215e1dcd0f8e"}, + {file = "debugpy-1.6.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b5d1b13d7c7bf5d7cf700e33c0b8ddb7baf030fcf502f76fc061ddd9405d16c"}, + {file = "debugpy-1.6.6-cp38-cp38-win32.whl", hash = "sha256:70ab53918fd907a3ade01909b3ed783287ede362c80c75f41e79596d5ccacd32"}, + {file = "debugpy-1.6.6-cp38-cp38-win_amd64.whl", hash = "sha256:c05349890804d846eca32ce0623ab66c06f8800db881af7a876dc073ac1c2225"}, + {file = "debugpy-1.6.6-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:11a0f3a106f69901e4a9a5683ce943a7a5605696024134b522aa1bfda25b5fec"}, + {file = "debugpy-1.6.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a771739902b1ae22a120dbbb6bd91b2cae6696c0e318b5007c5348519a4211c6"}, + {file = "debugpy-1.6.6-cp39-cp39-win32.whl", hash = "sha256:549ae0cb2d34fc09d1675f9b01942499751d174381b6082279cf19cdb3c47cbe"}, + {file = "debugpy-1.6.6-cp39-cp39-win_amd64.whl", hash = "sha256:de4a045fbf388e120bb6ec66501458d3134f4729faed26ff95de52a754abddb1"}, + {file = "debugpy-1.6.6-py2.py3-none-any.whl", hash = "sha256:be596b44448aac14eb3614248c91586e2bc1728e020e82ef3197189aae556115"}, + {file = "debugpy-1.6.6.zip", hash = "sha256:b9c2130e1c632540fbf9c2c88341493797ddf58016e7cba02e311de9b0a96b67"}, ] [[package]] From f84a9dd2bcc8a6926d089a7cce2ddd893ab4b7ac Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Thu, 9 Feb 2023 16:36:56 -0500 Subject: [PATCH 04/11] chore: more work on cbioportal-supporting dockerfile --- Dockerfile | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/Dockerfile b/Dockerfile index ac52d57c..89289135 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,12 @@ -FROM ghcr.io/bento-platform/bento_base_image:python-debian-2023.01.17 AS vcf2maf-dep +FROM --platform=$BUILDPLATFORM debian:bullseye-slim AS downloaded-deps -WORKDIR /tmp/vcf2maf +# Install VCF2MAF +# TODO: I don't like /opt as a home for these +WORKDIR /tmp/vcf2maf ENV VCF2MAF_VERSION=1.6.21 - -# TODO: I don't like /opt as a home for these RUN apt-get update -y && \ - apt-get install -y unzip && \ + apt-get install -y curl git unzip && \ echo "https://github.com/mskcc/vcf2maf/archive/refs/tags/v${VCF2MAF_VERSION}.zip" && \ curl -L "https://github.com/mskcc/vcf2maf/archive/refs/tags/v${VCF2MAF_VERSION}.zip" -o vcf2maf.zip && \ unzip vcf2maf.zip && \ @@ -16,7 +16,21 @@ RUN apt-get update -y && \ cp -r vcf2maf/data /opt/data && \ rm -rf vcf2maf -FROM ghcr.io/bento-platform/bento_base_image:python-debian-2023.01.17 AS base-deps +# Install Cromwell +ENV CROMWELL_VERSION=84 +WORKDIR / +RUN curl -L \ + https://github.com/broadinstitute/cromwell/releases/download/${CROMWELL_VERSION}/cromwell-${CROMWELL_VERSION}.jar \ + -o cromwell.jar + +# Clone (but don't install yet) Ensembl-VEP +ENV VEP_ENSEMBL_RELEASE_VERSION=104.3 +RUN git clone --depth 1 -b "release/${VEP_ENSEMBL_RELEASE_VERSION}" https://github.com/Ensembl/ensembl-vep.git + +FROM ghcr.io/bento-platform/bento_base_image:python-debian-2023.02.09 AS base-deps + +# Copy Ensembl-VEP from downloaded-deps +COPY --from=downloaded-deps /ensembl-vep /ensembl-vep # Install system packages for HTSLib + SAMtools + curl and jq for workflows # OpenJDK is for running WOMtool/Cromwell @@ -43,12 +57,12 @@ RUN apt-get update -y && \ # Boostrap dependencies for setting up and running the Python application RUN pip install --no-cache-dir poetry==1.3.2 gunicorn==20.1.0 "pysam>=0.20.0,<0.21.0" -# Install Cromwell -ENV CROMWELL_VERSION=84 +# Install Ensembl-VEP from cloned source WORKDIR / -RUN curl -L \ - https://github.com/broadinstitute/cromwell/releases/download/${CROMWELL_VERSION}/cromwell-${CROMWELL_VERSION}.jar \ - -o cromwell.jar +RUN cpanm --installdeps --with-recommends --notest --cpanfile ensembl-vep/cpanfile . && \ + cd ensembl-vep && \ + # Build vep in /ensembl-vep + perl INSTALL.pl -a a --NO_TEST --NO_UPDATE # Install Ensembl-VEP ENV VEP_ENSEMBL_RELEASE_VERSION=104.3 @@ -62,7 +76,10 @@ RUN git clone --depth 1 -b "release/${VEP_ENSEMBL_RELEASE_VERSION}" https://gith FROM base-deps AS build-install # Copy VCF2MAF -COPY --from=vcf2maf-dep /opt /opt +COPY --from=downloaded-deps /opt /opt + +# Copy Cromwell +COPY --from=downloaded-deps /cromwell.jar /cromwell.jar # Backwards-compatible with old BentoV2 container layout RUN mkdir -p /wes/tmp && mkdir -p /data From bb848d51357173b36b747449f6b7734af27282a5 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Tue, 14 Feb 2023 15:30:52 -0500 Subject: [PATCH 05/11] chore: specify wdl + version when running cromwell cli --- .idea/bento_wes.iml | 2 +- .idea/misc.xml | 2 +- bento_wes/backends/cromwell_local.py | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.idea/bento_wes.iml b/.idea/bento_wes.iml index 3ca05ee3..73db25a0 100644 --- a/.idea/bento_wes.iml +++ b/.idea/bento_wes.iml @@ -4,7 +4,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index a1287b9b..47180224 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/bento_wes/backends/cromwell_local.py b/bento_wes/backends/cromwell_local.py index cfdcb2d8..2cec8f7e 100644 --- a/bento_wes/backends/cromwell_local.py +++ b/bento_wes/backends/cromwell_local.py @@ -75,5 +75,7 @@ def _get_command(self, workflow_path: str, params_path: str, run_dir: str) -> Co "--options", options_file, "--workflow-root", run_dir, "--metadata-output", run_dir + "/_job_metadata_output.json", + "--type", "WDL", + "--type-version", "1.0", workflow_path, )) From 44e8a13068636ed13d7b2aa0a83f57bd976fd827 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Tue, 14 Feb 2023 15:31:02 -0500 Subject: [PATCH 06/11] chore: bump version to 0.8.3 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6c944e12..647bc738 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "bento_wes" -version = "0.8.2" +version = "0.8.3" description = "Workflow execution service for the Bento platform." authors = [ "David Lougheed ", From 6dac8859ec84184988dcdbd68989c906e0393dd3 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Tue, 14 Feb 2023 15:36:53 -0500 Subject: [PATCH 07/11] revert --- bento_wes/backends/cromwell_local.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/bento_wes/backends/cromwell_local.py b/bento_wes/backends/cromwell_local.py index 2cec8f7e..cfdcb2d8 100644 --- a/bento_wes/backends/cromwell_local.py +++ b/bento_wes/backends/cromwell_local.py @@ -75,7 +75,5 @@ def _get_command(self, workflow_path: str, params_path: str, run_dir: str) -> Co "--options", options_file, "--workflow-root", run_dir, "--metadata-output", run_dir + "/_job_metadata_output.json", - "--type", "WDL", - "--type-version", "1.0", workflow_path, )) From 07cc37db7022abe8636b147572bbda8f4f7494f0 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Tue, 14 Feb 2023 15:41:55 -0500 Subject: [PATCH 08/11] chore: update lock dependencies --- poetry.lock | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/poetry.lock b/poetry.lock index 2d340f44..ff18b6c6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -263,18 +263,18 @@ files = [ [[package]] name = "boto3" -version = "1.26.69" +version = "1.26.71" description = "The AWS SDK for Python" category = "main" optional = false python-versions = ">= 3.7" files = [ - {file = "boto3-1.26.69-py3-none-any.whl", hash = "sha256:9a0a29179957cb26fa8c3c1fddf66b18efaeaf633e08db5fb53815ffb0421419"}, - {file = "boto3-1.26.69.tar.gz", hash = "sha256:eb8cde24a4c5755c35126e8cd460e6b51c63d04292419e7e95721232720c7e5b"}, + {file = "boto3-1.26.71-py3-none-any.whl", hash = "sha256:5a9d19cdd8dcec679c483408f208027e01ab2087cbc66787790036087b6737de"}, + {file = "boto3-1.26.71.tar.gz", hash = "sha256:6c4845243d1896019646d649f1f0ff4042cedcc5db3ecfba3dc2d611ea11cd08"}, ] [package.dependencies] -botocore = ">=1.29.69,<1.30.0" +botocore = ">=1.29.71,<1.30.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.6.0,<0.7.0" @@ -283,14 +283,14 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.29.69" +version = "1.29.71" description = "Low-level, data-driven core of boto 3." category = "main" optional = false python-versions = ">= 3.7" files = [ - {file = "botocore-1.29.69-py3-none-any.whl", hash = "sha256:2a4ab8bcb3177daa425019e125c09996b9a6a1a62bb0baaaeeb86ffd552719cc"}, - {file = "botocore-1.29.69.tar.gz", hash = "sha256:7e1bebca013544fbc298cb58603bfccd5f71b49c720a5c33c07cf5dfc8145a1f"}, + {file = "botocore-1.29.71-py3-none-any.whl", hash = "sha256:40406466f5c416b1f54bfbfc11aef90d783103f7ea77a1992dcaf1768ab04e12"}, + {file = "botocore-1.29.71.tar.gz", hash = "sha256:783e7fa97bb5bf3759e4b333b8da2bcaffdb54828ea1d759b55329cc39003b98"}, ] [package.dependencies] @@ -1911,14 +1911,14 @@ test = ["websockets"] [[package]] name = "werkzeug" -version = "2.2.2" +version = "2.2.3" description = "The comprehensive WSGI web application library." category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "Werkzeug-2.2.2-py3-none-any.whl", hash = "sha256:f979ab81f58d7318e064e99c4506445d60135ac5cd2e177a2de0089bfd4c9bd5"}, - {file = "Werkzeug-2.2.2.tar.gz", hash = "sha256:7ea2d48322cc7c0f8b3a215ed73eabd7b5d75d0b50e31ab006286ccff9e00b8f"}, + {file = "Werkzeug-2.2.3-py3-none-any.whl", hash = "sha256:56433961bc1f12533306c624f3be5e744389ac61d722175d543e1751285da612"}, + {file = "Werkzeug-2.2.3.tar.gz", hash = "sha256:2e1ccc9417d4da358b9de6f174e3ac094391ea1d4fbef2d667865d819dfd0afe"}, ] [package.dependencies] From ca24fcfc5d23f23fdcc4758024afb30166b5c8e1 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Tue, 23 Jan 2024 11:11:13 -0500 Subject: [PATCH 09/11] chore: separate build stage for ensembl-vep --- Dockerfile | 62 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 21 deletions(-) diff --git a/Dockerfile b/Dockerfile index d412ab60..ca2de350 100644 --- a/Dockerfile +++ b/Dockerfile @@ -29,26 +29,16 @@ RUN curl -L \ ENV VEP_ENSEMBL_RELEASE_VERSION=111.0 RUN git clone --depth 1 -b "release/${VEP_ENSEMBL_RELEASE_VERSION}" https://github.com/Ensembl/ensembl-vep.git -FROM ghcr.io/bento-platform/bento_base_image:python-debian-2024.01.01 AS base-deps +FROM ghcr.io/bento-platform/bento_base_image:python-debian-2024.01.01 AS ensembl-vep SHELL ["/bin/bash", "-c"] WORKDIR / -# Copy Ensembl-VEP from downloaded-deps -COPY --from=downloaded-deps /ensembl-vep /ensembl-vep - -# Install system packages for HTSLib + SAMtools + curl and jq for workflows -# OpenJDK is for running WOMtool/Cromwell # Perl/libdbi-perl/lib*-dev/cpanminus/unzip are for cBioPortal scripts / caches / utilities RUN apt-get update -y && \ apt-get install -y \ - samtools \ - tabix \ - bcftools \ curl \ - jq \ - openjdk-17-jre \ perl \ libdbi-perl \ libperl-dev \ @@ -60,25 +50,44 @@ RUN apt-get update -y && \ && \ rm -rf /var/lib/apt/lists/* -# Then, install dependencies for running the Python server + Python workflow dependencies -COPY container.requirements.txt . -RUN pip install --no-cache-dir -r /container.requirements.txt && \ - rm /container.requirements.txt +# Copy Ensembl-VEP from downloaded-deps +COPY --from=downloaded-deps /ensembl-vep /ensembl-vep # Install Ensembl-VEP from cloned source WORKDIR / RUN cpanm --installdeps --with-recommends --notest --cpanfile ensembl-vep/cpanfile . && \ cd ensembl-vep && \ # Build vep in /ensembl-vep - perl INSTALL.pl -a a --NO_TEST --NO_UPDATE + perl INSTALL.pl -a a --NO_TEST --NO_UPDATE --SPECIES 'homo_sapiens' -FROM base-deps AS build-install +RUN ls -l /ensembl-vep -# Copy VCF2MAF -COPY --from=downloaded-deps /opt /opt +FROM ghcr.io/bento-platform/bento_base_image:python-debian-2024.01.01 AS base-deps -# Copy Cromwell -COPY --from=downloaded-deps /cromwell.jar /cromwell.jar +SHELL ["/bin/bash", "-c"] + +WORKDIR / + +# Install system packages for HTSLib + SAMtools + curl and jq for workflows +# OpenJDK is for running WOMtool/Cromwell + +RUN apt-get update -y && \ + apt-get install -y \ + samtools \ + tabix \ + bcftools \ + curl \ + jq \ + openjdk-17-jre \ + && \ + rm -rf /var/lib/apt/lists/* + +# Then, install dependencies for running the Python server + Python workflow dependencies +COPY container.requirements.txt . +RUN pip install --no-cache-dir -r /container.requirements.txt && \ + rm /container.requirements.txt + +FROM base-deps AS build-install # Backwards-compatible with old BentoV2 container layout RUN mkdir -p /wes/tmp && mkdir -p /data @@ -104,5 +113,16 @@ COPY README.md . # Install the module itself, locally (similar to `pip install -e .`) RUN poetry install --without dev +# Copy from other stages last, since it means the stages can be built in parallel + +# - Copy VCF2MAF +COPY --from=downloaded-deps /opt /opt + +# - Copy Cromwell +COPY --from=downloaded-deps /cromwell.jar /cromwell.jar + +# - Copy Ensembl-VEP +COPY --from=ensembl-vep /ensembl-vep /ensembl-vep + ENTRYPOINT [ "bash", "./entrypoint.bash" ] CMD [ "bash", "./run.bash" ] From 9f357eaf40e3752c87e08a5848ee857060b780e1 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Mon, 26 Feb 2024 14:33:25 -0500 Subject: [PATCH 10/11] set up vep from prebuilt docker image --- Dockerfile | 75 +++++++++++++++++++++++++++---------------------- entrypoint.bash | 3 ++ 2 files changed, 44 insertions(+), 34 deletions(-) diff --git a/Dockerfile b/Dockerfile index cce6a32f..9d2d5e64 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,7 +8,7 @@ SHELL ["/bin/bash", "-c"] WORKDIR /tmp/vcf2maf ENV VCF2MAF_VERSION=1.6.21 RUN apt-get update -y && \ - apt-get install -y curl git unzip && \ + apt-get install -y curl git unzip wget && \ echo "https://github.com/mskcc/vcf2maf/archive/refs/tags/v${VCF2MAF_VERSION}.zip" && \ curl -L "https://github.com/mskcc/vcf2maf/archive/refs/tags/v${VCF2MAF_VERSION}.zip" -o vcf2maf.zip && \ unzip vcf2maf.zip && \ @@ -27,40 +27,21 @@ RUN curl -L \ # Clone (but don't install yet) Ensembl-VEP ENV VEP_ENSEMBL_RELEASE_VERSION=111.0 -RUN git clone --depth 1 -b "release/${VEP_ENSEMBL_RELEASE_VERSION}" https://github.com/Ensembl/ensembl-vep.git +RUN git clone --depth 1 -b "release/${VEP_ENSEMBL_RELEASE_VERSION}" https://github.com/Ensembl/ensembl-vep.git && \ + chmod u+x ensembl-vep/*.pl + +# Clone ensembl-variation git repository +WORKDIR /ensembl-vep/ +RUN git clone --depth 1 https://github.com/Ensembl/ensembl-variation.git && \ + mkdir var_c_code && \ + cp ensembl-variation/C_code/*.c ensembl-variation/C_code/Makefile var_c_code/ +RUN git clone --depth 1 https://github.com/bioperl/bioperl-ext.git +RUN curl -L https://github.com/Ensembl/ensembl-xs/archive/2.3.2.zip -o ensembl-xs.zip && \ + unzip -q ensembl-xs.zip && \ + mv ensembl-xs-2.3.2 ensembl-xs && \ + rm -rf ensembl-xs.zip -FROM ghcr.io/bento-platform/bento_base_image:python-debian-2024.01.01 AS ensembl-vep - -SHELL ["/bin/bash", "-c"] - -WORKDIR / - -# Perl/libdbi-perl/lib*-dev/cpanminus/unzip are for cBioPortal scripts / caches / utilities -RUN apt-get update -y && \ - apt-get install -y \ - curl \ - perl \ - libdbi-perl \ - libperl-dev \ - cpanminus \ - unzip \ - libbz2-dev \ - liblzma-dev \ - zlib1g-dev \ - && \ - rm -rf /var/lib/apt/lists/* - -# Copy Ensembl-VEP from downloaded-deps -COPY --from=downloaded-deps /ensembl-vep /ensembl-vep - -# Install Ensembl-VEP from cloned source WORKDIR / -RUN cpanm --installdeps --with-recommends --notest --cpanfile ensembl-vep/cpanfile . && \ - cd ensembl-vep && \ - # Build vep in /ensembl-vep - perl INSTALL.pl -a a --NO_TEST --NO_UPDATE --SPECIES 'homo_sapiens' - -RUN ls -l /ensembl-vep FROM ghcr.io/bento-platform/bento_base_image:python-debian-2024.02.01 AS base-deps @@ -82,6 +63,32 @@ RUN apt-get update -y && \ && \ rm -rf /var/lib/apt/lists/* +# Install system packages for VEP +# Perl/libdbi-perl/lib*-dev/cpanminus/unzip are for cBioPortal scripts / caches / utilities +RUN apt-get update -y && \ + apt-get install -y \ + curl \ + perl \ + libdbd-mysql-perl \ + libdbi-perl \ + libjson-perl \ + libperl-dev \ + cpanminus \ + unzip \ + libbz2-dev \ + liblzma-dev \ + zlib1g-dev \ + && \ + rm -rf /var/lib/apt/lists/* + +# Install Perl packages for VEP +RUN curl "https://raw.githubusercontent.com/Ensembl/ensembl/release/111/cpanfile" -o "ensembl_cpanfile" && \ + curl "https://raw.githubusercontent.com/Ensembl/ensembl-vep/release/111/cpanfile" -o "ensembl_vep_cpanfile" && \ + cpanm --installdeps --with-recommends --notest --cpanfile ensembl_cpanfile . && \ + cpanm --installdeps --with-recommends --notest --cpanfile ensembl_vep_cpanfile . && \ + rm ensembl_cpanfile ensembl_vep_cpanfile && \ + rm -rf /root/.cpanm + # Then, install dependencies for running the Python server + Python workflow dependencies COPY container.requirements.txt . RUN pip install --no-cache-dir -r /container.requirements.txt && \ @@ -122,7 +129,7 @@ COPY --from=downloaded-deps /opt /opt COPY --from=downloaded-deps /cromwell.jar /cromwell.jar # - Copy Ensembl-VEP -COPY --from=ensembl-vep /ensembl-vep /ensembl-vep +COPY --from=ensemblorg/ensembl-vep:release_111.0 /opt/vep /opt/vep ENTRYPOINT [ "bash", "./entrypoint.bash" ] CMD [ "bash", "./run.bash" ] diff --git a/entrypoint.bash b/entrypoint.bash index 503064e8..46e07225 100644 --- a/entrypoint.bash +++ b/entrypoint.bash @@ -12,5 +12,8 @@ chmod -R o-rwx /wes/tmp # Remove all access from others for /wes/tmp # Configure git from entrypoint, since we've overwritten the base image entrypoint gosu bento_user /bin/bash -c '/set_gitconfig.bash' +# Set up PATH for VEP +export PATH="/opt/vep/src/ensembl-vep:/opt/vep/src/var_c_code:${PATH}" + # Drop into bento_user from root and execute the CMD specified for the image exec gosu bento_user "$@" From 37fa32f399f9693cb06244bddb7d7fe40853cf35 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Mon, 26 Feb 2024 15:45:23 -0500 Subject: [PATCH 11/11] chore(vep): faster build getting lwp from libwww-perl --- Dockerfile | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9d2d5e64..ad578703 100644 --- a/Dockerfile +++ b/Dockerfile @@ -72,6 +72,7 @@ RUN apt-get update -y && \ libdbd-mysql-perl \ libdbi-perl \ libjson-perl \ + libwww-perl \ libperl-dev \ cpanminus \ unzip \ @@ -81,14 +82,6 @@ RUN apt-get update -y && \ && \ rm -rf /var/lib/apt/lists/* -# Install Perl packages for VEP -RUN curl "https://raw.githubusercontent.com/Ensembl/ensembl/release/111/cpanfile" -o "ensembl_cpanfile" && \ - curl "https://raw.githubusercontent.com/Ensembl/ensembl-vep/release/111/cpanfile" -o "ensembl_vep_cpanfile" && \ - cpanm --installdeps --with-recommends --notest --cpanfile ensembl_cpanfile . && \ - cpanm --installdeps --with-recommends --notest --cpanfile ensembl_vep_cpanfile . && \ - rm ensembl_cpanfile ensembl_vep_cpanfile && \ - rm -rf /root/.cpanm - # Then, install dependencies for running the Python server + Python workflow dependencies COPY container.requirements.txt . RUN pip install --no-cache-dir -r /container.requirements.txt && \ @@ -129,6 +122,7 @@ COPY --from=downloaded-deps /opt /opt COPY --from=downloaded-deps /cromwell.jar /cromwell.jar # - Copy Ensembl-VEP +COPY --from=ensemblorg/ensembl-vep:release_111.0 /usr/share/perl/5.34.0/CPAN /opt/vep COPY --from=ensemblorg/ensembl-vep:release_111.0 /opt/vep /opt/vep ENTRYPOINT [ "bash", "./entrypoint.bash" ]