Skip to content

Commit

Permalink
Merge pull request #31 from bento-platform/cbioportal
Browse files Browse the repository at this point in the history
feat: add Ensembl-VEP and VCF2MAF for cBio workflows
  • Loading branch information
davidlougheed authored Mar 1, 2024
2 parents 4fd02cf + 37fa32f commit c0a5481
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 10 deletions.
99 changes: 89 additions & 10 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,48 @@
FROM --platform=$BUILDPLATFORM debian:bullseye-slim AS downloaded-deps

SHELL ["/bin/bash", "-c"]

# Install VCF2MAF
# TODO: I don't like /opt as a home for these

WORKDIR /tmp/vcf2maf
ENV VCF2MAF_VERSION=1.6.21
RUN apt-get update -y && \
apt-get install -y curl git unzip wget && \
echo "https://github.com/mskcc/vcf2maf/archive/refs/tags/v${VCF2MAF_VERSION}.zip" && \
curl -L "https://github.com/mskcc/vcf2maf/archive/refs/tags/v${VCF2MAF_VERSION}.zip" -o vcf2maf.zip && \
unzip vcf2maf.zip && \
mv "vcf2maf-${VCF2MAF_VERSION}" vcf2maf && \
mkdir -p /opt/data && \
cp vcf2maf/*.pl /opt && \
cp -r vcf2maf/data /opt/data && \
rm -rf vcf2maf

# Install Cromwell
ENV CROMWELL_VERSION=86
WORKDIR /
RUN curl -L \
https://github.com/broadinstitute/cromwell/releases/download/${CROMWELL_VERSION}/cromwell-${CROMWELL_VERSION}.jar \
-o cromwell.jar

# Clone (but don't install yet) Ensembl-VEP
ENV VEP_ENSEMBL_RELEASE_VERSION=111.0
RUN git clone --depth 1 -b "release/${VEP_ENSEMBL_RELEASE_VERSION}" https://github.com/Ensembl/ensembl-vep.git && \
chmod u+x ensembl-vep/*.pl

# Clone ensembl-variation git repository
WORKDIR /ensembl-vep/
RUN git clone --depth 1 https://github.com/Ensembl/ensembl-variation.git && \
mkdir var_c_code && \
cp ensembl-variation/C_code/*.c ensembl-variation/C_code/Makefile var_c_code/
RUN git clone --depth 1 https://github.com/bioperl/bioperl-ext.git
RUN curl -L https://github.com/Ensembl/ensembl-xs/archive/2.3.2.zip -o ensembl-xs.zip && \
unzip -q ensembl-xs.zip && \
mv ensembl-xs-2.3.2 ensembl-xs && \
rm -rf ensembl-xs.zip

WORKDIR /

FROM ghcr.io/bento-platform/bento_base_image:python-debian-2024.02.01 AS base-deps

SHELL ["/bin/bash", "-c"]
Expand All @@ -6,20 +51,42 @@ WORKDIR /

# Install system packages for HTSLib + SAMtools + curl and jq for workflows
# OpenJDK is for running WOMtool/Cromwell

RUN apt-get update -y && \
apt-get install -y \
samtools \
tabix \
bcftools \
curl \
jq \
openjdk-17-jre \
&& \
rm -rf /var/lib/apt/lists/*

# Install system packages for VEP
# Perl/libdbi-perl/lib*-dev/cpanminus/unzip are for cBioPortal scripts / caches / utilities
RUN apt-get update -y && \
apt-get install -y \
curl \
perl \
libdbd-mysql-perl \
libdbi-perl \
libjson-perl \
libwww-perl \
libperl-dev \
cpanminus \
unzip \
libbz2-dev \
liblzma-dev \
zlib1g-dev \
&& \
rm -rf /var/lib/apt/lists/*

# Then, install dependencies for running the Python server + Python workflow dependencies
COPY container.requirements.txt .
RUN apt-get update -y && \
apt-get install -y samtools tabix bcftools curl jq openjdk-17-jre && \
rm -rf /var/lib/apt/lists/* && \
pip install --no-cache-dir -r /container.requirements.txt && \
RUN pip install --no-cache-dir -r /container.requirements.txt && \
rm /container.requirements.txt

WORKDIR /
ENV CROMWELL_VERSION=86
RUN curl -L \
https://github.com/broadinstitute/cromwell/releases/download/${CROMWELL_VERSION}/cromwell-${CROMWELL_VERSION}.jar \
-o cromwell.jar

FROM base-deps AS build-install

# Backwards-compatible with old BentoV2 container layout
Expand All @@ -46,5 +113,17 @@ COPY README.md .
# Install the module itself, locally (similar to `pip install -e .`)
RUN poetry install --without dev

# Copy from other stages last, since it means the stages can be built in parallel

# - Copy VCF2MAF
COPY --from=downloaded-deps /opt /opt

# - Copy Cromwell
COPY --from=downloaded-deps /cromwell.jar /cromwell.jar

# - Copy Ensembl-VEP
COPY --from=ensemblorg/ensembl-vep:release_111.0 /usr/share/perl/5.34.0/CPAN /opt/vep
COPY --from=ensemblorg/ensembl-vep:release_111.0 /opt/vep /opt/vep

ENTRYPOINT [ "bash", "./entrypoint.bash" ]
CMD [ "bash", "./run.bash" ]
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@ BENTO_URL=http://127.0.0.1:5000/
# checks for TLS certificate validity!
BENTO_DEBUG=False

# SSL Configuration - whether to validate certificates
BENTO_VALIDATE_SSL=True

# Celery configuration
CELERY_RESULT_BACKEND=redis://
CELERY_BROKER_URL=redis://
Expand Down
3 changes: 3 additions & 0 deletions entrypoint.bash
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,8 @@ chmod -R o-rwx /wes/tmp # Remove all access from others for /wes/tmp
# Configure git from entrypoint, since we've overwritten the base image entrypoint
gosu bento_user /bin/bash -c '/set_gitconfig.bash'

# Set up PATH for VEP
export PATH="/opt/vep/src/ensembl-vep:/opt/vep/src/var_c_code:${PATH}"

# Drop into bento_user from root and execute the CMD specified for the image
exec gosu bento_user "$@"

0 comments on commit c0a5481

Please sign in to comment.