Skip to content

Commit

Permalink
Dockerfile to build smaller Images (#567)
Browse files Browse the repository at this point in the history
  • Loading branch information
yuhongsun96 authored Oct 12, 2023
1 parent 4196403 commit dbf59d2
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 36 deletions.
57 changes: 28 additions & 29 deletions backend/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,53 +1,51 @@
FROM python:3.11.4-slim-bookworm

RUN apt-get update \
&& apt-get install -y git cmake pkg-config libprotobuf-c-dev protobuf-compiler \
# Install system dependencies
RUN apt-get update && \
apt-get install -y git cmake pkg-config libprotobuf-c-dev protobuf-compiler \
libprotobuf-dev libgoogle-perftools-dev libpq-dev build-essential cron curl \
supervisor zip \
&& rm -rf /var/lib/apt/lists/*

COPY ./requirements/default.txt /tmp/requirements.txt
RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt
supervisor zip ca-certificates gnupg && \
rm -rf /var/lib/apt/lists/* && \
apt-get clean

# Install Python dependencies
# Remove py which is pulled in by retry, py is not needed and is a CVE
RUN pip uninstall -y py

RUN playwright install chromium
RUN playwright install-deps chromium
COPY ./requirements/default.txt /tmp/requirements.txt
RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt && \
pip uninstall -y py && \
playwright install chromium && \
playwright install-deps chromium

# install nodejs and replace nodejs packaged with playwright (18.17.0) with the one installed below
# install nodejs and replace nodejs packaged with playwright (18.17.0) with the one installed below
# based on the instructions found here:
# https://nodejs.org/en/download/package-manager#debian-and-ubuntu-based-linux-distributions
# this is temporarily needed until playwright updates their packaged node version to
# 20.5.1+
RUN apt-get update
RUN apt-get install -y ca-certificates curl gnupg
RUN mkdir -p /etc/apt/keyrings
RUN curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
RUN echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list
RUN apt-get update
RUN apt-get install nodejs -y
# replace nodejs packaged with playwright (18.17.0) with the one installed above
RUN cp /usr/bin/node /usr/local/lib/python3.11/site-packages/playwright/driver/node
# remove nodejs (except for the binary we moved into playwright)
RUN apt-get remove -y nodejs
RUN mkdir -p /etc/apt/keyrings && \
curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \
echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \
apt-get update && \
apt-get install -y nodejs && \
cp /usr/bin/node /usr/local/lib/python3.11/site-packages/playwright/driver/node && \
apt-get remove -y nodejs

# Cleanup for CVEs and size reduction
RUN apt-get remove -y linux-libc-dev \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/*

# Remove tornado test key to placate vulnerability scanners
# More details can be found here:
# https://github.com/tornadoweb/tornado/issues/3107
RUN rm /usr/local/lib/python3.11/site-packages/tornado/test/test.key
RUN apt-get remove -y linux-libc-dev && \
apt-get autoremove -y && \
rm -rf /var/lib/apt/lists/* && \
rm /usr/local/lib/python3.11/site-packages/tornado/test/test.key

# Set up application files
WORKDIR /app
COPY ./danswer /app/danswer
COPY ./alembic /app/alembic
COPY ./alembic.ini /app/alembic.ini
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf

# Create Vespa app zip
WORKDIR /app/danswer/datastores/vespa/app_config
RUN zip -r /app/danswer/vespa-app.zip .
WORKDIR /app
Expand All @@ -57,5 +55,6 @@ COPY ./scripts/migrate_vespa_to_acl.py /app/migrate_vespa_to_acl.py

ENV PYTHONPATH /app

# By default this container does nothing, it is used by api server and background which specify their own CMD
# Default command which does nothing
# This container is used by api server and background which specify their own CMD
CMD ["tail", "-f", "/dev/null"]
7 changes: 0 additions & 7 deletions backend/danswer/background/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
from danswer.db.models import IndexAttempt
from danswer.db.models import IndexingStatus
from danswer.search.search_utils import warm_up_models
from danswer.utils.acl import set_acl_for_vespa_nonblocking
from danswer.utils.logger import IndexAttemptSingleton
from danswer.utils.logger import setup_logger

Expand Down Expand Up @@ -449,12 +448,6 @@ def update_loop(delay: int = 10, num_workers: int = NUM_INDEXING_WORKERS) -> Non
# This ensures that bad states get cleaned up
mark_all_in_progress_cc_pairs_failed(db_session)

# TODO: remove this once everyone is migrated to ACL
# does nothing if this has been successfully run before
# NOTE: is done in another thread, to not block indexing runs from
# getting kicked off
set_acl_for_vespa_nonblocking(should_check_if_already_done=True)

while True:
start = time.time()
start_time_utc = datetime.utcfromtimestamp(start).strftime("%Y-%m-%d %H:%M:%S")
Expand Down

1 comment on commit dbf59d2

@vercel
Copy link

@vercel vercel bot commented on dbf59d2 Oct 12, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.