-
Notifications
You must be signed in to change notification settings - Fork 1
/
Dockerfile
84 lines (71 loc) · 2.69 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# Use an official and specific version tag if possible, instead of 'latest'
FROM runpod/pytorch:2.0.1-py3.10-cuda11.8.0-devel-ubuntu22.04
# Environment variables
ENV PYTHONUNBUFFERED=1
ARG MODEL=mistralai/Mistral-7B-Instruct-v0.1
ENV MODEL=$MODEL
# Modes can be: pod, serverless, both
# pod is for GPU pod on runpod, serverless is for serverless deployment on runpod
# both is mainly for serverless deployment on runpod, but it also starts up a jupyter notebook and openssh
# for debugging if you set the minimum active worker to one.
ARG MODE_TO_RUN=pod
ENV MODE_TO_RUN=$MODE_TO_RUN
ARG CONCURRENCY_MODIFIER=1
ENV CONCURRENCY_MODIFIER=$CONCURRENCY_MODIFIER
ARG MAX_MODEL_LEN=25000
ENV MAX_MODEL_LEN=$MAX_MODEL_LEN
# Set up the working directory
WORKDIR /app
# Install dependencies in a single RUN command to reduce layers
# Clean up in the same layer to reduce image size
RUN apt-get update --yes --quiet && \
DEBIAN_FRONTEND=noninteractive apt-get install --yes --quiet --no-install-recommends \
software-properties-common \
gpg-agent \
build-essential \
apt-utils \
ca-certificates \
curl && \
add-apt-repository --yes ppa:deadsnakes/ppa && \
apt-get update --yes --quiet && \
DEBIAN_FRONTEND=noninteractive apt-get install --yes --quiet --no-install-recommends \
python3.11 \
python3.11-dev \
python3.11-distutils \
python3.11-venv \
python3.11-lib2to3 \
python3.11-gdbm \
python3.11-tk && \
rm -rf /var/lib/apt/lists/* && \
update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 && \
update-alternatives --auto python3
# Install pip manually
RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
python3.11 get-pip.py && \
rm get-pip.py
# Create and activate a Python virtual environment
RUN python3 -m venv /app/venv
ENV PATH="/app/venv/bin:$PATH"
# Install Python packages
# Install Python packages in a single RUN instruction
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir \
OhMyRunPod \
asyncio \
requests \
runpod==1.6.2 \
langchain==0.0.259 \
"openllm[vllm]"
COPY requirements.txt ./
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r requirements.txt
# Remove Runpod's copy of start.sh and replace it with our own
RUN rm ../start.sh
COPY . .
RUN chmod +x start.sh
RUN python /app/preload.py
# Validation to make sure VLLM models are downloaded.
RUN ls -d /root/bentoml/models/*/
# Ex. if you want to override the model_arg: docker build --build-arg MODEL_ARG=mistralai/Mistral-7B-Instruct-v0.2 -t your_image_name .
# depot build -t justinwlin/serverlessllm:1.0 . --push --platform linux/amd64
CMD ["/app/start.sh"]