Tweaks

mila-iqia · Oct 4, 2023 · 9fcd803 · 9fcd803
1 parent 4d3c06a
commit 9fcd803
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 15 deletions.
diff --git a/docs/examples/llm/client.py b/docs/examples/llm/client.py
@@ -1,3 +1,5 @@
+import subprocess
+
 import openai
 
 
@@ -34,7 +36,7 @@ def get_job_comment(name="inference_server.sh"):
 # profit
 completion = openai.Completion.create(
     model=server['model'], 
-    prompt=args.prompt
+    prompt="What is the square root of 25 ?"
 )
 
 print(completion)
diff --git a/docs/examples/llm/inference_server.sh b/docs/examples/llm/inference_server.sh
@@ -16,21 +16,22 @@
 #SBATCH --ntasks-per-node=1
 #SBATCH --mem=32G
 
-usage() {
-  echo "Usage: $0 [-m] [-p] 
+function usage() {
+  echo "Usage: $0 [-m] [-p]"
   echo "  -h              Display this help message."
   echo "  -m MODEL        Specify a file to process."
   echo "  -p PATH         Specify a directory to work in."
+  echo "  -e ENV          Specify the conda environementt to use."
   echo "  ARGUMENT        Any additional argument you want to process."
   exit 1
 }
 
 MODEL=""
-PATH=""
+MODEL_PATH=""
 ENV="./env"
 
 
-while getopts ":hf:d:" opt; do
+while getopts ":hm:p:e:" opt; do
   case $opt in
     h)
       usage
@@ -39,7 +40,7 @@ while getopts ":hf:d:" opt; do
       MODEL="$OPTARG"
       ;;
     p)
-      PATH="$OPTARG"
+      MODEL_PATH="$OPTARG"
       ;;
     e)
       ENV="$OPTARG"
@@ -55,22 +56,25 @@ while getopts ":hf:d:" opt; do
   esac
 done
 
+echo "model: $MODEL"
+echo " path: $MODEL_PATH"
+echo "  env: $ENV"
 
 export MILA_WEIGHTS="/network/weights/"
-
 cd $SLURM_TMPDIR
 
 #
 #   Fix problem with conda saying it is not "init properly"
 #
 CONDA_EXEC="$(which conda)"
 CONDA_BASE=$(dirname $CONDA_EXEC)
+CONDA_ENVS="$CONDA_BASE/../envs"
 source $CONDA_BASE/../etc/profile.d/conda.sh
 
 #
 #   Create a new environment
 #
-if [ ! -d "$ENV" ]; then
+if [ ! -d "$ENV" ] && [ "$ENV" != "base" ] && [ ! -d "$CONDA_ENVS/$ENV" ]; then
      conda create --prefix $ENV python=3.9 -y 
 fi
 conda activate $ENV
@@ -85,12 +89,12 @@ NAME="$WEIGHTS/$MODEL"
 #
 scontrol update job $SLURM_JOB_ID comment="model=$MODEL|host=$HOST|port=$PORT|shared=y"
 
-# 
+#
 #   Launch Server
 #
 python -m vllm.entrypoints.openai.api_server       \
      --host $HOST                                  \
      --port $PORT                                  \
-     --model "$MODEL"                              \
+     --model "$MODEL_PATH"                         \
      --tensor-parallel-size $SLURM_NTASKS_PER_NODE \
      --served-model-name "$MODEL"
diff --git a/docs/examples/llm/vllm.rst b/docs/examples/llm/vllm.rst
@@ -36,24 +36,28 @@ You can override the defaults by specifying arguments to sbatch.
 Client
 ------
 
-Becasue vLLM replicates OpenAI's API, the client side is quite straight forward.
-Own OpenAI's client can be reused. 
+Because vLLM replicates OpenAI's API, the client side is quite straight forward and
+own OpenAI's client can be reused. 
 
 .. warning::
 
    The server takes a while to setup you might to have to wait a few minutes
    before the server is ready for inference.
 
-   You can check the job log of the server.
-   Look for 
+   You can check the job log of the server using ``tail -f slurm-<JOB-ID>.out`` to 
+   see the log as it is written.
+
+   Look for ``Uvicorn running on http://... (Press CTRL+C to quit)``
+   to know when the server is ready to receive requests.
 
 
 .. note::
 
-   We use squeue to look for the inference server job to configure the 
+   We use ``squeue`` to look for the inference server job to configure the 
    url endpoint automatically.
 
    Make sure your job name is unique!
 
+
 .. literalinclude:: client.py
     :language: python