Merge pull request #10 from VectorInstitute/develop

Develop
VectorInstitute · Aug 29, 2024 · f43d7bf · f43d7bf
2 parents 156dfa5 + b426e7e
commit f43d7bf
Show file tree

Hide file tree

Showing 17 changed files with 171 additions and 113 deletions.
diff --git a/README.md b/README.md
@@ -15,10 +15,9 @@ vec-inf launch Meta-Llama-3.1-8B-Instruct
 ```
 You should see an output like the following:
 
-<img width="450" alt="launch_img" src="https://github.com/user-attachments/assets/557eb421-47db-4810-bccd-c49c526b1b43">
+<img width="400" alt="launch_img" src="https://github.com/user-attachments/assets/557eb421-47db-4810-bccd-c49c526b1b43">
 
-The model would be launched using the [default parameters](vec-inf/models/models.csv), you can override these values by providing additional options, use `--help` to see the full list.
-If you'd like to see the Slurm logs, they are located in the `.vec-inf-logs` folder in your home directory. The log folder path can be modified by using the `--log-dir` option. 
+The model would be launched using the [default parameters](vec-inf/models/models.csv), you can override these values by providing additional options, use `--help` to see the full list. You can also launch your own customized model as long as the model architecture is [supported by vLLM](https://docs.vllm.ai/en/stable/models/supported_models.html), you'll need to specify all model launching related options to run a successful run. 
 
 You can check the inference server status by providing the Slurm job ID to the `status` command:
 ```bash
@@ -27,7 +26,7 @@ vec-inf status 13014393
 
 You should see an output like the following:
 
-<img width="450" alt="status_img" src="https://github.com/user-attachments/assets/7385b9ca-9159-4ca9-bae2-7e26d80d9747">
+<img width="400" alt="status_img" src="https://github.com/user-attachments/assets/7385b9ca-9159-4ca9-bae2-7e26d80d9747">
 
 There are 5 possible states:
 
@@ -52,6 +51,12 @@ vec-inf list
 ```
 <img width="1200" alt="list_img" src="https://github.com/user-attachments/assets/a4f0d896-989d-43bf-82a2-6a6e5d0d288f">
 
+You can also view the default setup for a specific supported model by providing the model name, for example `Meta-Llama-3.1-70B-Instruct`:
+```bash
+vec-inf list Meta-Llama-3.1-70B-Instruct
+```
+<img width="400" alt="list_model_img" src="https://github.com/user-attachments/assets/5dec7a33-ba6b-490d-af47-4cf7341d0b42">
+
 `launch`, `list`, and `status` command supports `--json-mode`, where the command output would be structured as a JSON string.
 
 ## Send inference requests

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "vec-inf"
-version = "0.3.0"
+version = "0.3.1"
 description = "Efficient LLM inference on Slurm clusters using vLLM."
 authors = ["Marshall Wang <[email protected]>"]
 license = "MIT license"

diff --git a/vec_inf/cli/_cli.py b/vec_inf/cli/_cli.py
@@ -1,7 +1,6 @@
 import os
 
 import click
-import pandas as pd
 from rich.console import Console
 from rich.columns import Columns
 from rich.panel import Panel
@@ -27,12 +26,12 @@ def cli():
 @click.option(
     "--model-family",
     type=str,
-    help='The model family name according to the directories in `models`'
+    help='The model family'
 )
 @click.option(
     "--model-variant",
     type=str,
-    help='The model variant according to the README in `models/model-family`'
+    help='The model variant'
 )
 @click.option(
     "--max-model-len",
@@ -57,12 +56,12 @@ def cli():
 @click.option(
     "--qos",
     type=str,
-    help='Quality of service, default to m3'
+    help='Quality of service, default depends on suggested resource allocation required for the model'
 )
 @click.option(
     "--time",
     type=str,
-    help='Time limit for job, this should comply with QoS, default to 4:00:00'
+    help='Time limit for job, this should comply with QoS, default to max walltime of the chosen QoS'
 )
 @click.option(
     "--data-type",
@@ -77,7 +76,7 @@ def cli():
 @click.option(
     "--log-dir",
     type=str,
-    help='Path to slurm log directory'
+    help='Path to slurm log directory, default to .vec-inf-logs in home directory'
 )
 @click.option(
     "--json-mode",
@@ -150,7 +149,7 @@ def launch(
 @click.option(
     "--log-dir",
     type=str,
-    help='Path to slurm log directory. This is required if it was set when launching the model'
+    help='Path to slurm log directory. This is required if --log-dir was set in model launch'
 )
 @click.option(
     "--json-mode",
@@ -238,16 +237,40 @@ def shutdown(slurm_job_id: int) -> None:
 
 
 @cli.command("list")
+@click.argument(
+    "model-name",
+    required=False)
 @click.option(
     "--json-mode",
     is_flag=True,
     help='Output in JSON string',
 )
-def list(json_mode: bool=False) -> None:
+def list(model_name: str=None, json_mode: bool=False) -> None:
     """
-    List all available models
+    List all available models, or get default setup of a specific model
     """
     models_df = load_models_df()
+
+    if model_name:
+        if model_name not in models_df['model_name'].values:
+            raise ValueError(f"Model name {model_name} not found in available models")
+
+        excluded_keys = {'venv', 'log_dir', 'pipeline_parallelism'}
+        model_row = models_df.loc[models_df['model_name'] == model_name]
+
+        if json_mode:
+            # click.echo(model_row.to_json(orient='records'))
+            filtered_model_row = model_row.drop(columns=excluded_keys, errors='ignore')
+            click.echo(filtered_model_row.to_json(orient='records'))
+            return
+        table = create_table(key_title="Model Config", value_title="Value")
+        for _, row in model_row.iterrows():
+            for key, value in row.items():
+                if key not in excluded_keys:
+                    table.add_row(key, str(value))
+        CONSOLE.print(table)
+        return
+
     if json_mode:
         click.echo(models_df['model_name'].to_json(orient='records'))
         return

diff --git a/vec_inf/launch_server.sh b/vec_inf/launch_server.sh
@@ -22,7 +22,7 @@ while [[ "$#" -gt 0 ]]; do
     shift
 done
 
-required_vars=(model_family model_variant partition qos walltime num_nodes num_gpus max_model_len vocab_size data_type virtual_env log_dir pipeline_parallelism)
+required_vars=(model_family model_variant partition qos walltime num_nodes num_gpus max_model_len vocab_size)
 
 for var in "$required_vars[@]"; do
     if [ -z "$!var" ]; then
@@ -40,10 +40,28 @@ export NUM_NODES=$num_nodes
 export NUM_GPUS=$num_gpus
 export VLLM_MAX_MODEL_LEN=$max_model_len
 export VLLM_MAX_LOGPROBS=$vocab_size
-export VLLM_DATA_TYPE=$data_type
-export VENV_BASE=$virtual_env
-export LOG_DIR=$log_dir
-export PIPELINE_PARALLELISM=$pipeline_parallelism
+# For custom models, the following are set to default if not specified
+export VLLM_DATA_TYPE="auto"
+export VENV_BASE="singularity"
+export LOG_DIR="default"
+# Pipeline parallelism is disabled and can only be enabled if specified in models.csv as this is an experimental feature
+export PIPELINE_PARALLELISM="false"
+
+if [ -n "$data_type" ]; then
+    export VLLM_DATA_TYPE=$data_type
+fi
+
+if [ -n "$virtual_env" ]; then
+    export VENV_BASE=$virtual_env
+fi
+
+if [ -n "$log_dir" ]; then
+    export LOG_DIR=$log_dir
+fi
+
+if [ -n "$pipeline_parallelism" ]; then
+    export PIPELINE_PARALLELISM=$pipeline_parallelism
+fi
 
 # ================================= Set default environment variables ======================================
 # Slurm job configuration

diff --git a/vec_inf/models/CodeLlama/README.md b/vec_inf/models/CodeLlama/README.md
diff --git a/vec_inf/models/Llama-2/README.md b/vec_inf/models/Llama-2/README.md
diff --git a/vec_inf/models/Meta-Llama-3.1/README.md b/vec_inf/models/Meta-Llama-3.1/README.md
diff --git a/vec_inf/models/Meta-Llama-3/README.md b/vec_inf/models/Meta-Llama-3/README.md
diff --git a/vec_inf/models/Mistral/README.md b/vec_inf/models/Mistral/README.md
diff --git a/vec_inf/models/Mixtral/README.md b/vec_inf/models/Mixtral/README.md
diff --git a/vec_inf/models/Phi-3/README.md b/vec_inf/models/Phi-3/README.md