From 7ab122d8affcdb0587922e98903b7a13d039460e Mon Sep 17 00:00:00 2001
From: Altan Orhon <altan@uw.edu>
Date: Wed, 20 Dec 2023 15:47:10 -0800
Subject: [PATCH] Updated runscript

---
 README.md   | 22 +++++++++++++++-------
 Singularity | 15 +++++++--------
 2 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index 61e445e..e012c60 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@ This container provides a convenient way to run [LLaVA](https://github.com/haoti
 To run LLaVA using [Apptainer](https://apptainer.org/docs/user/main/index.html), run the following command:
 
 ```bash
-apptainer run --nv oras://ghcr.io/uw-psych/llava-container:v0.0.1
+apptainer run --nv --writable-tmpfs oras://ghcr.io/uw-psych/llava-container/llava-container:0.0.1 llava-run
 ```
 
 You **must** pass the `--nv` flag to enable GPU support.
@@ -15,13 +15,17 @@ Depending on your intended use, you may also want to pass the `--bind` flag to m
 To specify a directory to use for the HuggingFace model cache, you can pass the `--env` flag to set the `HUGGINGFACE_HUB_CACHE` environment variable. For example:
 
 ```bash
-apptainer run --nv --env HUGGINGFACE_HUB_CACHE=/path/to/cache oras://ghcr.io/uw-psych/llava-container:v0.0.1
+apptainer run --nv --writable-tmpfs --env HUGGINGFACE_HUB_CACHE=/path/to/cache oras://ghcr.io/uw-psych/llava-container/llava-container:0.0.1
 ```
 
-By default, the container will run the script `llava-run.py` (from this repository) with the arguments provided. 
-
 The `llava-run.py` script is a modification of `LLaVA/lava/eval/run_llava.py` that adds support for loading 4- and 8-bit models as found in `LaVA/llava/serve/cli.py`.
 
+If you want to use a different command, you can pass it after the image name:
+
+```bash
+apptainer run --nv --writable-tmpfs --env HUGGINGFACE_HUB_CACHE=/path/to/cache oras://ghcr.io/uw-psych/llava-container/llava-container:0.0.1 python -m llava.serve.cli
+```
+
 ## Running LLaVA on Klone
 
 Here is a complete example of running LLaVA on the [Klone](https://uw-psych.github.io/compute_docs/docs/compute/slurm.html) SLURM cluster:
@@ -50,14 +54,18 @@ export APPTAINER_CACHEDIR="${APPTAINER_CACHEDIR:-/gscratch/scrubbed/${USER}/appt
 # Run LLaVA:
 apptainer run \
     --nv \
-    --env HUGGINGFACE_HUB_CACHE=/gscratch/scrubbed/${USER}/hf-cache" \
-    oras://ghcr.io/uw-psych/llava-container:v0.0.1 \
+    --writable-tmpfs \
+    --env HUGGINGFACE_HUB_CACHE=/gscratch/scrubbed/${USER}/hf-cache \
+    oras://ghcr.io/uw-psych/llava-container/llava-container:0.0.1 \
+    llava-run \
     --model-path liuhaotian/llava-v1.5-7b \
     --image-file "https://llava-vl.github.io/static/images/view.jpg" \
     --query "What's going on here?"
 # --nv: enable GPU support
+# --writable-tmpfs: ensure /tmp is writable
 # --env: set the HuggingFace cache directory
-#   oras://ghcr.io/uw-psych/llava-container:v0.0.1: URL of the container image
+#   oras://ghcr.io/uw-psych/llava-container/llava-container:0.0.1: The container
+#   llava-run: the command to run in the container
 # --model-path: the name of the model to use
 # --image-file: the URL of the image to use
 # --query: what to ask the model
diff --git a/Singularity b/Singularity
index da995c9..ff67e18 100644
--- a/Singularity
+++ b/Singularity
@@ -32,15 +32,15 @@ From: mambaorg/micromamba:{{ MICROMAMBA_TAG }}
 	export HUGGINGFACE_HUB_CACHE="${HUGGINGFACE_HUB_CACHE:-}"
 
 %runscript
-	# Run the script "llava-run" with the arguments provided:
-	exec micromamba run -n base /opt/local/bin/llava-run "$@"
+	# Run the provided command with the micromamba base environment activated:
+	exec micromamba run --no-capture-output -n base "$@"
 
 %help
 	This container provides a convenient way to run
 	[LLaVA](https://github.com/haotian-liu/LLaVA).
 
-	To run LLaVA, use the following command:
-		apptainer run --nv llava-container.sif
+	To run LLaVA with the `llava-run` script, use the following command:
+		apptainer run --nv llava-container.sif llava-run [arguments]
 	
 	You must pass the "--nv" flag to enable GPU support.
 
@@ -51,11 +51,10 @@ From: mambaorg/micromamba:{{ MICROMAMBA_TAG }}
 	To specify a directory to use for the HuggingFace model cache, use the
 	following command:
 		apptainer run --nv --env HUGGINGFACE_HUB_CACHE=/path/to/cache \
-			llava-container.sif
+			llava-container.sif llava-run
 		
-	By default, the container will run the script "llava-run.py" (from this
-	repository) with the arguments provided. The following describes the usage
-	of this script:
+	This container includes a script called "llava-run" that runs LLaVA with the
+	arguments provided. The following describes the usage of this script:
 
 		llava-run [-h] [--model-path MODEL_PATH] [--model-base MODEL_BASE]
 			--image-file IMAGE_FILE --query QUERY [--conv-mode CONV_MODE]