From b49ffd9cd5b67d9e385bd2289140c7c4dbd825b3 Mon Sep 17 00:00:00 2001
From: Zhanghao Wu <zhanghao.wu@outlook.com>
Date: Tue, 7 Mar 2023 14:04:47 -0800
Subject: [PATCH 1/8] Add skypilot examples

---
 README.md              | 11 +++++++++++
 flexgen/apps/task.yaml | 27 +++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)
 create mode 100644 flexgen/apps/task.yaml

diff --git a/README.md b/README.md
index 9655b2cb..ab92e231 100644
--- a/README.md
+++ b/README.md
@@ -66,9 +66,19 @@ python3 -m flexgen.apps.helm_run --description mmlu:model=text,subject=abstract_
 ```
 Note that only a subset of HELM scenarios is tested. See more tested scenarios [here](flexgen/apps/helm_passed_30b.sh).
 
+### Run FlexGen on any cloud with SkyPilot
+FlexGen benchmark can be launched with [SkyPilot](http://skypilot.co), a tool for launching ML jobs on any cloud.
+You can use a single command below to automatically launch the benchmark on any cloud with SkyPilot, after you setup your cloud account locally (check how to setup SkyPilot [here](https://skypilot.readthedocs.io/en/latest/getting-started/installation.html)).
+```
+sky launch -c flexgen flexgen/apps/task.yaml
+```
+Note that you can replace the run section with any FlexGen command. You can log into the cluster running the job with `ssh flexgen` and terminate the cluster with `sky down flexgen`.
+
 ### Data Wrangling
 You can run the examples in this paper, ['Can Foundation Models Wrangle Your Data?'](https://arxiv.org/abs/2205.09911), by following the instructions [here](flexgen/apps/data_wrangle).
 
+
+
 ## Performance Benchmark
 ### Generation Throughput (token/s)
 The corresponding effective batch sizes are in parentheses. Please see [here](benchmark/batch_size_table.md) for more details.
@@ -86,6 +96,7 @@ The corresponding effective batch sizes are in parentheses. Please see [here](be
 
 How to [reproduce](benchmark/flexgen).
 
+
 ## Roadmap
 We plan to work on the following features.
 
diff --git a/flexgen/apps/task.yaml b/flexgen/apps/task.yaml
new file mode 100644
index 00000000..dc4ed8fe
--- /dev/null
+++ b/flexgen/apps/task.yaml
@@ -0,0 +1,27 @@
+# benchmark.yaml
+# A SkyPilot job definition for benchmarking FlexGen.
+
+resources:
+  accelerators: T4:1
+  instance_type: n1-highmem-32
+
+setup: |
+  # Install Latest CUDA
+  wget -q https://developer.download.nvidia.com/compute/cuda/11.6.0/local_installers/cuda_11.6.0_510.39.01_linux.run
+  echo Installing CUDA 11.6.0
+  sudo sh cuda_11.6.0_510.39.01_linux.run --silent --toolkit
+  
+  conda create -y -n flexgen python=3.9
+  conda activate flexgen
+  pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu116
+  pip install crfm-helm==0.2.1
+
+  # Install flexgen
+  git clone https://github.com/FMInference/FlexGen.git || true
+  cd FlexGen
+  pip install -e .
+
+run: |
+  conda activate flexgen
+  # python3 -m flexgen.flex_opt --model facebook/opt-1.3b
+  python3 -m flexgen.apps.helm_run --description mmlu:model=text,subject=abstract_algebra,data_augmentation=canonical --pad-to-seq-len 512 --model facebook/opt-30b --percent 20 80 0 100 0 100 --gpu-batch-size 48 --num-gpu-batches 3 --max-eval-instance 100

From 5679f855b533b0976ff962b07b48d434bdae2153 Mon Sep 17 00:00:00 2001
From: Zhanghao Wu <zhanghao.wu@outlook.com>
Date: Tue, 7 Mar 2023 14:11:51 -0800
Subject: [PATCH 2/8] Add more description

---
 flexgen/apps/README.md | 6 ++++++
 flexgen/apps/task.yaml | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/flexgen/apps/README.md b/flexgen/apps/README.md
index 2d44135d..841b6469 100644
--- a/flexgen/apps/README.md
+++ b/flexgen/apps/README.md
@@ -12,3 +12,9 @@ Run Massive Multitask Language Understanding (MMLU) scenario.
 ```
 python3 helm_run.py --description mmlu:model=text,subject=abstract_algebra,data_augmentation=canonical --pad-to-seq-len 512 --model facebook/opt-30b --percent 20 80 0 100 0 100 --gpu-batch-size 48 --num-gpu-batches 3 --max-eval-instance 100
 ```
+
+### Run on any cloud with SkyPilot
+Run FlexGen benchmark on any cloud with [SkyPilot](http://skypilot.co).
+```
+sky launch -c flexgen task.yaml
+```
diff --git a/flexgen/apps/task.yaml b/flexgen/apps/task.yaml
index dc4ed8fe..05d355a4 100644
--- a/flexgen/apps/task.yaml
+++ b/flexgen/apps/task.yaml
@@ -11,6 +11,7 @@ setup: |
   echo Installing CUDA 11.6.0
   sudo sh cuda_11.6.0_510.39.01_linux.run --silent --toolkit
   
+  # Create conda environment
   conda create -y -n flexgen python=3.9
   conda activate flexgen
   pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu116
@@ -22,6 +23,7 @@ setup: |
   pip install -e .
 
 run: |
+  # Run any FlexGen command
   conda activate flexgen
   # python3 -m flexgen.flex_opt --model facebook/opt-1.3b
   python3 -m flexgen.apps.helm_run --description mmlu:model=text,subject=abstract_algebra,data_augmentation=canonical --pad-to-seq-len 512 --model facebook/opt-30b --percent 20 80 0 100 0 100 --gpu-batch-size 48 --num-gpu-batches 3 --max-eval-instance 100

From 421e5ae7ae38a0a80fd2bdd23123c8f62bda52cd Mon Sep 17 00:00:00 2001
From: Zhanghao Wu <zhanghao.wu@outlook.com>
Date: Tue, 7 Mar 2023 14:35:32 -0800
Subject: [PATCH 3/8] Make the setup detached

---
 README.md              | 2 +-
 flexgen/apps/README.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index ab92e231..ed50e62a 100644
--- a/README.md
+++ b/README.md
@@ -70,7 +70,7 @@ Note that only a subset of HELM scenarios is tested. See more tested scenarios [
 FlexGen benchmark can be launched with [SkyPilot](http://skypilot.co), a tool for launching ML jobs on any cloud.
 You can use a single command below to automatically launch the benchmark on any cloud with SkyPilot, after you setup your cloud account locally (check how to setup SkyPilot [here](https://skypilot.readthedocs.io/en/latest/getting-started/installation.html)).
 ```
-sky launch -c flexgen flexgen/apps/task.yaml
+sky launch -c flexgen --detach-setup flexgen/apps/task.yaml
 ```
 Note that you can replace the run section with any FlexGen command. You can log into the cluster running the job with `ssh flexgen` and terminate the cluster with `sky down flexgen`.
 
diff --git a/flexgen/apps/README.md b/flexgen/apps/README.md
index 841b6469..e74621f3 100644
--- a/flexgen/apps/README.md
+++ b/flexgen/apps/README.md
@@ -16,5 +16,5 @@ python3 helm_run.py --description mmlu:model=text,subject=abstract_algebra,data_
 ### Run on any cloud with SkyPilot
 Run FlexGen benchmark on any cloud with [SkyPilot](http://skypilot.co).
 ```
-sky launch -c flexgen task.yaml
+sky launch -c flexgen --detach-setup task.yaml
 ```

From 8ea3cde087ae96a86fdb6320bfb586d9b4cdf8d5 Mon Sep 17 00:00:00 2001
From: Zhanghao Wu <zhanghao.wu@outlook.com>
Date: Tue, 7 Mar 2023 22:36:52 -0800
Subject: [PATCH 4/8] Add comment for other clouds

---
 flexgen/apps/task.yaml | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/flexgen/apps/task.yaml b/flexgen/apps/task.yaml
index 05d355a4..388ed3f3 100644
--- a/flexgen/apps/task.yaml
+++ b/flexgen/apps/task.yaml
@@ -1,9 +1,12 @@
 # benchmark.yaml
 # A SkyPilot job definition for benchmarking FlexGen.
 
+# Specify the resources required for this job.
 resources:
   accelerators: T4:1
-  instance_type: n1-highmem-32
+  instance_type: n1-highmem-32 # On GCP with 1 T4 GPU and more than 200GB of RAM.
+  # instance_type: g4dn.16xlarge # On AWS with 1 T4 GPU and more than 200GB of RAM.
+  # Azure does not support T4 GPUs with more than 200GB of RAM.
 
 setup: |
   # Install Latest CUDA
@@ -26,4 +29,7 @@ run: |
   # Run any FlexGen command
   conda activate flexgen
   # python3 -m flexgen.flex_opt --model facebook/opt-1.3b
-  python3 -m flexgen.apps.helm_run --description mmlu:model=text,subject=abstract_algebra,data_augmentation=canonical --pad-to-seq-len 512 --model facebook/opt-30b --percent 20 80 0 100 0 100 --gpu-batch-size 48 --num-gpu-batches 3 --max-eval-instance 100
+  python3 -m flexgen.apps.helm_run \
+          --description mmlu:model=text,subject=abstract_algebra,data_augmentation=canonical \
+          --pad-to-seq-len 512 --model facebook/opt-30b --percent 20 80 0 100 0 100 \
+          --gpu-batch-size 48 --num-gpu-batches 3 --max-eval-instance 100

From bef90a9ca176db2cf27b25a945268c04fcfeff7b Mon Sep 17 00:00:00 2001
From: Zhanghao Wu <zhanghao.wu@outlook.com>
Date: Wed, 8 Mar 2023 18:02:58 -0800
Subject: [PATCH 5/8] Update README.md

Co-authored-by: Zongheng Yang <zongheng.y@gmail.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ed50e62a..b45257fb 100644
--- a/README.md
+++ b/README.md
@@ -67,7 +67,7 @@ python3 -m flexgen.apps.helm_run --description mmlu:model=text,subject=abstract_
 Note that only a subset of HELM scenarios is tested. See more tested scenarios [here](flexgen/apps/helm_passed_30b.sh).
 
 ### Run FlexGen on any cloud with SkyPilot
-FlexGen benchmark can be launched with [SkyPilot](http://skypilot.co), a tool for launching ML jobs on any cloud.
+FlexGen benchmark can be launched with [SkyPilot](https://github.com/skypilot-org/skypilot), a tool for launching ML jobs on any cloud.
 You can use a single command below to automatically launch the benchmark on any cloud with SkyPilot, after you setup your cloud account locally (check how to setup SkyPilot [here](https://skypilot.readthedocs.io/en/latest/getting-started/installation.html)).
 ```
 sky launch -c flexgen --detach-setup flexgen/apps/task.yaml

From f9c227f6ae6a4b8951ac2b2ebf0be2cc1044dba0 Mon Sep 17 00:00:00 2001
From: Zhanghao Wu <zhanghao.wu@outlook.com>
Date: Wed, 8 Mar 2023 18:31:18 -0800
Subject: [PATCH 6/8] address comments

---
 README.md                                 | 17 ++++++++++++-----
 flexgen/apps/README.md                    | 13 +++++++++++--
 flexgen/apps/{task.yaml => skypilot.yaml} |  6 ++++--
 3 files changed, 27 insertions(+), 9 deletions(-)
 rename flexgen/apps/{task.yaml => skypilot.yaml} (84%)

diff --git a/README.md b/README.md
index ed50e62a..802df8d8 100644
--- a/README.md
+++ b/README.md
@@ -66,13 +66,20 @@ python3 -m flexgen.apps.helm_run --description mmlu:model=text,subject=abstract_
 ```
 Note that only a subset of HELM scenarios is tested. See more tested scenarios [here](flexgen/apps/helm_passed_30b.sh).
 
-### Run FlexGen on any cloud with SkyPilot
-FlexGen benchmark can be launched with [SkyPilot](http://skypilot.co), a tool for launching ML jobs on any cloud.
-You can use a single command below to automatically launch the benchmark on any cloud with SkyPilot, after you setup your cloud account locally (check how to setup SkyPilot [here](https://skypilot.readthedocs.io/en/latest/getting-started/installation.html)).
+### Run FlexGen on Any Cloud with SkyPilot
+FlexGen benchmark can be launched with [SkyPilot](https://github.com/skypilot-org/skypilot), a tool for launching ML jobs on any cloud.
+First, install SkyPilot and check you have some cloud credentials ([docs](https://skypilot.readthedocs.io/en/latest/getting-started/installation.html)):
+```bash
+pip install "skypilot[aws,gcp,azure,lambda]"  # pick your clouds
+sky check
 ```
-sky launch -c flexgen --detach-setup flexgen/apps/task.yaml
+You can now use a single command to automatically launch the benchmark on any cloud:
+```bash
+sky launch -c flexgen --detach-setup flexgen/apps/skypilot.yaml
 ```
-Note that you can replace the run section with any FlexGen command. You can log into the cluster running the job with `ssh flexgen` and terminate the cluster with `sky down flexgen`.
+You can then log into the cluster running the job with `ssh flexgen` for monitoring. Once the job has finished, you can terminate the cluster with `sky down flexgen` or pass in `--down` flag to the command above to have the cluster terminate itself automatically.
+
+To run any other FlexGen command, you can edit [`flexgen/apps/skypilot.yaml`](./flexgen/apps/skypilot.yaml) and replace the `run` section.
 
 ### Data Wrangling
 You can run the examples in this paper, ['Can Foundation Models Wrangle Your Data?'](https://arxiv.org/abs/2205.09911), by following the instructions [here](flexgen/apps/data_wrangle).
diff --git a/flexgen/apps/README.md b/flexgen/apps/README.md
index e74621f3..5744da67 100644
--- a/flexgen/apps/README.md
+++ b/flexgen/apps/README.md
@@ -14,7 +14,16 @@ python3 helm_run.py --description mmlu:model=text,subject=abstract_algebra,data_
 ```
 
 ### Run on any cloud with SkyPilot
-Run FlexGen benchmark on any cloud with [SkyPilot](http://skypilot.co).
+FlexGen benchmark can be launched with [SkyPilot](https://github.com/skypilot-org/skypilot), a tool for launching ML jobs on any cloud.
+First, install SkyPilot and check you have some cloud credentials ([docs](https://skypilot.readthedocs.io/en/latest/getting-started/installation.html)):
+```bash
+pip install "skypilot[aws,gcp,azure,lambda]"  # pick your clouds
+sky check
 ```
-sky launch -c flexgen --detach-setup task.yaml
+You can now use a single command to automatically launch the benchmark on any cloud:
+```bash
+sky launch -c flexgen --detach-setup skypilot.yaml
 ```
+You can then log into the cluster running the job with `ssh flexgen` for monitoring. Once the job has finished, you can terminate the cluster with `sky down flexgen` or pass in `--down` flag to the command above to have the cluster terminate itself automatically.
+
+To run any other FlexGen command, you can edit [`skypilot.yaml`](skypilot.yaml) and replace the `run` section.
diff --git a/flexgen/apps/task.yaml b/flexgen/apps/skypilot.yaml
similarity index 84%
rename from flexgen/apps/task.yaml
rename to flexgen/apps/skypilot.yaml
index 388ed3f3..d76607c1 100644
--- a/flexgen/apps/task.yaml
+++ b/flexgen/apps/skypilot.yaml
@@ -1,9 +1,11 @@
-# benchmark.yaml
 # A SkyPilot job definition for benchmarking FlexGen.
+# References:
+#   https://skypilot.readthedocs.io/en/latest/getting-started/quickstart.html
+#   https://skypilot.readthedocs.io/en/latest/reference/yaml-spec.html
 
 # Specify the resources required for this job.
 resources:
-  accelerators: T4:1
+  accelerators: T4:1  # Can replace with other GPU type and count, see `sky show-gpus`.
   instance_type: n1-highmem-32 # On GCP with 1 T4 GPU and more than 200GB of RAM.
   # instance_type: g4dn.16xlarge # On AWS with 1 T4 GPU and more than 200GB of RAM.
   # Azure does not support T4 GPUs with more than 200GB of RAM.

From 22340cf5dbe9d411ba152d082a4329596f8bdec1 Mon Sep 17 00:00:00 2001
From: Zhanghao Wu <zhanghao.wu@outlook.com>
Date: Wed, 8 Mar 2023 23:26:21 -0800
Subject: [PATCH 7/8] Adopt changes

---
 README.md              | 2 +-
 flexgen/apps/README.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 4e9a8450..f4a5d384 100644
--- a/README.md
+++ b/README.md
@@ -73,7 +73,7 @@ First, install SkyPilot and check you have some cloud credentials ([docs](https:
 pip install "skypilot[aws,gcp,azure,lambda]"  # pick your clouds
 sky check
 ```
-You can now use a single command to automatically launch the benchmark on any cloud:
+You can now use a single command to launch the benchmark on any cloud, which automatically finds a region (in the cheapest-price order) with availability for the requested GPUs:
 ```bash
 sky launch -c flexgen --detach-setup flexgen/apps/skypilot.yaml
 ```
diff --git a/flexgen/apps/README.md b/flexgen/apps/README.md
index aebcb116..c2ca1879 100644
--- a/flexgen/apps/README.md
+++ b/flexgen/apps/README.md
@@ -33,7 +33,7 @@ First, install SkyPilot and check you have some cloud credentials ([docs](https:
 pip install "skypilot[aws,gcp,azure,lambda]"  # pick your clouds
 sky check
 ```
-You can now use a single command to automatically launch the benchmark on any cloud:
+You can now use a single command to launch the benchmark on any cloud, which automatically finds a region (in the cheapest-price order) with availability for the requested GPUs:
 ```bash
 sky launch -c flexgen --detach-setup skypilot.yaml
 ```

From 5f64c9ad118ad7e6546a85b5bbd48e805d3aa5d7 Mon Sep 17 00:00:00 2001
From: Zhanghao Wu <zhanghao.wu@outlook.com>
Date: Tue, 14 Mar 2023 22:08:22 -0700
Subject: [PATCH 8/8] Change to memory specification instead

---
 flexgen/apps/skypilot.yaml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/flexgen/apps/skypilot.yaml b/flexgen/apps/skypilot.yaml
index d76607c1..24630bbc 100644
--- a/flexgen/apps/skypilot.yaml
+++ b/flexgen/apps/skypilot.yaml
@@ -6,9 +6,7 @@
 # Specify the resources required for this job.
 resources:
   accelerators: T4:1  # Can replace with other GPU type and count, see `sky show-gpus`.
-  instance_type: n1-highmem-32 # On GCP with 1 T4 GPU and more than 200GB of RAM.
-  # instance_type: g4dn.16xlarge # On AWS with 1 T4 GPU and more than 200GB of RAM.
-  # Azure does not support T4 GPUs with more than 200GB of RAM.
+  memory: 200+ # requires more than 200GB of memory
 
 setup: |
   # Install Latest CUDA