From b49ffd9cd5b67d9e385bd2289140c7c4dbd825b3 Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Tue, 7 Mar 2023 14:04:47 -0800 Subject: [PATCH 1/8] Add skypilot examples --- README.md | 11 +++++++++++ flexgen/apps/task.yaml | 27 +++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 flexgen/apps/task.yaml diff --git a/README.md b/README.md index 9655b2cb..ab92e231 100644 --- a/README.md +++ b/README.md @@ -66,9 +66,19 @@ python3 -m flexgen.apps.helm_run --description mmlu:model=text,subject=abstract_ ``` Note that only a subset of HELM scenarios is tested. See more tested scenarios [here](flexgen/apps/helm_passed_30b.sh). +### Run FlexGen on any cloud with SkyPilot +FlexGen benchmark can be launched with [SkyPilot](http://skypilot.co), a tool for launching ML jobs on any cloud. +You can use a single command below to automatically launch the benchmark on any cloud with SkyPilot, after you setup your cloud account locally (check how to setup SkyPilot [here](https://skypilot.readthedocs.io/en/latest/getting-started/installation.html)). +``` +sky launch -c flexgen flexgen/apps/task.yaml +``` +Note that you can replace the run section with any FlexGen command. You can log into the cluster running the job with `ssh flexgen` and terminate the cluster with `sky down flexgen`. + ### Data Wrangling You can run the examples in this paper, ['Can Foundation Models Wrangle Your Data?'](https://arxiv.org/abs/2205.09911), by following the instructions [here](flexgen/apps/data_wrangle). + + ## Performance Benchmark ### Generation Throughput (token/s) The corresponding effective batch sizes are in parentheses. Please see [here](benchmark/batch_size_table.md) for more details. @@ -86,6 +96,7 @@ The corresponding effective batch sizes are in parentheses. Please see [here](be How to [reproduce](benchmark/flexgen). + ## Roadmap We plan to work on the following features. diff --git a/flexgen/apps/task.yaml b/flexgen/apps/task.yaml new file mode 100644 index 00000000..dc4ed8fe --- /dev/null +++ b/flexgen/apps/task.yaml @@ -0,0 +1,27 @@ +# benchmark.yaml +# A SkyPilot job definition for benchmarking FlexGen. + +resources: + accelerators: T4:1 + instance_type: n1-highmem-32 + +setup: | + # Install Latest CUDA + wget -q https://developer.download.nvidia.com/compute/cuda/11.6.0/local_installers/cuda_11.6.0_510.39.01_linux.run + echo Installing CUDA 11.6.0 + sudo sh cuda_11.6.0_510.39.01_linux.run --silent --toolkit + + conda create -y -n flexgen python=3.9 + conda activate flexgen + pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu116 + pip install crfm-helm==0.2.1 + + # Install flexgen + git clone https://github.com/FMInference/FlexGen.git || true + cd FlexGen + pip install -e . + +run: | + conda activate flexgen + # python3 -m flexgen.flex_opt --model facebook/opt-1.3b + python3 -m flexgen.apps.helm_run --description mmlu:model=text,subject=abstract_algebra,data_augmentation=canonical --pad-to-seq-len 512 --model facebook/opt-30b --percent 20 80 0 100 0 100 --gpu-batch-size 48 --num-gpu-batches 3 --max-eval-instance 100 From 5679f855b533b0976ff962b07b48d434bdae2153 Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Tue, 7 Mar 2023 14:11:51 -0800 Subject: [PATCH 2/8] Add more description --- flexgen/apps/README.md | 6 ++++++ flexgen/apps/task.yaml | 2 ++ 2 files changed, 8 insertions(+) diff --git a/flexgen/apps/README.md b/flexgen/apps/README.md index 2d44135d..841b6469 100644 --- a/flexgen/apps/README.md +++ b/flexgen/apps/README.md @@ -12,3 +12,9 @@ Run Massive Multitask Language Understanding (MMLU) scenario. ``` python3 helm_run.py --description mmlu:model=text,subject=abstract_algebra,data_augmentation=canonical --pad-to-seq-len 512 --model facebook/opt-30b --percent 20 80 0 100 0 100 --gpu-batch-size 48 --num-gpu-batches 3 --max-eval-instance 100 ``` + +### Run on any cloud with SkyPilot +Run FlexGen benchmark on any cloud with [SkyPilot](http://skypilot.co). +``` +sky launch -c flexgen task.yaml +``` diff --git a/flexgen/apps/task.yaml b/flexgen/apps/task.yaml index dc4ed8fe..05d355a4 100644 --- a/flexgen/apps/task.yaml +++ b/flexgen/apps/task.yaml @@ -11,6 +11,7 @@ setup: | echo Installing CUDA 11.6.0 sudo sh cuda_11.6.0_510.39.01_linux.run --silent --toolkit + # Create conda environment conda create -y -n flexgen python=3.9 conda activate flexgen pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu116 @@ -22,6 +23,7 @@ setup: | pip install -e . run: | + # Run any FlexGen command conda activate flexgen # python3 -m flexgen.flex_opt --model facebook/opt-1.3b python3 -m flexgen.apps.helm_run --description mmlu:model=text,subject=abstract_algebra,data_augmentation=canonical --pad-to-seq-len 512 --model facebook/opt-30b --percent 20 80 0 100 0 100 --gpu-batch-size 48 --num-gpu-batches 3 --max-eval-instance 100 From 421e5ae7ae38a0a80fd2bdd23123c8f62bda52cd Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Tue, 7 Mar 2023 14:35:32 -0800 Subject: [PATCH 3/8] Make the setup detached --- README.md | 2 +- flexgen/apps/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ab92e231..ed50e62a 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ Note that only a subset of HELM scenarios is tested. See more tested scenarios [ FlexGen benchmark can be launched with [SkyPilot](http://skypilot.co), a tool for launching ML jobs on any cloud. You can use a single command below to automatically launch the benchmark on any cloud with SkyPilot, after you setup your cloud account locally (check how to setup SkyPilot [here](https://skypilot.readthedocs.io/en/latest/getting-started/installation.html)). ``` -sky launch -c flexgen flexgen/apps/task.yaml +sky launch -c flexgen --detach-setup flexgen/apps/task.yaml ``` Note that you can replace the run section with any FlexGen command. You can log into the cluster running the job with `ssh flexgen` and terminate the cluster with `sky down flexgen`. diff --git a/flexgen/apps/README.md b/flexgen/apps/README.md index 841b6469..e74621f3 100644 --- a/flexgen/apps/README.md +++ b/flexgen/apps/README.md @@ -16,5 +16,5 @@ python3 helm_run.py --description mmlu:model=text,subject=abstract_algebra,data_ ### Run on any cloud with SkyPilot Run FlexGen benchmark on any cloud with [SkyPilot](http://skypilot.co). ``` -sky launch -c flexgen task.yaml +sky launch -c flexgen --detach-setup task.yaml ``` From 8ea3cde087ae96a86fdb6320bfb586d9b4cdf8d5 Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Tue, 7 Mar 2023 22:36:52 -0800 Subject: [PATCH 4/8] Add comment for other clouds --- flexgen/apps/task.yaml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/flexgen/apps/task.yaml b/flexgen/apps/task.yaml index 05d355a4..388ed3f3 100644 --- a/flexgen/apps/task.yaml +++ b/flexgen/apps/task.yaml @@ -1,9 +1,12 @@ # benchmark.yaml # A SkyPilot job definition for benchmarking FlexGen. +# Specify the resources required for this job. resources: accelerators: T4:1 - instance_type: n1-highmem-32 + instance_type: n1-highmem-32 # On GCP with 1 T4 GPU and more than 200GB of RAM. + # instance_type: g4dn.16xlarge # On AWS with 1 T4 GPU and more than 200GB of RAM. + # Azure does not support T4 GPUs with more than 200GB of RAM. setup: | # Install Latest CUDA @@ -26,4 +29,7 @@ run: | # Run any FlexGen command conda activate flexgen # python3 -m flexgen.flex_opt --model facebook/opt-1.3b - python3 -m flexgen.apps.helm_run --description mmlu:model=text,subject=abstract_algebra,data_augmentation=canonical --pad-to-seq-len 512 --model facebook/opt-30b --percent 20 80 0 100 0 100 --gpu-batch-size 48 --num-gpu-batches 3 --max-eval-instance 100 + python3 -m flexgen.apps.helm_run \ + --description mmlu:model=text,subject=abstract_algebra,data_augmentation=canonical \ + --pad-to-seq-len 512 --model facebook/opt-30b --percent 20 80 0 100 0 100 \ + --gpu-batch-size 48 --num-gpu-batches 3 --max-eval-instance 100 From bef90a9ca176db2cf27b25a945268c04fcfeff7b Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Wed, 8 Mar 2023 18:02:58 -0800 Subject: [PATCH 5/8] Update README.md Co-authored-by: Zongheng Yang --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ed50e62a..b45257fb 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ python3 -m flexgen.apps.helm_run --description mmlu:model=text,subject=abstract_ Note that only a subset of HELM scenarios is tested. See more tested scenarios [here](flexgen/apps/helm_passed_30b.sh). ### Run FlexGen on any cloud with SkyPilot -FlexGen benchmark can be launched with [SkyPilot](http://skypilot.co), a tool for launching ML jobs on any cloud. +FlexGen benchmark can be launched with [SkyPilot](https://github.com/skypilot-org/skypilot), a tool for launching ML jobs on any cloud. You can use a single command below to automatically launch the benchmark on any cloud with SkyPilot, after you setup your cloud account locally (check how to setup SkyPilot [here](https://skypilot.readthedocs.io/en/latest/getting-started/installation.html)). ``` sky launch -c flexgen --detach-setup flexgen/apps/task.yaml From f9c227f6ae6a4b8951ac2b2ebf0be2cc1044dba0 Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Wed, 8 Mar 2023 18:31:18 -0800 Subject: [PATCH 6/8] address comments --- README.md | 17 ++++++++++++----- flexgen/apps/README.md | 13 +++++++++++-- flexgen/apps/{task.yaml => skypilot.yaml} | 6 ++++-- 3 files changed, 27 insertions(+), 9 deletions(-) rename flexgen/apps/{task.yaml => skypilot.yaml} (84%) diff --git a/README.md b/README.md index ed50e62a..802df8d8 100644 --- a/README.md +++ b/README.md @@ -66,13 +66,20 @@ python3 -m flexgen.apps.helm_run --description mmlu:model=text,subject=abstract_ ``` Note that only a subset of HELM scenarios is tested. See more tested scenarios [here](flexgen/apps/helm_passed_30b.sh). -### Run FlexGen on any cloud with SkyPilot -FlexGen benchmark can be launched with [SkyPilot](http://skypilot.co), a tool for launching ML jobs on any cloud. -You can use a single command below to automatically launch the benchmark on any cloud with SkyPilot, after you setup your cloud account locally (check how to setup SkyPilot [here](https://skypilot.readthedocs.io/en/latest/getting-started/installation.html)). +### Run FlexGen on Any Cloud with SkyPilot +FlexGen benchmark can be launched with [SkyPilot](https://github.com/skypilot-org/skypilot), a tool for launching ML jobs on any cloud. +First, install SkyPilot and check you have some cloud credentials ([docs](https://skypilot.readthedocs.io/en/latest/getting-started/installation.html)): +```bash +pip install "skypilot[aws,gcp,azure,lambda]" # pick your clouds +sky check ``` -sky launch -c flexgen --detach-setup flexgen/apps/task.yaml +You can now use a single command to automatically launch the benchmark on any cloud: +```bash +sky launch -c flexgen --detach-setup flexgen/apps/skypilot.yaml ``` -Note that you can replace the run section with any FlexGen command. You can log into the cluster running the job with `ssh flexgen` and terminate the cluster with `sky down flexgen`. +You can then log into the cluster running the job with `ssh flexgen` for monitoring. Once the job has finished, you can terminate the cluster with `sky down flexgen` or pass in `--down` flag to the command above to have the cluster terminate itself automatically. + +To run any other FlexGen command, you can edit [`flexgen/apps/skypilot.yaml`](./flexgen/apps/skypilot.yaml) and replace the `run` section. ### Data Wrangling You can run the examples in this paper, ['Can Foundation Models Wrangle Your Data?'](https://arxiv.org/abs/2205.09911), by following the instructions [here](flexgen/apps/data_wrangle). diff --git a/flexgen/apps/README.md b/flexgen/apps/README.md index e74621f3..5744da67 100644 --- a/flexgen/apps/README.md +++ b/flexgen/apps/README.md @@ -14,7 +14,16 @@ python3 helm_run.py --description mmlu:model=text,subject=abstract_algebra,data_ ``` ### Run on any cloud with SkyPilot -Run FlexGen benchmark on any cloud with [SkyPilot](http://skypilot.co). +FlexGen benchmark can be launched with [SkyPilot](https://github.com/skypilot-org/skypilot), a tool for launching ML jobs on any cloud. +First, install SkyPilot and check you have some cloud credentials ([docs](https://skypilot.readthedocs.io/en/latest/getting-started/installation.html)): +```bash +pip install "skypilot[aws,gcp,azure,lambda]" # pick your clouds +sky check ``` -sky launch -c flexgen --detach-setup task.yaml +You can now use a single command to automatically launch the benchmark on any cloud: +```bash +sky launch -c flexgen --detach-setup skypilot.yaml ``` +You can then log into the cluster running the job with `ssh flexgen` for monitoring. Once the job has finished, you can terminate the cluster with `sky down flexgen` or pass in `--down` flag to the command above to have the cluster terminate itself automatically. + +To run any other FlexGen command, you can edit [`skypilot.yaml`](skypilot.yaml) and replace the `run` section. diff --git a/flexgen/apps/task.yaml b/flexgen/apps/skypilot.yaml similarity index 84% rename from flexgen/apps/task.yaml rename to flexgen/apps/skypilot.yaml index 388ed3f3..d76607c1 100644 --- a/flexgen/apps/task.yaml +++ b/flexgen/apps/skypilot.yaml @@ -1,9 +1,11 @@ -# benchmark.yaml # A SkyPilot job definition for benchmarking FlexGen. +# References: +# https://skypilot.readthedocs.io/en/latest/getting-started/quickstart.html +# https://skypilot.readthedocs.io/en/latest/reference/yaml-spec.html # Specify the resources required for this job. resources: - accelerators: T4:1 + accelerators: T4:1 # Can replace with other GPU type and count, see `sky show-gpus`. instance_type: n1-highmem-32 # On GCP with 1 T4 GPU and more than 200GB of RAM. # instance_type: g4dn.16xlarge # On AWS with 1 T4 GPU and more than 200GB of RAM. # Azure does not support T4 GPUs with more than 200GB of RAM. From 22340cf5dbe9d411ba152d082a4329596f8bdec1 Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Wed, 8 Mar 2023 23:26:21 -0800 Subject: [PATCH 7/8] Adopt changes --- README.md | 2 +- flexgen/apps/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4e9a8450..f4a5d384 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,7 @@ First, install SkyPilot and check you have some cloud credentials ([docs](https: pip install "skypilot[aws,gcp,azure,lambda]" # pick your clouds sky check ``` -You can now use a single command to automatically launch the benchmark on any cloud: +You can now use a single command to launch the benchmark on any cloud, which automatically finds a region (in the cheapest-price order) with availability for the requested GPUs: ```bash sky launch -c flexgen --detach-setup flexgen/apps/skypilot.yaml ``` diff --git a/flexgen/apps/README.md b/flexgen/apps/README.md index aebcb116..c2ca1879 100644 --- a/flexgen/apps/README.md +++ b/flexgen/apps/README.md @@ -33,7 +33,7 @@ First, install SkyPilot and check you have some cloud credentials ([docs](https: pip install "skypilot[aws,gcp,azure,lambda]" # pick your clouds sky check ``` -You can now use a single command to automatically launch the benchmark on any cloud: +You can now use a single command to launch the benchmark on any cloud, which automatically finds a region (in the cheapest-price order) with availability for the requested GPUs: ```bash sky launch -c flexgen --detach-setup skypilot.yaml ``` From 5f64c9ad118ad7e6546a85b5bbd48e805d3aa5d7 Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Tue, 14 Mar 2023 22:08:22 -0700 Subject: [PATCH 8/8] Change to memory specification instead --- flexgen/apps/skypilot.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/flexgen/apps/skypilot.yaml b/flexgen/apps/skypilot.yaml index d76607c1..24630bbc 100644 --- a/flexgen/apps/skypilot.yaml +++ b/flexgen/apps/skypilot.yaml @@ -6,9 +6,7 @@ # Specify the resources required for this job. resources: accelerators: T4:1 # Can replace with other GPU type and count, see `sky show-gpus`. - instance_type: n1-highmem-32 # On GCP with 1 T4 GPU and more than 200GB of RAM. - # instance_type: g4dn.16xlarge # On AWS with 1 T4 GPU and more than 200GB of RAM. - # Azure does not support T4 GPUs with more than 200GB of RAM. + memory: 200+ # requires more than 200GB of memory setup: | # Install Latest CUDA