Support Py-TXI (TGI and TEI) (#147)

huggingface · Mar 8, 2024 · fd32ad5 · fd32ad5
1 parent a3cd823
commit fd32ad5
Show file tree

Hide file tree

Showing 20 changed files with 391 additions and 249 deletions.
diff --git a/.github/workflows/test_cli_cpu_py_tgi.yaml → .github/workflows/test_cli_cpu_py_txi.yaml b/.github/workflows/test_cli_cpu_py_tgi.yaml → .github/workflows/test_cli_cpu_py_txi.yaml
@@ -1,4 +1,4 @@
-name: CLI CPU Py-TGI Tests
+name: CLI CPU Py-TXI Tests
 
 on:
   workflow_dispatch:
@@ -12,7 +12,7 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  run_cli_cpu_py_tgi_tests:
+  run_cli_cpu_py_txi_tests:
     runs-on: ubuntu-latest
     steps:
       - name: Free disk space
@@ -35,10 +35,13 @@ jobs:
       - name: Install requirements
         run: |
           pip install --upgrade pip
-          pip install -e .[testing,py-tgi]
+          pip install -e .[testing,py-txi]
 
       - name: Pull TGI docker image
         run: docker pull ghcr.io/huggingface/text-generation-inference:latest
 
+      - name: Pull TEI docker image
+        run: docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-latest
+
       - name: Run tests
-        run: pytest -k "cli and cpu and py_tgi"
+        run: pytest -k "cli and cpu and py_txi"
diff --git a/examples/tei_bert.yaml b/examples/tei_bert.yaml
@@ -0,0 +1,36 @@
+defaults:
+  - backend: py-txi
+  - launcher: inline # default launcher
+  - benchmark: inference # default benchmark
+  - experiment # inheriting experiment schema
+  - _self_ # for hydra 1.1 compatibility
+  - override hydra/job_logging: colorlog # colorful logging
+  - override hydra/hydra_logging: colorlog # colorful logging
+
+experiment_name: tei_bert
+
+backend:
+  device: cpu
+  pooling: cls
+  model: bert-base-uncased
+
+benchmark:
+  input_shapes:
+    batch_size: 64
+    sequence_length: 128
+
+# hydra/cli specific settings
+hydra:
+  run:
+    # where to store run results
+    dir: runs/${experiment_name}
+  sweep:
+    # where to store sweep results
+    dir: sweeps/${experiment_name}
+  job:
+    # change working directory to the run directory
+    chdir: true
+    env_set:
+      # set environment variable OVERRIDE_BENCHMARKS to 1
+      # to not skip benchmarks that have been run before
+      OVERRIDE_BENCHMARKS: 1
diff --git a/examples/tgi_llama.yaml b/examples/tgi_llama.yaml
@@ -1,7 +1,7 @@
 defaults:
-  - backend: text-generation-inference # default backend
-  - benchmark: inference # default benchmark
+  - backend: py-txi
   - launcher: inline # default launcher
+  - benchmark: inference # default benchmark
   - experiment # inheriting experiment schema
   - _self_ # for hydra 1.1 compatibility
   - override hydra/job_logging: colorlog # colorful logging
@@ -10,18 +10,16 @@ defaults:
 experiment_name: tgi_llama
 
 backend:
-  device: cuda
+  device: cpu
   device_ids: 0,1
-  device_map: true
-  model: TheBloke/Llama-2-7B-AWQ
-  quantization_scheme: awq
-  sharded: false
+  no_weights: true
+  model: NousResearch/Nous-Hermes-llama-2-7b
 
 benchmark:
   input_shapes:
-    batch_size: 1
+    batch_size: 4
     sequence_length: 256
-  new_tokens: 1000
+  new_tokens: 100
 
 # hydra/cli specific settings
 hydra:

diff --git a/optimum_benchmark/backends/py_tgi/backend.py b/optimum_benchmark/backends/py_tgi/backend.py
diff --git a/optimum_benchmark/backends/py_tgi/config.py b/optimum_benchmark/backends/py_tgi/config.py
diff --git a/...mum_benchmark/backends/py_tgi/__init__.py → ...mum_benchmark/backends/py_txi/__init__.py b/...mum_benchmark/backends/py_tgi/__init__.py → ...mum_benchmark/backends/py_txi/__init__.py