-
Notifications
You must be signed in to change notification settings - Fork 23
38 lines (34 loc) · 1.37 KB
/
test-pytorch-xla-tpu-tgi-jetstream.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
name: Optimum TPU / Test TGI on TPU / Jetstream Pytorch
on:
push:
branches: [ main, ci-ephemeral-tpu ]
pull_request:
branches: [ main ]
paths:
- "text-generation-inference/**"
# This can be used to trigger workflow from the web interface
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
do-the-job:
name: Run TGI tests - Jetstream Pytorch
runs-on:
group: gcp-ct5lp-hightpu-8t
container:
image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:r2.4.0_3.10_tpuvm
options: --shm-size "16gb" --ipc host --privileged ${{ vars.V5_LITEPOD_8_ENV}}
env:
PJRT_DEVICE: TPU
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Build and test TGI server
run: |
make test_installs jetstream_requirements tgi_server
find text-generation-inference/ -name "text_generation_server-*whl" -exec python -m pip install {} \;
JETSTREAM_PT=1 HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_TPU_CI }} \
python -m pytest -sv text-generation-inference/tests -k "jetstream and greedy and Meta-Llama" --runslow
JETSTREAM_PT=1 HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_TPU_CI }} \
python -m pytest -sv text-generation-inference/tests -k "jetstream and greedy and gemma" --runslow