Skip to content
name: Optimum TPU / Test TGI on TPU / Integration Tests
on:
push:
pull_request:
branches: [ main ]
paths:
- "text-generation-inference/**"
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
integration-tests:
name: Run TGI Integration Tests
runs-on:
group: gcp-ct5lp-hightpu-8t
env:
PJRT_DEVICE: TPU
HF_HUB_CACHE: /mnt/hf_cache/cache_huggingface
HF_TOKEN: ${{ secrets.HF_TOKEN_OPTIMUM_TPU_CI }}
V5_LITEPOD_8_ENV: ${{ vars.V5_LITEPOD_8_ENV}}
steps:
- name: Checkout code
uses: actions/checkout@v4
# - name: Install docker
# run: |
# apt-get update -y
# apt-get install -y docker.io
- name: Build TGI Docker Image
run: |
make tpu-tgi
# - name: Run integration tests
# run: |
# make tgi_docker_test
- name: Debug
run: |
# Start docker container in background
docker run -d --name tgi-tests-gpt2 \
-e LOG_LEVEL=info,text_generation_router,text_generation_launcher=debug \
-e HF_HUB_ENABLE_HF_TRANSFER=0 \
-e MAX_BATCH_SIZE=4 \
-e JETSTREAM_PT_DISABLE=1 \
-e SKIP_WARMUP=1 \
-e MODEL_ID=openai-community/gpt2 \
${{ vars.V5_LITEPOD_8_ENV}} \
-e HF_SEQUENCE_LENGTH=1024 \
-v /mnt/hf_cache:/mnt/cache \
--shm-size 16G \
--privileged \
--ipc host \
--network host \
huggingface/optimum-tpu:latest \
--max-input-length 512 \
--max-total-tokens 1024 \
--max-batch-prefill-tokens 512 \
--max-batch-total-tokens 1024 &
# Sleep for 10 seconds to let the container start up
sleep 10
# Show docker logs while container is starting up
docker logs -f tgi-tests-gpt2 &
# for tgi server to start
sleep 300 &&
curl --max-time 30 0.0.0.0:80/generate \
-X POST \
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":20}}' \
-H 'Content-Type: application/json'