Skip to content
name: Optimum TPU / Test TGI on TPU / Integration Tests
on:
push:
pull_request:
branches: [ main ]
paths:
- "text-generation-inference/**"
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
integration-tests:
name: Run TGI Integration Tests
runs-on:
group: gcp-ct5lp-hightpu-8t
env:
PJRT_DEVICE: TPU
HF_HUB_CACHE: /mnt/hf_cache/cache_huggingface
HF_TOKEN: ${{ secrets.HF_TOKEN_OPTIMUM_TPU_CI }}
V5_LITEPOD_8_ENV: ${{ vars.V5_LITEPOD_8_ENV}}
steps:
- name: Checkout code
uses: actions/checkout@v4
# - name: Install docker
# run: |
# apt-get update -y
# apt-get install -y docker.io
- name: Install Python
run: |
apt-get update -y
apt-get install -y python3 python3-pip
- name: Run integration tests
run: |
make tgi_docker_test
- name: Build TGI Docker Image
run: |
make tpu-tgi
# - name: Debug
# run: |
# # Start docker container in background
# docker run -d --name tgi-tests-gpt2 \
# -e LOG_LEVEL=info,text_generation_router,text_generation_launcher=debug \
# -e HF_HUB_ENABLE_HF_TRANSFER=0 \
# -e MAX_BATCH_SIZE=4 \
# -e JETSTREAM_PT_DISABLE=1 \
# -e SKIP_WARMUP=1 \
# -e MODEL_ID=openai-community/gpt2 \
# ${{ vars.V5_LITEPOD_8_ENV}} \
# -e HF_SEQUENCE_LENGTH=1024 \
# -v /mnt/hf_cache:/mnt/cache \
# --shm-size 16G \
# --privileged \
# --ipc host \
# --network host \
# huggingface/optimum-tpu:latest \
# --max-input-length 512 \
# --max-total-tokens 1024 \
# --max-batch-prefill-tokens 512 \
# --max-batch-total-tokens 1024 &
# # Sleep for 10 seconds to let the container start up
# sleep 10
# # Show docker logs while container is starting up
# docker logs -f tgi-tests-gpt2 &
# # for tgi server to start
# sleep 300 &&
# curl --max-time 30 0.0.0.0:80/generate \
# -X POST \
# -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":20}}' \
# -H 'Content-Type: application/json'