This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Optimum TPU / Test TGI on TPU / Integration Tests | |
on: | |
push: | |
pull_request: | |
branches: [ main ] | |
paths: | |
- "text-generation-inference/**" | |
workflow_dispatch: | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
jobs: | |
integration-tests: | |
name: Run TGI Integration Tests | |
runs-on: | |
group: gcp-ct5lp-hightpu-8t | |
env: | |
PJRT_DEVICE: TPU | |
HF_HUB_CACHE: /mnt/hf_cache/cache_huggingface | |
HF_TOKEN: ${{ secrets.HF_TOKEN_OPTIMUM_TPU_CI }} | |
V5_LITEPOD_8_ENV: ${{ vars.V5_LITEPOD_8_ENV}} | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
# - name: Install docker | |
# run: | | |
# apt-get update -y | |
# apt-get install -y docker.io | |
- name: Install Python | |
run: | | |
apt-get update -y | |
apt-get install -y python3 python3-pip | |
- name: Run integration tests | |
run: | | |
make tgi_docker_test | |
- name: Build TGI Docker Image | |
run: | | |
make tpu-tgi | |
# - name: Debug | |
# run: | | |
# # Start docker container in background | |
# docker run -d --name tgi-tests-gpt2 \ | |
# -e LOG_LEVEL=info,text_generation_router,text_generation_launcher=debug \ | |
# -e HF_HUB_ENABLE_HF_TRANSFER=0 \ | |
# -e MAX_BATCH_SIZE=4 \ | |
# -e JETSTREAM_PT_DISABLE=1 \ | |
# -e SKIP_WARMUP=1 \ | |
# -e MODEL_ID=openai-community/gpt2 \ | |
# ${{ vars.V5_LITEPOD_8_ENV}} \ | |
# -e HF_SEQUENCE_LENGTH=1024 \ | |
# -v /mnt/hf_cache:/mnt/cache \ | |
# --shm-size 16G \ | |
# --privileged \ | |
# --ipc host \ | |
# --network host \ | |
# huggingface/optimum-tpu:latest \ | |
# --max-input-length 512 \ | |
# --max-total-tokens 1024 \ | |
# --max-batch-prefill-tokens 512 \ | |
# --max-batch-total-tokens 1024 & | |
# # Sleep for 10 seconds to let the container start up | |
# sleep 10 | |
# # Show docker logs while container is starting up | |
# docker logs -f tgi-tests-gpt2 & | |
# # for tgi server to start | |
# sleep 300 && | |
# curl --max-time 30 0.0.0.0:80/generate \ | |
# -X POST \ | |
# -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":20}}' \ | |
# -H 'Content-Type: application/json' |