From a4e7d5ea66e23d63f6f451167569178ebe368453 Mon Sep 17 00:00:00 2001 From: Nina Cai Date: Thu, 14 Mar 2024 13:27:43 +0000 Subject: [PATCH 1/2] change gpu docker images project_id --- .github/workflows/UploadDockerImages.yml | 73 ++++++++++++------------ 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/.github/workflows/UploadDockerImages.yml b/.github/workflows/UploadDockerImages.yml index 45a56c20e..42f415890 100644 --- a/.github/workflows/UploadDockerImages.yml +++ b/.github/workflows/UploadDockerImages.yml @@ -18,45 +18,46 @@ name: Build Images on: + pull_request: schedule: # Run the job daily at 7PM PST (3AM UTC) - cron: '0 3 * * *' jobs: - tpu: - strategy: - fail-fast: false - matrix: - device-type: ["v4-8"] - runs-on: ["self-hosted", "tpu", "${{ matrix.device-type }}"] - steps: - - uses: actions/checkout@v3 - - name: build jax stable image - run : | - project=tpu-prod-env-multipod - local_image_name=maxtext_local_jax_stable - cloud_image_name=maxtext_jax_stable - bash docker_build_dependency_image.sh LOCAL_IMAGE_NAME=$local_image_name MODE=stable - docker build --build-arg BASEIMAGE=${local_image_name} -f ./maxtext_runner.Dockerfile -t ${local_image_name}_runner . - gcloud auth configure-docker --quiet - docker tag ${local_image_name}_runner gcr.io/$project/${cloud_image_name}:latest - docker push gcr.io/$project/${cloud_image_name}:latest - image_date=$(date +%Y-%m-%d) - docker tag ${local_image_name}_runner gcr.io/$project/${cloud_image_name}:${image_date} - docker push gcr.io/$project/${cloud_image_name}:${image_date} - - name: build jax nightly image - run : | - project=tpu-prod-env-multipod - local_image_name=maxtext_local_jax_nightly - cloud_image_name=maxtext_jax_nightly - bash docker_build_dependency_image.sh LOCAL_IMAGE_NAME=$local_image_name MODE=nightly - docker build --build-arg BASEIMAGE=${local_image_name} -f ./maxtext_runner.Dockerfile -t ${local_image_name}_runner . - gcloud auth configure-docker --quiet - docker tag ${local_image_name}_runner gcr.io/$project/${cloud_image_name}:latest - docker push gcr.io/$project/${cloud_image_name}:latest - image_date=$(date +%Y-%m-%d) - docker tag ${local_image_name}_runner gcr.io/$project/${cloud_image_name}:${image_date} - docker push gcr.io/$project/${cloud_image_name}:${image_date} + # tpu: + # strategy: + # fail-fast: false + # matrix: + # device-type: ["v4-8"] + # runs-on: ["self-hosted", "tpu", "${{ matrix.device-type }}"] + # steps: + # - uses: actions/checkout@v3 + # - name: build jax stable image + # run : | + # project=tpu-prod-env-multipod + # local_image_name=maxtext_local_jax_stable + # cloud_image_name=maxtext_jax_stable + # bash docker_build_dependency_image.sh LOCAL_IMAGE_NAME=$local_image_name MODE=stable + # docker build --build-arg BASEIMAGE=${local_image_name} -f ./maxtext_runner.Dockerfile -t ${local_image_name}_runner . + # gcloud auth configure-docker --quiet + # docker tag ${local_image_name}_runner gcr.io/$project/${cloud_image_name}:latest + # docker push gcr.io/$project/${cloud_image_name}:latest + # image_date=$(date +%Y-%m-%d) + # docker tag ${local_image_name}_runner gcr.io/$project/${cloud_image_name}:${image_date} + # docker push gcr.io/$project/${cloud_image_name}:${image_date} + # - name: build jax nightly image + # run : | + # project=tpu-prod-env-multipod + # local_image_name=maxtext_local_jax_nightly + # cloud_image_name=maxtext_jax_nightly + # bash docker_build_dependency_image.sh LOCAL_IMAGE_NAME=$local_image_name MODE=nightly + # docker build --build-arg BASEIMAGE=${local_image_name} -f ./maxtext_runner.Dockerfile -t ${local_image_name}_runner . + # gcloud auth configure-docker --quiet + # docker tag ${local_image_name}_runner gcr.io/$project/${cloud_image_name}:latest + # docker push gcr.io/$project/${cloud_image_name}:latest + # image_date=$(date +%Y-%m-%d) + # docker tag ${local_image_name}_runner gcr.io/$project/${cloud_image_name}:${image_date} + # docker push gcr.io/$project/${cloud_image_name}:${image_date} gpu: strategy: @@ -68,7 +69,7 @@ jobs: - uses: actions/checkout@v3 - name: build jax stable image run : | - project=tpu-prod-env-multipod + project=supercomputer-testing local_image_name=maxtext_gpu_local_jax_stable cloud_image_name=maxtext_gpu_jax_stable bash docker_build_dependency_image.sh LOCAL_IMAGE_NAME=$local_image_name MODE=stable DEVICE=gpu @@ -81,7 +82,7 @@ jobs: docker push gcr.io/$project/${cloud_image_name}:${image_date} - name: build jax nightly image run : | - project=tpu-prod-env-multipod + project=supercomputer-testing local_image_name=maxtext_gpu_local_jax_nightly cloud_image_name=maxtext_gpu_jax_nightly bash docker_build_dependency_image.sh LOCAL_IMAGE_NAME=$local_image_name MODE=nightly DEVICE=gpu From 11cffa649a6a87b873a7939aa8053d10c622c19c Mon Sep 17 00:00:00 2001 From: Nina Cai Date: Thu, 14 Mar 2024 15:13:17 +0000 Subject: [PATCH 2/2] add back tpu docker images --- .github/workflows/UploadDockerImages.yml | 69 ++++++++++++------------ 1 file changed, 34 insertions(+), 35 deletions(-) diff --git a/.github/workflows/UploadDockerImages.yml b/.github/workflows/UploadDockerImages.yml index 42f415890..ba1e7b769 100644 --- a/.github/workflows/UploadDockerImages.yml +++ b/.github/workflows/UploadDockerImages.yml @@ -18,46 +18,45 @@ name: Build Images on: - pull_request: schedule: # Run the job daily at 7PM PST (3AM UTC) - cron: '0 3 * * *' jobs: - # tpu: - # strategy: - # fail-fast: false - # matrix: - # device-type: ["v4-8"] - # runs-on: ["self-hosted", "tpu", "${{ matrix.device-type }}"] - # steps: - # - uses: actions/checkout@v3 - # - name: build jax stable image - # run : | - # project=tpu-prod-env-multipod - # local_image_name=maxtext_local_jax_stable - # cloud_image_name=maxtext_jax_stable - # bash docker_build_dependency_image.sh LOCAL_IMAGE_NAME=$local_image_name MODE=stable - # docker build --build-arg BASEIMAGE=${local_image_name} -f ./maxtext_runner.Dockerfile -t ${local_image_name}_runner . - # gcloud auth configure-docker --quiet - # docker tag ${local_image_name}_runner gcr.io/$project/${cloud_image_name}:latest - # docker push gcr.io/$project/${cloud_image_name}:latest - # image_date=$(date +%Y-%m-%d) - # docker tag ${local_image_name}_runner gcr.io/$project/${cloud_image_name}:${image_date} - # docker push gcr.io/$project/${cloud_image_name}:${image_date} - # - name: build jax nightly image - # run : | - # project=tpu-prod-env-multipod - # local_image_name=maxtext_local_jax_nightly - # cloud_image_name=maxtext_jax_nightly - # bash docker_build_dependency_image.sh LOCAL_IMAGE_NAME=$local_image_name MODE=nightly - # docker build --build-arg BASEIMAGE=${local_image_name} -f ./maxtext_runner.Dockerfile -t ${local_image_name}_runner . - # gcloud auth configure-docker --quiet - # docker tag ${local_image_name}_runner gcr.io/$project/${cloud_image_name}:latest - # docker push gcr.io/$project/${cloud_image_name}:latest - # image_date=$(date +%Y-%m-%d) - # docker tag ${local_image_name}_runner gcr.io/$project/${cloud_image_name}:${image_date} - # docker push gcr.io/$project/${cloud_image_name}:${image_date} + tpu: + strategy: + fail-fast: false + matrix: + device-type: ["v4-8"] + runs-on: ["self-hosted", "tpu", "${{ matrix.device-type }}"] + steps: + - uses: actions/checkout@v3 + - name: build jax stable image + run : | + project=tpu-prod-env-multipod + local_image_name=maxtext_local_jax_stable + cloud_image_name=maxtext_jax_stable + bash docker_build_dependency_image.sh LOCAL_IMAGE_NAME=$local_image_name MODE=stable + docker build --build-arg BASEIMAGE=${local_image_name} -f ./maxtext_runner.Dockerfile -t ${local_image_name}_runner . + gcloud auth configure-docker --quiet + docker tag ${local_image_name}_runner gcr.io/$project/${cloud_image_name}:latest + docker push gcr.io/$project/${cloud_image_name}:latest + image_date=$(date +%Y-%m-%d) + docker tag ${local_image_name}_runner gcr.io/$project/${cloud_image_name}:${image_date} + docker push gcr.io/$project/${cloud_image_name}:${image_date} + - name: build jax nightly image + run : | + project=tpu-prod-env-multipod + local_image_name=maxtext_local_jax_nightly + cloud_image_name=maxtext_jax_nightly + bash docker_build_dependency_image.sh LOCAL_IMAGE_NAME=$local_image_name MODE=nightly + docker build --build-arg BASEIMAGE=${local_image_name} -f ./maxtext_runner.Dockerfile -t ${local_image_name}_runner . + gcloud auth configure-docker --quiet + docker tag ${local_image_name}_runner gcr.io/$project/${cloud_image_name}:latest + docker push gcr.io/$project/${cloud_image_name}:latest + image_date=$(date +%Y-%m-%d) + docker tag ${local_image_name}_runner gcr.io/$project/${cloud_image_name}:${image_date} + docker push gcr.io/$project/${cloud_image_name}:${image_date} gpu: strategy: