From 9466a8f47315e3307a2991f5e55f0efbe53f5bfe Mon Sep 17 00:00:00 2001 From: zacharyburnett Date: Wed, 25 Oct 2023 12:27:27 -0400 Subject: [PATCH] add data workflow to cache WebbPSF data --- .github/workflows/ci.yml | 8 ++++- .github/workflows/data.yml | 70 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/data.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c871d446..c6144fa1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,13 +27,19 @@ jobs: - macos: py311-xdist - linux: py311-cov-xdist coverage: 'codecov' + data: + uses: ./.github/workflows/data.yml test_downstream: uses: OpenAstronomy/github-actions-workflows/.github/workflows/tox.yml@v1 + needs: [ data ] with: setenv: | - CRDS_PATH: ${{ needs.crds.outputs.path }} + WEBBPSF_PATH: ${{ needs.data.outputs.webbpsf_path }} + CRDS_PATH: ${{ needs.data.outputs.path }}/crds_cache CRDS_CLIENT_RETRY_COUNT: 3 CRDS_CLIENT_RETRY_DELAY_SECONDS: 20 + cache-path: ${{ needs.data.outputs.webbpsf_path }} + cache-key: webbpsf-${{ needs.data.outputs.webbpsf_hash }} envs: | - linux: test-jwst-xdist - linux: test-romancal-xdist diff --git a/.github/workflows/data.yml b/.github/workflows/data.yml new file mode 100644 index 00000000..f67396df --- /dev/null +++ b/.github/workflows/data.yml @@ -0,0 +1,70 @@ +on: + workflow_call: + outputs: + path: + value: ${{ jobs.data.outputs.path }} + webbpsf_path: + value: ${{ jobs.data.outputs.webbpsf_path }} + webbpsf_hash: + value: ${{ jobs.data.outputs.webbpsf_hash }} + workflow_dispatch: + schedule: + - cron: "42 4 * * 3" + +env: + DATA_PATH: /tmp/data + +jobs: + webbpsf-data: + if: (github.repository == 'spacetelescope/romancal' && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'update webbpsf data'))) + name: download and cache WebbPSF data + runs-on: ubuntu-latest + env: + WEBBPSF_DATA_URL: https://stsci.box.com/shared/static/qxpiaxsjwo15ml6m4pkhtk36c9jgj70k.gz + outputs: + path: ${{ steps.cache_path.outputs.path }} + hash: ${{ steps.data_hash.outputs.hash }} + steps: + - id: cache_path + run: echo "path=${{ env.DATA_PATH }}/webbpsf-data" >> $GITHUB_OUTPUT + - run: mkdir -p tmp/data + - run: wget ${{ env.WEBBPSF_DATA_URL }} -O tmp/webbpsf-data.tar.gz + - id: data_hash + run: echo "hash=$( shasum tmp/webbpsf-data.tar.gz | cut -d ' ' -f 1 )" >> $GITHUB_OUTPUT + - id: cache_check + uses: actions/cache@v3 + with: + path: ${{ steps.cache_path.outputs.path }} + key: webbpsf-${{ steps.data_hash.outputs.hash }} + - if: ${{ steps.cache_check.outputs.cache-hit != 'true' }} + run: mkdir -p ${{ env.DATA_PATH }} + - if: ${{ steps.cache_check.outputs.cache-hit != 'true' }} + run: tar -xzvf tmp/webbpsf-data.tar.gz -C ${{ env.DATA_PATH }} + data: + needs: [ webbpsf-data ] + # run data job if webbpsf-data succeeds or is skipped. This allows + # this data job to always fetch the crds context even if the webbpsf data fetching + # was skipped (and an existing cache will be used for the webbpsf data). + if: always() && (needs.webbpsf-data.result == 'success' || needs.webbpsf-data.result == 'skipped') + name: retrieve latest data cache key + runs-on: ubuntu-latest + env: + GH_TOKEN: ${{ github.token }} + outputs: + path: ${{ env.DATA_PATH }} + webbpsf_hash: ${{ steps.webbpsf_hash.outputs.hash }} + webbpsf_path: ${{ steps.webbpsf_path.outputs.path }} + steps: + - id: webbpsf_hash + run: | + # use actions/gh-actions-cache to allow filtering by key + gh extension install actions/gh-actions-cache + + RECENT=$(gh actions-cache list -R spacetelescope/romancal --key webbpsf- --sort created-at | cut -f 1 | head -n 1) + echo "RECENT=$RECENT" + HASH=$(echo $RECENT | cut -d '-' -f 2) + echo "HASH=$HASH" + echo "hash=$HASH" >> $GITHUB_OUTPUT + if [ "$HASH" == '' ]; then exit 1; fi + - id: webbpsf_path + run: echo "path=${{ env.DATA_PATH }}/webbpsf-data" >> $GITHUB_OUTPUT