diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 0951ccf5..9084db73 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -1,31 +1,36 @@ # This GitHub Actions workflow automates the process of # publishing dataset collections to a staging environment -# and creating a pull request (PR) in the veda-config repository -# with the dataset configuration. # It is triggered by a pull request to the main branch # that modifies any files within the ingestion-data/dataset-config/ directory # The workflow includes steps to # - publish the datasets, -# - create a PR in veda-config repository, # - constantly updates the status of the workflow in the PR comment -name: Publish collection to staging and create dataset config PR +name: Publish collection to staging on: pull_request: - branches: - - main + branches: ['main', 'feat/automated-staging-publish'] paths: # Run the workflow only if files inside this path are updated - - ingestion-data/dataset-config/* + # - ingestion-data/staging/dataset-config/* + - ingestion-data/testing/dataset-config/* + + push: + branches: + - main + +permissions: + pull-requests: write + contents: read jobs: dataset-publication-and-configuration: - permissions: - pull-requests: write - contents: read + if: ${{ github.event_name == 'pull_request' && (github.event.action == 'synchronize' || github.event.action == 'opened') }} runs-on: ubuntu-latest - + environment: staging + outputs: + publishedCollections: ${{ steps.publish-collections.outputs.success_collections }} steps: - uses: actions/checkout@v4 @@ -38,9 +43,8 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | WORKFLOW_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" - body='### Workflow Status - **Starting workflow...** [View action run]($WORKFLOW_URL) - ' + body="### Workflow Status + **Starting workflow...** [View action run]($WORKFLOW_URL)" # Get the PR number PR_NUMBER=${{ github.event.pull_request.number }} @@ -61,21 +65,30 @@ jobs: echo "COMMENT_ID=$COMMENT_ID" >> $GITHUB_OUTPUT - # Find only the updated files (file that differ from base) + # Find only the newly added files # Only .json files # The files are outputted to GITHUB_OUTPUT, which can be used in subsequent steps - - name: Get updated files + - name: Get newly added files id: changed-files - uses: tj-actions/changed-files@v44 + uses: tj-actions/changed-files@v45 with: files: | **.json + - name: List all newly added files + env: + ADDED_FILES: ${{ steps.changed-files.outputs.added_files }} + run: | + for file in ${ADDED_FILES}; do + echo "$file was added" + done + # Uses service client creds to get token # No username/password needed - name: Get auth token id: get-token run: | + echo "Vars: $vars" response=$(curl -X POST \ ${{ vars.STAGING_COGNITO_DOMAIN }}/oauth2/token \ -H "Content-Type: application/x-www-form-urlencoded" \ @@ -92,10 +105,10 @@ jobs: # Used by other steps # If none of the requests are successful, workflow fails # Updates the PR comment with status of collection publication - - name: Publish all updated collections + - name: Publish all newly added collections to staging id: publish-collections env: - ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }} + ADDED_FILES: ${{ steps.changed-files.outputs.added_files }} WORKFLOWS_URL: ${{ vars.STAGING_WORKFLOWS_URL }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} AUTH_TOKEN: ${{ steps.get-token.outputs.ACCESS_TOKEN }} @@ -120,7 +133,7 @@ jobs: status_message='### Collection Publication Status ' - for file in "${ALL_CHANGED_FILES[@]}"; do + for file in "${ADDED_FILES[@]}"; do echo $file if [ -f "$file" ]; then dataset_config=$(jq '.' "$file") @@ -143,7 +156,7 @@ jobs: all_failed=false else echo "$collection_id failed to publish ❌" - status_message+="- **$collection_id**: Failed to publish ❌ + status_message+="- **$collection_id**: Failed to publish. Error code $status_code. ❌ " fi else @@ -168,115 +181,61 @@ jobs: $status_message" gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY" - # Update PR comment - - name: Update PR comment for PR creation - if: success() - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }} - run: | - CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body') - UPDATED_BODY="$CURRENT_BODY - - **Creating a PR in veda-config...**" - gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY" - - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.9' cache: 'pip' - # Creates a slim dataset mdx file for each collection based on the dataset config json - - name: Create dataset mdx for given collections - env: - PUBLISHED_COLLECTION_FILES: ${{ steps.publish-collections.outputs.success_collections }} - run: | - pip install -r scripts/requirements.txt - for file in "${PUBLISHED_COLLECTION_FILES[@]}" - do - python3 scripts/mdx.py "$file" - done - - - name: Set up Git - run: | - git config --global user.name "github-actions[bot]" - git config --global user.email "github-actions[bot]@users.noreply.github.com" - - - name: Clone `veda-config` - env: - VEDA_CONFIG_GH_TOKEN: ${{ secrets.VEDA_CONFIG_GH_TOKEN }} - run: git clone https://${{ env.VEDA_CONFIG_GH_TOKEN }}@github.com/${{ vars.VEDA_CONFIG_REPO_ORG }}/${{ vars.VEDA_CONFIG_REPO_NAME }}.git - - # Creates a PR in veda-config with the following changes: - # 1. the mdx files for all published collections - # 2. updates the stac/raster urls in .env file - # This step needs a GH_TOKEN that has permissions to create a PR in veda-config - - name: Create PR with changes - id: create-pr - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - VEDA_CONFIG_GH_TOKEN: ${{ secrets.VEDA_CONFIG_GH_TOKEN }} - COMMENT_ID: ${{ steps.publish-collections.outputs.COMMENT_ID }} - PUBLISHED_COLLECTION_FILES: ${{ steps.publish-collections.outputs.success_collections }} - run: | - files_string=$(IFS=$'\n'; echo "${PUBLISHED_COLLECTION_FILES[*]}") - hash=$(echo -n "$files_string" | md5sum | cut -d ' ' -f 1) - NEW_BRANCH="add-dataset-$hash" - cd ${{ vars.VEDA_CONFIG_REPO_NAME }} - git fetch origin - if git ls-remote --exit-code --heads origin $NEW_BRANCH; then - git push origin --delete $NEW_BRANCH - fi - git checkout -b $NEW_BRANCH - - # Update the env vars to staging based on env vars - sed -i "s|${{ vars.ENV_FROM }}|${{ vars.ENV_TO }}|g" .env - cp -r ../datasets/* datasets/ - git add . - git commit -m "Add dataset(s)" - git push origin $NEW_BRANCH - PR_URL=$(GITHUB_TOKEN=$VEDA_CONFIG_GH_TOKEN gh pr create -H $NEW_BRANCH -B develop --title 'Add dataset [Automated workflow]' --body-file <(echo "Add datasets (Automatically created by Github action)")) - - echo "PR_URL=$PR_URL" >> $GITHUB_OUTPUT - echo "PR creation succeeded" - - # Updates the comment with a link to the above PR - - name: Update PR comment with PR creation result - if: success() + # If the workflow fails at any point, the PR comment will be updated + - name: Update PR comment on overall workflow failure + if: failure() env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }} run: | - PR_URL=${{ steps.create-pr.outputs.PR_URL }} + WORKFLOW_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body') UPDATED_BODY="$CURRENT_BODY - **A PR has been created with the dataset configuration: πŸ—ΊοΈ [PR link]($PR_URL)**" + ** ❌ The workflow run failed. [See logs here]($WORKFLOW_URL)**" gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY" - - name: Update PR comment on PR creation failure - if: failure() && steps.create-pr.outcome == 'failure' - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }} - run: | - CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body') - UPDATED_BODY="$CURRENT_BODY + create-mdx-files: + runs-on: ubuntu-latest + needs: dataset-publication-and-configuration + steps: + - name: Use output from dataset-publication-and-configuration - **Failed ❌ to create a PR with the dataset configuration. πŸ˜” **" - gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY" + run: | + echo "The output from the previous step is: ${{ needs.dataset-publication-and-configuration.outputs.publishedCollections }}" - # If the workflow fails at any point, the PR comment will be updated - - name: Update PR comment on overall workflow failure - if: failure() && steps.create-pr.outcome != 'failure' + # Creates a slim dataset mdx file for each collection based on the dataset config json + - name: Create dataset mdx for given collections env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }} + PUBLISHED_COLLECTION_FILES: ${{ needs.dataset-publication-and-configuration.outputs.publishedCollections }} + run: echo "NO-OP step" + # run: | + # pip install -r scripts/requirements.txt + # for file in "${PUBLISHED_COLLECTION_FILES[@]}" + # do + # python3 scripts/mdx.py "$file" + # done + + open-veda-config-pr-notification: + runs-on: ubuntu-latest + needs: create-mdx-files + steps: + - name: Notify user to open a new PR run: | - WORKFLOW_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" - CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body') - UPDATED_BODY="$CURRENT_BODY + echo "The action is complete. Please open a new Pull Request in veda-config for the changes." - ** ❌ The workflow run failed. [See logs here]($WORKFLOW_URL)**" - gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY" + publish-to-prod-on-pr-merge: + if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged == true }} + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Publish to production on PR merge + run: echo "NO-OP. This step runs when a PR is merged." diff --git a/ingestion-data/testing/dataset-config/test.json b/ingestion-data/testing/dataset-config/test.json new file mode 100644 index 00000000..753d6341 --- /dev/null +++ b/ingestion-data/testing/dataset-config/test.json @@ -0,0 +1,31 @@ +{ + "collection": "hls-swir-falsecolor-composite-TEST", + "title": "HLS SWIR FalseColor Composite", + "spatial_extent": { + "xmin": -156.75, + "ymin": 20.80, + "xmax": -156.55, + "ymax": 20.94 + }, + "temporal_extent": { + "startdate": "2023-08-08T00:00:00Z", + "enddate": "2023-08-08T23:59:59Z" + }, + "data_type": "cog", + "license": "CC0-1.0", + "description": "HLS falsecolor composite imagery using Bands 12, 8A, and 4.", + "is_periodic": false, + "time_density": "day", + "sample_files": [ + "s3://veda-data-store-staging/maui-fire/Lahaina_HLS_2023-08-08_SWIR_falsecolor_cog.tif", + "s3://veda-data-store-staging/maui-fire/Lahaina_HLS_2023-08-13_SWIR_falsecolor_cog.tif" + ], + "discovery_items": [ + { + "discovery": "s3", + "prefix": "maui-fire/", + "bucket": "veda-data-store-staging", + "filename_regex": "(.*)SWIR_falsecolor(.*).tif$" + } + ] +}