Skip to content

Ingest Single Dataset: bpl_libraries #8

Ingest Single Dataset: bpl_libraries

Ingest Single Dataset: bpl_libraries #8

Workflow file for this run

name: Ingest Single Dataset
run-name: "Ingest Single Dataset: ${{ inputs.dataset }}"
on:
workflow_dispatch:
inputs:
dataset:
description: "Name of the dataset (required)"
required: true
default: dcp_mappluto
latest:
type: boolean
description: "Tag this version as latest (optional)"
required: false
default: true
version:
description: "The version of the dataset (i.e. 22v2, 21C) if needed (optional)"
required: false
mode:
description: "Preprocessing 'mode', if applicable"
required: false
dev_image:
description: "Use dev image specific to this branch? (If exists)"
type: boolean
required: true
default: false
dev_bucket:
description: "dev bucket to use in place of recipes. If name of bucket is 'de-dev-a' simply put 'a'"
type: string
required: false
jobs:
dataloading:
runs-on: ubuntu-22.04
container:
image: nycplanning/build-base:${{ inputs.dev_image && format('dev-{0}', github.head_ref || github.ref_name) || 'latest' }}
defaults:
run:
shell: bash
env:
BUILD_NAME: ${{ github.head_ref || github.ref_name }}
RECIPES_BUCKET: ${{ inputs.dev_bucket && format('de-dev-{0}', inputs.dev_bucket) || 'edm-recipes' }}
DEV_FLAG: ${{ inputs.dev_bucket && 'true' || 'false' }}
steps:
- uses: actions/checkout@v4
- name: Load Secrets
uses: 1password/load-secrets-action@v1
with:
export-env: true
env:
OP_SERVICE_ACCOUNT_TOKEN: ${{ secrets.OP_SERVICE_ACCOUNT_TOKEN }}
AWS_S3_ENDPOINT: "op://Data Engineering/DO_keys/AWS_S3_ENDPOINT"
AWS_SECRET_ACCESS_KEY: "op://Data Engineering/DO_keys/AWS_SECRET_ACCESS_KEY"
AWS_ACCESS_KEY_ID: "op://Data Engineering/DO_keys/AWS_ACCESS_KEY_ID"
- name: Finish container setup ...
run: ./bash/docker_container_setup.sh
- name: Archive dataset
env:
latest: ${{ github.event.inputs.latest == 'true' && '--latest' || '' }}
version: ${{ github.event.inputs.version && format('--version {0}', github.event.inputs.version) || '' }}
mode: ${{ github.event.inputs.mode && format('--mode {0}', github.event.inputs.mode) || '' }}
run: python3 -m dcpy.cli lifecycle ingest ${{ inputs.dataset }} $latest $version $mode