Skip to content

Run newsapi_articles pipeline from newsapi_pipeline.py #71

Run newsapi_articles pipeline from newsapi_pipeline.py

Run newsapi_articles pipeline from newsapi_pipeline.py #71

name: Run newsapi_articles pipeline from newsapi_pipeline.py
'on':
schedule:
# Runs at 04:00 UTC every day
- cron: 0 4 * * *
workflow_dispatch: null
env:
DESTINATION__FILESYSTEM__DATASET_NAME: newsapi
NEWSAPI_PIPELINE__DESTINATION__SCHEMA_NAME: "ingest_newsapi_v1"
DESTINATION__FILESYSTEM__BUCKET_URL: ${{ secrets.BUCKET_URL }}
NEWSAPI__DESTINATION__ATHENA__QUERY_RESULT_BUCKET: ${{ secrets.ATHENA__QUERY_RESULT_BUCKET }}
DESTINATION__FILESYSTEM__CREDENTIALS__AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
DESTINATION__FILESYSTEM__CREDENTIALS__AWS_SECRET_ACCESS_KEY: ${{ secrets.DESTINATION__FILESYSTEM__CREDENTIALS__AWS_SECRET_ACCESS_KEY }}
SOURCES__NEWSAPI_PIPELINE__API_KEY: ${{ secrets.SOURCES__NEWSAPI_PIPELINE__API_KEY }}
jobs:
maybe_skip:
runs-on: ubuntu-latest
outputs:
should_skip: ${{ steps.skip_check.outputs.should_skip }}
steps:
- id: skip_check
uses: fkirc/skip-duplicate-actions@v5
with:
concurrent_skipping: always
skip_after_successful_duplicate: 'false'
do_not_skip: '[]'
run_pipeline:
needs: maybe_skip
if: needs.maybe_skip.outputs.should_skip != 'true'
runs-on: ubuntu-latest
steps:
- name: Check out
uses: actions/checkout@v3
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: 3.11.x
- name: Install pipenv
run: pip install pipenv
- name: Cache pipenv virtualenv
uses: actions/cache@v3
id: cache-pipenv
with:
path: ~/.local/share/virtualenvs
key: ${{ runner.os }}-pipenv-${{ hashFiles('**/Pipfile.lock') }}
- name: Install dependencies
if: steps.cache-pipenv.outputs.cache-hit != 'true'
run: pipenv install --deploy
- name: Run pipeline script
run: pipenv run python 'newsapi_pipeline.py'