From eada4b9ac9e3a72131cf2295ab61bd09d8217973 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Evgeni=20K=C3=B6pplin?= Date: Mon, 16 Dec 2024 21:11:57 +0100 Subject: [PATCH] add to actions --- .github/workflows/run_newsapi_articles_workflow.yml | 1 + newsapi_pipeline.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run_newsapi_articles_workflow.yml b/.github/workflows/run_newsapi_articles_workflow.yml index 423048e..e158d14 100644 --- a/.github/workflows/run_newsapi_articles_workflow.yml +++ b/.github/workflows/run_newsapi_articles_workflow.yml @@ -5,6 +5,7 @@ name: Run newsapi_articles pipeline from newsapi_pipeline.py workflow_dispatch: null env: DESTINATION__FILESYSTEM__DATASET_NAME: newsapi + NEWSAPI_PIPELINE__DESTINATION__SCHEMA_NAME: "ingest_newsapi_v1" DESTINATION__FILESYSTEM__BUCKET_URL: ${{ secrets.BUCKET_URL }} NEWSAPI__DESTINATION__ATHENA__QUERY_RESULT_BUCKET: ${{ secrets.ATHENA__QUERY_RESULT_BUCKET }} NEWSAPI__DESTINATION__BUCKET_URL: ${{ secrets.NEWSAPI__DESTINATION__BUCKET_URL_PROD }} diff --git a/newsapi_pipeline.py b/newsapi_pipeline.py index 1276590..1950fd0 100644 --- a/newsapi_pipeline.py +++ b/newsapi_pipeline.py @@ -1,5 +1,6 @@ import argparse from datetime import datetime, timedelta +from pathlib import Path import dlt from loguru import logger # Import Loguru @@ -9,6 +10,8 @@ today = datetime.utcnow().date() before_yesterday = today - timedelta(days=2) +target_schema_name: str = dlt.config[f"{Path(__file__).stem}.destination.schema_name"] + # Define a resource for fetching articles from the US @dlt.resource(table_name="articles_us_en", write_disposition="append") @@ -139,7 +142,7 @@ def run_pipeline(destination="filesystem", full_refresh=False): pipeline = dlt.pipeline( pipeline_name="newsapi_articles", destination=destination, - dataset_name="newsapi_data", + dataset_name=target_schema_name, ) load_info = pipeline.run(