diff --git a/airflow/dags/ingest_population.py b/airflow/dags/ingest_population.py index 32bcfc7c7..895bff2e5 100644 --- a/airflow/dags/ingest_population.py +++ b/airflow/dags/ingest_population.py @@ -4,6 +4,8 @@ import pandas as pd from airflow.decorators import dag, task from include.container import Container +from include.pools import DBT_POOL +from include.utils import get_dbt_command_from_directory from pendulum import datetime URL = "https://www.insee.fr/fr/statistiques/fichier/3698339/base-pop-historiques-1876-2021.xlsx" @@ -42,8 +44,14 @@ def ingest(path_on_bucket) -> int | None: os.remove(tmp_localpath) return row_count + @task.bash(pool=DBT_POOL) + def dbt_build() -> str: + return get_dbt_command_from_directory(cmd="dbt build -s +insee+") + path_on_bucket = download() - ingest(path_on_bucket) + ingest_task = ingest(path_on_bucket) + + path_on_bucket >> ingest_task >> dbt_build() ingest_population() diff --git a/airflow/include/sql/sparte/models/majic/schema.yml b/airflow/include/sql/sparte/models/majic/schema.yml index ce8b60065..0e1f8fccb 100644 --- a/airflow/include/sql/sparte/models/majic/schema.yml +++ b/airflow/include/sql/sparte/models/majic/schema.yml @@ -34,7 +34,6 @@ models: - name: departement data_tests: - not_null - - unique - has_all_departements - relationships: to: ref('departement') @@ -44,7 +43,6 @@ models: - name: epci data_tests: - not_null - - unique - has_all_epcis - relationships: to: ref('epci') @@ -54,7 +52,6 @@ models: - name: region data_tests: - not_null - - unique - has_all_regions - relationships: to: ref('region') @@ -64,7 +61,6 @@ models: - name: scot data_tests: - not_null - - unique - has_all_scots - relationships: to: ref('scot')