Skip to content

Commit

Permalink
Merge pull request #794 from MTES-MCT/staging
Browse files Browse the repository at this point in the history
MEP v8.4
  • Loading branch information
alexisig authored Dec 16, 2024
2 parents 1cab8e4 + 1699e1d commit 9280981
Show file tree
Hide file tree
Showing 336 changed files with 6,586 additions and 2,835 deletions.
2 changes: 0 additions & 2 deletions .astro/config.yaml

This file was deleted.

1 change: 0 additions & 1 deletion .astro/dag_integrity_exceptions.txt

This file was deleted.

130 changes: 0 additions & 130 deletions .astro/test_dag_integrity_default.py

This file was deleted.

63 changes: 63 additions & 0 deletions .github/workflows/airflow_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
name: 'Pull Request'

on:
push:
branches-ignore:
- 'staging'
- 'master'

env:
AIRFLOW__CORE__TEST_CONNECTION: Enabled
AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL: 3

AIRFLOW_S3_LOGIN: ${{ secrets.AIRFLOW_S3_LOGIN }}
AIRFLOW_S3_PASSWORD: ${{ secrets.AIRFLOW_S3_PASSWORD }}
AIRFLOW_S3_ENDPOINT: ${{ secrets.AIRFLOW_S3_ENDPOINT }}
AIRFLOW_S3_REGION_NAME: ${{ secrets.AIRFLOW_S3_REGION_NAME }}

DBT_DB_NAME: ${{ secrets.DBT_DB_NAME }}
DBT_DB_USER: ${{ secrets.DBT_DB_USER }}
DBT_DB_PASSWORD: ${{ secrets.DBT_DB_PASSWORD }}
DBT_DB_HOST: ${{ secrets.DBT_DB_HOST }}
DBT_DB_PORT: ${{ secrets.DBT_DB_PORT }}
DBT_DB_SCHEMA: ${{ secrets.DBT_DB_SCHEMA }}

DEV_DB_NAME: ${{ secrets.DBT_DB_NAME }}
DEV_DB_USER: ${{ secrets.DBT_DB_USER }}
DEV_DB_PASSWORD: ${{ secrets.DBT_DB_PASSWORD }}
DEV_DB_HOST: ${{ secrets.DBT_DB_HOST }}
DEV_DB_PORT: ${{ secrets.DBT_DB_PORT }}
DEV_DB_SCHEMA: ${{ secrets.DBT_DB_SCHEMA }}

STAGING_DB_NAME: ${{ secrets.DBT_DB_NAME }}
STAGING_DB_USER: ${{ secrets.DBT_DB_USER }}
STAGING_DB_PASSWORD: ${{ secrets.DBT_DB_PASSWORD }}
STAGING_DB_HOST: ${{ secrets.DBT_DB_HOST }}
STAGING_DB_PORT: ${{ secrets.DBT_DB_PORT }}
STAGING_DB_SCHEMA: ${{ secrets.DBT_DB_SCHEMA }}

PROD_DB_NAME: ${{ secrets.DBT_DB_NAME }}
PROD_DB_USER: ${{ secrets.DBT_DB_USER }}
PROD_DB_PASSWORD: ${{ secrets.DBT_DB_PASSWORD }}
PROD_DB_HOST: ${{ secrets.DBT_DB_HOST }}
PROD_DB_PORT: ${{ secrets.DBT_DB_PORT }}
PROD_DB_SCHEMA: ${{ secrets.DBT_DB_SCHEMA }}

GPU_SFTP_HOST: ${{ secrets.GPU_SFTP_HOST }}
GPU_SFTP_USER: ${{ secrets.GPU_SFTP_USER }}
GPU_SFTP_PASSWORD: ${{ secrets.GPU_SFTP_PASSWORD }}
GPU_SFTP_PORT: ${{ secrets.GPU_SFTP_PORT }}

MATTERMOST_WEBHOOK_URL: ${{ secrets.MATTERMOST_WEBHOOK_URL }}
MATTERMOST_CHANNEL: ${{ secrets.MATTERMOST_CHANNEL }}


jobs:
test-airflow:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: curl -sSL install.astronomer.io | sudo bash -s
- run: cd airflow
- run: astro dev pytest
working-directory: airflow
25 changes: 0 additions & 25 deletions .sqlfluff

This file was deleted.

4 changes: 2 additions & 2 deletions airflow/dags/diff_ocsge_download_page_to_mattermost.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import requests
from airflow.decorators import dag, task
from bs4 import BeautifulSoup
from include.container import Container
from include.domain.container import Container
from pendulum import datetime


Expand Down Expand Up @@ -56,7 +56,7 @@ def diff():
"```",
]
)
Container().mattermost().send(markdown_message)
Container().notification().send(message=markdown_message)

diff()

Expand Down
2 changes: 1 addition & 1 deletion airflow/dags/download_all_ocsge.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from airflow.operators.python import PythonOperator
from include.container import Container

with open("include/ocsge/sources.json", "r") as f:
with open("include/domain/data/ocsge/sources.json", "r") as f:
sources = json.load(f)


Expand Down
71 changes: 71 additions & 0 deletions airflow/dags/identify_changed_ocsge_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import json

import requests
from airflow.decorators import dag, task
from bs4 import BeautifulSoup
from include.domain.container import Container
from pendulum import datetime

with open("include/domain/data/ocsge/sources.json", "r") as f:
sources = json.load(f)


@dag(
start_date=datetime(2024, 1, 1),
schedule="0 10 * * *", # every day at 10:00
catchup=False,
doc_md=__doc__,
default_args={"owner": "Alexis Athlani", "retries": 3},
tags=["OCS GE"],
)
def identify_changed_ocsge_data():
@task.python
def check_for_missing_urls():
url = "https://geoservices.ign.fr/ocsge#telechargement"
selector = "a" # noqa: E501

new_html = requests.get(url).text
new_soup = BeautifulSoup(new_html, features="html.parser")
links = new_soup.select(selector)
current_urls = [link.get("href") for link in links]

missing_urls = []

for departement in sources:
difference: dict[str, str] = sources[departement]["difference"]
occupation_du_sol_et_zone_construite: dict[str, str] = sources[departement][
"occupation_du_sol_et_zone_construite"
]

for year_pair, url in difference.items():
if url not in current_urls:
missing_urls.append(
{"departement": departement, "years": year_pair, "type": "difference", "url": url}
)

for year, url in occupation_du_sol_et_zone_construite.items():
if url not in current_urls:
missing_urls.append(
{
"departement": departement,
"years": year,
"type": "occupation_du_sol_et_zone_construite",
"url": url,
}
)

if missing_urls:
markdown_message = "⚠️ Changement d'url des données OCS GE détecté\n"
for url in missing_urls:
markdown_message += "```\n"
markdown_message += f"Departement : {url['departement']}\n"
markdown_message += f"Années : {url['years']}\n"
markdown_message += f"Type : {url['type']}\n"
markdown_message += f"Url manquant : {url['url']}\n"
markdown_message += "```\n"
Container().notification().send(message=markdown_message)

check_for_missing_urls()


identify_changed_ocsge_data()
2 changes: 1 addition & 1 deletion airflow/dags/ingest_admin_express.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from include.container import Container
from pendulum import datetime

with open("include/admin_express/sources.json", "r") as f:
with open("include/domain/data/admin_express/sources.json", "r") as f:
sources = json.load(f)
zones = [source["name"] for source in sources]

Expand Down
Loading

0 comments on commit 9280981

Please sign in to comment.