From 8b75c0b78132f7ac2dd5ea435f657879692f209a Mon Sep 17 00:00:00 2001 From: David Gasquez Date: Tue, 2 Apr 2024 14:49:04 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20=F0=9F=8E=A8=20clean=20models?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- datadex/assets/huggingface.py | 3 +- datadex/assets/spain.py | 39 ++++++++++++++++++- dbt/models/sources.yml | 8 ++++ dbt/models/spain_aemet_historical_weather.sql | 31 +++++++++++++++ .../spain_aemet_historical_weather_schema.yml | 9 +++++ 5 files changed, 86 insertions(+), 4 deletions(-) create mode 100644 dbt/models/spain_aemet_historical_weather.sql create mode 100644 dbt/models/spain_aemet_historical_weather_schema.yml diff --git a/datadex/assets/huggingface.py b/datadex/assets/huggingface.py index 301e115..8ad835e 100644 --- a/datadex/assets/huggingface.py +++ b/datadex/assets/huggingface.py @@ -25,8 +25,7 @@ def hf_asset(data: pd.DataFrame, hf: HuggingFaceResource) -> None: "threatened_animal_species", "country_year_indicators", "spain_ipc", - "spain_aemet_stations", - "spain_aemet_weather_data", + "spain_aemet_historical_weather", ] assets = [] diff --git a/datadex/assets/spain.py b/datadex/assets/spain.py index 208859a..3444512 100644 --- a/datadex/assets/spain.py +++ b/datadex/assets/spain.py @@ -74,13 +74,33 @@ def spain_ipc() -> pd.DataFrame: @asset(group_name="spain_open_data") -def spain_aemet_stations(aemet_api: AEMETAPI) -> pd.DataFrame: +def spain_aemet_stations_data(aemet_api: AEMETAPI) -> pd.DataFrame: """ Spain AEMET stations data. """ df = pd.DataFrame(aemet_api.get_all_stations()) + # Clean latitud and longitud + df["latitude_sign"] = df["latitud"].str[-1] + df["longitude_sign"] = df["longitud"].str[-1] + + df["latitud"] = pd.to_numeric( + df["latitud"].str[:-1].str.replace(",", "."), errors="coerce" + ) + df["longitud"] = pd.to_numeric( + df["longitud"].str[:-1].str.replace(",", "."), errors="coerce" + ) + + df["latitud"] = df["latitud"] * df["latitude_sign"].apply( + lambda x: 1 if x == "N" else -1 + ) + df["longitud"] = df["longitud"] * df["longitude_sign"].apply( + lambda x: 1 if x == "E" else -1 + ) + + df = df.drop(columns=["latitude_sign", "longitude_sign"]) + return df @@ -107,6 +127,21 @@ def spain_aemet_weather_data( df = pd.concat([df, mdf], ignore_index=True) - # df["fecha"] = pd.to_datetime(df["fecha"], format="%Y-%m-%d") + df["fecha"] = pd.to_datetime(df["fecha"], format="%Y-%m-%d") + + float_columns = [ + "prec", + "presMax", + "presMin", + "racha", + "sol", + "tmax", + "tmed", + "tmin", + "velmedia", + ] + + df[float_columns] = df[float_columns].apply(lambda x: x.str.replace(",", ".")) + df[float_columns] = df[float_columns].apply(pd.to_numeric, errors="coerce") return df diff --git a/dbt/models/sources.yml b/dbt/models/sources.yml index 0f5f849..083bb02 100644 --- a/dbt/models/sources.yml +++ b/dbt/models/sources.yml @@ -15,3 +15,11 @@ sources: meta: dagster: asset_key: ["world_bank_wdi"] + - name: spain_aemet_weather_data + meta: + dagster: + asset_key: ["spain_aemet_weather_data"] + - name: spain_aemet_stations_data + meta: + dagster: + asset_key: ["spain_aemet_stations_data"] diff --git a/dbt/models/spain_aemet_historical_weather.sql b/dbt/models/spain_aemet_historical_weather.sql new file mode 100644 index 0000000..87debd9 --- /dev/null +++ b/dbt/models/spain_aemet_historical_weather.sql @@ -0,0 +1,31 @@ +select + cast(w.fecha as date) as fecha, + w.indicativo, + w.nombre, + w.provincia, + s.latitud, + s.longitud, + w.altitud, + w.tmed, + w.prec, + w.tmin, + w.horatmin, + w.tmax, + w.horatmax, + w.dir, + w.velmedia, + w.racha, + w.horaracha, + w.presMax, + w.horaPresMax, + w.presMin, + w.horaPresMin, + w.hrMedia, + w.hrMax, + w.horaHrMax, + w.hrMin, + w.horaHrMin, + w.sol +from {{ source('main', 'spain_aemet_weather_data') }} as w +left join {{ source('main', 'spain_aemet_stations_data') }} as s + on w.indicativo = s.indicativo diff --git a/dbt/models/spain_aemet_historical_weather_schema.yml b/dbt/models/spain_aemet_historical_weather_schema.yml new file mode 100644 index 0000000..c3c6a0b --- /dev/null +++ b/dbt/models/spain_aemet_historical_weather_schema.yml @@ -0,0 +1,9 @@ +version: 2 + +models: + - name: spain_aemet_historical_weather + description: "Historical weather data for Spain. Cleaned and augmented with station metadata." + config: + meta: + dagster: + group: spain_open_data