Skip to content

Commit

Permalink
feat: 🎨 clean models
Browse files Browse the repository at this point in the history
  • Loading branch information
davidgasquez committed Apr 2, 2024
1 parent f890057 commit 8b75c0b
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 4 deletions.
3 changes: 1 addition & 2 deletions datadex/assets/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ def hf_asset(data: pd.DataFrame, hf: HuggingFaceResource) -> None:
"threatened_animal_species",
"country_year_indicators",
"spain_ipc",
"spain_aemet_stations",
"spain_aemet_weather_data",
"spain_aemet_historical_weather",
]

assets = []
Expand Down
39 changes: 37 additions & 2 deletions datadex/assets/spain.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,33 @@ def spain_ipc() -> pd.DataFrame:


@asset(group_name="spain_open_data")
def spain_aemet_stations(aemet_api: AEMETAPI) -> pd.DataFrame:
def spain_aemet_stations_data(aemet_api: AEMETAPI) -> pd.DataFrame:
"""
Spain AEMET stations data.
"""

df = pd.DataFrame(aemet_api.get_all_stations())

# Clean latitud and longitud
df["latitude_sign"] = df["latitud"].str[-1]
df["longitude_sign"] = df["longitud"].str[-1]

df["latitud"] = pd.to_numeric(
df["latitud"].str[:-1].str.replace(",", "."), errors="coerce"
)
df["longitud"] = pd.to_numeric(
df["longitud"].str[:-1].str.replace(",", "."), errors="coerce"
)

df["latitud"] = df["latitud"] * df["latitude_sign"].apply(
lambda x: 1 if x == "N" else -1
)
df["longitud"] = df["longitud"] * df["longitude_sign"].apply(
lambda x: 1 if x == "E" else -1
)

df = df.drop(columns=["latitude_sign", "longitude_sign"])

return df


Expand All @@ -107,6 +127,21 @@ def spain_aemet_weather_data(

df = pd.concat([df, mdf], ignore_index=True)

# df["fecha"] = pd.to_datetime(df["fecha"], format="%Y-%m-%d")
df["fecha"] = pd.to_datetime(df["fecha"], format="%Y-%m-%d")

float_columns = [
"prec",
"presMax",
"presMin",
"racha",
"sol",
"tmax",
"tmed",
"tmin",
"velmedia",
]

df[float_columns] = df[float_columns].apply(lambda x: x.str.replace(",", "."))
df[float_columns] = df[float_columns].apply(pd.to_numeric, errors="coerce")

return df
8 changes: 8 additions & 0 deletions dbt/models/sources.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,11 @@ sources:
meta:
dagster:
asset_key: ["world_bank_wdi"]
- name: spain_aemet_weather_data
meta:
dagster:
asset_key: ["spain_aemet_weather_data"]
- name: spain_aemet_stations_data
meta:
dagster:
asset_key: ["spain_aemet_stations_data"]
31 changes: 31 additions & 0 deletions dbt/models/spain_aemet_historical_weather.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
select
cast(w.fecha as date) as fecha,
w.indicativo,
w.nombre,
w.provincia,
s.latitud,
s.longitud,
w.altitud,
w.tmed,
w.prec,
w.tmin,
w.horatmin,
w.tmax,
w.horatmax,
w.dir,
w.velmedia,
w.racha,
w.horaracha,
w.presMax,
w.horaPresMax,
w.presMin,
w.horaPresMin,
w.hrMedia,
w.hrMax,
w.horaHrMax,
w.hrMin,
w.horaHrMin,
w.sol
from {{ source('main', 'spain_aemet_weather_data') }} as w
left join {{ source('main', 'spain_aemet_stations_data') }} as s
on w.indicativo = s.indicativo
9 changes: 9 additions & 0 deletions dbt/models/spain_aemet_historical_weather_schema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
version: 2

models:
- name: spain_aemet_historical_weather
description: "Historical weather data for Spain. Cleaned and augmented with station metadata."
config:
meta:
dagster:
group: spain_open_data

0 comments on commit 8b75c0b

Please sign in to comment.