Skip to content

Commit

Permalink
refactor: 🔄 remove outdated assets and simplify data processing
Browse files Browse the repository at this point in the history
- Deleted outdated assets for OWID energy and CO2 data retrieval.
- Simplified World Bank WDI data processing by removing unnecessary pivot & renaming steps.
- Removed `slugify` dependency for column name cleaning.
  • Loading branch information
davidgasquez committed Oct 26, 2024
1 parent b120d89 commit d985543
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 83 deletions.
59 changes: 0 additions & 59 deletions datadex/assets/indicators.py

This file was deleted.

27 changes: 3 additions & 24 deletions datadex/indicators/assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import dagster as dg
import httpx
import polars as pl
from slugify import slugify


@dg.asset()
Expand Down Expand Up @@ -49,32 +48,12 @@ def world_bank_wdi() -> pl.DataFrame:
df = pl.read_csv("/tmp/WDICSV.csv")

# Reshape the dataframe
df = df.melt(
id_vars=["Country Name", "Country Code", "Indicator Name", "Indicator Code"],
df = df.unpivot(
index=["Country Name", "Country Code", "Indicator Name", "Indicator Code"],
value_name="Indicator Value",
variable_name="Year",
)

# Make one column per Indicator Name
df = df.pivot(
index=["Country Name", "Country Code", "Year"],
values="Indicator Value",
on="Indicator Value",
)

# Cast to floats
df = df.select(
[
pl.col("Country Name"),
pl.col("Country Code"),
pl.col("Year").cast(pl.Int32),
*[pl.col(col).cast(pl.Float32) for col in df.columns[3:]],
]
)

# Clean column names
df = df.rename(
lambda column_name: slugify(column_name.replace("%", "percent"), separator="_")
)
df = df.with_columns(pl.col("Year").cast(pl.Int32))

return df

0 comments on commit d985543

Please sign in to comment.