Skip to content

Commit

Permalink
feat: 🔥 moar world indicators
Browse files Browse the repository at this point in the history
  • Loading branch information
davidgasquez committed Mar 28, 2024
1 parent 8ffedbc commit 5f2a3d9
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 1 deletion.
67 changes: 67 additions & 0 deletions datadex/assets/indicators.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,31 @@
import io
import zipfile

import pandas as pd
import requests
from dagster import asset


def sanitize_string(s: str) -> str:
"""
Sanitize a string to be used as a column name in a pandas DataFrame.
"""

return (
s.lower()
.replace(" ", "_")
.replace("(", "")
.replace(")", "")
.replace("-", "_")
.replace(",", "")
.replace(":", "")
.replace("'", "")
.replace("$", "dollar")
.replace("%", "percent")
.replace("+", "plus")
)


@asset
def owid_energy_data() -> pd.DataFrame:
"""
Expand All @@ -22,3 +46,46 @@ def owid_co2_data() -> pd.DataFrame:
"https://raw.githubusercontent.com/owid/co2-data/master/owid-co2-data.csv"
)
return pd.read_csv(co2_owid_url)


@asset
def world_bank_wdi() -> pd.DataFrame:
"""
World Development Indicators (WDI) is the World Bank's premier compilation of cross-country comparable data on development.
Bulk data download is available at https://datatopics.worldbank.org/world-development-indicators/
"""

url = "https://databankfiles.worldbank.org/public/ddpext_download/WDI_CSV.zip"

# Download the zip file
response = requests.get(url)

# Read the zip file
zip_file = zipfile.ZipFile(io.BytesIO(response.content))

# Extract the zip file
zip_file.extractall(path="/tmp/")

# Load the WDICSV.csv file as a pandas DataFrame
df = pd.read_csv("/tmp/WDICSV.csv")

# Reshape the dataframe
melted_data = pd.melt(
df,
id_vars=["Country Name", "Country Code", "Indicator Name", "Indicator Code"],
var_name="Year",
value_name="Indicator Value",
)

# Now one column per Indicator Name
pivoted_data = melted_data.pivot_table(
index=["Country Name", "Country Code", "Year"],
columns="Indicator Name",
values="Indicator Value",
).reset_index()

# Clean column names
pivoted_data.columns = [sanitize_string(col) for col in pivoted_data.columns]

return pivoted_data
9 changes: 8 additions & 1 deletion dbt/models/country_year_indicators.sql
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ with
wind_share_elec,
wind_share_energy
from {{ source("main", "owid_energy_data") }}
where iso_code is not null
),

owid_co2_data as (
Expand Down Expand Up @@ -215,8 +216,14 @@ with
trade_co2,
trade_co2_share
from {{ source("main", "owid_co2_data") }}
where iso_code is not null
),

world_bank_wdi as (
select * from {{ source("main", "world_bank_wdi") }}
)

select e.*, c.*
select e.*, c.*, w.*
from owid_energy_data as e
join owid_co2_data as c on e.iso_code = c.iso_code and e.year = c.year
join world_bank_wdi as w on e.iso_code = w.country_code and e.year = w.year
4 changes: 4 additions & 0 deletions dbt/models/sources.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,7 @@ sources:
meta:
dagster:
asset_key: ["owid_energy_data"]
- name: world_bank_wdi
meta:
dagster:
asset_key: ["world_bank_wdi"]

0 comments on commit 5f2a3d9

Please sign in to comment.