Skip to content

Commit

Permalink
feat: ⚡️ update datonic assets
Browse files Browse the repository at this point in the history
  • Loading branch information
davidgasquez committed Jan 24, 2024
1 parent 1e320a8 commit f9bdcde
Show file tree
Hide file tree
Showing 14 changed files with 213 additions and 600 deletions.
5 changes: 2 additions & 3 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,11 @@
"quarto.quarto",
"redhat.vscode-yaml",
"tamasfe.even-better-toml",
"visualstudioexptteam.vscodeintellicode",
"Evidence.sqltools-duckdb-driver"
"visualstudioexptteam.vscodeintellicode"
]
}
},
"postCreateCommand": "pip install -e '.[dev]' && npm install [email protected] && exit 0",
"postCreateCommand": "pip install -e '.[dev]'",
"portsAttributes": {
"3000": {
"label": "Dagster"
Expand Down
5 changes: 4 additions & 1 deletion datadex/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@

resources = {
"dbt": dbt,
"io_manager": DuckDBPandasIOManager(database=DATA_DIR + "local.duckdb"),
"io_manager": DuckDBPandasIOManager(
database=DATA_DIR + "local.duckdb",
# connection_config={"pandas_analyze_sample": 0},
),
}

defs = Definitions(
Expand Down
79 changes: 73 additions & 6 deletions datadex/assets.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,86 @@
import io
import os

import pandas as pd
from dagster import asset
from frictionless import Package
import requests
from dagster import AssetExecutionContext, asset


@asset
def threatened_animal_species() -> pd.DataFrame:
p = Package(
"https://raw.githubusercontent.com/datonic/threatened-animal-species/main/datapackage.yaml"
def threatened_animal_species(context: AssetExecutionContext) -> pd.DataFrame:
API_ENDPOINT = "https://apiv3.iucnredlist.org/api/v3"
TOKEN = os.getenv("IUCNREDLIST_TOKEN")

page = 1
all_results = []

while True:
r = requests.get(f"{API_ENDPOINT}/species/page/{page}?token={TOKEN}")
context.log.info(f"Page {page} status code: {r.status_code}")
if r.status_code != 200 or r.json()["result"] == []:
break
results = r.json()["result"]
all_results.extend(results)
page += 1

return pd.DataFrame(all_results).drop(
columns=["infra_rank", "infra_name", "population", "main_common_name"]
)
return p.get_resource("threatened-species").to_pandas() # type: ignore


@asset
def owid_energy_data() -> pd.DataFrame:
"""
Raw Energy data from Our World in Data.
"""
energy_owid_url = (
"https://raw.githubusercontent.com/owid/energy-data/master/owid-energy-data.csv"
)
return pd.read_csv(energy_owid_url)


@asset
def owid_co2_data() -> pd.DataFrame:
"""
Raw CO2 data from Our World in Data.
"""
co2_owid_url = (
"https://raw.githubusercontent.com/owid/co2-data/master/owid-co2-data.csv"
)
return pd.read_csv(co2_owid_url)


@asset
def co2_global_trend() -> pd.DataFrame:
"""
Trends in Atmospheric Carbon Dioxide from NOAA/ESRL.
"""
co2_noaa_url = "https://gml.noaa.gov/webdata/ccgg/trends/co2/co2_trend_gl.csv"
return pd.read_csv(co2_noaa_url, skiprows=24)


@asset
def wikidata_asteroids() -> pd.DataFrame:
"""
Wikidata asteroids data.
"""
url = "https://query.wikidata.org/sparql"
query = """
SELECT
?asteroidLabel
?discovered
?discovererLabel
WHERE {
?asteroid wdt:P31 wd:Q3863; # Retrieve instances of "asteroid"
wdt:P61 ?discoverer; # Retrieve discoverer of the asteroid
wdt:P575 ?discovered; # Retrieve discovered date of the asteroid
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
ORDER BY DESC(?discovered)
"""

response = requests.get(
url, headers={"Accept": "text/csv"}, params={"query": query}
)

return pd.read_csv(io.StringIO(response.content.decode("utf-8")))
4 changes: 1 addition & 3 deletions dbt/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,4 @@ target-path: target

models:
datadex:
+materialized: external
+options:
codec: zstd
+materialized: table
18 changes: 18 additions & 0 deletions dbt/models/climate.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
with
energy_data as (
select year, sum(solar_electricity) as solar_electricity
from {{ source("public", "owid_energy_data") }}
where iso_code is not null and solar_electricity is not null and year >= 2014
group by year
),
co2_global_trend as (
select year, avg(trend) as co2_trend
from {{ source("public", "co2_global_trend") }}
group by year
)
select
energy_data.year,
energy_data.solar_electricity,
co2_global_trend.co2_trend as co2_trend
from energy_data
left join co2_global_trend on energy_data.year = co2_global_trend.year
9 changes: 0 additions & 9 deletions dbt/models/climate/climate_co2_global_trend.sql

This file was deleted.

1 change: 0 additions & 1 deletion dbt/models/climate/climate_energy.sql

This file was deleted.

1 change: 0 additions & 1 deletion dbt/models/climate/climate_owid_co2_by_country.sql

This file was deleted.

1 change: 0 additions & 1 deletion dbt/models/climate/climate_theatened_animal_species.sql

This file was deleted.

21 changes: 8 additions & 13 deletions dbt/models/sources.yml
Original file line number Diff line number Diff line change
@@ -1,19 +1,6 @@
version: 2

sources:
- name: web
tables:
- name: energy
description: Key energy metrics maintained by Our World in Data
meta:
source: https://ourworldindata.org/energy
external_location: https://raw.githubusercontent.com/owid/energy-data/master/owid-energy-data.csv
- name: co2_global_trend
description: Trends in Atmospheric Carbon Dioxide
meta:
source: https://gml.noaa.gov
external_location: https://gml.noaa.gov/webdata/ccgg/trends/co2/co2_trend_gl.csv

- name: public
tables:
- name: threatened_animal_species
Expand All @@ -24,3 +11,11 @@ sources:
meta:
dagster:
asset_key: ["owid_co2_data"]
- name: owid_energy_data
meta:
dagster:
asset_key: ["owid_energy_data"]
- name: co2_global_trend
meta:
dagster:
asset_key: ["co2_global_trend"]
3 changes: 0 additions & 3 deletions portal/_quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,6 @@ website:
- text: dbt Docs
icon: sun
file: dbt
- text: Dashboard
icon: clipboard-data
href: dashboard.html
- text: Knowledge Base
icon: book
href: notebooks
Expand Down
Loading

0 comments on commit f9bdcde

Please sign in to comment.