Skip to content

Commit

Permalink
refactor: ⚡️ add jobs
Browse files Browse the repository at this point in the history
  • Loading branch information
davidgasquez committed Jan 18, 2024
1 parent 41e94f4 commit c84ccd2
Show file tree
Hide file tree
Showing 10 changed files with 58 additions and 36 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -136,4 +136,8 @@ logs/
data/**
!data/datasets/.gitkeep

# Quarto
/.quarto/

# Environment
.env
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
.DEFAULT_GOAL := run

run:
dagster asset materialize --select \* -m datadex.dag
dagster job execute -j all_assets_job -m datadex

dev:
dagster dev -m datadex.dag
dagster dev -m datadex

preview:
quarto preview portal
Expand Down
26 changes: 26 additions & 0 deletions datadex/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import os

from dagster import Definitions, load_assets_from_modules
from dagster_dbt import DbtCliResource, load_assets_from_dbt_project
from dagster_duckdb_pandas import DuckDBPandasIOManager

from . import assets, jobs

DBT_PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) + "/../dbt/"
DATA_DIR = os.path.dirname(os.path.abspath(__file__)) + "/../data/"

dbt = DbtCliResource(project_dir=DBT_PROJECT_DIR, profiles_dir=DBT_PROJECT_DIR)

dbt_assets = load_assets_from_dbt_project(DBT_PROJECT_DIR, DBT_PROJECT_DIR)
python_assets = load_assets_from_modules([assets])

resources = {
"dbt": dbt,
"io_manager": DuckDBPandasIOManager(database=DATA_DIR + "local.duckdb"),
}

defs = Definitions(
assets=[*dbt_assets, *python_assets],
resources=resources,
jobs=[jobs.all_assets_job],
)
4 changes: 2 additions & 2 deletions datadex/assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@


@asset
def raw_threatened_animal_species() -> pd.DataFrame:
def threatened_animal_species() -> pd.DataFrame:
p = Package(
"https://raw.githubusercontent.com/datonic/threatened-animal-species/main/datapackage.yaml"
)
return p.get_resource("threatened-species").to_pandas() # type: ignore


@asset
def raw_owid_co2_data() -> pd.DataFrame:
def owid_co2_data() -> pd.DataFrame:
co2_owid_url = (
"https://raw.githubusercontent.com/owid/co2-data/master/owid-co2-data.csv"
)
Expand Down
26 changes: 0 additions & 26 deletions datadex/dag.py

This file was deleted.

10 changes: 10 additions & 0 deletions datadex/jobs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from dagster import AssetSelection, define_asset_job, load_assets_from_modules

from . import assets

assets = load_assets_from_modules(modules=[assets])

all_assets_job = define_asset_job(
name="all_assets_job",
selection=AssetSelection.all(),
)
8 changes: 8 additions & 0 deletions datadex/resources.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from dagster import ConfigurableResource


class HuggingFaceResource(ConfigurableResource):
# token: str = EnvVar("HUGGINGFACE_TOKEN")

def login(self):
raise NotImplementedError()
2 changes: 1 addition & 1 deletion dbt/models/climate/climate_owid_co2_by_country.sql
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select country, iso_code, year, co2 from {{ source("public", "raw_owid_co2_data") }}
select country, iso_code, year, co2 from {{ source("public", "owid_co2_data") }}
2 changes: 1 addition & 1 deletion dbt/models/climate/climate_theatened_animal_species.sql
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select * from {{ source("public", "raw_threatened_animal_species") }}
select * from {{ source("public", "threatened_animal_species") }}
8 changes: 4 additions & 4 deletions dbt/models/sources.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ sources:

- name: public
tables:
- name: raw_threatened_animal_species
- name: threatened_animal_species
meta:
dagster:
asset_key: ["raw_threatened_animal_species"]
- name: raw_owid_co2_data
asset_key: ["threatened_animal_species"]
- name: owid_co2_data
meta:
dagster:
asset_key: ["raw_owid_co2_data"]
asset_key: ["owid_co2_data"]

0 comments on commit c84ccd2

Please sign in to comment.