-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor: ♻️ Simplify HuggingFace dataset upload assets
This change replaces multiple hard-coded asset functions for uploading datasets to HuggingFace with a single `create_hf_asset` function that generates these assets dynamically based on a list of dataset names. This approach significantly reduces code duplication and enhances maintainability by allowing for easy addition or removal of datasets without modifying the core asset creation logic.
- Loading branch information
1 parent
dcba743
commit 268ff0e
Showing
1 changed file
with
22 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,60 +1,31 @@ | ||
import pandas as pd | ||
from dagster import asset | ||
from dagster import asset, AssetIn | ||
|
||
from ..resources import HuggingFaceResource | ||
|
||
|
||
@asset() | ||
def hf_co2_data(co2_global_trend: pd.DataFrame, hf: HuggingFaceResource) -> None: | ||
""" | ||
Upload CO2 data to HuggingFace. | ||
""" | ||
hf.upload_dataset(co2_global_trend, "co2_global_trend") | ||
def create_hf_asset(dataset_name: str): | ||
@asset(name="huggingface_" + dataset_name, ins={"data": AssetIn(dataset_name)}) | ||
def hf_asset(data: pd.DataFrame, hf: HuggingFaceResource) -> None: | ||
""" | ||
Upload data to HuggingFace. | ||
""" | ||
hf.upload_dataset(data, dataset_name) | ||
|
||
return hf_asset | ||
|
||
@asset() | ||
def hf_spain_energy_demand( | ||
spain_energy_demand: pd.DataFrame, hf: HuggingFaceResource | ||
) -> None: | ||
""" | ||
Upload Spain energy demand data to HuggingFace. | ||
""" | ||
hf.upload_dataset(spain_energy_demand, "spain_energy_demand") | ||
|
||
datasets = [ | ||
"co2_global_trend", | ||
"spain_energy_demand", | ||
"owid_energy_data", | ||
"owid_co2_data", | ||
"wikidata_asteroids", | ||
"threatened_animal_species", | ||
"climate", | ||
] | ||
|
||
@asset() | ||
def hf_owid_energy_data( | ||
owid_energy_data: pd.DataFrame, hf: HuggingFaceResource | ||
) -> None: | ||
""" | ||
Upload Our World in Data energy data to HuggingFace. | ||
""" | ||
hf.upload_dataset(owid_energy_data, "owid_energy_data") | ||
|
||
|
||
@asset() | ||
def hf_owid_co2_data(owid_co2_data: pd.DataFrame, hf: HuggingFaceResource) -> None: | ||
""" | ||
Upload Our World in Data CO2 data to HuggingFace. | ||
""" | ||
hf.upload_dataset(owid_co2_data, "owid_co2_data") | ||
|
||
|
||
@asset() | ||
def hf_wikidata_asteroids( | ||
wikidata_asteroids: pd.DataFrame, hf: HuggingFaceResource | ||
) -> None: | ||
""" | ||
Upload Wikidata asteroids data to HuggingFace. | ||
""" | ||
hf.upload_dataset(wikidata_asteroids, "wikidata_asteroids") | ||
|
||
|
||
@asset() | ||
def hf_threatened_animal_species( | ||
threatened_animal_species: pd.DataFrame, hf: HuggingFaceResource | ||
) -> None: | ||
""" | ||
Upload IUCN Red List threatened animal species data to HuggingFace. | ||
""" | ||
hf.upload_dataset(threatened_animal_species, "threatened_animal_species") | ||
assets = [] | ||
for dataset in datasets: | ||
a = create_hf_asset(dataset) | ||
assets.append(a) |