Skip to content

Commit

Permalink
refactor: ♻️ Simplify HuggingFace dataset upload assets
Browse files Browse the repository at this point in the history
This change replaces multiple hard-coded asset functions for uploading datasets to HuggingFace with a single `create_hf_asset` function that generates these assets dynamically based on a list of dataset names. This approach significantly reduces code duplication and enhances maintainability by allowing for easy addition or removal of datasets without modifying the core asset creation logic.
  • Loading branch information
davidgasquez committed Mar 25, 2024
1 parent dcba743 commit 268ff0e
Showing 1 changed file with 22 additions and 51 deletions.
73 changes: 22 additions & 51 deletions datadex/assets/huggingface.py
Original file line number Diff line number Diff line change
@@ -1,60 +1,31 @@
import pandas as pd
from dagster import asset
from dagster import asset, AssetIn

from ..resources import HuggingFaceResource


@asset()
def hf_co2_data(co2_global_trend: pd.DataFrame, hf: HuggingFaceResource) -> None:
"""
Upload CO2 data to HuggingFace.
"""
hf.upload_dataset(co2_global_trend, "co2_global_trend")
def create_hf_asset(dataset_name: str):
@asset(name="huggingface_" + dataset_name, ins={"data": AssetIn(dataset_name)})
def hf_asset(data: pd.DataFrame, hf: HuggingFaceResource) -> None:
"""
Upload data to HuggingFace.
"""
hf.upload_dataset(data, dataset_name)

return hf_asset

@asset()
def hf_spain_energy_demand(
spain_energy_demand: pd.DataFrame, hf: HuggingFaceResource
) -> None:
"""
Upload Spain energy demand data to HuggingFace.
"""
hf.upload_dataset(spain_energy_demand, "spain_energy_demand")

datasets = [
"co2_global_trend",
"spain_energy_demand",
"owid_energy_data",
"owid_co2_data",
"wikidata_asteroids",
"threatened_animal_species",
"climate",
]

@asset()
def hf_owid_energy_data(
owid_energy_data: pd.DataFrame, hf: HuggingFaceResource
) -> None:
"""
Upload Our World in Data energy data to HuggingFace.
"""
hf.upload_dataset(owid_energy_data, "owid_energy_data")


@asset()
def hf_owid_co2_data(owid_co2_data: pd.DataFrame, hf: HuggingFaceResource) -> None:
"""
Upload Our World in Data CO2 data to HuggingFace.
"""
hf.upload_dataset(owid_co2_data, "owid_co2_data")


@asset()
def hf_wikidata_asteroids(
wikidata_asteroids: pd.DataFrame, hf: HuggingFaceResource
) -> None:
"""
Upload Wikidata asteroids data to HuggingFace.
"""
hf.upload_dataset(wikidata_asteroids, "wikidata_asteroids")


@asset()
def hf_threatened_animal_species(
threatened_animal_species: pd.DataFrame, hf: HuggingFaceResource
) -> None:
"""
Upload IUCN Red List threatened animal species data to HuggingFace.
"""
hf.upload_dataset(threatened_animal_species, "threatened_animal_species")
assets = []
for dataset in datasets:
a = create_hf_asset(dataset)
assets.append(a)

0 comments on commit 268ff0e

Please sign in to comment.