Skip to content

Commit

Permalink
feat: 🚧 force assets into one parquet file in HF
Browse files Browse the repository at this point in the history
  • Loading branch information
davidgasquez committed Apr 2, 2024
1 parent 6b1cdda commit b85692b
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 1 deletion.
4 changes: 4 additions & 0 deletions datadex/assets/spain.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ def spain_aemet_stations_data(aemet_api: AEMETAPI) -> pd.DataFrame:

df = df.drop(columns=["latitude_sign", "longitude_sign"])

df = df.convert_dtypes(dtype_backend="pyarrow")

return df


Expand Down Expand Up @@ -144,4 +146,6 @@ def spain_aemet_weather_data(
df[float_columns] = df[float_columns].apply(lambda x: x.str.replace(",", "."))
df[float_columns] = df[float_columns].apply(pd.to_numeric, errors="coerce")

df = df.convert_dtypes(dtype_backend="pyarrow")

return df
4 changes: 3 additions & 1 deletion datadex/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ def login(self):
def upload_dataset(self, dataset, name):
self.login()
dataset = Dataset.from_pandas(dataset, split=NamedSplit("main"))
r = dataset.push_to_hub("davidgasquez/" + name)
r = dataset.push_to_hub(
"davidgasquez/" + name, max_shard_size="50000MB", num_shards=1
)
return r


Expand Down

0 comments on commit b85692b

Please sign in to comment.