diff --git a/utils/python/hsfs_utils.py b/utils/python/hsfs_utils.py index d8e17aae15..b42cb14ae2 100644 --- a/utils/python/hsfs_utils.py +++ b/utils/python/hsfs_utils.py @@ -298,6 +298,7 @@ def offline_fg_materialization( .option("includeHeaders", "true") .option("failOnDataLoss", "false") .load() + .limit(5000000) ) # update offsets @@ -326,9 +327,6 @@ def offline_fg_materialization( == str(entity.subject["id"]) ) - # limit the number of records ingested - df = df.limit(5000000) - # deserialize dataframe so that it can be properly saved deserialized_df = engine.get_instance()._deserialize_from_avro(entity, df)