diff --git a/utils/python/hsfs_utils.py b/utils/python/hsfs_utils.py index 5149da6f6c..2696419859 100644 --- a/utils/python/hsfs_utils.py +++ b/utils/python/hsfs_utils.py @@ -298,7 +298,6 @@ def offline_fg_materialization( .option("includeHeaders", "true") .option("failOnDataLoss", "false") .load() - .limit(5000000) ) # filter only the necassary entries @@ -315,6 +314,9 @@ def offline_fg_materialization( == str(entity.subject["id"]) ) + # limit the number of records ingested + df = df.limit(5000000) + # deserialize dataframe so that it can be properly saved deserialized_df = engine.get_instance()._deserialize_from_avro(entity, df)