From 243cfea089ccea20bf2d546f8eaa6783a8121e48 Mon Sep 17 00:00:00 2001 From: bubriks Date: Wed, 4 Dec 2024 15:26:21 +0200 Subject: [PATCH] move limit back --- utils/python/hsfs_utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/utils/python/hsfs_utils.py b/utils/python/hsfs_utils.py index d8e17aae15..b42cb14ae2 100644 --- a/utils/python/hsfs_utils.py +++ b/utils/python/hsfs_utils.py @@ -298,6 +298,7 @@ def offline_fg_materialization( .option("includeHeaders", "true") .option("failOnDataLoss", "false") .load() + .limit(5000000) ) # update offsets @@ -326,9 +327,6 @@ def offline_fg_materialization( == str(entity.subject["id"]) ) - # limit the number of records ingested - df = df.limit(5000000) - # deserialize dataframe so that it can be properly saved deserialized_df = engine.get_instance()._deserialize_from_avro(entity, df)