diff --git a/python/hsfs/builtin_transformations.py b/python/hsfs/builtin_transformations.py index 421a04cffe..9e2daa0d24 100644 --- a/python/hsfs/builtin_transformations.py +++ b/python/hsfs/builtin_transformations.py @@ -44,9 +44,9 @@ def robust_scaler(feature: pd.Series, statistics=feature_statistics) -> pd.Serie @udf(int) def label_encoder(feature: pd.Series, statistics=feature_statistics) -> pd.Series: - unique_data = [ - value for value in statistics.feature.extended_statistics["unique_values"] - ] + unique_data = sorted( + [value for value in statistics.feature.extended_statistics["unique_values"]] + ) value_to_index = {value: index for index, value in enumerate(unique_data)} return pd.Series( [value_to_index[data] if not pd.isna(data) else np.nan for data in feature]