Skip to content

Commit

Permalink
sorting built in label encoder to maintain consistency
Browse files Browse the repository at this point in the history
  • Loading branch information
manu-sj committed Jun 11, 2024
1 parent eef2cb5 commit 50e944c
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions python/hsfs/builtin_transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ def robust_scaler(feature: pd.Series, statistics=feature_statistics) -> pd.Serie

@udf(int)
def label_encoder(feature: pd.Series, statistics=feature_statistics) -> pd.Series:
unique_data = [
value for value in statistics.feature.extended_statistics["unique_values"]
]
unique_data = sorted(
[value for value in statistics.feature.extended_statistics["unique_values"]]
)
value_to_index = {value: index for index, value in enumerate(unique_data)}
return pd.Series(
[value_to_index[data] if not pd.isna(data) else np.nan for data in feature]
Expand Down

0 comments on commit 50e944c

Please sign in to comment.