Skip to content

Commit

Permalink
Fixed imputers.
Browse files Browse the repository at this point in the history
  • Loading branch information
chukarsten committed Jul 27, 2022
1 parent 4e47f78 commit a3aa645
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 2 deletions.
2 changes: 1 addition & 1 deletion evalml/tests/component_tests/test_per_column_imputer.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def test_transform_drop_all_nan_columns():
pd.DataFrame(
{
"all_nan": [np.nan, np.nan, np.nan],
"some_nan": [0.0, 1.0, 0.0],
"some_nan": [0, 1, 0],
"another_col": [0, 1, 2],
},
),
Expand Down
7 changes: 7 additions & 0 deletions evalml/tests/component_tests/test_simple_imputer.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,7 @@ def test_simple_imputer_ignores_natural_language(

if has_nan == "has_nan":
X_df.iloc[-1, :] = None
X_df.astype({"int col": "Int64"})
X_df.ww.init()
y = pd.Series([x for x in range(X_df.shape[1])])

Expand All @@ -551,10 +552,16 @@ def test_simple_imputer_ignores_natural_language(
if numeric_impute_strategy == "mean" and has_nan == "has_nan":
ans = X_df.mean()
ans["natural language col"] = pd.NA
X_df = X_df.astype(
{"int col": float},
) # Convert to float as the imputer will do this as we're requesting the mean
X_df.iloc[-1, :] = ans
elif numeric_impute_strategy == "median" and has_nan == "has_nan":
ans = X_df.median()
ans["natural language col"] = pd.NA
X_df = X_df.astype(
{"int col": float},
) # Convert to float as the imputer will do this as we're requesting the mean
X_df.iloc[-1, :] = ans
elif numeric_impute_strategy == "constant" and has_nan == "has_nan":
X_df.iloc[-1, 0:2] = fill_value
Expand Down
2 changes: 1 addition & 1 deletion evalml/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1982,7 +1982,7 @@ def imputer_test_data():
),
"int col": [0, 1, 2, 0, 3] * 4,
"object col": ["b", "b", "a", "c", "d"] * 4,
"float col": [0.0, 1.0, 0.0, -2.0, 5.0] * 4,
"float col": [0.1, 1.0, 0.0, -2.0, 5.0] * 4,
"bool col": [True, False, False, True, True] * 4,
"categorical with nan": pd.Series(
[np.nan, "1", "0", "0", "3"] * 4,
Expand Down

0 comments on commit a3aa645

Please sign in to comment.