alteryx · chukarsten · Aug 10, 2022 · Jul 24, 2022 · Jul 25, 2022 · Jul 25, 2022
diff --git a/.github/meta.yaml b/.github/meta.yaml
@@ -37,7 +37,7 @@ outputs:
         - requirements-parser >=0.2.0
         - shap >=0.40.0
         - texttable >=1.6.2
-        - woodwork >=0.16.2, < 0.17.0
+        - woodwork >=0.17.0
         - featuretools>=1.7.0
         - nlp-primitives>=2.1.0,!=2.6.0
         - python >=3.8.*

diff --git a/core-requirements.txt b/core-requirements.txt
@@ -11,7 +11,7 @@ requirements-parser>=0.2.0
 shap>=0.40.0
 statsmodels>=0.12.2
 texttable>=1.6.2
-woodwork>=0.16.2, < 0.17.0
+woodwork>=0.17.0
 dask>=2021.10.0
 nlp-primitives>=2.1.0,!=2.6.0
 featuretools>=1.7.0

diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -1,6 +1,19 @@
 Release Notes
 -------------
 **Future Releases**
+    * Enhancements
+        * Updated to run with Woodwork >= 0.17.0 :pr:`3626`
+    * Fixes
+    * Changes
+    * Documentation Changes
+    * Testing Changes
+
+.. warning::
+
+    **Breaking Changes**
+
+
+**v0.55.0 July. 24, 2022**
     * Enhancements
         * Increased the amount of logical type information passed to Woodwork when calling ``ww.init()`` in transformers :pr:`3604`
         * Added ability to log how long each batch and pipeline take in ``automl.search()`` :pr:`3577`
@@ -13,16 +26,11 @@ Release Notes
         * Bump minimum scikit-optimize version to 0.9.0 `:pr:`3614`
     * Changes
         * Add pre-commit hooks for linting :pr:`3608`
-    * Documentation Changes
     * Testing Changes
         * Pinned GraphViz version for Windows CI Test :pr:`3596`
         * Removed ``pytest.mark.skip_if_39`` pytest marker :pr:`3602` :pr:`3607`
         * Refactored test cases that iterate over all components to use ``pytest.mark.parametrise`` and changed the corresponding ``if...continue`` blocks to ``pytest.mark.xfail`` :pr:`3622`
 
-.. warning::
-
-    **Breaking Changes**
-
 
 **v0.54.0 June. 23, 2022**
     * Fixes

diff --git a/evalml/__init__.py b/evalml/__init__.py
@@ -23,4 +23,4 @@
 warnings.filterwarnings("ignore", category=FutureWarning)
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 
-__version__ = "0.54.0"
+__version__ = "0.55.0"
diff --git a/evalml/pipelines/classification_pipeline.py b/evalml/pipelines/classification_pipeline.py
@@ -66,7 +66,16 @@ def fit(self, X, y):
             )
 
         self._fit(X, y)
-        self._classes_ = list(ww.init_series(np.unique(y)))
+
+        # TODO: Added this in because numpy's unique() does not support pandas.NA
+        try:
+            self._classes_ = list(ww.init_series(np.unique(y)))
+        except TypeError as e:
+            if "boolean value of NA is ambiguous" in str(e):
+                self._classes_ = y.unique()
+        except Exception as e:
+            raise e
+
         return self
 
     def _encode_targets(self, y):

diff --git a/evalml/pipelines/components/transformers/imputers/time_series_imputer.py b/evalml/pipelines/components/transformers/imputers/time_series_imputer.py
@@ -1,5 +1,6 @@
 """Component that imputes missing data according to a specified timeseries-specific imputation strategy."""
 import pandas as pd
+import woodwork as ww
 
 from evalml.pipelines.components.transformers import Transformer
 from evalml.utils import infer_feature_types
@@ -165,7 +166,10 @@ def transform(self, X, y=None):
 
         if self._interpolate_cols is not None:
             X_interpolate = X.ww[self._interpolate_cols]
-            imputed = X_interpolate.interpolate()
+            # TODO: Revert when pandas introduces Float64 dtype
+            imputed = X_interpolate.astype(
+                float,
+            ).interpolate()  # Cast to float because Int64 not handled
             imputed.bfill(inplace=True)  # Fill in the first value, if missing
             X_not_all_null[X_interpolate.columns] = imputed
 
@@ -178,10 +182,9 @@ def transform(self, X, y=None):
                 y_imputed = y.bfill()
                 y_imputed.pad(inplace=True)
             elif self._impute_target == "interpolate":
-                y_imputed = y.interpolate()
+                # TODO: Revert when pandas introduces Float64 dtype
+                y_imputed = y.astype(float).interpolate()
                 y_imputed.bfill(inplace=True)
-            y_imputed.ww.init(schema=y.ww.schema)
-
-        X_not_all_null.ww.init(schema=X_schema)
+            y_imputed = ww.init_series(y_imputed)
 
         return X_not_all_null, y_imputed
diff --git a/evalml/pipelines/components/transformers/preprocessing/time_series_featurizer.py b/evalml/pipelines/components/transformers/preprocessing/time_series_featurizer.py
@@ -288,6 +288,7 @@ def transform(self, X, y=None):
         delayed_features = self._compute_delays(X_ww, y)
         rolling_means = self._compute_rolling_transforms(X_ww, y, original_features)
         features = ww.concat_columns([delayed_features, rolling_means])
+        features.ww.init()
         return features.ww.drop(original_features)
 
     def fit_transform(self, X, y=None):

diff --git a/evalml/tests/automl_tests/test_automl.py b/evalml/tests/automl_tests/test_automl.py
@@ -4016,7 +4016,7 @@ def test_automl_baseline_pipeline_predictions_and_scores_time_series(problem_typ
         expected_predictions = pd.Series(expected_predictions, name="target_delay_1")
 
     preds = baseline.predict(X_validation, None, X_train, y_train)
-    pd.testing.assert_series_equal(expected_predictions, preds)
+    pd.testing.assert_series_equal(expected_predictions, preds, check_dtype=False)
     if is_classification(problem_type):
         pd.testing.assert_frame_equal(
             expected_predictions_proba,

diff --git a/evalml/tests/component_tests/test_datetime_featurizer.py b/evalml/tests/component_tests/test_datetime_featurizer.py
@@ -288,10 +288,17 @@ def test_datetime_featurizer_with_inconsistent_date_format():
     answer = pd.DataFrame(
         {
             "numerical": [0] * len(dates),
-            "date col_year": [2021.0] * 18 + [np.nan] * 2,
-            "date col_month": [9.0] * 18 + [np.nan] * 2,
+            "date col_year": [2021] * 18 + [pd.NA] * 2,
+            "date col_month": [9] * 18 + [pd.NA] * 2,
             "date col_day_of_week": expected_dow,
-            "date col_hour": [0.0] * 18 + [np.nan] * 2,
+            "date col_hour": [0] * 18 + [pd.NA] * 2,
+        },
+    ).astype(
+        dtype={
+            "date col_year": "Int64",
+            "date col_month": "Int64",
+            "date col_day_of_week": "Int64",
+            "date col_hour": "Int64",
         },
     )
     pd.testing.assert_frame_equal(answer, expected)

diff --git a/evalml/tests/component_tests/test_drop_nan_rows_transformer.py b/evalml/tests/component_tests/test_drop_nan_rows_transformer.py
@@ -16,8 +16,7 @@ def test_drop_rows_transformer():
     X_expected = pd.DataFrame(
         {"a column": [3], "another col": [6]},
         index=[2],
-        dtype=np.float64,
-    )
+    ).astype("Int64")
     drop_rows_transformer = DropNaNRowsTransformer()
     drop_rows_transformer.fit(X)
     transformed_X, _ = drop_rows_transformer.transform(X)

diff --git a/evalml/tests/component_tests/test_drop_null_columns_transformer.py b/evalml/tests/component_tests/test_drop_null_columns_transformer.py
@@ -45,7 +45,7 @@ def test_drop_null_transformer_transform_default_pct_null_threshold():
     X = pd.DataFrame(
         {"lots_of_null": [None, None, None, None, 5], "no_null": [1, 2, 3, 4, 5]},
     )
-    X_expected = X.astype({"lots_of_null": "float64", "no_null": "int64"})
+    X_expected = X.astype({"lots_of_null": "Int64", "no_null": "int64"})
     drop_null_transformer.fit(X)
     X_t = drop_null_transformer.transform(X)
     assert_frame_equal(X_expected, X_t)
@@ -94,7 +94,12 @@ def test_drop_null_transformer_transform_boundary_pct_null_threshold():
     drop_null_transformer = DropNullColumns(pct_null_threshold=1.0)
     drop_null_transformer.fit(X)
     X_t = drop_null_transformer.transform(X)
-    assert_frame_equal(X_t, X.drop(["all_null"], axis=1))
+    assert_frame_equal(
+        X_t,
+        X.drop(columns=["all_null"]).astype(
+            {"some_null": "Int64", "lots_of_null": "Int64"},
+        ),
+    )
     # check that X is untouched
     assert X.equals(
         pd.DataFrame(
@@ -112,7 +117,7 @@ def test_drop_null_transformer_fit_transform():
     X = pd.DataFrame(
         {"lots_of_null": [None, None, None, None, 5], "no_null": [1, 2, 3, 4, 5]},
     )
-    X_expected = X.astype({"lots_of_null": "float64", "no_null": "int64"})
+    X_expected = X.astype({"lots_of_null": "Int64", "no_null": "int64"})
     X_t = drop_null_transformer.fit_transform(X)
     assert_frame_equal(X_expected, X_t)
 
@@ -152,6 +157,11 @@ def test_drop_null_transformer_fit_transform():
             "lots_of_null": [None, None, None, None, 5],
             "some_null": [None, 0, 3, 4, 5],
         },
+    ).astype(
+        {
+            "lots_of_null": "Int64",
+            "some_null": "Int64",
+        },
     )
     drop_null_transformer = DropNullColumns(pct_null_threshold=1.0)
     X_t = drop_null_transformer.fit_transform(X)

diff --git a/evalml/tests/component_tests/test_imputer.py b/evalml/tests/component_tests/test_imputer.py
@@ -8,9 +8,11 @@
 from pandas.testing import assert_frame_equal
 from woodwork.logical_types import (
     Boolean,
+    BooleanNullable,
     Categorical,
     Double,
     Integer,
+    IntegerNullable,
     NaturalLanguage,
 )
 
@@ -88,14 +90,14 @@ def test_numeric_only_input(imputer_test_data):
     expected = pd.DataFrame(
         {
             "int col": [0, 1, 2, 0, 3] * 4,
-            "float col": [0.0, 1.0, 0.0, -2.0, 5.0] * 4,
+            "float col": [0.1, 1.0, 0.0, -2.0, 5.0] * 4,
             "int with nan": [0.5, 1.0, 0.0, 0.0, 1.0] * 4,
-            "float with nan": [0.0, 1.0, 0, -1.0, 0.0] * 4,
+            "float with nan": [0.3, 1.0, 0.15, -1.0, 0.0] * 4,
         },
     )
     assert_frame_equal(transformed, expected, check_dtype=False)
 
-    imputer = Imputer()
+    imputer = Imputer(numeric_impute_strategy="median")
     transformed = imputer.fit_transform(X, y)
     assert_frame_equal(transformed, expected, check_dtype=False)
 
@@ -158,14 +160,14 @@ def test_categorical_and_numeric_input(imputer_test_data):
             ),
             "int col": [0, 1, 2, 0, 3] * 4,
             "object col": pd.Series(["b", "b", "a", "c", "d"] * 4, dtype="category"),
-            "float col": [0.0, 1.0, 0.0, -2.0, 5.0] * 4,
+            "float col": [0.1, 1.0, 0.0, -2.0, 5.0] * 4,
             "bool col": [True, False, False, True, True] * 4,
             "categorical with nan": pd.Series(
                 ["0", "1", "0", "0", "3"] * 4,
                 dtype="category",
             ),
             "int with nan": [0.5, 1.0, 0.0, 0.0, 1.0] * 4,
-            "float with nan": [0.0, 1.0, 0, -1.0, 0.0] * 4,
+            "float with nan": [0.3, 1.0, 0.075, -1.0, 0.0] * 4,
             "object with nan": pd.Series(
                 ["b", "b", "b", "c", "b"] * 4,
                 dtype="category",
@@ -313,7 +315,7 @@ def test_imputer_fill_value(imputer_test_data):
                 ["fill", "1", "0", "0", "3"] * 4,
                 dtype="category",
             ),
-            "float with nan": [0.0, 1.0, -1, -1.0, 0.0] * 4,
+            "float with nan": [0.3, 1.0, -1, -1.0, 0.0] * 4,
             "object with nan": pd.Series(
                 ["b", "b", "fill", "c", "fill"] * 4,
                 dtype="category",
@@ -512,7 +514,7 @@ def test_imputer_all_bool_return_original(data_type, make_data_type):
 def test_imputer_bool_dtype_object(data_type, make_data_type):
     X = pd.DataFrame([True, np.nan, False, np.nan, True] * 4)
     y = pd.Series([1, 0, 0, 1, 0] * 4)
-    X_expected_arr = pd.DataFrame([True, True, False, True, True] * 4, dtype="category")
+    X_expected_arr = pd.DataFrame([True, True, False, True, True] * 4, dtype="boolean")
     X = make_data_type(data_type, X)
     y = make_data_type(data_type, y)
     imputer = Imputer()
@@ -537,7 +539,7 @@ def test_imputer_multitype_with_one_bool(data_type, make_data_type):
         {
             "bool with nan": pd.Series(
                 [True, False, False, False, False] * 4,
-                dtype="category",
+                dtype="boolean",
             ),
             "bool no nan": pd.Series(
                 [False, False, False, False, True] * 4,
@@ -563,7 +565,9 @@ def test_imputer_int_preserved():
         transformed,
         pd.DataFrame(pd.Series([1, 2, 11, 14 / 3])),
     )
-    assert {k: type(v) for k, v in transformed.ww.logical_types.items()} == {0: Double}
+    assert {k: type(v) for k, v in transformed.ww.logical_types.items()} == {
+        0: IntegerNullable,
+    }
 
     X = pd.DataFrame(pd.Series([1, 2, 3, np.nan]))
     imputer = Imputer(numeric_impute_strategy="mean")
@@ -573,7 +577,9 @@ def test_imputer_int_preserved():
         pd.DataFrame(pd.Series([1, 2, 3, 2])),
         check_dtype=False,
     )
-    assert {k: type(v) for k, v in transformed.ww.logical_types.items()} == {0: Double}
+    assert {k: type(v) for k, v in transformed.ww.logical_types.items()} == {
+        0: IntegerNullable,
+    }
 
     X = pd.DataFrame(pd.Series([1, 2, 3, 4], dtype="int"))
     imputer = Imputer(numeric_impute_strategy="mean")
@@ -595,9 +601,9 @@ def test_imputer_bool_preserved(test_case, null_type):
         ]
         X = pd.DataFrame(pd.Series([True, False, True, null_type] * 4))
         expected = pd.DataFrame(
-            pd.Series([True, False, True, True] * 4, dtype="category"),
+            pd.Series([True, False, True, True] * 4, dtype="boolean"),
         )
-        expected_ww_dtype = Categorical
+        expected_ww_dtype = BooleanNullable
         check_dtype = True
     elif test_case == "boolean_without_null":
         X = pd.DataFrame(pd.Series([True, False, True, False] * 4))

diff --git a/evalml/tests/component_tests/test_per_column_imputer.py b/evalml/tests/component_tests/test_per_column_imputer.py
@@ -219,15 +219,18 @@ def test_fit_transform_drop_all_nan_columns():
         "another_col": {"impute_strategy": "most_frequent"},
     }
     transformer = PerColumnImputer(impute_strategies=strategies)
-    X_expected_arr = pd.DataFrame({"some_nan": [0, 1, 0], "another_col": [0, 1, 2]})
+    X_expected_arr = pd.DataFrame(
+        {"some_nan": [0, 1, 0], "another_col": [0, 1, 2]},
+    ).astype({"some_nan": "Int64"})
     X_t = transformer.fit_transform(X)
     assert_frame_equal(X_expected_arr, X_t, check_dtype=False)
+    # Check that original dataframe remains unchanged
     assert_frame_equal(
         X,
         pd.DataFrame(
             {
                 "all_nan": [np.nan, np.nan, np.nan],
-                "some_nan": [0.0, 1.0, 0.0],
+                "some_nan": [0, 1, 0],
                 "another_col": [0, 1, 2],
             },
         ),
@@ -259,7 +262,7 @@ def test_transform_drop_all_nan_columns():
         pd.DataFrame(
             {
                 "all_nan": [np.nan, np.nan, np.nan],
-                "some_nan": [0.0, 1.0, 0.0],
+                "some_nan": [0, 1, 0],
                 "another_col": [0, 1, 2],
             },
         ),
@@ -347,8 +350,9 @@ def test_per_column_imputer_column_subset():
     )
     X_expected.ww.init(
         logical_types={
-            "all_nan_not_included": "double",
-            "column_with_nan_included": "double",
+            "all_nan_not_included": "Double",
+            "column_with_nan_not_included": "IntegerNullable",
+            "column_with_nan_included": "IntegerNullable",
         },
     )
     X.ww.init(
@@ -362,11 +366,10 @@ def test_per_column_imputer_column_subset():
             {
                 "all_nan_not_included": [np.nan, np.nan, np.nan],
                 "all_nan_included": [np.nan, np.nan, np.nan],
-                "column_with_nan_not_included": [np.nan, 1, 0],
-                # Because of https://github.com/alteryx/evalml/issues/2055
-                "column_with_nan_included": [0.0, 1.0, 0.0],
+                "column_with_nan_not_included": [pd.NA, 1, 0],
+                "column_with_nan_included": [0, 1, 0],
             },
-        ),
+        ).astype({"column_with_nan_not_included": "Int64"}),
     )