From 0baffcdacccf1672daeaf0b2d9e9a58a6cda8beb Mon Sep 17 00:00:00 2001 From: Loic Diridollou Date: Wed, 20 Nov 2024 18:14:43 -0500 Subject: [PATCH 1/4] GH1037 Remove na_sentinel from factorize methods --- pandas-stubs/core/arrays/base.pyi | 5 +++-- tests/test_pandas.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas-stubs/core/arrays/base.pyi b/pandas-stubs/core/arrays/base.pyi index 9a632700..62fe63bc 100644 --- a/pandas-stubs/core/arrays/base.pyi +++ b/pandas-stubs/core/arrays/base.pyi @@ -42,8 +42,9 @@ class ExtensionArray: def shift(self, periods: int = ..., fill_value: object = ...) -> Self: ... def unique(self): ... def searchsorted(self, value, side: str = ..., sorter=...): ... - # TODO: remove keyword-only when pandas removed na_sentinel - def factorize(self, *, use_na_sentinel: bool = ...) -> tuple[np.ndarray, Self]: ... + def factorize( + self, sort: bool = ..., use_na_sentinel: bool = ... + ) -> tuple[np.ndarray, Self]: ... def repeat(self, repeats, axis=...): ... def take( self, diff --git a/tests/test_pandas.py b/tests/test_pandas.py index cafa4fba..7411ac81 100644 --- a/tests/test_pandas.py +++ b/tests/test_pandas.py @@ -790,7 +790,7 @@ def test_lreshape() -> None: def test_factorize() -> None: - codes, uniques = pd.factorize(np.array(["b", "b", "a", "c", "b"])) + codes, uniques = pd.factorize(np.array(["b", "b", "a", "c", "b"]), sort=False) check(assert_type(codes, np.ndarray), np.ndarray) check(assert_type(uniques, np.ndarray), np.ndarray) From dd7724c815c33e0f29178889409e0818430fbf26 Mon Sep 17 00:00:00 2001 From: Loic Diridollou Date: Thu, 21 Nov 2024 17:32:17 -0500 Subject: [PATCH 2/4] GH1037 PR feedback --- pandas-stubs/core/algorithms.pyi | 4 ---- pandas-stubs/core/arrays/base.pyi | 4 +--- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/pandas-stubs/core/algorithms.pyi b/pandas-stubs/core/algorithms.pyi index 83d99b7c..49f922a1 100644 --- a/pandas-stubs/core/algorithms.pyi +++ b/pandas-stubs/core/algorithms.pyi @@ -52,8 +52,6 @@ def factorize( def factorize( values: Index | Series, sort: bool = ..., - # Not actually positional-only, used to handle deprecations in 1.5.0 - *, use_na_sentinel: bool = ..., size_hint: int | None = ..., ) -> tuple[np.ndarray, Index]: ... @@ -61,8 +59,6 @@ def factorize( def factorize( values: Categorical, sort: bool = ..., - # Not actually positional-only, used to handle deprecations in 1.5.0 - *, use_na_sentinel: bool = ..., size_hint: int | None = ..., ) -> tuple[np.ndarray, Categorical]: ... diff --git a/pandas-stubs/core/arrays/base.pyi b/pandas-stubs/core/arrays/base.pyi index 62fe63bc..d60fba4d 100644 --- a/pandas-stubs/core/arrays/base.pyi +++ b/pandas-stubs/core/arrays/base.pyi @@ -42,9 +42,7 @@ class ExtensionArray: def shift(self, periods: int = ..., fill_value: object = ...) -> Self: ... def unique(self): ... def searchsorted(self, value, side: str = ..., sorter=...): ... - def factorize( - self, sort: bool = ..., use_na_sentinel: bool = ... - ) -> tuple[np.ndarray, Self]: ... + def factorize(self, use_na_sentinel: bool = ...) -> tuple[np.ndarray, Self]: ... def repeat(self, repeats, axis=...): ... def take( self, From 11dc0793b5ad2f66ba4a6ee236a42e7901f487f4 Mon Sep 17 00:00:00 2001 From: Loic Diridollou Date: Thu, 21 Nov 2024 17:33:08 -0500 Subject: [PATCH 3/4] GH1037 PR feedback --- tests/test_pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_pandas.py b/tests/test_pandas.py index 7411ac81..cafa4fba 100644 --- a/tests/test_pandas.py +++ b/tests/test_pandas.py @@ -790,7 +790,7 @@ def test_lreshape() -> None: def test_factorize() -> None: - codes, uniques = pd.factorize(np.array(["b", "b", "a", "c", "b"]), sort=False) + codes, uniques = pd.factorize(np.array(["b", "b", "a", "c", "b"])) check(assert_type(codes, np.ndarray), np.ndarray) check(assert_type(uniques, np.ndarray), np.ndarray) From baabae1f6cf8001c9c7ce5c449de04ae69119d09 Mon Sep 17 00:00:00 2001 From: Loic Diridollou Date: Thu, 21 Nov 2024 18:15:31 -0500 Subject: [PATCH 4/4] GH1037 PR feedback --- pandas-stubs/core/base.pyi | 2 +- tests/test_indexes.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/pandas-stubs/core/base.pyi b/pandas-stubs/core/base.pyi index 744228c7..1cd75252 100644 --- a/pandas-stubs/core/base.pyi +++ b/pandas-stubs/core/base.pyi @@ -106,7 +106,7 @@ class IndexOpsMixin(OpsMixin, Generic[S1]): @property def is_monotonic_increasing(self) -> bool: ... def factorize( - self, sort: bool = ... + self, sort: bool = ..., use_na_sentinel: bool = ... ) -> tuple[np.ndarray, np.ndarray | Index | Categorical]: ... def searchsorted( self, value, side: Literal["left", "right"] = ..., sorter=... diff --git a/tests/test_indexes.py b/tests/test_indexes.py index 75c9fc11..aab49c40 100644 --- a/tests/test_indexes.py +++ b/tests/test_indexes.py @@ -6,6 +6,8 @@ import numpy as np from numpy import typing as npt import pandas as pd +from pandas.core.arrays.categorical import Categorical +from pandas.core.indexes.base import Index from typing_extensions import ( Never, assert_type, @@ -1160,3 +1162,16 @@ def test_value_counts() -> None: pd.Series, float, ) + + +def test_index_factorize() -> None: + """Test Index.factorize method.""" + codes, idx_uniques = pd.Index(["b", "b", "a", "c", "b"]).factorize() + check(assert_type(codes, np.ndarray), np.ndarray) + check(assert_type(idx_uniques, np.ndarray | Index | Categorical), pd.Index) + + codes, idx_uniques = pd.Index(["b", "b", "a", "c", "b"]).factorize( + use_na_sentinel=False + ) + check(assert_type(codes, np.ndarray), np.ndarray) + check(assert_type(idx_uniques, np.ndarray | Index | Categorical), pd.Index)