From 0f6794644e44dfbdde55ab8ad38946aa55077241 Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Fri, 10 Jan 2025 22:22:25 +1100 Subject: [PATCH 1/4] c --- crates/polars-core/src/frame/column/mod.rs | 18 ++++++++++++++++-- py-polars/tests/unit/interop/test_interop.py | 5 +++++ py-polars/tests/unit/test_scalar.py | 5 +++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/crates/polars-core/src/frame/column/mod.rs b/crates/polars-core/src/frame/column/mod.rs index a1426bd225cb..f1ccf49ab2ce 100644 --- a/crates/polars-core/src/frame/column/mod.rs +++ b/crates/polars-core/src/frame/column/mod.rs @@ -1091,7 +1091,20 @@ impl Column { match self { Column::Series(s) => s.rechunk().into(), Column::Partitioned(_) => self.clone(), - Column::Scalar(_) => self.clone(), + Column::Scalar(s) => { + if s.lazy_as_materialized_series() + .filter(|x| x.n_chunks() > 1) + .is_some() + { + Column::Scalar(ScalarColumn::new( + s.name().clone(), + s.scalar().clone(), + s.len(), + )) + } else { + self.clone() + } + }, } } @@ -1700,7 +1713,8 @@ impl Column { pub fn n_chunks(&self) -> usize { match self { Column::Series(s) => s.n_chunks(), - Column::Scalar(_) | Column::Partitioned(_) => 1, + Column::Scalar(s) => s.lazy_as_materialized_series().map_or(1, |x| x.n_chunks()), + Column::Partitioned(_) => 1, } } diff --git a/py-polars/tests/unit/interop/test_interop.py b/py-polars/tests/unit/interop/test_interop.py index 90d3ab4a85c9..d203095070f9 100644 --- a/py-polars/tests/unit/interop/test_interop.py +++ b/py-polars/tests/unit/interop/test_interop.py @@ -857,3 +857,8 @@ def test_from_arrow_string_cache_20271() -> None: assert_series_equal( df.to_series().to_physical(), pl.Series("b", [3, 4]), check_dtypes=False ) + + +def test_to_arrow_empty_chunks_20627() -> None: + df = pl.concat(2 * [pl.Series([1])]).filter(pl.Series([False, True])).to_frame() + assert df.to_arrow().shape == (1, 1) diff --git a/py-polars/tests/unit/test_scalar.py b/py-polars/tests/unit/test_scalar.py index 825d87723b67..0fdb74cb8843 100644 --- a/py-polars/tests/unit/test_scalar.py +++ b/py-polars/tests/unit/test_scalar.py @@ -79,3 +79,8 @@ def test_scalar_identification_function_expr_in_binary() -> None: pl.select(x).with_columns(o=pl.col("x").null_count() > 0), pl.select(x, o=False), ) + + +def test_scalar_rechunk_20627() -> None: + df = pl.concat(2 * [pl.Series([1])]).filter(pl.Series([False, True])).to_frame() + assert df.rechunk().to_series().n_chunks() == 1 From 45970a8dd9b57286d144850718a4ab0effff8de4 Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Fri, 10 Jan 2025 22:41:17 +1100 Subject: [PATCH 2/4] assert for partitioned --- crates/polars-core/src/frame/column/mod.rs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/crates/polars-core/src/frame/column/mod.rs b/crates/polars-core/src/frame/column/mod.rs index f1ccf49ab2ce..99bf7fa8156a 100644 --- a/crates/polars-core/src/frame/column/mod.rs +++ b/crates/polars-core/src/frame/column/mod.rs @@ -1090,7 +1090,12 @@ impl Column { pub fn rechunk(&self) -> Column { match self { Column::Series(s) => s.rechunk().into(), - Column::Partitioned(_) => self.clone(), + Column::Partitioned(s) => { + if let Some(s) = s.lazy_as_materialized_series() { + debug_assert_eq!(s.n_chunks(), 1) + } + self.clone() + }, Column::Scalar(s) => { if s.lazy_as_materialized_series() .filter(|x| x.n_chunks() > 1) @@ -1714,7 +1719,12 @@ impl Column { match self { Column::Series(s) => s.n_chunks(), Column::Scalar(s) => s.lazy_as_materialized_series().map_or(1, |x| x.n_chunks()), - Column::Partitioned(_) => 1, + Column::Partitioned(s) => { + if let Some(s) = s.lazy_as_materialized_series() { + debug_assert_eq!(s.n_chunks(), 1) + } + 1 + }, } } From 1a4646223373bedf5911c5845bcf8f837115d5c6 Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Fri, 10 Jan 2025 22:50:19 +1100 Subject: [PATCH 3/4] c --- crates/polars-core/src/frame/column/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/polars-core/src/frame/column/mod.rs b/crates/polars-core/src/frame/column/mod.rs index 99bf7fa8156a..54ad9758a043 100644 --- a/crates/polars-core/src/frame/column/mod.rs +++ b/crates/polars-core/src/frame/column/mod.rs @@ -1092,6 +1092,7 @@ impl Column { Column::Series(s) => s.rechunk().into(), Column::Partitioned(s) => { if let Some(s) = s.lazy_as_materialized_series() { + // This should always hold for partitioned. debug_assert_eq!(s.n_chunks(), 1) } self.clone() From 5bde17513db31e334ce01036e2f340dad03b603d Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Fri, 10 Jan 2025 22:50:27 +1100 Subject: [PATCH 4/4] c --- crates/polars-core/src/frame/column/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/polars-core/src/frame/column/mod.rs b/crates/polars-core/src/frame/column/mod.rs index 54ad9758a043..032b748a74c8 100644 --- a/crates/polars-core/src/frame/column/mod.rs +++ b/crates/polars-core/src/frame/column/mod.rs @@ -1722,6 +1722,7 @@ impl Column { Column::Scalar(s) => s.lazy_as_materialized_series().map_or(1, |x| x.n_chunks()), Column::Partitioned(s) => { if let Some(s) = s.lazy_as_materialized_series() { + // This should always hold for partitioned. debug_assert_eq!(s.n_chunks(), 1) } 1