From e34dc63857612fdc90af74f647cf809183bc5ad8 Mon Sep 17 00:00:00 2001 From: Akmal Soliev Date: Thu, 21 Mar 2024 11:53:21 +0100 Subject: [PATCH 1/2] WIP: Fixing month delta --- src/business_days.rs | 2 + src/is_workday.rs | 1 + src/month_delta.rs | 12 ++--- src/timezone.rs | 3 ++ src/to_julian.rs | 1 + tests/test_month_delta.py | 93 ++++++--------------------------------- 6 files changed, 25 insertions(+), 87 deletions(-) diff --git a/src/business_days.rs b/src/business_days.rs index 94ca787..c40ca0a 100644 --- a/src/business_days.rs +++ b/src/business_days.rs @@ -183,6 +183,7 @@ pub(crate) fn impl_advance_n_days( s.datetime()?, None, &StringChunked::from_iter(std::iter::once("raise")), + NonExistent::Raise, )?; let out = match n.len() { 1 => { @@ -217,6 +218,7 @@ pub(crate) fn impl_advance_n_days( &out?.into_datetime(*time_unit, None), time_zone.as_deref(), &StringChunked::from_iter(std::iter::once("raise")), + NonExistent::Raise, )?; out.cast(original_dtype) } diff --git a/src/is_workday.rs b/src/is_workday.rs index 8022642..9175821 100644 --- a/src/is_workday.rs +++ b/src/is_workday.rs @@ -28,6 +28,7 @@ pub(crate) fn impl_is_workday( dates.datetime()?, None, &StringChunked::from_iter(std::iter::once("raise")), + NonExistent::Raise, )?; let out: BooleanChunked = ca.apply_values_generic(|date| { is_workday_date((date / multiplier) as i32, weekmask, holidays) diff --git a/src/month_delta.rs b/src/month_delta.rs index 02d33f6..bc7ce6c 100644 --- a/src/month_delta.rs +++ b/src/month_delta.rs @@ -66,17 +66,13 @@ fn add_month(ts: NaiveDate, n_months: i64) -> NaiveDate { /// let end_date = NaiveDate::from_ymd(2023, 4, 1); /// assert_eq!(get_m_diff(start_date, end_date), 3); /// ``` -fn get_m_diff(mut left: NaiveDate, right: NaiveDate) -> i32 { +fn get_m_diff(left: NaiveDate, right: NaiveDate) -> i32 { let mut n = 0; - if left.year() + 2 < right.year() { + if right.year() + 1 > left.year() { n = (right.year() - left.year() - 1) * 12; - left = add_month(left, n.into()); } - while left < right { - left = add_month(left, 1); - if left <= right { - n += 1; - } + while add_month(left, (n+1).into()) <= right { + n += 1; } n } diff --git a/src/timezone.rs b/src/timezone.rs index 4b36fa7..def3a13 100644 --- a/src/timezone.rs +++ b/src/timezone.rs @@ -35,6 +35,9 @@ fn naive_local_to_naive_utc_in_new_time_zone( Ambiguous::Raise => { polars_bail!(ComputeError: "datetime '{}' is ambiguous in time zone '{}'. Please use `ambiguous` to tell how it should be localized.", ndt, to_tz) } + Ambiguous::Null => { + unimplemented!("Ambiguous::Null is not yet supported"); + } }, LocalResult::None => polars_bail!(ComputeError: "datetime '{}' is non-existent in time zone '{}'. Non-existent datetimes are not yet supported", diff --git a/src/to_julian.rs b/src/to_julian.rs index 2af749a..91b5a7c 100644 --- a/src/to_julian.rs +++ b/src/to_julian.rs @@ -61,6 +61,7 @@ pub(crate) fn impl_to_julian_date(s: &Series) -> PolarsResult { s.datetime()?, None, &StringChunked::from_iter(std::iter::once("raise")), + NonExistent::Raise, )?; let chunks = ca.downcast_iter().map(|arr| -> Float64Array { arr.into_iter() diff --git a/tests/test_month_delta.py b/tests/test_month_delta.py index 0647137..04c0f11 100644 --- a/tests/test_month_delta.py +++ b/tests/test_month_delta.py @@ -1,91 +1,27 @@ -import polars as pl -import polars_xdt as xdt from datetime import date -from dateutil.relativedelta import relativedelta - -from hypothesis import given, strategies as st, assume +import polars as pl +from dateutil.relativedelta import relativedelta +from hypothesis import example, given, settings +from hypothesis import strategies as st -def test_month_delta(): - df = pl.DataFrame( - { - "start_date": [ - date(2024, 1, 1), - date(2024, 1, 1), - date(2023, 9, 1), - date(2023, 1, 4), - date(2022, 6, 4), - date(2023, 1, 1), - date(2023, 1, 1), - date(2022, 2, 1), - date(2022, 2, 1), - date(2024, 3, 1), - date(2024, 3, 31), - date(2022, 2, 28), - date(2023, 1, 31), - date(2019, 12, 31), - date(2024, 1, 31), - date(1970, 1, 2), - ], - "end_date": [ - date(2024, 1, 4), - date(2024, 1, 31), - date(2023, 11, 1), - date(2022, 1, 4), - date(2022, 1, 4), - date(2022, 12, 31), - date(2021, 12, 31), - date(2022, 3, 1), - date(2023, 3, 1), - date(2023, 2, 28), - date(2023, 2, 28), - date(2023, 1, 31), - date(2022, 2, 28), - date(2023, 1, 1), - date(2024, 4, 30), - date(1971, 1, 1), - ], - }, - ) - - assert_month_diff = [ - 0, # 2024-01-01 to 2024-01-04 - 0, # 2024-01-01 to 2024-01-31 - 2, # 2023-09-01 to 2023-11-01 - -12, # 2023-01-04 to 2022-01-04 - -5, # 2022-06-04 to 2022-01-04 - 0, # 2023-01-01 to 2022-12-31 - -12, # 2023-01-01 to 2021-12-31 - 1, # 2022-02-01 to 2022-03-01 - 13, # 2022-02-01 to 2023-03-01 - -12, # 2024-03-01 to 2023-02-28 - -13, # 2024-03-31 to 2023-02-28 - 11, # 2022-02-28 to 2023-01-31 - -11, # 2023-01-31 to 2022-02-28 - 36, # 2019-12-31 to 2023-01-01 - 3, # 2024-01-31 to 2024-04-30 - 11, # 1970-01-02 to 1971-01-01 - ] - df = df.with_columns( - # For easier visual debugging purposes - pl.Series(name="assert_month_delta", values=assert_month_diff), - month_delta=xdt.month_delta("start_date", "end_date"), - ) - # pl.Config.set_tbl_rows(50) - # print(df) - month_diff_list = df.get_column("month_delta").to_list() - assert assert_month_diff == month_diff_list, ( - "The month difference list did not match the expected values.\n" - "Please check the function: 'month_diff.rs' for discrepancies." - ) +import polars_xdt as xdt @given( start_date=st.dates( - min_value=date(1960, 1, 1), max_value=date(2024, 12, 31) + min_value=date(1824, 1, 1), max_value=date(2024, 12, 31) ), end_date=st.dates(min_value=date(1960, 1, 1), max_value=date(2024, 12, 31)), ) +@example(start_date=date(2022, 2, 28), end_date=date(2024, 2, 29)) # Leap year +@example(start_date=date(2024, 1, 1), end_date=date(2024, 1, 31)) # Same month +@example(start_date=date(1973, 1, 1), end_date=date(1973, 1, 1)) # Same date +@example(start_date=date(2019, 12, 31), end_date=date(2020, 1, 1)) # Border +@example(start_date=date(2018, 12, 1), end_date=date(2020, 1, 1)) # End of year +@example(start_date=date(2022, 12, 1), end_date=date(2020, 1, 1)) # Negative +@example(start_date=date(2000, 3, 29), end_date=date(2003, 1, 28)) # Failed test +@settings(max_examples=10_000) def test_month_delta_hypothesis(start_date: date, end_date: date) -> None: df = pl.DataFrame( { @@ -110,5 +46,4 @@ def test_month_delta_hypothesis(start_date: date, end_date: date) -> None: if end_date > start_date: break expected -= 1 - assert result == expected From 62cc30d20b8238c0be9500c0b06117fd8e954e93 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 21 Mar 2024 11:41:46 +0000 Subject: [PATCH 2/2] fixup --- src/month_delta.rs | 29 +++++++++++++++++------------ tests/test_month_delta.py | 18 +++++++----------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/src/month_delta.rs b/src/month_delta.rs index bc7ce6c..c6be7f2 100644 --- a/src/month_delta.rs +++ b/src/month_delta.rs @@ -68,11 +68,20 @@ fn add_month(ts: NaiveDate, n_months: i64) -> NaiveDate { /// ``` fn get_m_diff(left: NaiveDate, right: NaiveDate) -> i32 { let mut n = 0; - if right.year() + 1 > left.year() { - n = (right.year() - left.year() - 1) * 12; - } - while add_month(left, (n+1).into()) <= right { - n += 1; + if right >= left { + if right.year() + 1 > left.year() { + n = (right.year() - left.year() - 1) * 12; + } + while add_month(left, (n + 1).into()) <= right { + n += 1; + } + } else { + if left.year() + 1 > right.year() { + n = -(left.year() - right.year() - 1) * 12; + } + while add_month(left, (n - 1).into()) >= right { + n -= 1; + } } n } @@ -119,13 +128,9 @@ pub(crate) fn impl_month_delta(start_dates: &Series, end_dates: &Series) -> Pola .as_date_iter() .zip(end_dates.as_date_iter()) .map(|(s_arr, e_arr)| { - s_arr.zip(e_arr).map(|(start_date, end_date)| { - if start_date > end_date { - -get_m_diff(end_date, start_date) - } else { - get_m_diff(start_date, end_date) - } - }) + s_arr + .zip(e_arr) + .map(|(start_date, end_date)| get_m_diff(start_date, end_date)) }) .collect(); diff --git a/tests/test_month_delta.py b/tests/test_month_delta.py index 04c0f11..0de669c 100644 --- a/tests/test_month_delta.py +++ b/tests/test_month_delta.py @@ -10,7 +10,7 @@ @given( start_date=st.dates( - min_value=date(1824, 1, 1), max_value=date(2024, 12, 31) + min_value=date(1924, 1, 1), max_value=date(2024, 12, 31) ), end_date=st.dates(min_value=date(1960, 1, 1), max_value=date(2024, 12, 31)), ) @@ -20,8 +20,10 @@ @example(start_date=date(2019, 12, 31), end_date=date(2020, 1, 1)) # Border @example(start_date=date(2018, 12, 1), end_date=date(2020, 1, 1)) # End of year @example(start_date=date(2022, 12, 1), end_date=date(2020, 1, 1)) # Negative -@example(start_date=date(2000, 3, 29), end_date=date(2003, 1, 28)) # Failed test -@settings(max_examples=10_000) +@example( + start_date=date(2000, 3, 29), end_date=date(2003, 1, 28) +) # Failed test +@settings(max_examples=1000) def test_month_delta_hypothesis(start_date: date, end_date: date) -> None: df = pl.DataFrame( { @@ -35,15 +37,9 @@ def test_month_delta_hypothesis(start_date: date, end_date: date) -> None: expected = 0 if start_date <= end_date: - while True: - start_date = start_date + relativedelta(months=1) - if start_date > end_date: - break + while start_date + relativedelta(months=expected + 1) <= end_date: expected += 1 else: - while True: - end_date = end_date + relativedelta(months=1) - if end_date > start_date: - break + while start_date + relativedelta(months=expected - 1) >= end_date: expected -= 1 assert result == expected