From 42bd17d5af4da0c1987a1531c43e45f8ff722592 Mon Sep 17 00:00:00 2001 From: Akmal Soliev Date: Thu, 8 Feb 2024 14:52:11 +0100 Subject: [PATCH 1/7] [FIX] subtract function for same month dates --- polars_xdt/__init__.py | 2 + polars_xdt/functions.py | 68 +++++++++++++++++++++++++ src/expressions.rs | 8 +++ src/lib.rs | 1 + src/month_delta.rs | 101 ++++++++++++++++++++++++++++++++++++++ tests/test_month_delta.py | 77 +++++++++++++++++++++++++++++ 6 files changed, 257 insertions(+) create mode 100644 src/month_delta.rs create mode 100644 tests/test_month_delta.py diff --git a/polars_xdt/__init__.py b/polars_xdt/__init__.py index 9099b37..ab7813a 100644 --- a/polars_xdt/__init__.py +++ b/polars_xdt/__init__.py @@ -7,6 +7,7 @@ format_localized, from_local_datetime, is_workday, + month_delta, month_name, offset_by, to_julian_date, @@ -29,5 +30,6 @@ "to_julian_date", "to_local_datetime", "workday_count", + "month_delta", "__version__", ] diff --git a/polars_xdt/functions.py b/polars_xdt/functions.py index 8b5edf8..3c9d2cb 100644 --- a/polars_xdt/functions.py +++ b/polars_xdt/functions.py @@ -721,3 +721,71 @@ def workday_count( "holidays": holidays_int, }, ) + + +def month_delta( + start_dates: IntoExpr, + end_dates: IntoExpr, +) -> pl.Expr: + """ + Calculate the number of months between two Series. + + Parameters + ---------- + start_dates + A Series object containing the start dates. + end_dates + A Series object containing the end dates. + + Returns + ------- + polars.Expr + + Example + ------- + >>> from datetime import date + >>> import polars as pl + >>> import polars_xdt as xdt + >>> df = pl.DataFrame( + ... { + ... "start_date": [ + ... date(2024, 3, 1), + ... date(2024, 3, 31), + ... date(2022, 2, 28), + ... date(2023, 1, 31), + ... date(2019, 12, 31), + ... ], + ... "end_date": [ + ... date(2023, 2, 28), + ... date(2023, 2, 28), + ... date(2023, 2, 28), + ... date(2023, 1, 31), + ... date(2023, 1, 1), + ... ], + ... }, + ... ) + >>> df.with_columns( + ... xdt.month_delta("start_date", "end_date").alias("month_delta") + ... ) + shape: (5, 3) + ┌────────────┬────────────┬─────────────┐ + │ start_date ┆ end_date ┆ month_delta │ + │ --- ┆ --- ┆ --- │ + │ date ┆ date ┆ i32 │ + ╞════════════╪════════════╪═════════════╡ + │ 2024-03-01 ┆ 2023-02-28 ┆ -12 │ + │ 2024-03-31 ┆ 2023-02-28 ┆ -14 │ + │ 2022-02-28 ┆ 2023-02-28 ┆ 12 │ + │ 2023-01-31 ┆ 2023-01-31 ┆ 0 │ + │ 2019-12-31 ┆ 2023-01-01 ┆ 36 │ + └────────────┴────────────┴─────────────┘ + """ + start_dates = parse_into_expr(start_dates) + end_dates = parse_into_expr(end_dates) + + return start_dates.register_plugin( + lib=lib, + symbol="month_delta", + is_elementwise=True, + args=[end_dates], + ) diff --git a/src/expressions.rs b/src/expressions.rs index 9f5d8a2..3d88160 100644 --- a/src/expressions.rs +++ b/src/expressions.rs @@ -2,6 +2,7 @@ use crate::business_days::*; use crate::format_localized::*; use crate::is_workday::*; +use crate::month_delta::*; use crate::sub::*; use crate::timezone::*; use crate::to_julian::*; @@ -85,6 +86,13 @@ fn workday_count(inputs: &[Series], kwargs: BusinessDayKwargs) -> PolarsResult PolarsResult { + let start_dates = &inputs[0]; + let end_dates = &inputs[1]; + impl_month_delta(start_dates, end_dates) +} + #[polars_expr(output_type=Boolean)] fn is_workday(inputs: &[Series], kwargs: BusinessDayKwargs) -> PolarsResult { let dates = &inputs[0]; diff --git a/src/lib.rs b/src/lib.rs index 5698d4f..9473b16 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,7 @@ mod business_days; mod expressions; mod format_localized; mod is_workday; +mod month_delta; mod sub; mod timezone; mod to_julian; diff --git a/src/month_delta.rs b/src/month_delta.rs new file mode 100644 index 0000000..74f1a19 --- /dev/null +++ b/src/month_delta.rs @@ -0,0 +1,101 @@ +use chrono::{Datelike, NaiveDate}; +use polars::prelude::*; + +fn last_day_of_month(date: NaiveDate) -> NaiveDate { + if date.month() == 12 { + NaiveDate::from_ymd_opt(date.year() + 1, 1, 1) + .unwrap() + .pred_opt() + .unwrap() + } else { + date.with_day(1) + .unwrap() + .with_month(date.month() + 1) + .unwrap() + .pred_opt() + .unwrap() + } +} + +pub(crate) fn impl_month_delta(start_dates: &Series, end_dates: &Series) -> PolarsResult { + if (start_dates.dtype() != &DataType::Date) || (end_dates.dtype() != &DataType::Date) { + polars_bail!(InvalidOperation: "polars_xdt.month_delta only works on Date type. Please cast to Date first."); + } + let start_dates = start_dates.date()?; + let end_dates = end_dates.date()?; + + let month_diff: Int32Chunked = start_dates + .as_date_iter() + .zip(end_dates.as_date_iter()) + .map(|(s_arr, e_arr)| { + s_arr.zip(e_arr).map(|(start_date, end_date)| { + let year_diff = end_date.year() - start_date.year(); + let mut month_diff = end_date.month() as i32 - start_date.month() as i32; + month_diff += year_diff * 12; + + // Check 1: Check if the actual number of days difference matches + // assuming both dates start on the first + let actual_days_diff = end_date.signed_duration_since(start_date).num_days(); + let expected_days_diff = { + let start_dt = start_date.with_day(1).unwrap(); // start date at the beginning of the month + let end_dt = end_date.with_day(1).unwrap(); // end date at the beginning of a month + end_dt.signed_duration_since(start_dt).num_days() // expected day difference as full months + }; + + // Calculates if the date difference spans entire months + // If do then add additional month to the calculation + let addition_condition: bool = { + actual_days_diff == expected_days_diff + && end_date.month() != start_date.month() + && end_date.day() != start_date.day() + }; + + // Determines if the end date is earlier in the month than the start date, + // but not an entire month earlier + let subtraction_condition: bool = { + expected_days_diff.abs() > actual_days_diff.abs() + }; + + // Check 2: Check if both dates fall on the last days of + // their respective months + let end_date_end = last_day_of_month(end_date); + let start_date_end = last_day_of_month(start_date); + let last_month_days = { + // End date is the last day of its month + end_date.day() == end_date_end.day() && + // Start date is the last day of its month + start_date.day() == start_date_end.day() && + end_date.day() != start_date.day() && + start_date.month() != end_date.month() + }; + + // Apply corrections based on the conditions checked earlier + // Use absolute value to determine the magnitude of the change + let mut abs_month_diff = month_diff.abs(); + + if addition_condition { + // Add an extra month if the entire months have been spanned + abs_month_diff += 1 + } + if last_month_days { + // Add an extra month for end cases where both dates are at month-end + abs_month_diff += 1 + } + if subtraction_condition { + // Subtract a month if the start date is later in the month than the end date + abs_month_diff -= 1 + } + + // Return the final month difference + // if start date is after the end date then return negative + if month_diff < 0 { + -abs_month_diff + } else { + abs_month_diff + } + }) + }) + .collect(); + + Ok(month_diff.into_series()) +} diff --git a/tests/test_month_delta.py b/tests/test_month_delta.py new file mode 100644 index 0000000..1fe8b72 --- /dev/null +++ b/tests/test_month_delta.py @@ -0,0 +1,77 @@ +import polars as pl +import polars_xdt as xdt +from datetime import date + + +def test_month_delta(): + df = pl.DataFrame( + { + "start_date": [ + date(2024, 1, 1), + date(2024, 1, 1), + date(2023, 9, 1), + date(2023, 1, 4), + date(2022, 6, 4), + date(2023, 1, 1), + date(2023, 1, 1), + date(2022, 2, 1), + date(2022, 2, 1), + date(2024, 3, 1), + date(2024, 3, 31), + date(2022, 2, 28), + date(2023, 1, 31), + date(2019, 12, 31), + date(2024, 1, 31), + date(1970, 1, 2), + ], + "end_date": [ + date(2024, 1, 4), + date(2024, 1, 31), + date(2023, 11, 1), + date(2022, 1, 4), + date(2022, 1, 4), + date(2022, 12, 31), + date(2021, 12, 31), + date(2022, 3, 1), + date(2023, 3, 1), + date(2023, 2, 28), + date(2023, 2, 28), + date(2023, 1, 31), + date(2022, 2, 28), + date(2023, 1, 1), + date(2024, 4, 30), + date(1971, 1, 1), + ], + }, + ) + + expected_month_diff = [ + 0, + 0, + 2, + -12, + -5, + 0, + -12, + 1, + 13, + -12, + -14, + 12, + -12, + 36, + 3, + 11, + ] + df = df.with_columns( + # For easier visual debugging purposes + pl.Series(name="out_month_delta", values=expected_month_diff), + month_delta=xdt.month_delta("start_date", "end_date"), + ) + + month_diff_list = df.get_column("month_delta").to_list() + + assert expected_month_diff == month_diff_list, ( + "The month difference list did not match the expected values.\n" + "Please check the function: 'month_diff.rs' for discrepancies." + ) From c7c321bf73f05cff870ee05687dd58fd4b438077 Mon Sep 17 00:00:00 2001 From: Akmal Soliev Date: Thu, 8 Feb 2024 15:01:34 +0100 Subject: [PATCH 2/7] [FIX] pre-commit fix --- polars_xdt/functions.py | 10 +++++----- src/month_delta.rs | 5 ++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/polars_xdt/functions.py b/polars_xdt/functions.py index 3c9d2cb..3b70c96 100644 --- a/polars_xdt/functions.py +++ b/polars_xdt/functions.py @@ -749,11 +749,11 @@ def month_delta( >>> df = pl.DataFrame( ... { ... "start_date": [ - ... date(2024, 3, 1), - ... date(2024, 3, 31), - ... date(2022, 2, 28), - ... date(2023, 1, 31), - ... date(2019, 12, 31), + ... date(2024, 3, 1), + ... date(2024, 3, 31), + ... date(2022, 2, 28), + ... date(2023, 1, 31), + ... date(2019, 12, 31), ... ], ... "end_date": [ ... date(2023, 2, 28), diff --git a/src/month_delta.rs b/src/month_delta.rs index 74f1a19..659175b 100644 --- a/src/month_delta.rs +++ b/src/month_delta.rs @@ -52,9 +52,8 @@ pub(crate) fn impl_month_delta(start_dates: &Series, end_dates: &Series) -> Pola // Determines if the end date is earlier in the month than the start date, // but not an entire month earlier - let subtraction_condition: bool = { - expected_days_diff.abs() > actual_days_diff.abs() - }; + let subtraction_condition: bool = + { expected_days_diff.abs() > actual_days_diff.abs() }; // Check 2: Check if both dates fall on the last days of // their respective months From dea63f3d157c381a9b6261343fd76b155a12b386 Mon Sep 17 00:00:00 2001 From: Akmal Soliev Date: Wed, 21 Feb 2024 21:55:56 +0100 Subject: [PATCH 3/7] [REFACTOR] Restructured the code to make it clearer --- polars_xdt/functions.py | 1 + src/month_delta.rs | 87 ++++++++++++++++++++--------------------- 2 files changed, 43 insertions(+), 45 deletions(-) diff --git a/polars_xdt/functions.py b/polars_xdt/functions.py index f3ce762..8322f16 100644 --- a/polars_xdt/functions.py +++ b/polars_xdt/functions.py @@ -789,6 +789,7 @@ def month_delta( │ 2023-01-31 ┆ 2023-01-31 ┆ 0 │ │ 2019-12-31 ┆ 2023-01-01 ┆ 36 │ └────────────┴────────────┴─────────────┘ + """ start_dates = parse_into_expr(start_dates) end_dates = parse_into_expr(end_dates) diff --git a/src/month_delta.rs b/src/month_delta.rs index 659175b..a13973d 100644 --- a/src/month_delta.rs +++ b/src/month_delta.rs @@ -1,7 +1,7 @@ use chrono::{Datelike, NaiveDate}; use polars::prelude::*; -fn last_day_of_month(date: NaiveDate) -> NaiveDate { +fn get_last_month_date(date: NaiveDate) -> NaiveDate { if date.month() == 12 { NaiveDate::from_ymd_opt(date.year() + 1, 1, 1) .unwrap() @@ -17,6 +17,44 @@ fn last_day_of_month(date: NaiveDate) -> NaiveDate { } } +fn get_last_day_bool(start_date: NaiveDate, end_date: NaiveDate) -> bool { + // Check if both dates fall on the last days of their respective months + let end_date_end = get_last_month_date(end_date); + let start_date_end = get_last_month_date(start_date); + { + // End date is the last day of its month + end_date.day() == end_date_end.day() && + // Start date is the last day of its month + start_date.day() == start_date_end.day() && + end_date.day() != start_date.day() && + start_date.month() != end_date.month() + } +} + +fn get_month_span_indicator(start_date: NaiveDate, end_date: NaiveDate) -> i32 { + // Check 1: Check if the actual number of days difference matches + // assuming both dates start on the first + let actual_days_diff = end_date.signed_duration_since(start_date).num_days(); + let expected_days_diff = { + let start_dt = start_date.with_day(1).unwrap(); // start date at the beginning of the month + let end_dt = end_date.with_day(1).unwrap(); // end date at the beginning of a month + end_dt.signed_duration_since(start_dt).num_days() // expected day difference as full months + }; + + // Calculates if the date difference spans entire months + // If do then add additional month to the calculation + if actual_days_diff == expected_days_diff + && end_date.month() != start_date.month() + && end_date.day() != start_date.day() + { + 1 + } else if expected_days_diff.abs() > actual_days_diff.abs() { + -1 + } else { + 0 + } +} + pub(crate) fn impl_month_delta(start_dates: &Series, end_dates: &Series) -> PolarsResult { if (start_dates.dtype() != &DataType::Date) || (end_dates.dtype() != &DataType::Date) { polars_bail!(InvalidOperation: "polars_xdt.month_delta only works on Date type. Please cast to Date first."); @@ -33,57 +71,16 @@ pub(crate) fn impl_month_delta(start_dates: &Series, end_dates: &Series) -> Pola let mut month_diff = end_date.month() as i32 - start_date.month() as i32; month_diff += year_diff * 12; - // Check 1: Check if the actual number of days difference matches - // assuming both dates start on the first - let actual_days_diff = end_date.signed_duration_since(start_date).num_days(); - let expected_days_diff = { - let start_dt = start_date.with_day(1).unwrap(); // start date at the beginning of the month - let end_dt = end_date.with_day(1).unwrap(); // end date at the beginning of a month - end_dt.signed_duration_since(start_dt).num_days() // expected day difference as full months - }; - - // Calculates if the date difference spans entire months - // If do then add additional month to the calculation - let addition_condition: bool = { - actual_days_diff == expected_days_diff - && end_date.month() != start_date.month() - && end_date.day() != start_date.day() - }; - - // Determines if the end date is earlier in the month than the start date, - // but not an entire month earlier - let subtraction_condition: bool = - { expected_days_diff.abs() > actual_days_diff.abs() }; - - // Check 2: Check if both dates fall on the last days of - // their respective months - let end_date_end = last_day_of_month(end_date); - let start_date_end = last_day_of_month(start_date); - let last_month_days = { - // End date is the last day of its month - end_date.day() == end_date_end.day() && - // Start date is the last day of its month - start_date.day() == start_date_end.day() && - end_date.day() != start_date.day() && - start_date.month() != end_date.month() - }; - // Apply corrections based on the conditions checked earlier // Use absolute value to determine the magnitude of the change let mut abs_month_diff = month_diff.abs(); - if addition_condition { - // Add an extra month if the entire months have been spanned - abs_month_diff += 1 - } - if last_month_days { + abs_month_diff += get_month_span_indicator(start_date, end_date); + + if get_last_day_bool(start_date, end_date) { // Add an extra month for end cases where both dates are at month-end abs_month_diff += 1 } - if subtraction_condition { - // Subtract a month if the start date is later in the month than the end date - abs_month_diff -= 1 - } // Return the final month difference // if start date is after the end date then return negative From b89f09471c656eb4e507e45a1c7636ff27e6204e Mon Sep 17 00:00:00 2001 From: Akmal Soliev Date: Wed, 21 Feb 2024 23:38:55 +0100 Subject: [PATCH 4/7] [DOC] Added comments clarifying the functions --- src/month_delta.rs | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/month_delta.rs b/src/month_delta.rs index a13973d..a610b89 100644 --- a/src/month_delta.rs +++ b/src/month_delta.rs @@ -1,8 +1,10 @@ use chrono::{Datelike, NaiveDate}; use polars::prelude::*; +// Function to get the date of the last day of the current month for a given date. fn get_last_month_date(date: NaiveDate) -> NaiveDate { if date.month() == 12 { + // If it is December, move to the next year and set the month to January. NaiveDate::from_ymd_opt(date.year() + 1, 1, 1) .unwrap() .pred_opt() @@ -17,8 +19,8 @@ fn get_last_month_date(date: NaiveDate) -> NaiveDate { } } +// Function that checks if both dates fall on the last days of their respective months. fn get_last_day_bool(start_date: NaiveDate, end_date: NaiveDate) -> bool { - // Check if both dates fall on the last days of their respective months let end_date_end = get_last_month_date(end_date); let start_date_end = get_last_month_date(start_date); { @@ -31,9 +33,12 @@ fn get_last_day_bool(start_date: NaiveDate, end_date: NaiveDate) -> bool { } } -fn get_month_span_indicator(start_date: NaiveDate, end_date: NaiveDate) -> i32 { - // Check 1: Check if the actual number of days difference matches - // assuming both dates start on the first +// Function to calculate the span of months between two dates as an integer. +// This function specifically checks if the span between the two dates covers whole months, +// and under certain conditions, adjusts the count by 1 or -1 to reflect partial months. +fn get_month_span_int(start_date: NaiveDate, end_date: NaiveDate) -> i32 { + // Check if the actual number of days difference matches assuming both + // dates start on the first let actual_days_diff = end_date.signed_duration_since(start_date).num_days(); let expected_days_diff = { let start_dt = start_date.with_day(1).unwrap(); // start date at the beginning of the month @@ -49,8 +54,11 @@ fn get_month_span_indicator(start_date: NaiveDate, end_date: NaiveDate) -> i32 { { 1 } else if expected_days_diff.abs() > actual_days_diff.abs() { + // If the expected difference (in absolute terms) is greater than the actual difference, + // it indicates a partial month span, and we return -1 to adjust the month span downwards. -1 } else { + // If none of the conditions were met 0 } } @@ -75,7 +83,7 @@ pub(crate) fn impl_month_delta(start_dates: &Series, end_dates: &Series) -> Pola // Use absolute value to determine the magnitude of the change let mut abs_month_diff = month_diff.abs(); - abs_month_diff += get_month_span_indicator(start_date, end_date); + abs_month_diff += get_month_span_int(start_date, end_date); if get_last_day_bool(start_date, end_date) { // Add an extra month for end cases where both dates are at month-end From 155e6753fd60cea623fb9b4317bf2f8e6d774a1c Mon Sep 17 00:00:00 2001 From: Akmal Soliev Date: Mon, 26 Feb 2024 13:49:28 +0100 Subject: [PATCH 5/7] [REFACTOR] Applied recommendation --- polars_xdt/functions.py | 2 ++ src/month_delta.rs | 14 +++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/polars_xdt/functions.py b/polars_xdt/functions.py index 8869582..485a5ec 100644 --- a/polars_xdt/functions.py +++ b/polars_xdt/functions.py @@ -799,6 +799,8 @@ def month_delta( symbol="month_delta", is_elementwise=True, args=[end_dates], + ) + def arg_previous_greater(expr: IntoExpr) -> pl.Expr: """ diff --git a/src/month_delta.rs b/src/month_delta.rs index a610b89..1d576b7 100644 --- a/src/month_delta.rs +++ b/src/month_delta.rs @@ -37,7 +37,7 @@ fn get_last_day_bool(start_date: NaiveDate, end_date: NaiveDate) -> bool { // This function specifically checks if the span between the two dates covers whole months, // and under certain conditions, adjusts the count by 1 or -1 to reflect partial months. fn get_month_span_int(start_date: NaiveDate, end_date: NaiveDate) -> i32 { - // Check if the actual number of days difference matches assuming both + // Check if the actual number of days difference matches assuming both // dates start on the first let actual_days_diff = end_date.signed_duration_since(start_date).num_days(); let expected_days_diff = { @@ -46,21 +46,21 @@ fn get_month_span_int(start_date: NaiveDate, end_date: NaiveDate) -> i32 { end_dt.signed_duration_since(start_dt).num_days() // expected day difference as full months }; + let mut n = 0; // Calculates if the date difference spans entire months // If do then add additional month to the calculation if actual_days_diff == expected_days_diff && end_date.month() != start_date.month() && end_date.day() != start_date.day() { - 1 + n += 1 } else if expected_days_diff.abs() > actual_days_diff.abs() { // If the expected difference (in absolute terms) is greater than the actual difference, // it indicates a partial month span, and we return -1 to adjust the month span downwards. - -1 - } else { - // If none of the conditions were met - 0 - } + n -= 1 + } + + n } pub(crate) fn impl_month_delta(start_dates: &Series, end_dates: &Series) -> PolarsResult { From 0fb0c269899d1a3b71c0f8607c58080e1b744f73 Mon Sep 17 00:00:00 2001 From: Akmal Soliev Date: Thu, 7 Mar 2024 00:23:20 +0100 Subject: [PATCH 6/7] [NEW] Restructure and improvements --- Cargo.toml | 2 +- README.md | 1 + polars_xdt/functions.py | 26 ++------ polars_xdt/ranges.py | 9 +-- src/ewma_by_time.rs | 26 +++----- src/expressions.rs | 21 ++----- src/month_delta.rs | 122 +++++++++++++++---------------------- tests/test_ewma_by_time.py | 6 +- tests/test_month_delta.py | 42 ++++++------- 9 files changed, 96 insertions(+), 159 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 564f8ab..aea15a4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "polars_xdt" -version = "0.14.3" +version = "0.14.5" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/README.md b/README.md index 5ccad90..88a6a91 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ eXtra stuff for DateTimes in [Polars](https://www.pola.rs/). - ✅ convert to and from multiple time zones - ✅ format datetime in different locales - ✅ convert to Julian Dates +- ✅ time-based EWMA Installation ------------ diff --git a/polars_xdt/functions.py b/polars_xdt/functions.py index b6c2085..55c9f2d 100644 --- a/polars_xdt/functions.py +++ b/polars_xdt/functions.py @@ -784,7 +784,7 @@ def month_delta( │ date ┆ date ┆ i32 │ ╞════════════╪════════════╪═════════════╡ │ 2024-03-01 ┆ 2023-02-28 ┆ -12 │ - │ 2024-03-31 ┆ 2023-02-28 ┆ -14 │ + │ 2024-03-31 ┆ 2023-02-28 ┆ -13 │ │ 2022-02-28 ┆ 2023-02-28 ┆ 12 │ │ 2023-01-31 ┆ 2023-01-31 ┆ 0 │ │ 2019-12-31 ┆ 2023-01-01 ┆ 36 │ @@ -903,13 +903,12 @@ def ewma_by_time( *, times: IntoExpr, half_life: timedelta, - adjust: bool = True, ) -> pl.Expr: r""" Calculate time-based exponentially weighted moving average. Given observations :math:`x_1, x_2, \ldots, x_n` at times - :math:`t_1, t_2, \ldots, t_n`, the **unadjusted** EWMA is calculated as + :math:`t_1, t_2, \ldots, t_n`, the EWMA is calculated as .. math:: @@ -921,16 +920,6 @@ def ewma_by_time( where :math:`\lambda` equals :math:`\ln(2) / \text{half_life}`. - The **adjusted** version is - - .. math:: - - y_0 &= x_0 - - \alpha_i &= (\alpha_{i-1} + 1) \exp(-\lambda(t_i - t_{i-1})) - - y_i &= (x_i + \alpha_i y_{i-1}) / (1. + \alpha_i); - Parameters ---------- values @@ -939,9 +928,6 @@ def ewma_by_time( Times corresponding to `values`. Should be ``DateTime`` or ``Date``. half_life Unit over which observation decays to half its value. - adjust - Whether to adjust the result to account for the bias towards the - initial value. Defaults to True. Returns ------- @@ -977,10 +963,10 @@ def ewma_by_time( │ i64 ┆ date ┆ f64 │ ╞════════╪════════════╪══════════╡ │ 0 ┆ 2020-01-01 ┆ 0.0 │ - │ 1 ┆ 2020-01-03 ┆ 0.585786 │ - │ 2 ┆ 2020-01-10 ┆ 1.523889 │ + │ 1 ┆ 2020-01-03 ┆ 0.292893 │ + │ 2 ┆ 2020-01-10 ┆ 1.492474 │ │ null ┆ 2020-01-15 ┆ null │ - │ 4 ┆ 2020-01-17 ┆ 3.233686 │ + │ 4 ┆ 2020-01-17 ┆ 3.254508 │ └────────┴────────────┴──────────┘ """ @@ -993,5 +979,5 @@ def ewma_by_time( symbol="ewma_by_time", is_elementwise=False, args=[values], - kwargs={"half_life": half_life_us, "adjust": adjust}, + kwargs={"half_life": half_life_us}, ) diff --git a/polars_xdt/ranges.py b/polars_xdt/ranges.py index 1d820c2..1a3d852 100644 --- a/polars_xdt/ranges.py +++ b/polars_xdt/ranges.py @@ -25,8 +25,7 @@ def date_range( eager: Literal[False] = ..., weekend: Sequence[str] = ..., holidays: Sequence[date] | None = ..., -) -> pl.Expr: - ... +) -> pl.Expr: ... @overload @@ -41,8 +40,7 @@ def date_range( eager: Literal[True], weekend: Sequence[str] = ..., holidays: Sequence[date] | None = ..., -) -> pl.Series: - ... +) -> pl.Series: ... @overload @@ -57,8 +55,7 @@ def date_range( eager: bool = ..., weekend: Sequence[str] = ..., holidays: Sequence[date] | None = ..., -) -> pl.Series | pl.Expr: - ... +) -> pl.Series | pl.Expr: ... def date_range( # noqa: PLR0913 diff --git a/src/ewma_by_time.rs b/src/ewma_by_time.rs index 51debf2..1f50fcb 100644 --- a/src/ewma_by_time.rs +++ b/src/ewma_by_time.rs @@ -1,12 +1,10 @@ use polars::prelude::*; use polars_arrow::array::PrimitiveArray; -use pyo3_polars::export::polars_core::export::num::Pow; pub(crate) fn impl_ewma_by_time_float( times: &Int64Chunked, values: &Float64Chunked, half_life: i64, - adjust: bool, time_unit: TimeUnit, ) -> Float64Chunked { let mut out = Vec::with_capacity(times.len()); @@ -22,7 +20,6 @@ pub(crate) fn impl_ewma_by_time_float( let mut prev_time: i64 = times.get(0).unwrap(); let mut prev_result = values.get(0).unwrap(); - let mut prev_alpha = 0.0; out.push(Some(prev_result)); values .iter() @@ -32,18 +29,10 @@ pub(crate) fn impl_ewma_by_time_float( match (time, value) { (Some(time), Some(value)) => { let delta_time = time - prev_time; - let result: f64; - if adjust { - let alpha = - (prev_alpha + 1.) * Pow::pow(0.5, delta_time as f64 / half_life as f64); - result = (value + alpha * prev_result) / (1. + alpha); - prev_alpha = alpha; - } else { - // equivalent to: - // alpha = exp(-delta_time*ln(2) / half_life) - prev_alpha = (0.5_f64).powf(delta_time as f64 / half_life as f64); - result = (1. - prev_alpha) * value + prev_alpha * prev_result; - } + // equivalent to: + // alpha = exp(-delta_time*ln(2) / half_life) + let alpha = (0.5_f64).powf(delta_time as f64 / half_life as f64); + let result = (1. - alpha) * value + alpha * prev_result; prev_time = time; prev_result = result; out.push(Some(result)); @@ -59,24 +48,23 @@ pub(crate) fn impl_ewma_by_time( times: &Int64Chunked, values: &Series, half_life: i64, - adjust: bool, time_unit: TimeUnit, ) -> Series { match values.dtype() { DataType::Float64 => { let values = values.f64().unwrap(); - impl_ewma_by_time_float(times, values, half_life, adjust, time_unit).into_series() + impl_ewma_by_time_float(times, values, half_life, time_unit).into_series() } DataType::Int64 | DataType::Int32 => { let values = values.cast(&DataType::Float64).unwrap(); let values = values.f64().unwrap(); - impl_ewma_by_time_float(times, values, half_life, adjust, time_unit).into_series() + impl_ewma_by_time_float(times, values, half_life, time_unit).into_series() } DataType::Float32 => { // todo: preserve Float32 in this case let values = values.cast(&DataType::Float64).unwrap(); let values = values.f64().unwrap(); - impl_ewma_by_time_float(times, values, half_life, adjust, time_unit).into_series() + impl_ewma_by_time_float(times, values, half_life, time_unit).into_series() } dt => panic!("Expected values to be signed numeric, got {:?}", dt), } diff --git a/src/expressions.rs b/src/expressions.rs index 0fd1c59..2635ac1 100644 --- a/src/expressions.rs +++ b/src/expressions.rs @@ -179,7 +179,6 @@ fn arg_previous_greater(inputs: &[Series]) -> PolarsResult { #[derive(Deserialize)] struct EwmTimeKwargs { half_life: i64, - adjust: bool, } #[polars_expr(output_type=Float64)] @@ -188,26 +187,18 @@ fn ewma_by_time(inputs: &[Series], kwargs: EwmTimeKwargs) -> PolarsResult { let time = &inputs[0].datetime().unwrap(); - Ok(impl_ewma_by_time( - &time.0, - values, - kwargs.half_life, - kwargs.adjust, - time.time_unit(), + Ok( + impl_ewma_by_time(&time.0, values, kwargs.half_life, time.time_unit()) + .into_series(), ) - .into_series()) } DataType::Date => { let binding = &inputs[0].cast(&DataType::Datetime(TimeUnit::Milliseconds, None))?; let time = binding.datetime().unwrap(); - Ok(impl_ewma_by_time( - &time.0, - values, - kwargs.half_life, - kwargs.adjust, - time.time_unit(), + Ok( + impl_ewma_by_time(&time.0, values, kwargs.half_life, time.time_unit()) + .into_series(), ) - .into_series()) } _ => polars_bail!(InvalidOperation: "First argument should be a date or datetime type."), } diff --git a/src/month_delta.rs b/src/month_delta.rs index 1d576b7..1c58683 100644 --- a/src/month_delta.rs +++ b/src/month_delta.rs @@ -1,65 +1,56 @@ -use chrono::{Datelike, NaiveDate}; +use chrono::Datelike; +use chrono::NaiveDate; use polars::prelude::*; -// Function to get the date of the last day of the current month for a given date. -fn get_last_month_date(date: NaiveDate) -> NaiveDate { - if date.month() == 12 { - // If it is December, move to the next year and set the month to January. - NaiveDate::from_ymd_opt(date.year() + 1, 1, 1) - .unwrap() - .pred_opt() - .unwrap() - } else { - date.with_day(1) - .unwrap() - .with_month(date.month() + 1) - .unwrap() - .pred_opt() - .unwrap() +fn add_month(ts: NaiveDate, n_months: i64) -> NaiveDate { + // Have to define, because it is hidden + const DAYS_PER_MONTH: [[i64; 12]; 2] = [ + //J F M A M J J A S O N D + [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], // non-leap year + [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], // leap year + ]; + let months = n_months; + + // Retrieve the current date and increment the values + // based on the number of months + + let mut year = ts.year(); + let mut month = ts.month() as i32; + let mut day = ts.day(); + year += (months / 12) as i32; + month += (months % 12) as i32; + + // if the month overflowed or underflowed, adjust the year + // accordingly. Because we add the modulo for the months + // the year will only adjust by one + if month > 12 { + year += 1; + month -= 12; + } else if month <= 0 { + year -= 1; + month += 12; } -} -// Function that checks if both dates fall on the last days of their respective months. -fn get_last_day_bool(start_date: NaiveDate, end_date: NaiveDate) -> bool { - let end_date_end = get_last_month_date(end_date); - let start_date_end = get_last_month_date(start_date); - { - // End date is the last day of its month - end_date.day() == end_date_end.day() && - // Start date is the last day of its month - start_date.day() == start_date_end.day() && - end_date.day() != start_date.day() && - start_date.month() != end_date.month() + // Adding this not to import copy pasta again + let leap_year = year % 400 == 0 || (year % 4 == 0 && year % 100 != 0); + // Normalize the day if we are past the end of the month. + let last_day_of_month = DAYS_PER_MONTH[leap_year as usize][(month - 1) as usize] as u32; + + if day > last_day_of_month { + day = last_day_of_month } -} -// Function to calculate the span of months between two dates as an integer. -// This function specifically checks if the span between the two dates covers whole months, -// and under certain conditions, adjusts the count by 1 or -1 to reflect partial months. -fn get_month_span_int(start_date: NaiveDate, end_date: NaiveDate) -> i32 { - // Check if the actual number of days difference matches assuming both - // dates start on the first - let actual_days_diff = end_date.signed_duration_since(start_date).num_days(); - let expected_days_diff = { - let start_dt = start_date.with_day(1).unwrap(); // start date at the beginning of the month - let end_dt = end_date.with_day(1).unwrap(); // end date at the beginning of a month - end_dt.signed_duration_since(start_dt).num_days() // expected day difference as full months - }; + NaiveDate::from_ymd_opt(year, month as u32, day).unwrap() +} +fn get_m_diff(mut left: NaiveDate, right: NaiveDate) -> i32 { let mut n = 0; - // Calculates if the date difference spans entire months - // If do then add additional month to the calculation - if actual_days_diff == expected_days_diff - && end_date.month() != start_date.month() - && end_date.day() != start_date.day() - { - n += 1 - } else if expected_days_diff.abs() > actual_days_diff.abs() { - // If the expected difference (in absolute terms) is greater than the actual difference, - // it indicates a partial month span, and we return -1 to adjust the month span downwards. - n -= 1 - } - + while left < right { + left = add_month(left, 1); + if left <= right { + n += 1; + } + } n } @@ -75,27 +66,10 @@ pub(crate) fn impl_month_delta(start_dates: &Series, end_dates: &Series) -> Pola .zip(end_dates.as_date_iter()) .map(|(s_arr, e_arr)| { s_arr.zip(e_arr).map(|(start_date, end_date)| { - let year_diff = end_date.year() - start_date.year(); - let mut month_diff = end_date.month() as i32 - start_date.month() as i32; - month_diff += year_diff * 12; - - // Apply corrections based on the conditions checked earlier - // Use absolute value to determine the magnitude of the change - let mut abs_month_diff = month_diff.abs(); - - abs_month_diff += get_month_span_int(start_date, end_date); - - if get_last_day_bool(start_date, end_date) { - // Add an extra month for end cases where both dates are at month-end - abs_month_diff += 1 - } - - // Return the final month difference - // if start date is after the end date then return negative - if month_diff < 0 { - -abs_month_diff + if start_date > end_date { + -get_m_diff(end_date, start_date) } else { - abs_month_diff + get_m_diff(start_date, end_date) } }) }) diff --git a/tests/test_ewma_by_time.py b/tests/test_ewma_by_time.py index 6886477..4fa9e65 100644 --- a/tests/test_ewma_by_time.py +++ b/tests/test_ewma_by_time.py @@ -26,10 +26,10 @@ def test_ewma_by_time(): { "ewma": [ 0.0, - 0.585786437626905, - 1.52388878049859, + 0.2928932188134524, + 1.4924741174358913, None, - 3.2336858398518338, + 3.2545080948503213, ] } ) diff --git a/tests/test_month_delta.py b/tests/test_month_delta.py index 1fe8b72..753f356 100644 --- a/tests/test_month_delta.py +++ b/tests/test_month_delta.py @@ -45,33 +45,33 @@ def test_month_delta(): }, ) - expected_month_diff = [ - 0, - 0, - 2, - -12, - -5, - 0, - -12, - 1, - 13, - -12, - -14, - 12, - -12, - 36, - 3, - 11, + assert_month_diff = [ + 0, # 2024-01-01 to 2024-01-04 + 0, # 2024-01-01 to 2024-01-31 + 2, # 2023-09-01 to 2023-11-01 + -12, # 2023-01-04 to 2022-01-04 + -5, # 2022-06-04 to 2022-01-04 + 0, # 2023-01-01 to 2022-12-31 + -12, # 2023-01-01 to 2021-12-31 + 1, # 2022-02-01 to 2022-03-01 + 13, # 2022-02-01 to 2023-03-01 + -12, # 2024-03-01 to 2023-02-28 + -13, # 2024-03-31 to 2023-02-28 + 11, # 2022-02-28 to 2023-01-31 + -11, # 2023-01-31 to 2022-02-28 + 36, # 2019-12-31 to 2023-01-01 + 3, # 2024-01-31 to 2024-04-30 + 11, # 1970-01-02 to 1971-01-01 ] df = df.with_columns( # For easier visual debugging purposes - pl.Series(name="out_month_delta", values=expected_month_diff), + pl.Series(name="assert_month_delta", values=assert_month_diff), month_delta=xdt.month_delta("start_date", "end_date"), ) - + # pl.Config.set_tbl_rows(50) + # print(df) month_diff_list = df.get_column("month_delta").to_list() - - assert expected_month_diff == month_diff_list, ( + assert assert_month_diff == month_diff_list, ( "The month difference list did not match the expected values.\n" "Please check the function: 'month_diff.rs' for discrepancies." ) From 567b7395262ad9b23a9823ae633a60c90dd4d0e9 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 9 Mar 2024 09:50:36 +0000 Subject: [PATCH 7/7] fixup, add hypothesis test --- docs/API.rst | 1 + polars_xdt/functions.py | 4 ++-- polars_xdt/ranges.py | 9 ++++++--- pyproject.toml | 3 ++- requirements.txt | 1 + tests/test_month_delta.py | 37 +++++++++++++++++++++++++++++++++++++ 6 files changed, 49 insertions(+), 6 deletions(-) diff --git a/docs/API.rst b/docs/API.rst index 405d5af..f1da729 100644 --- a/docs/API.rst +++ b/docs/API.rst @@ -13,6 +13,7 @@ API polars_xdt.from_local_datetime polars_xdt.is_workday polars_xdt.month_name + polars_xdt.month_delta polars_xdt.offset_by polars_xdt.to_local_datetime polars_xdt.to_julian_date diff --git a/polars_xdt/functions.py b/polars_xdt/functions.py index 55c9f2d..f6e462d 100644 --- a/polars_xdt/functions.py +++ b/polars_xdt/functions.py @@ -751,8 +751,8 @@ def month_delta( ------- polars.Expr - Example - ------- + Examples + -------- >>> from datetime import date >>> import polars as pl >>> import polars_xdt as xdt diff --git a/polars_xdt/ranges.py b/polars_xdt/ranges.py index 1a3d852..1d820c2 100644 --- a/polars_xdt/ranges.py +++ b/polars_xdt/ranges.py @@ -25,7 +25,8 @@ def date_range( eager: Literal[False] = ..., weekend: Sequence[str] = ..., holidays: Sequence[date] | None = ..., -) -> pl.Expr: ... +) -> pl.Expr: + ... @overload @@ -40,7 +41,8 @@ def date_range( eager: Literal[True], weekend: Sequence[str] = ..., holidays: Sequence[date] | None = ..., -) -> pl.Series: ... +) -> pl.Series: + ... @overload @@ -55,7 +57,8 @@ def date_range( eager: bool = ..., weekend: Sequence[str] = ..., holidays: Sequence[date] | None = ..., -) -> pl.Series | pl.Expr: ... +) -> pl.Series | pl.Expr: + ... def date_range( # noqa: PLR0913 diff --git a/pyproject.toml b/pyproject.toml index 0284020..5fe715a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,6 +86,7 @@ line-length = 80 [[tool.mypy.overrides]] module = [ - "pandas" + "pandas", + "dateutil.*", ] ignore_missing_imports = true diff --git a/requirements.txt b/requirements.txt index 3802661..0b7e3bf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ maturin +python-dateutil polars hypothesis numpy diff --git a/tests/test_month_delta.py b/tests/test_month_delta.py index 753f356..0647137 100644 --- a/tests/test_month_delta.py +++ b/tests/test_month_delta.py @@ -1,6 +1,9 @@ import polars as pl import polars_xdt as xdt from datetime import date +from dateutil.relativedelta import relativedelta + +from hypothesis import given, strategies as st, assume def test_month_delta(): @@ -75,3 +78,37 @@ def test_month_delta(): "The month difference list did not match the expected values.\n" "Please check the function: 'month_diff.rs' for discrepancies." ) + + +@given( + start_date=st.dates( + min_value=date(1960, 1, 1), max_value=date(2024, 12, 31) + ), + end_date=st.dates(min_value=date(1960, 1, 1), max_value=date(2024, 12, 31)), +) +def test_month_delta_hypothesis(start_date: date, end_date: date) -> None: + df = pl.DataFrame( + { + "start_date": [start_date], + "end_date": [end_date], + } + ) + result = df.select(result=xdt.month_delta("start_date", "end_date"))[ + "result" + ].item() + + expected = 0 + if start_date <= end_date: + while True: + start_date = start_date + relativedelta(months=1) + if start_date > end_date: + break + expected += 1 + else: + while True: + end_date = end_date + relativedelta(months=1) + if end_date > start_date: + break + expected -= 1 + + assert result == expected