Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NEW] month_delta function #57

Merged
merged 12 commits into from
Mar 9, 2024
2 changes: 2 additions & 0 deletions polars_xdt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
format_localized,
from_local_datetime,
is_workday,
month_delta,
month_name,
offset_by,
to_julian_date,
Expand All @@ -29,5 +30,6 @@
"to_julian_date",
"to_local_datetime",
"workday_count",
"month_delta",
"__version__",
]
69 changes: 69 additions & 0 deletions polars_xdt/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -731,3 +731,72 @@ def workday_count(
"holidays": holidays_int,
},
)


def month_delta(
start_dates: IntoExpr,
end_dates: IntoExpr,
) -> pl.Expr:
"""
Calculate the number of months between two Series.

Parameters
----------
start_dates
A Series object containing the start dates.
end_dates
A Series object containing the end dates.

Returns
-------
polars.Expr

Example
-------
>>> from datetime import date
>>> import polars as pl
>>> import polars_xdt as xdt
>>> df = pl.DataFrame(
... {
... "start_date": [
... date(2024, 3, 1),
... date(2024, 3, 31),
... date(2022, 2, 28),
... date(2023, 1, 31),
... date(2019, 12, 31),
... ],
... "end_date": [
... date(2023, 2, 28),
... date(2023, 2, 28),
... date(2023, 2, 28),
... date(2023, 1, 31),
... date(2023, 1, 1),
... ],
... },
... )
>>> df.with_columns(
... xdt.month_delta("start_date", "end_date").alias("month_delta")
... )
shape: (5, 3)
┌────────────┬────────────┬─────────────┐
│ start_date ┆ end_date ┆ month_delta │
│ --- ┆ --- ┆ --- │
│ date ┆ date ┆ i32 │
╞════════════╪════════════╪═════════════╡
│ 2024-03-01 ┆ 2023-02-28 ┆ -12 │
│ 2024-03-31 ┆ 2023-02-28 ┆ -14 │
│ 2022-02-28 ┆ 2023-02-28 ┆ 12 │
│ 2023-01-31 ┆ 2023-01-31 ┆ 0 │
│ 2019-12-31 ┆ 2023-01-01 ┆ 36 │
└────────────┴────────────┴─────────────┘

"""
start_dates = parse_into_expr(start_dates)
end_dates = parse_into_expr(end_dates)

return start_dates.register_plugin(
lib=lib,
symbol="month_delta",
is_elementwise=True,
args=[end_dates],
)
8 changes: 8 additions & 0 deletions src/expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
use crate::business_days::*;
use crate::format_localized::*;
use crate::is_workday::*;
use crate::month_delta::*;
use crate::sub::*;
use crate::timezone::*;
use crate::to_julian::*;
Expand Down Expand Up @@ -85,6 +86,13 @@ fn workday_count(inputs: &[Series], kwargs: BusinessDayKwargs) -> PolarsResult<S
impl_workday_count(begin_dates, end_dates, &weekmask, holidays)
}

#[polars_expr(output_type=Int32)]
fn month_delta(inputs: &[Series]) -> PolarsResult<Series> {
let start_dates = &inputs[0];
let end_dates = &inputs[1];
impl_month_delta(start_dates, end_dates)
}

#[polars_expr(output_type=Boolean)]
fn is_workday(inputs: &[Series], kwargs: BusinessDayKwargs) -> PolarsResult<Series> {
let dates = &inputs[0];
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ mod business_days;
mod expressions;
mod format_localized;
mod is_workday;
mod month_delta;
mod sub;
mod timezone;
mod to_julian;
Expand Down
105 changes: 105 additions & 0 deletions src/month_delta.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
use chrono::{Datelike, NaiveDate};
use polars::prelude::*;

// Function to get the date of the last day of the current month for a given date.
fn get_last_month_date(date: NaiveDate) -> NaiveDate {
if date.month() == 12 {
// If it is December, move to the next year and set the month to January.
NaiveDate::from_ymd_opt(date.year() + 1, 1, 1)
.unwrap()
.pred_opt()
.unwrap()
} else {
date.with_day(1)
.unwrap()
.with_month(date.month() + 1)
.unwrap()
.pred_opt()
.unwrap()
}
}

// Function that checks if both dates fall on the last days of their respective months.
fn get_last_day_bool(start_date: NaiveDate, end_date: NaiveDate) -> bool {
let end_date_end = get_last_month_date(end_date);
let start_date_end = get_last_month_date(start_date);
{
// End date is the last day of its month
end_date.day() == end_date_end.day() &&
// Start date is the last day of its month
start_date.day() == start_date_end.day() &&
end_date.day() != start_date.day() &&
start_date.month() != end_date.month()
}
}

// Function to calculate the span of months between two dates as an integer.
// This function specifically checks if the span between the two dates covers whole months,
// and under certain conditions, adjusts the count by 1 or -1 to reflect partial months.
fn get_month_span_int(start_date: NaiveDate, end_date: NaiveDate) -> i32 {
// Check if the actual number of days difference matches assuming both
// dates start on the first
let actual_days_diff = end_date.signed_duration_since(start_date).num_days();
let expected_days_diff = {
let start_dt = start_date.with_day(1).unwrap(); // start date at the beginning of the month
let end_dt = end_date.with_day(1).unwrap(); // end date at the beginning of a month
end_dt.signed_duration_since(start_dt).num_days() // expected day difference as full months
};

// Calculates if the date difference spans entire months
// If do then add additional month to the calculation
if actual_days_diff == expected_days_diff
&& end_date.month() != start_date.month()
&& end_date.day() != start_date.day()
{
1
} else if expected_days_diff.abs() > actual_days_diff.abs() {
// If the expected difference (in absolute terms) is greater than the actual difference,
// it indicates a partial month span, and we return -1 to adjust the month span downwards.
-1
} else {
// If none of the conditions were met
0
}
}

pub(crate) fn impl_month_delta(start_dates: &Series, end_dates: &Series) -> PolarsResult<Series> {
akmalsoliev marked this conversation as resolved.
Show resolved Hide resolved
if (start_dates.dtype() != &DataType::Date) || (end_dates.dtype() != &DataType::Date) {
polars_bail!(InvalidOperation: "polars_xdt.month_delta only works on Date type. Please cast to Date first.");
}
let start_dates = start_dates.date()?;
let end_dates = end_dates.date()?;

let month_diff: Int32Chunked = start_dates
.as_date_iter()
.zip(end_dates.as_date_iter())
.map(|(s_arr, e_arr)| {
s_arr.zip(e_arr).map(|(start_date, end_date)| {
let year_diff = end_date.year() - start_date.year();
akmalsoliev marked this conversation as resolved.
Show resolved Hide resolved
let mut month_diff = end_date.month() as i32 - start_date.month() as i32;
month_diff += year_diff * 12;

// Apply corrections based on the conditions checked earlier
// Use absolute value to determine the magnitude of the change
let mut abs_month_diff = month_diff.abs();

abs_month_diff += get_month_span_int(start_date, end_date);

if get_last_day_bool(start_date, end_date) {
// Add an extra month for end cases where both dates are at month-end
abs_month_diff += 1
}

// Return the final month difference
// if start date is after the end date then return negative
if month_diff < 0 {
-abs_month_diff
} else {
abs_month_diff
}
})
})
.collect();

Ok(month_diff.into_series())
}
77 changes: 77 additions & 0 deletions tests/test_month_delta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import polars as pl
import polars_xdt as xdt
from datetime import date


def test_month_delta():
df = pl.DataFrame(
{
"start_date": [
date(2024, 1, 1),
date(2024, 1, 1),
date(2023, 9, 1),
date(2023, 1, 4),
date(2022, 6, 4),
date(2023, 1, 1),
date(2023, 1, 1),
date(2022, 2, 1),
date(2022, 2, 1),
date(2024, 3, 1),
date(2024, 3, 31),
date(2022, 2, 28),
date(2023, 1, 31),
date(2019, 12, 31),
date(2024, 1, 31),
date(1970, 1, 2),
],
"end_date": [
date(2024, 1, 4),
date(2024, 1, 31),
date(2023, 11, 1),
date(2022, 1, 4),
date(2022, 1, 4),
date(2022, 12, 31),
date(2021, 12, 31),
date(2022, 3, 1),
date(2023, 3, 1),
date(2023, 2, 28),
date(2023, 2, 28),
date(2023, 1, 31),
date(2022, 2, 28),
date(2023, 1, 1),
date(2024, 4, 30),
date(1971, 1, 1),
],
},
)

expected_month_diff = [
0,
0,
2,
-12,
-5,
0,
-12,
1,
13,
-12,
-14,
12,
-12,
36,
3,
11,
]
df = df.with_columns(
# For easier visual debugging purposes
pl.Series(name="out_month_delta", values=expected_month_diff),
month_delta=xdt.month_delta("start_date", "end_date"),
)

month_diff_list = df.get_column("month_delta").to_list()

assert expected_month_diff == month_diff_list, (
"The month difference list did not match the expected values.\n"
"Please check the function: 'month_diff.rs' for discrepancies."
)
Loading