Skip to content

Commit

Permalink
Merge pull request #54 from pola-rs/offsets
Browse files Browse the repository at this point in the history
add base_utc_offset and dst_offset
  • Loading branch information
MarcoGorelli authored Jan 7, 2024
2 parents ecd6701 + b2426e9 commit 7fed49c
Show file tree
Hide file tree
Showing 6 changed files with 276 additions and 2 deletions.
2 changes: 2 additions & 0 deletions docs/API.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ API

polars_xdt.date_range
polars_xdt.workday_count
polars_xdt.ExprXDTNamespace.base_utc_offset
polars_xdt.ExprXDTNamespace.ceil
polars_xdt.ExprXDTNamespace.dst_offset
polars_xdt.ExprXDTNamespace.format_localized
polars_xdt.ExprXDTNamespace.from_local_datetime
polars_xdt.ExprXDTNamespace.is_workday
Expand Down
94 changes: 94 additions & 0 deletions polars_xdt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,100 @@ def ceil(
)
return cast(XDTExpr, result)

def base_utc_offset(self) -> XDTExpr:
"""
Base offset from UTC.
This is usually constant for all datetimes in a given time zone, but
may vary in the rare case that a country switches time zone, like
Samoa (Apia) did at the end of 2011.
Returns
-------
Expr
Expression of data type :class:`Duration`.
See Also
--------
Expr.dt.dst_offset : Daylight savings offset from UTC.
Examples
--------
>>> from datetime import datetime
>>> import polars_xdt # noqa: F401
>>> df = pl.DataFrame(
... {
... "ts": [datetime(2011, 12, 29), datetime(2012, 1, 1)],
... }
... )
>>> df = df.with_columns(
... pl.col("ts").dt.replace_time_zone("Pacific/Apia")
... )
>>> df.with_columns(
... pl.col("ts").xdt.base_utc_offset().alias("base_utc_offset")
... )
shape: (2, 2)
┌────────────────────────────┬─────────────────┐
│ ts ┆ base_utc_offset │
│ --- ┆ --- │
│ datetime[μs, Pacific/Apia] ┆ duration[ms] │
╞════════════════════════════╪═════════════════╡
│ 2011-12-29 00:00:00 -10 ┆ -11h │
│ 2012-01-01 00:00:00 +14 ┆ 13h │
└────────────────────────────┴─────────────────┘
"""
result = self._expr.register_plugin(
lib=lib,
symbol="base_utc_offset",
is_elementwise=True,
args=[],
)
return cast(XDTExpr, result)

def dst_offset(self) -> XDTExpr:
"""
Additional offset currently in effect (typically due to daylight saving time).
Returns
-------
Expr
Expression of data type :class:`Duration`.
See Also
--------
Expr.dt.base_utc_offset : Base offset from UTC.
Examples
--------
>>> from datetime import datetime
>>> import polars_xdt # noqa: F401
>>> df = pl.DataFrame(
... {
... "ts": [datetime(2020, 10, 25), datetime(2020, 10, 26)],
... }
... )
>>> df = df.with_columns(
... pl.col("ts").dt.replace_time_zone("Europe/London")
... )
>>> df.with_columns(pl.col("ts").xdt.dst_offset().alias("dst_offset"))
shape: (2, 2)
┌─────────────────────────────┬──────────────┐
│ ts ┆ dst_offset │
│ --- ┆ --- │
│ datetime[μs, Europe/London] ┆ duration[ms] │
╞═════════════════════════════╪══════════════╡
│ 2020-10-25 00:00:00 BST ┆ 1h │
│ 2020-10-26 00:00:00 GMT ┆ 0ms │
└─────────────────────────────┴──────────────┘
"""
result = self._expr.register_plugin(
lib=lib,
symbol="dst_offset",
is_elementwise=True,
args=[],
)
return cast(XDTExpr, result)


class XDTExpr(pl.Expr):
@property
Expand Down
37 changes: 35 additions & 2 deletions src/expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@ use crate::is_workday::*;
use crate::sub::*;
use crate::timezone::*;
use crate::to_julian::*;
use crate::utc_offsets::*;
use chrono_tz::Tz;
use polars::prelude::*;
use pyo3_polars::derive::polars_expr;
use serde::Deserialize;
use std::str::FromStr;
#[derive(Deserialize)]
pub struct BusinessDayKwargs {
holidays: Vec<i32>,
Expand All @@ -26,10 +29,16 @@ pub struct FormatLocalizedKwargs {
locale: String,
}

fn bday_output(input_fields: &[Field]) -> PolarsResult<Field> {
fn same_output(input_fields: &[Field]) -> PolarsResult<Field> {
let field = input_fields[0].clone();
Ok(field)
}
fn duration_ms(input_fields: &[Field]) -> PolarsResult<Field> {
Ok(Field::new(
input_fields[0].name(),
DataType::Duration(TimeUnit::Milliseconds),
))
}

pub fn to_local_datetime_output(input_fields: &[Field]) -> PolarsResult<Field> {
let field = input_fields[0].clone();
Expand All @@ -53,7 +62,7 @@ pub fn from_local_datetime_output(input_fields: &[Field]) -> PolarsResult<Field>
Ok(Field::new(&field.name, dtype))
}

#[polars_expr(output_type_func=bday_output)]
#[polars_expr(output_type_func=same_output)]
fn advance_n_days(inputs: &[Series], kwargs: BusinessDayKwargs) -> PolarsResult<Series> {
let s = &inputs[0];
let n = &inputs[1].cast(&DataType::Int32)?;
Expand Down Expand Up @@ -110,3 +119,27 @@ fn to_julian_date(inputs: &[Series]) -> PolarsResult<Series> {
let s = &inputs[0];
impl_to_julian_date(s)
}

#[polars_expr(output_type_func=duration_ms)]
fn base_utc_offset(inputs: &[Series]) -> PolarsResult<Series> {
let s = &inputs[0];
match s.dtype() {
DataType::Datetime(time_unit, Some(time_zone)) => {
let time_zone = Tz::from_str(time_zone).unwrap();
Ok(impl_base_utc_offset(s.datetime()?, time_unit, &time_zone).into_series())
}
_ => polars_bail!(InvalidOperation: "base_utc_offset only works on Datetime type."),
}
}

#[polars_expr(output_type_func=duration_ms)]
fn dst_offset(inputs: &[Series]) -> PolarsResult<Series> {
let s = &inputs[0];
match s.dtype() {
DataType::Datetime(time_unit, Some(time_zone)) => {
let time_zone = Tz::from_str(time_zone).unwrap();
Ok(impl_dst_offset(s.datetime()?, time_unit, &time_zone).into_series())
}
_ => polars_bail!(InvalidOperation: "base_utc_offset only works on Datetime type."),
}
}
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ mod is_workday;
mod sub;
mod timezone;
mod to_julian;
mod utc_offsets;

use pyo3::types::PyModule;
use pyo3::{pymodule, PyResult, Python};
Expand Down
43 changes: 43 additions & 0 deletions src/utc_offsets.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
use chrono::TimeZone;
use chrono_tz::OffsetComponents;
use chrono_tz::Tz;
use polars::prelude::*;
use polars_arrow::temporal_conversions::{
timestamp_ms_to_datetime, timestamp_ns_to_datetime, timestamp_us_to_datetime,
};

pub(crate) fn impl_base_utc_offset(
ca: &DatetimeChunked,
time_unit: &TimeUnit,
time_zone: &Tz,
) -> DurationChunked {
let timestamp_to_datetime = match time_unit {
TimeUnit::Nanoseconds => timestamp_ns_to_datetime,
TimeUnit::Microseconds => timestamp_us_to_datetime,
TimeUnit::Milliseconds => timestamp_ms_to_datetime,
};
ca.0.apply_values(|t| {
let ndt = timestamp_to_datetime(t);
let dt = time_zone.from_utc_datetime(&ndt);
dt.offset().base_utc_offset().num_milliseconds()
})
.into_duration(TimeUnit::Milliseconds)
}

pub(crate) fn impl_dst_offset(
ca: &DatetimeChunked,
time_unit: &TimeUnit,
time_zone: &Tz,
) -> DurationChunked {
let timestamp_to_datetime = match time_unit {
TimeUnit::Nanoseconds => timestamp_ns_to_datetime,
TimeUnit::Microseconds => timestamp_us_to_datetime,
TimeUnit::Milliseconds => timestamp_ms_to_datetime,
};
ca.0.apply_values(|t| {
let ndt = timestamp_to_datetime(t);
let dt = time_zone.from_utc_datetime(&ndt);
dt.offset().dst_offset().num_milliseconds()
})
.into_duration(TimeUnit::Milliseconds)
}
101 changes: 101 additions & 0 deletions tests/offsets_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from __future__ import annotations
import pytest
import polars as pl
import polars_xdt as xdt
from datetime import datetime

from typing import TYPE_CHECKING
from polars.testing import assert_series_equal
from polars.exceptions import ComputeError
if TYPE_CHECKING:
from polars.type_aliases import TimeUnit

@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
def test_base_utc_offset(time_unit: TimeUnit) -> None:
df = pl.datetime_range(
datetime(2011, 12, 29),
datetime(2012, 1, 1),
"2d",
time_zone="Pacific/Apia",
eager=True,
).dt.cast_time_unit(time_unit).to_frame('a')
result = df.select(xdt.col('a').xdt.base_utc_offset().alias("base_utc_offset"))['base_utc_offset']
expected = pl.Series(
"base_utc_offset",
[-11 * 3600 * 1000, 13 * 3600 * 1000],
dtype=pl.Duration("ms"),
)
assert_series_equal(result, expected)


def test_base_utc_offset_lazy_schema() -> None:
ser = pl.datetime_range(
datetime(2020, 10, 25),
datetime(2020, 10, 26),
time_zone="Europe/London",
eager=True,
)
df = pl.DataFrame({"ts": ser}).lazy()
result = df.with_columns(base_utc_offset=xdt.col("ts").xdt.base_utc_offset()).schema
expected = {
"ts": pl.Datetime(time_unit="us", time_zone="Europe/London"),
"base_utc_offset": pl.Duration(time_unit="ms"),
}
assert result == expected


def test_base_utc_offset_invalid() -> None:
df = pl.datetime_range(
datetime(2011, 12, 29),
datetime(2012, 1, 1),
"2d",
eager=True,
).to_frame('a')
with pytest.raises(
ComputeError,
match=r"base_utc_offset only works on Datetime type",
):
df.select(xdt.col('a').xdt.base_utc_offset())


@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
def test_dst_offset(time_unit: TimeUnit) -> None:
df = pl.datetime_range(
datetime(2020, 10, 25),
datetime(2020, 10, 26),
time_zone="Europe/London",
eager=True,
).dt.cast_time_unit(time_unit).to_frame('a')
result = df.select(xdt.col('a').xdt.dst_offset().alias("dst_offset"))['dst_offset']
expected = pl.Series("dst_offset", [3_600 * 1_000, 0], dtype=pl.Duration("ms"))
assert_series_equal(result, expected)


def test_dst_offset_lazy_schema() -> None:
ser = pl.datetime_range(
datetime(2020, 10, 25),
datetime(2020, 10, 26),
time_zone="Europe/London",
eager=True,
)
df = pl.DataFrame({"ts": ser}).lazy()
result = df.with_columns(dst_offset=xdt.col("ts").xdt.dst_offset()).schema
expected = {
"ts": pl.Datetime(time_unit="us", time_zone="Europe/London"),
"dst_offset": pl.Duration(time_unit="ms"),
}
assert result == expected


def test_dst_offset_invalid() -> None:
df = pl.datetime_range(
datetime(2011, 12, 29),
datetime(2012, 1, 1),
"2d",
eager=True,
).to_frame('a')
with pytest.raises(
ComputeError,
match=r"base_utc_offset only works on Datetime type",
):
df.select(xdt.col('a').xdt.dst_offset())

0 comments on commit 7fed49c

Please sign in to comment.