Skip to content

Commit

Permalink
Merge pull request #2 from MarcoGorelli/holidays
Browse files Browse the repository at this point in the history
Holidays
  • Loading branch information
MarcoGorelli authored Oct 13, 2023
2 parents 27ea43f + 461fc9c commit c04fe3e
Show file tree
Hide file tree
Showing 8 changed files with 321 additions and 78 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ Cargo.lock
target/
.hypothesis
*.pyc
.vscode
45 changes: 36 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,37 @@ $ pip install polars-business
To use it, you'll need to `import polars_business`, and then you'll be a `.business` accessor
on your expressions!

Currently there's only a single function: `advance_n_days`.
Currently there's only a single function: `advance_n_days`. It takes arguments:
- `n`: number of days to advance. This can be an expression.
- `holidays`: list of holidays in `datetime.date` format. The Python `holidays` package may
be useful here. You can install it with `pip install holidays`, and then you can get a list
of holidays for a given country with (for example, `'UK'`):
```
import holidays
list(holidays.country_holidays('UK', years=[2020, 2021, 2022, 2023]))
```

Example
-------

Here's an example of how to shift a date range forwards by 5 business days (i.e. Monday to Friday, excluding weekends):
Given some dates, can you shift them all forwards by 5 business days (according to the UK holiday calendar)?

With `polars-business`, this is easy:
```python
from datetime import date

import holidays
import polars as pl
import polars_business

from datetime import date

uk_holidays = holidays.country_holidays('UK', years=[2023, 2024])
df = pl.DataFrame({
"dates": pl.date_range(date(2000, 1, 1), date(9999, 1, 1), eager=True),
"dates": [date(2023, 4, 2), date(2023, 9, 1), date(2024, 1, 4)]
})
df = df.filter(pl.col('dates').dt.weekday() <6)

print(df.with_columns(dates_shifted=pl.col('dates').business.advance_n_days(n=5)))
print(df.with_columns(dates_shifted=pl.col('dates').business.advance_n_days(n=5, holidays=uk_holidays)))
```

Note
Expand All @@ -50,13 +63,27 @@ What to expected
----------------
The following will hopefully come relatively soon:
- support for `Datetime`s
- support for custom holiday calendars
- support for rolling forwards/backwards to the next
valid business date (if not already on one)

Ideas for future development:
- business date range
- support for custom mask
- support for custom week mask

Benchmarks
----------

The following timings can be verified using the `perf.py` script.

### Adding 17 business days to 10 million dates (no holidays)

- Polars-business 0.058
- NumPy 0.092
- pandas 0.801

### Adding 17 business days to 10 million dates (UK holidays for 2020-2023)

Currently there's only a single function: `advance_n_days`.
- Polars-business 0.406
- NumPy 0.417
- pandas: omitted as pandas doesn't (yet) vectorise `CustomBusinessDay`, so
we'd likely be talking about minutes
29 changes: 22 additions & 7 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,39 @@
SHELL=/bin/bash

venv: ## Set up virtual environment
python3 -m venv venv
venv/bin/pip install -r requirements.txt
. ../.venv/bin/activate

install: venv
unset CONDA_PREFIX && \
source venv/bin/activate && maturin develop -m polars_business/Cargo.toml
source ../.venv/bin/activate && maturin develop -m polars_business/Cargo.toml

install-release: venv
unset CONDA_PREFIX && \
source venv/bin/activate && maturin develop --release -m polars_business/Cargo.toml
source ../.venv/bin/activate && maturin develop --release -m polars_business/Cargo.toml

build: venv
unset CONDA_PREFIX && \
source ../.venv/bin/activate && maturin develop --release -m polars_business/Cargo.toml

clean:
-@rm -r venv
-@rm -r ../.venv
-@cd polars_business && cargo clean


run: install
source venv/bin/activate && python run.py
source ../.venv/bin/activate && python run.py

run-release: install-release
source venv/bin/activate && python run.py
source ../.venv/bin/activate && python run.py

test: build
pytest ../tests

clippy: venv
cargo clippy -p polars_business

fmt: venv
cargo fmt --all
black .

pre-commit: clippy fmt
75 changes: 75 additions & 0 deletions src/perf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@

import timeit
import numpy as np

# BENCHMARK 1: NO HOLIDAYS INVOLVED

setup = """
import polars as pl
import polars_business
from datetime import date
import numpy as np
import pandas as pd
import holidays
import warnings
dates = pl.date_range(date(2020, 1, 1), date(2024, 1, 1), closed='left', eager=True)
dates = dates.filter(dates.dt.weekday() < 6)
size = 10_000_000
input_dates = np.random.choice(dates, size)
df = pl.DataFrame({
'ts': input_dates,
})
df_pd = pd.DataFrame({
'ts': input_dates,
})
"""

def time_it(statement):
results = np.array(timeit.Timer(
stmt=statement,
setup=setup,
)
.repeat(7, 3)
)/3
return round(min(results), 3)

print('Polars-business: ', time_it("result_pl = df.select(pl.col('ts').business.advance_n_days(n=17))"))

print('NumPy: ', time_it("result_np = np.busday_offset(input_dates, 17)"))

print('pandas: ', time_it("result_pd = df_pd['ts'] + pd.tseries.offsets.BusinessDay(17)"))

# BENCHMARK 2: WITH HOLIDAYS

setup = """
import polars as pl
import polars_business
from datetime import date
import numpy as np
import pandas as pd
import holidays
import warnings
uk_holidays = list(holidays.country_holidays('UK', years=[2020, 2021, 2022, 2023]))
dates = pl.date_range(date(2020, 1, 1), date(2024, 1, 1), closed='left', eager=True)
dates = dates.filter(~dates.is_in(uk_holidays))
dates = dates.filter(dates.dt.weekday() < 6)
size = 10_000_000
input_dates = np.random.choice(dates, size)
df = pl.DataFrame({
'ts': input_dates,
})
df_pd = pd.DataFrame({
'ts': input_dates,
})
"""

print('Polars-business: ', time_it("result_pl = df.select(pl.col('ts').business.advance_n_days(n=17, holidays=uk_holidays))"))

print('NumPy: ', time_it("result_np = np.busday_offset(input_dates, 17, holidays=uk_holidays)"))
26 changes: 17 additions & 9 deletions src/polars_business/polars_business/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,22 @@ class BusinessDayTools:
def __init__(self, expr: pl.Expr):
self._expr = expr.cast(pl.Int32)


def advance_n_days(self, n) -> pl.Expr:
def advance_n_days(self, n, holidays=None) -> pl.Expr:
# if not (isinstance(n, int) and n > 0):
# raise ValueError("only positive integers are currently supported for `n`")

return self._expr._register_plugin(
lib=lib,
symbol="advance_n_days",
is_elementwise=True,
args = [n],
)
if holidays is None:
return self._expr._register_plugin(
lib=lib,
symbol="advance_n_days",
is_elementwise=True,
args=[
n,
],
)
else:
return self._expr._register_plugin(
lib=lib,
symbol="advance_n_days",
is_elementwise=True,
args=[n, pl.Series([list(set(holidays))]).cast(pl.List(pl.Int32))],
)
Loading

0 comments on commit c04fe3e

Please sign in to comment.