diff --git a/polars_business/bump_version.py b/polars_business/bump_version.py index fd25b0d..6917dcc 100644 --- a/polars_business/bump_version.py +++ b/polars_business/bump_version.py @@ -4,26 +4,28 @@ how = sys.argv[1] -with open("polars_business/pyproject.toml", "r", encoding='utf-8') as f: +with open("polars_business/pyproject.toml", "r", encoding="utf-8") as f: content = f.read() old_version = re.search(r'version = "(.*)"', content).group(1) -version = old_version.split('.') -if how == 'patch': - version = '.'.join(version[:-1] + [str(int(version[-1]) + 1)]) -elif how == 'minor': - version = '.'.join(version[:-2] + [str(int(version[-2]) + 1), '0']) -elif how == 'major': - version = '.'.join([str(int(version[0]) + 1), '0', '0']) +version = old_version.split(".") +if how == "patch": + version = ".".join(version[:-1] + [str(int(version[-1]) + 1)]) +elif how == "minor": + version = ".".join(version[:-2] + [str(int(version[-2]) + 1), "0"]) +elif how == "major": + version = ".".join([str(int(version[0]) + 1), "0", "0"]) content = content.replace(f'version = "{old_version}"', f'version = "{version}"') -with open("polars_business/pyproject.toml", "w", encoding='utf-8') as f: +with open("polars_business/pyproject.toml", "w", encoding="utf-8") as f: f.write(content) -with open("polars_business/polars_business/__init__.py", "r", encoding='utf-8') as f: +with open("polars_business/polars_business/__init__.py", "r", encoding="utf-8") as f: content = f.read() -content = content.replace(f'__version__ = "{old_version}"', f'__version__ = "{version}"') -with open("polars_business/polars_business/__init__.py", "w", encoding='utf-8') as f: +content = content.replace( + f'__version__ = "{old_version}"', f'__version__ = "{version}"' +) +with open("polars_business/polars_business/__init__.py", "w", encoding="utf-8") as f: f.write(content) -subprocess.run(['git', 'commit', '-a', '-m', f'Bump version to {version}']) -subprocess.run(['git', 'tag', '-a', version, '-m', version]) -subprocess.run(['git', 'push', '--follow-tags']) +subprocess.run(["git", "commit", "-a", "-m", f"Bump version to {version}"]) +subprocess.run(["git", "tag", "-a", version, "-m", version]) +subprocess.run(["git", "push", "--follow-tags"]) diff --git a/polars_business/perf.py b/polars_business/perf.py index 79915ab..dd72919 100644 --- a/polars_business/perf.py +++ b/polars_business/perf.py @@ -1,9 +1,8 @@ - import timeit import numpy as np -BENCHMARKS = [1, 2, 3, 4] -# BENCHMARKS = [1, 2] +# BENCHMARKS = [1, 2, 3, 4] +BENCHMARKS = [1] # BENCHMARK 1: NO HOLIDAYS INVOLVED @@ -30,21 +29,31 @@ }) """ + def time_it(statement): - results = np.array(timeit.Timer( - stmt=statement, - setup=setup, + results = ( + np.array( + timeit.Timer( + stmt=statement, + setup=setup, + ).repeat(7, 3) ) - .repeat(7, 3) - )/3 + / 3 + ) return round(min(results), 3) + if 1 in BENCHMARKS: - print('Polars-business: ', time_it("result_pl = df.select(pl.col('ts').business.advance_n_days(n=17))")) - print('NumPy: ', time_it("result_np = np.busday_offset(input_dates, 17)")) + print( + "Polars-business: ", + time_it("result_pl = df.select(pl.col('ts').business.advance_n_days(n=17))"), + ) + print("NumPy: ", time_it("result_np = np.busday_offset(input_dates, 17)")) # uncomment, too slow... -# print('pandas: ', time_it("result_pd = df_pd['ts'] + pd.tseries.offsets.BusinessDay(17)")) +print( + "pandas: ", time_it("result_pd = df_pd['ts'] + pd.tseries.offsets.BusinessDay(17)") +) # BENCHMARK 2: WITH HOLIDAYS @@ -75,8 +84,16 @@ def time_it(statement): """ if 2 in BENCHMARKS: - print('Polars-business: ', time_it("result_pl = df.select(pl.col('ts').business.advance_n_days(n=17, holidays=uk_holidays))")) - print('NumPy: ', time_it("result_np = np.busday_offset(input_dates, 17, holidays=uk_holidays)")) + print( + "Polars-business: ", + time_it( + "result_pl = df.select(pl.col('ts').business.advance_n_days(n=17, holidays=uk_holidays))" + ), + ) + print( + "NumPy: ", + time_it("result_np = np.busday_offset(input_dates, 17, holidays=uk_holidays)"), + ) # BENCHMARK 3: WITH weekends @@ -106,8 +123,16 @@ def time_it(statement): """ if 3 in BENCHMARKS: - print('Polars-business: ', time_it("result_pl = df.select(pl.col('ts').business.advance_n_days(n=17, weekend=weekend))")) - print('NumPy: ', time_it("result_np = np.busday_offset(input_dates, 17, weekmask='1111001')")) + print( + "Polars-business: ", + time_it( + "result_pl = df.select(pl.col('ts').business.advance_n_days(n=17, weekend=weekend))" + ), + ) + print( + "NumPy: ", + time_it("result_np = np.busday_offset(input_dates, 17, weekmask='1111001')"), + ) # BENCHMARK 4: WITH weekends and holidays @@ -139,5 +164,15 @@ def time_it(statement): """ if 4 in BENCHMARKS: - print('Polars-business: ', time_it("result_pl = df.select(pl.col('ts').business.advance_n_days(n=17, weekend=weekend, holidays=uk_holidays))")) - print('NumPy: ', time_it("result_np = np.busday_offset(input_dates, 17, weekmask='1111001', holidays=uk_holidays)")) + print( + "Polars-business: ", + time_it( + "result_pl = df.select(pl.col('ts').business.advance_n_days(n=17, weekend=weekend, holidays=uk_holidays))" + ), + ) + print( + "NumPy: ", + time_it( + "result_np = np.busday_offset(input_dates, 17, weekmask='1111001', holidays=uk_holidays)" + ), + ) diff --git a/polars_business/polars_business/polars_business/__init__.py b/polars_business/polars_business/polars_business/__init__.py index a34f064..f6a25d3 100644 --- a/polars_business/polars_business/polars_business/__init__.py +++ b/polars_business/polars_business/polars_business/__init__.py @@ -7,15 +7,7 @@ __version__ = "0.1.15" -mapping = { - 'Mon': 0, - 'Tue': 1, - 'Wed': 2, - 'Thu': 3, - 'Fri': 4, - 'Sat': 5, - 'Sun': 6 -} +mapping = {"Mon": 0, "Tue": 1, "Wed": 2, "Thu": 3, "Fri": 4, "Sat": 5, "Sun": 6} @pl.api.register_expr_namespace("business") @@ -23,14 +15,15 @@ class BusinessDayTools: def __init__(self, expr: pl.Expr): self._expr = expr - def advance_n_days(self, n, weekend=('Sat', 'Sun'), holidays=None) -> pl.Expr: - + def advance_n_days(self, n, weekend=("Sat", "Sun"), holidays=None) -> pl.Expr: if not holidays: - holidays = [] + holidays = [] else: - holidays = sorted({(holiday - date(1970, 1, 1)).days for holiday in holidays}) - if weekend == ('Sat', 'Sun'): - weekend = [5,6] + holidays = sorted( + {(holiday - date(1970, 1, 1)).days for holiday in holidays} + ) + if weekend == ("Sat", "Sun"): + weekend = [5, 6] else: weekend = sorted({mapping[name] for name in weekend}) @@ -39,10 +32,10 @@ def advance_n_days(self, n, weekend=('Sat', 'Sun'), holidays=None) -> pl.Expr: symbol="advance_n_days", is_elementwise=True, args=[n], - kwargs = { - 'holidays': holidays, - 'weekend': weekend, - } + kwargs={ + "holidays": holidays, + "weekend": weekend, + }, ) # elif holidays is not None and weekend is None: # holidays = pl.Series([list(set(holidays))]).cast(pl.List(pl.Int32)) diff --git a/polars_business/polars_business/src/business_days.rs b/polars_business/polars_business/src/business_days.rs index c76f260..9e299ee 100644 --- a/polars_business/polars_business/src/business_days.rs +++ b/polars_business/polars_business/src/business_days.rs @@ -1,6 +1,6 @@ -use polars::prelude::*; -use polars::prelude::arity::try_binary_elementwise; use ahash::AHashMap; +use polars::prelude::arity::try_binary_elementwise; +use polars::prelude::*; pub(crate) fn weekday(x: i32) -> i32 { // the first modulo might return a negative number, so we add 7 and take @@ -67,7 +67,11 @@ fn roll(n_days: i32, x_weekday: i32, weekend: &[i32]) -> i32 { n_days } -pub(crate) fn calculate_n_days_with_holidays(x: i32, n: i32, holidays: &[i32]) -> PolarsResult { +pub(crate) fn calculate_n_days_with_holidays( + x: i32, + n: i32, + holidays: &[i32], +) -> PolarsResult { let x_mod_7 = x % 7; let x_weekday = weekday(x_mod_7); @@ -80,17 +84,17 @@ pub(crate) fn calculate_n_days_with_holidays(x: i32, n: i32, holidays: &[i32]) - if holidays.binary_search(&x).is_ok() { polars_bail!(ComputeError: format!("date {} is not a business date, cannot advance. `roll` argument coming soon.", x)) } - let mut count_hols = count_holidays(x, x + n_days, &holidays); + let mut count_hols = count_holidays(x, x + n_days, holidays); while count_hols > 0 { let n_days_before = n_days; if n_days > 0 { n_days = n_days + calculate_n_days_without_holidays_fast(count_hols, weekday(x_mod_7 + n_days)); - count_hols = count_holidays(x + n_days_before + 1, x + n_days, &holidays); + count_hols = count_holidays(x + n_days_before + 1, x + n_days, holidays); } else { n_days = n_days + calculate_n_days_without_holidays_fast(-count_hols, weekday(x_mod_7 + n_days)); - count_hols = count_holidays(x + n_days_before - 1, x + n_days, &holidays); + count_hols = count_holidays(x + n_days_before - 1, x + n_days, holidays); } } Ok(n_days) @@ -116,7 +120,7 @@ pub(crate) fn calculate_n_days_with_weekend_and_holidays( if holidays.binary_search(&x).is_ok() { polars_bail!(ComputeError: format!("date {} is not a business date, cannot advance. `roll` argument coming soon.", x)) } - let mut count_hols = count_holidays(x, x + n_days, &holidays); + let mut count_hols = count_holidays(x, x + n_days, holidays); while count_hols > 0 { let n_days_before = n_days; if n_days > 0 { @@ -127,7 +131,7 @@ pub(crate) fn calculate_n_days_with_weekend_and_holidays( weekend.len() as i32, cache, ); - count_hols = count_holidays(x + n_days_before + 1, x + n_days, &holidays); + count_hols = count_holidays(x + n_days_before + 1, x + n_days, holidays); } else { n_days = n_days + calculate_n_days_without_holidays_slow( @@ -136,7 +140,7 @@ pub(crate) fn calculate_n_days_with_weekend_and_holidays( weekend.len() as i32, cache, ); - count_hols = count_holidays(x + n_days_before - 1, x + n_days, &holidays); + count_hols = count_holidays(x + n_days_before - 1, x + n_days, holidays); } } Ok(n_days) @@ -265,22 +269,26 @@ pub(crate) fn impl_advance_n_days( if x_weekday >= 5 { polars_bail!(ComputeError: format!("date {} is not a business date, cannot advance. `roll` argument coming soon.", x_date)) } - Ok(x_date + (calculate_n_days_without_holidays_fast(n, x_weekday))) - .map(Some) + Ok(Some( + x_date + (calculate_n_days_without_holidays_fast(n, x_weekday)), + )) } else if !holidays.is_empty() && weekend == [5, 6] { - Ok(x_date + calculate_n_days_with_holidays(x_date, n, &holidays)?) - .map(Some) + Ok(Some( + x_date + calculate_n_days_with_holidays(x_date, n, &holidays)?, + )) } else if holidays.is_empty() && weekend != [5, 6] { let cache = cache.as_ref().unwrap(); - Ok(x_date + calculate_n_days_with_weekend(x_date, n, &weekend, cache)?) - .map(Some) + Ok(Some( + x_date + calculate_n_days_with_weekend(x_date, n, &weekend, cache)?, + )) } else { let cache = cache.as_ref().unwrap(); - Ok(x_date - + calculate_n_days_with_weekend_and_holidays( - x_date, n, &weekend, &cache, &holidays, - )?) - .map(Some) + Ok(Some( + x_date + + calculate_n_days_with_weekend_and_holidays( + x_date, n, &weekend, cache, &holidays, + )?, + )) } } _ => Ok(None), @@ -352,27 +360,31 @@ pub(crate) fn impl_advance_n_days( if x_weekday >= 5 { polars_bail!(ComputeError: format!("date {} is not a business date, cannot advance. `roll` argument coming soon.", x_date)) } - Ok(x+(calculate_n_days_without_holidays_fast(n, x_weekday) as i64 *multiplier)).map(Some) + Ok(Some( + x + (calculate_n_days_without_holidays_fast(n, x_weekday) as i64 + * multiplier), + )) } else if !holidays.is_empty() && weekend == [5, 6] { - Ok( - x + calculate_n_days_with_holidays(x_date, n, &holidays)? - as i64 + Ok(Some( + x + calculate_n_days_with_holidays(x_date, n, &holidays)? as i64 * multiplier, - ).map(Some) + )) } else if holidays.is_empty() && weekend != [5, 6] { let cache = cache.as_ref().unwrap(); let x_date = (x / multiplier) as i32; - Ok( - x + calculate_n_days_with_weekend(x_date, n, &weekend, &cache)? + Ok(Some( + x + calculate_n_days_with_weekend(x_date, n, &weekend, cache)? as i64 * multiplier, - ).map(Some) + )) } else { let cache = cache.as_ref().unwrap(); - Ok(x + calculate_n_days_with_weekend_and_holidays( - x_date, n, &weekend, &cache, &holidays, - )? as i64 - * multiplier).map(Some) + Ok(Some( + x + calculate_n_days_with_weekend_and_holidays( + x_date, n, &weekend, cache, &holidays, + )? as i64 + * multiplier, + )) } } _ => Ok(None), @@ -389,4 +401,4 @@ pub(crate) fn impl_advance_n_days( polars_bail!(ComputeError: format!("expected Datetime or Date dtype, got: {}", original_dtype)) } } -} \ No newline at end of file +} diff --git a/polars_business/polars_business/src/expressions.rs b/polars_business/polars_business/src/expressions.rs index f6edf6d..dce3fc1 100644 --- a/polars_business/polars_business/src/expressions.rs +++ b/polars_business/polars_business/src/expressions.rs @@ -1,7 +1,7 @@ +use crate::business_days::*; use polars::prelude::*; use pyo3_polars::derive::polars_expr; use serde::Deserialize; -use crate::business_days::*; #[derive(Deserialize)] pub struct BusinessDayKwargs { diff --git a/polars_business/polars_business/src/lib.rs b/polars_business/polars_business/src/lib.rs index 4deb7c6..147cedc 100644 --- a/polars_business/polars_business/src/lib.rs +++ b/polars_business/polars_business/src/lib.rs @@ -1,2 +1,2 @@ -mod expressions; mod business_days; +mod expressions; diff --git a/polars_business/requirements.txt b/polars_business/requirements.txt index 6471a4a..d2b61f7 100644 --- a/polars_business/requirements.txt +++ b/polars_business/requirements.txt @@ -4,3 +4,4 @@ hypothesis numpy pandas pytest +holidays diff --git a/polars_business/run.py b/polars_business/run.py index 5874c8b..865e70d 100644 --- a/polars_business/run.py +++ b/polars_business/run.py @@ -7,15 +7,11 @@ start = datetime(2000, 1, 18) n = 11 -weekend = ['Sat', 'Sun'] -holidays = []#[date(2000, 1, 29)] +weekend = ["Sat", "Sun"] +holidays = [] # [date(2000, 1, 29)] weekmask = [0 if reverse_mapping[i] in weekend else 1 for i in range(7)] -df = pl.DataFrame( - { - "dates": [start] - } -) +df = pl.DataFrame({"dates": [start]}) df = df.with_columns(start_wday=pl.col("dates").dt.strftime("%a")) print( diff --git a/polars_business/tests/test_business_offsets.py b/polars_business/tests/test_business_offsets.py index b7b6627..90f0356 100644 --- a/polars_business/tests/test_business_offsets.py +++ b/polars_business/tests/test_business_offsets.py @@ -11,24 +11,41 @@ reverse_mapping = {value: key for key, value in polars_business.mapping.items()} + def get_result(date, dtype, **kwargs): if dtype == pl.Date: - result = pl.DataFrame({'ts': [date]}).select(pl.col('ts').business.advance_n_days(**kwargs))['ts'].item() + result = ( + pl.DataFrame({"ts": [date]}) + .select(pl.col("ts").business.advance_n_days(**kwargs))["ts"] + .item() + ) else: - result = pl.DataFrame({'ts': [dt.datetime(date.year, date.month, date.day)]}).select( - pl.col('ts').dt.cast_time_unit(dtype.time_unit) - .dt.replace_time_zone(dtype.time_zone) - .business.advance_n_days(**kwargs) - .dt.date() - )['ts'].item() + result = ( + pl.DataFrame({"ts": [dt.datetime(date.year, date.month, date.day)]}) + .select( + pl.col("ts") + .dt.cast_time_unit(dtype.time_unit) + .dt.replace_time_zone(dtype.time_zone) + .business.advance_n_days(**kwargs) + .dt.date() + )["ts"] + .item() + ) return result @given( date=st.dates(min_value=dt.date(1000, 1, 1), max_value=dt.date(9999, 12, 31)), n=st.integers(min_value=-30, max_value=30), - dtype = st.sampled_from([pl.Date, pl.Datetime('ms'), pl.Datetime('ms', 'Asia/Kathmandu'), pl.Datetime('us', 'Europe/London')]), - function = st.sampled_from([lambda x: x, lambda x: pl.Series([x])]) + dtype=st.sampled_from( + [ + pl.Date, + pl.Datetime("ms"), + pl.Datetime("ms", "Asia/Kathmandu"), + pl.Datetime("us", "Europe/London"), + ] + ), + function=st.sampled_from([lambda x: x, lambda x: pl.Series([x])]), ) def test_against_np_busday_offset(date: dt.date, n: int, dtype, function) -> None: # how to do this... @@ -46,50 +63,100 @@ def test_against_np_busday_offset(date: dt.date, n: int, dtype, function) -> Non def test_against_pandas_bday_offset(date: dt.date, n: int) -> None: # maybe just remove this one? assume(date.weekday() < 5) - result = pl.DataFrame({'ts': [date]}).select(pl.col('ts').business.advance_n_days(n=n))['ts'].item() + result = ( + pl.DataFrame({"ts": [date]}) + .select(pl.col("ts").business.advance_n_days(n=n))["ts"] + .item() + ) expected = pd.Timestamp(date) + pd.tseries.offsets.BusinessDay(n) assert pd.Timestamp(result) == expected - @given( date=st.dates(min_value=dt.date(2000, 1, 1), max_value=dt.date(2000, 12, 31)), n=st.integers(min_value=-30, max_value=30), - holidays = st.lists(st.dates(min_value=dt.date(2000, 1, 1), max_value=dt.date(2000, 12, 31)), min_size=1, max_size=300), - dtype = st.sampled_from([pl.Date, pl.Datetime('ms'), pl.Datetime('ms', 'Asia/Kathmandu'), pl.Datetime('us', 'Europe/London')]), - function = st.sampled_from([lambda x: x, lambda x: pl.Series([x])]), + holidays=st.lists( + st.dates(min_value=dt.date(2000, 1, 1), max_value=dt.date(2000, 12, 31)), + min_size=1, + max_size=300, + ), + dtype=st.sampled_from( + [ + pl.Date, + pl.Datetime("ms"), + pl.Datetime("ms", "Asia/Kathmandu"), + pl.Datetime("us", "Europe/London"), + ] + ), + function=st.sampled_from([lambda x: x, lambda x: pl.Series([x])]), ) -def test_against_np_busday_offset_with_holidays(date: dt.date, n: int, holidays: list[dt.date], dtype, function) -> None: +def test_against_np_busday_offset_with_holidays( + date: dt.date, n: int, holidays: list[dt.date], dtype, function +) -> None: assume(date.weekday() < 5) assume(date not in holidays) # TODO: remove once unwrap is removed result = get_result(date, dtype, n=function(n), holidays=holidays) expected = np.busday_offset(date, n, holidays=holidays) assert np.datetime64(result) == expected + @given( date=st.dates(min_value=dt.date(2000, 1, 1), max_value=dt.date(2000, 12, 31)), n=st.integers(min_value=-30, max_value=30), - weekend = st.lists(st.sampled_from(['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']), min_size=0, max_size=7), - dtype = st.sampled_from([pl.Date, pl.Datetime('ms'), pl.Datetime('ms', 'Asia/Kathmandu'), pl.Datetime('us', 'Europe/London')]), - function = st.sampled_from([lambda x: x, lambda x: pl.Series([x])]), + weekend=st.lists( + st.sampled_from(["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]), + min_size=0, + max_size=7, + ), + dtype=st.sampled_from( + [ + pl.Date, + pl.Datetime("ms"), + pl.Datetime("ms", "Asia/Kathmandu"), + pl.Datetime("us", "Europe/London"), + ] + ), + function=st.sampled_from([lambda x: x, lambda x: pl.Series([x])]), ) -def test_against_np_busday_offset_with_weekends(date: dt.date, n: int, weekend: list[dt.date], dtype, function) -> None: +def test_against_np_busday_offset_with_weekends( + date: dt.date, n: int, weekend: list[dt.date], dtype, function +) -> None: assume(reverse_mapping[date.weekday()] not in weekend) result = get_result(date, dtype, n=function(n), weekend=weekend) weekmask = [0 if reverse_mapping[i] in weekend else 1 for i in range(7)] expected = np.busday_offset(date, n, weekmask=weekmask) assert np.datetime64(result) == expected + @given( date=st.dates(min_value=dt.date(2000, 1, 1), max_value=dt.date(2000, 12, 31)), n=st.integers(min_value=-30, max_value=30), - weekend = st.lists(st.sampled_from(['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']), min_size=0, max_size=7), - holidays = st.lists(st.dates(min_value=dt.date(2000, 1, 1), max_value=dt.date(2000, 12, 31)), min_size=1, max_size=300), - dtype = st.sampled_from([pl.Date, pl.Datetime('ms'), pl.Datetime('ms', 'Asia/Kathmandu'), pl.Datetime('us', 'Europe/London')]), - function = st.sampled_from([lambda x: x, lambda x: pl.Series([x])]), + weekend=st.lists( + st.sampled_from(["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]), + min_size=0, + max_size=7, + ), + holidays=st.lists( + st.dates(min_value=dt.date(2000, 1, 1), max_value=dt.date(2000, 12, 31)), + min_size=1, + max_size=300, + ), + dtype=st.sampled_from( + [ + pl.Date, + pl.Datetime("ms"), + pl.Datetime("ms", "Asia/Kathmandu"), + pl.Datetime("us", "Europe/London"), + ] + ), + function=st.sampled_from([lambda x: x, lambda x: pl.Series([x])]), ) -def test_against_np_busday_offset_with_weekends_and_holidays(date: dt.date, n: int, weekend: list[int], holidays: list[dt.date], dtype, function) -> None: - assume(reverse_mapping[date.weekday()] not in weekend) # TODO: remove once unwrap is removed +def test_against_np_busday_offset_with_weekends_and_holidays( + date: dt.date, n: int, weekend: list[int], holidays: list[dt.date], dtype, function +) -> None: + assume( + reverse_mapping[date.weekday()] not in weekend + ) # TODO: remove once unwrap is removed assume(date not in holidays) # TODO: remove once unwrap is removed result = get_result(date, dtype, n=function(n), weekend=weekend, holidays=holidays) weekmask = [0 if reverse_mapping[i] in weekend else 1 for i in range(7)]