diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index c4f9dcf..077e5ce 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -21,16 +21,13 @@ jobs: - name: Checkout source uses: actions/checkout@v2 - - name: Setup R - uses: r-lib/actions/setup-r@v2 - with: - r-version: '4.2.0' + - name: Setup r2u + uses: eddelbuettel/github-actions/r2u-setup@master - - name: install fwildclusterboot for testing - run: Rscript -e 'install.packages("fwildclusterboot", repos="https://cloud.r-project.org")' + - name: install R packages + run: Rscript -e 'install.packages(c("fwildclusterboot"))' shell: bash - - name: Setup python uses: actions/setup-python@v2 with: diff --git a/.gitignore b/.gitignore index b526339..eada79c 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,5 @@ *.egg-info/ settings.json dist/ -build/ \ No newline at end of file +build/ +*.lock \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index 67ed62f..7ef07dc 100644 --- a/poetry.lock +++ b/poetry.lock @@ -15,6 +15,34 @@ files = [ six = ">=1.6.1,<2.0" wheel = ">=0.23.0,<1.0" +[[package]] +name = "backports-zoneinfo" +version = "0.2.1" +description = "Backport of the standard library zoneinfo module" +optional = false +python-versions = ">=3.6" +files = [ + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:da6013fd84a690242c310d77ddb8441a559e9cb3d3d59ebac9aca1a57b2e18bc"}, + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:89a48c0d158a3cc3f654da4c2de1ceba85263fafb861b98b59040a5086259722"}, + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:1c5742112073a563c81f786e77514969acb58649bcdf6cdf0b4ed31a348d4546"}, + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-win32.whl", hash = "sha256:e8236383a20872c0cdf5a62b554b27538db7fa1bbec52429d8d106effbaeca08"}, + {file = "backports.zoneinfo-0.2.1-cp36-cp36m-win_amd64.whl", hash = "sha256:8439c030a11780786a2002261569bdf362264f605dfa4d65090b64b05c9f79a7"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:f04e857b59d9d1ccc39ce2da1021d196e47234873820cbeaad210724b1ee28ac"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:17746bd546106fa389c51dbea67c8b7c8f0d14b5526a579ca6ccf5ed72c526cf"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:5c144945a7752ca544b4b78c8c41544cdfaf9786f25fe5ffb10e838e19a27570"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-win32.whl", hash = "sha256:e55b384612d93be96506932a786bbcde5a2db7a9e6a4bb4bffe8b733f5b9036b"}, + {file = "backports.zoneinfo-0.2.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a76b38c52400b762e48131494ba26be363491ac4f9a04c1b7e92483d169f6582"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:8961c0f32cd0336fb8e8ead11a1f8cd99ec07145ec2931122faaac1c8f7fd987"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:e81b76cace8eda1fca50e345242ba977f9be6ae3945af8d46326d776b4cf78d1"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7b0a64cda4145548fed9efc10322770f929b944ce5cee6c0dfe0c87bf4c0c8c9"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-win32.whl", hash = "sha256:1b13e654a55cd45672cb54ed12148cd33628f672548f373963b0bff67b217328"}, + {file = "backports.zoneinfo-0.2.1-cp38-cp38-win_amd64.whl", hash = "sha256:4a0f800587060bf8880f954dbef70de6c11bbe59c673c3d818921f042f9954a6"}, + {file = "backports.zoneinfo-0.2.1.tar.gz", hash = "sha256:fadbfe37f74051d024037f223b8e001611eac868b5c5b06144ef4d8b799862f2"}, +] + +[package.extras] +tzdata = ["tzdata"] + [[package]] name = "build" version = "1.0.3" @@ -1669,6 +1697,34 @@ files = [ [package.dependencies] requests = ">=2.0.1,<3.0.0" +[[package]] +name = "rpy2" +version = "3.5.16" +description = "Python interface to the R language (embedded R)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "rpy2-3.5.16-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:c748fc74ba01a51f6aca0a5f9e7bf637cd09413ffa031dd79b49b7a9fe97770b"}, + {file = "rpy2-3.5.16-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f076b34bd79f62ae583e75acc1b305ba73a6639ea5c9a44dc53896709ccd8ba0"}, + {file = "rpy2-3.5.16-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:3bb396851710856c6544c4278988b2abfe01d5a392278e5157b97148e62079c4"}, + {file = "rpy2-3.5.16-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:c067769dbade7faccdc8d2181ae5e29329fe0e66289fb291436a546da2f5e881"}, + {file = "rpy2-3.5.16.tar.gz", hash = "sha256:837e2f74583658a5c4c339761a73f9434f33ef9ced3e30c64da7562165c2801b"}, +] + +[package.dependencies] +"backports.zoneinfo" = {version = "*", markers = "python_version < \"3.9\""} +cffi = ">=1.15.1" +jinja2 = "*" +packaging = {version = "*", markers = "platform_system == \"Windows\""} +tzlocal = "*" + +[package.extras] +all = ["ipython", "numpy", "pandas (>=1.3.5)", "pytest"] +pandas = ["numpy", "pandas (>=1.3.5)"] +test = ["ipython", "numpy", "pandas (>=1.3.5)", "pytest"] +test-minimal = ["coverage", "pytest (>=8)", "pytest-cov"] +types = ["mypy", "types-tzlocal"] + [[package]] name = "scipy" version = "1.9.3" @@ -1855,6 +1911,24 @@ files = [ {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"}, ] +[[package]] +name = "tzlocal" +version = "5.2" +description = "tzinfo object for the local timezone" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tzlocal-5.2-py3-none-any.whl", hash = "sha256:49816ef2fe65ea8ac19d19aa7a1ae0551c834303d5014c6d5a62e4cbda8047b8"}, + {file = "tzlocal-5.2.tar.gz", hash = "sha256:8d399205578f1a9342816409cc1e46a93ebd5755e39ea2d85334bea911bf0e6e"}, +] + +[package.dependencies] +"backports.zoneinfo" = {version = "*", markers = "python_version < \"3.9\""} +tzdata = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +devenv = ["check-manifest", "pytest (>=4.3)", "pytest-cov", "pytest-mock (>=3.3)", "zest.releaser"] + [[package]] name = "urllib3" version = "2.0.7" @@ -2047,4 +2121,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.8,<4.0" -content-hash = "c1bdd7ba1dbf4d249e6aa0d336ebb5a70c8e35ff81235a94cc57055cb45bf884" +content-hash = "aef650de2ae05e2f8feba858435d9c59ab7cc0d15c66606d2ad1b6d8a41b01b8" diff --git a/pyproject.toml b/pyproject.toml index dfec8b6..f934802 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ pymdown-extensions = ">=10.0" mkdocstrings-python-legacy = "^0.2.3" mkdocstrings = {version = "^0.19.0", extras = ["python"], optional = true } pymdown-extensions = ">=10.0" +rpy2 = "^3.5.16" [build-system] diff --git a/tests/test_seeds.py b/tests/test_seeds.py index 26ea201..abe6cab 100644 --- a/tests/test_seeds.py +++ b/tests/test_seeds.py @@ -44,3 +44,16 @@ def test_results_from_same_seed(data): np.random.seed(123) b2 = wildboottest(model, param = "X1", cluster = x, B= 999) pd.testing.assert_frame_equal(a2,b2) + +def test_seeds_and_rng(data): + model = sm.ols(formula='Y ~ X1 + X2', data=data) + + cluster_list = [data.cluster, None] + + for x in cluster_list: + + # specifying seed and rng with that seed -> same results + a = wildboottest(model, param = "X1", cluster = x, B= 999, seed=876587) + rng = np.random.default_rng(seed=876587) + b = wildboottest(model, param = "X1", cluster = x, B= 999, seed=rng) + pd.testing.assert_frame_equal(a,b) \ No newline at end of file diff --git a/tests/test_weights.py b/tests/test_weights.py index 090af40..002217b 100644 --- a/tests/test_weights.py +++ b/tests/test_weights.py @@ -4,7 +4,6 @@ import numpy as np import pandas as pd -np.random.seed(89756) ts = list(wild_draw_fun_dict.keys()) full_enum = [True, False] @@ -13,6 +12,7 @@ @pytest.fixture def data(): + np.random.seed(12315) N = 100 k = 2 G= 20 @@ -46,9 +46,11 @@ def test_different_weights(data): X, y, cluster, bootcluster, R, B = data results_dict = {} + + rng = np.random.default_rng(seed=0) for w in ts: - boot = WildboottestCL(X = X, Y = y, cluster = cluster, bootcluster = bootcluster, R = R, B = 99999, seed = 12341) + boot = WildboottestCL(X = X, Y = y, cluster = cluster, bootcluster = bootcluster, R = R, B = 99999, seed = rng) boot.get_scores(bootstrap_type = "11", impose_null = True) boot.get_weights(weights_type = w) boot.get_numer() @@ -60,7 +62,9 @@ def test_different_weights(data): results_dict[w] = boot.pvalue results_series = pd.Series(results_dict) + print(results_series) mapd = (results_series - results_series.mean()).abs().mean() / results_series.mean() + print(mapd) assert mapd <= .1# make sure mean absolute percentage deviation is less than 10% (ad hoc) \ No newline at end of file diff --git a/wildboottest/wildboottest.py b/wildboottest/wildboottest.py index 4733c11..7a413cf 100644 --- a/wildboottest/wildboottest.py +++ b/wildboottest/wildboottest.py @@ -5,6 +5,7 @@ from wildboottest.weights import draw_weights import warnings from typing import Union, Tuple, Callable +from numpy.random import Generator class WildDrawFunctionException(Exception): pass @@ -55,7 +56,7 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series], R : Union[np.ndarray, pd.DataFrame], r: Union[np.ndarray, float], B: int, - seed: Union[int, None] = None) -> None: + seed: Union[int, Generator, None] = None) -> None: """Initializes the Heteroskedastic Wild Bootstrap Class @@ -64,7 +65,9 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series], Y (Union[np.ndarray, pd.DataFrame, pd.Series]): Endogenous variable array or dataframe R (Union[np.ndarray, pd.DataFrame]): Constraint matrix for running bootstrap B (int): bootstrap iterations - seed (Union[int, None], optional): Random seed for random weight types. Defaults to None. + seed (Union[int, Generator, None], optional): Random seed for random weight types. + If an integer, will be used as a seed in a numpy default random generator, or a numpy random generator + can also be specified and used. Defaults to None. Raises: TypeError: Raise if input arrays are lists @@ -85,10 +88,12 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series], else: self.Y = Y - if seed is None: - seed = np.random.randint(low = 1, high = (2**32 - 1), size = 1, dtype=np.int64) - - self.rng = np.random.default_rng(seed = seed) + if isinstance(seed, int): + self.rng = np.random.default_rng(seed=seed) + elif isinstance(seed, Generator): + self.rng = seed + else: + self.rng = np.random.default_rng() self.N = X.shape[0] self.k = X.shape[1] @@ -274,7 +279,7 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series], R : Union[np.ndarray, pd.DataFrame], B: int, bootcluster: Union[np.ndarray, pd.DataFrame, pd.Series, None] = None, - seed: Union[int, None] = None, + seed: Union[int, Generator, None] = None, parallel: bool = True) -> None: """Initializes the Wild Cluster Bootstrap Class @@ -285,7 +290,9 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series], R (Union[np.ndarray, pd.DataFrame]): Constraint matrix for running bootstrap B (int): bootstrap iterations bootcluster (Union[np.ndarray, pd.DataFrame, pd.Series, None], optional): Sub-cluster array. Defaults to None. - seed (Union[int, None], optional): Random seed for random weight types. Defaults to None. + seed (Union[int, Generator, None], optional): Random seed for random weight types. + If an integer, will be used as a seed in a numpy default random generator, or a numpy random generator + can also be specified and used. Defaults to None. parallel (bool, optional): Whether to run the bootstrap in parallel. Defaults to True. Raises: TypeError: Raise if input arrays are lists @@ -326,11 +333,13 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series], self.bootclustid = np.unique(bootcluster) self.bootcluster = bootcluster - if seed is None: - seed = np.random.randint(low = 1, high = (2**32 - 1), size = 1, dtype=np.int64) - - self.rng = np.random.default_rng(seed = seed) - + if isinstance(seed, int): + self.rng = np.random.default_rng(seed=seed) + elif isinstance(seed, Generator): + self.rng = seed + else: + self.rng = np.random.default_rng() + self.N_G_bootcluster = len(self.bootclustid) self.G = len(self.clustid) @@ -647,7 +656,7 @@ def wildboottest(model : 'OLS', weights_type: str = 'rademacher', impose_null: bool = True, bootstrap_type: str = '11', - seed: Union[str, None] = None, + seed: Union[int, Generator, None] = None, adj: bool = True, cluster_adj: bool = True, parallel: bool = True, @@ -666,7 +675,9 @@ def wildboottest(model : 'OLS', Defaults to True. bootstrap_type (str, optional):A string of length one. Allows to choose the bootstrap type to be run. Either '11', '31', '13' or '33'. '11' by default. Defaults to '11'. - seed (Union[str, None], optional): Option to provide a random seed. Defaults to None. + seed (Union[int, Generator, None], optional): Random seed for random weight types. + If an integer, will be used as a seed in a numpy default random generator, or a numpy random generator + can also be specified and used. Defaults to None. adj (bool, optional): Whether to adjust for small sample. Defaults to True. cluster_adj (bool, optional): Whether to do a cluster-robust small sample correction. Defaults to True. parallel (bool, optional): Whether to run the bootstrap in parallel. Defaults to True.