-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbootstrap.py
107 lines (90 loc) · 3.75 KB
/
bootstrap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import numpy as np
from numpy.random import Generator, SeedSequence, PCG64
import hashlib
def get_rng(seed: str):
"""Generate a random number generator based on a seed string."""
# Over python iteration the traditional hash was changed. So, here we fix it to md5
hash = hashlib.md5(seed.encode("utf-8")).hexdigest() # Convert string to a hash
seed_int = int(hash, 16) % (10 ** 6) # Convert hash to an fixed size integer
print("Seed to md5 hash:", seed, "->", hash, "->", seed_int)
# Create instance of random number generator explicitly to ensure long time support
# PCG64 -> https://www.pcg-random.org/
# see https://numpy.org/doc/stable/reference/random/generated/numpy.random.seed.html
rng = Generator(PCG64(SeedSequence(seed_int)))
return rng
def bs_corrs(corr, Nbs, Mbs=None, seed=None, return_bs_list=False, return_mbs=False):
''' generate bootstrap resampling of correlation function data
Args:
- corr: numpy array of data (Ncfg, Nt, ...)
- Nbs: the number of bootstrap samples to generate
- Mbs: the number of random draws per bootstrap to generate
if Mbs != Ncfg, you will have to appropriately rescale
the fluctuations by sqrt( Mbs / Ncfg)
- seed: a string that will be hashed to seed the random number generator
Return:
return_mbs=False
corr_bs: an array of shape (Nbs, Nt, ...)
return_mbs=True
corr_bs: an array of shape (Nbs, Mbs, Nt, ...)
return_bs_list=True
corr_bs, bs_list.shape = (Nbs, Mbs)
'''
Ncfg = corr.shape[0]
if Mbs:
m_bs = Mbs
else:
m_bs = Ncfg
# seed the random number generator
rng = get_rng(seed) if seed else np.random.default_rng()
# make BS list: [low, high)
bs_list = rng.integers(low=0, high=Ncfg, size=[Nbs, m_bs])
# make BS corrs
corr_bs = np.zeros(tuple([Nbs, m_bs]) + corr.shape[1:], dtype=corr.dtype)
for bs in range(Nbs):
corr_bs[bs] = corr[bs_list[bs]]
# if return_mbs, return (Nbs, Mbs, Nt, ...) array
# otherwise, return mean over Mbs axis
if return_mbs:
bs_mean = corr_bs.mean(axis=(0,1))
d_corr_bs = corr_bs - bs_mean
corr_bs = bs_mean + d_corr_bs * np.sqrt( m_bs / Ncfg)
else:
corr_bs = corr_bs.mean(axis=1)
bs_mean = corr_bs.mean(axis=0)
d_corr_bs = corr_bs - bs_mean
corr_bs = bs_mean + d_corr_bs * np.sqrt( m_bs / Ncfg)
if return_bs_list:
return corr_bs, bs_list
else:
return corr_bs
def bs_prior(Nbs, mean=0., sdev=1., seed=None):
''' Generate bootstrap distribution of prior central values
Args:
Nbs : number of values to return
mean : mean of Gaussian distribution
sdev : width of Gaussian distribution
seed : string to seed random number generator
Return:
a numpy array of length Nbs of normal(mean, sdev) values
'''
# seed the random number generator
rng = get_rng(seed) if seed else np.random.default_rng()
return rng.normal(loc=mean, scale=sdev, size=Nbs)
def block_data(data, bl):
''' Generate "blocked" or "binned" data from original data
Args:
data : data of shape is (Ncfg, ...)
bl : block length in axis=0 units
Return:
block data : data of shape (Ncfg//bl, ...)
'''
ncfg, nt_gf = data.shape
if ncfg % bl == 0:
nb = ncfg // bl
else:
nb = ncfg // bl + 1
corr_bl = np.zeros([nb, nt_gf], dtype=data.dtype)
for b in range(nb-1):
corr_bl[b] = data[b*bl:(b+1)*bl].mean(axis=0)
corr_bl[nb-1] = data[(nb-1)*bl:].mean(axis=0)
return corr_bl