-
Notifications
You must be signed in to change notification settings - Fork 1
/
IS.py
114 lines (97 loc) · 5.04 KB
/
IS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import numpy as np
import scipy.stats as stat
import matplotlib.pyplot as plt
#========================================
def powerlaw(n,lam):
#========================================
zeta=np.sum(1.0/np.arange(a,b+1)**lam)
return(n**(-lam)/zeta)
#========================================
def lognormal(n,mu,sig):
#========================================
return(1.0/n/np.sqrt(2*np.pi*sig**2)*np.exp(-(np.log(n)-mu)**2/(2*sig**2)))
#========================================
def LogLikelihood(lam, sizes, M, a, b):
#========================================
"""
Calculate loglikelihood for power law given the data
Likelihoods across all random draws, given your data
"""
#normalisation factor for all lambda draws - normalises a distribution to sum of probability = 1 (by summing across all possible values in density)
zetamat=np.power.outer(1.0/np.arange(a,b+1),lam) #Matrix of normalisation constants for each lambda draw, at each size: each row = size**-current lambda, for every size from max to min
zeta=np.sum(zetamat,0) #Norm vector - for each lambda draw - sum of norm constants for entire size max-min range at each lambda
norm=-M*np.log(zeta) #Contribution of zeta to the likelihood
nprod=-lam*np.sum(np.log(sizes)) #Loglikelihood calculation, given the data
loglik=nprod+norm #Normalised loglikelihood
return(loglik)
#=============================================
def LogLikelihood_LN(mu,sig, sizes, M , a, b):
#=============================================
"""
Calculate loglikelihood for lognormal given the data
Likelihoods across all random draws, given your data
"""
T1 = -np.sum(np.log(sizes))
T2_mat = np.subtract.outer(np.log(sizes),mu)**2
T2 = -np.sum(T2_mat,0)/(2*sig**2)
T0 = -M*np.log(np.sqrt(2*np.pi) * sig )
loglik=T0+T1+T2
return(loglik)
#=============================================
def IS(npart, sizes, M, a, b):
#=============================================
"""
IMPORTANCE SAMPLER - for power law - monte carlo sampling from two different distributions
OUTPUT - posterior average exponent, log marginal likelihood, effective sample size = how good is the sampler
"""
lambda_sample=np.random.uniform(0.1,5,npart) #randomly sample lambda (exponent values)
#Weights - loglikelihoods of your data for each lambda * (weight by) log probability of drawing each lambda sample from the prior, divided by log probability of drawing each lambda sample from the proposal
#Likelihood of data weighted by prior expectation of lambda, and proposal expected lambda - cancel out the effect of the proposal
weights=LogLikelihood(lambda_sample, sizes, M, a, b)+stat.norm.logpdf(lambda_sample,1,3)-stat.uniform.logpdf(lambda_sample,0.1,5)
maxw=np.max(weights)
w2 = np.exp(weights-maxw)
w2_sum = np.sum(w2)
ESS=1.0/(np.sum((w2/w2_sum)**2))
mean_lambda = np.dot(lambda_sample,w2)/w2_sum #average of the lambda value for the posterior distribution
#marginal likelihood = empirical means of all the weights
marglik = maxw + np.log(np.sum(np.exp(weights-maxw)))-np.log(npart) #Take the exponent of logs to unlog, before summing/then divded by ncounts
return([mean_lambda, marglik, LogLikelihood(lambda_sample, sizes, M , a, b), ESS])
#IMPORTANCE SAMPLER - for lognormal - monte carlo sampling from two different distributions
#OUTPUT - posterior average exponent, log marginal likelihood, effective sample size = how good is the sampler
#=============================================
def IS_LN(npart, sizes, M, a, b):
#=============================================
"""
IMPORTANCE SAMPLER - for lognormal - monte carlo sampling from two different distributions
OUTPUT - posterior average exponent, log marginal likelihood, effective sample size = how good is the sampler
"""
mu_sample = np.random.uniform(-2.0,2.0,npart) #randomly sample mu
sig_sample = np.random.uniform(0.1,5.0,npart) #randomly sample sigma
weights=LogLikelihood_LN(mu_sample,sig_sample, sizes, M, a, b)
maxw=np.max(weights)
w2 = np.exp(weights-maxw)
w2_sum = np.sum(w2)
ESS=1.0/(np.sum((w2/w2_sum)**2))
wmax_ID=np.argmax(w2)
mean_mu = mu_sample[wmax_ID]
mean_sig = sig_sample[wmax_ID]
#marginal likelihood = empirical means of all the weights
marglik = maxw + np.log(np.sum(np.exp(weights-maxw)))-np.log(npart)
return([mean_mu,mean_sig, marglik, LogLikelihood_LN(mu_sample, sig_sample, sizes, M, a, b), ESS])
#=============================================
def plot_samples(npart):
#=============================================
lambda_sample=np.random.uniform(0.1,5,npart)
weights=LogLikelihood(lambda_sample)
maxw=np.max(weights)
w2 = np.exp(weights-maxw)
plt.hist(lambda_sample,weights=w2,bins=np.linspace(2.5,2.8))
plt.show()
#=============================================
def plotcomp(lam,mu,sig):
#=============================================
x = np.linspace(a,b,40)
plt.hist(sizes,40,log=True,density=True)
plt.plot(x,powerlaw(x,lam))
plt.plot(x,lognormal(x,mu,sig))
plt.show()