-
Notifications
You must be signed in to change notification settings - Fork 59
/
Copy pathmcmc_gtb.py
129 lines (98 loc) · 4.08 KB
/
mcmc_gtb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env python
"""
Markov Chain Monte Carlo (MCMC) sampler for polygenic prediction with continuous shrinkage (CS) priors.
"""
import numpy as np
from scipy import linalg
from numpy import random
import gigrnd
def mcmc(a, b, phi, sst_dict, n, ld_blk, blk_size, n_iter, n_burnin, thin, chrom, out_dir, beta_std, write_psi, write_pst, seed):
print('... MCMC ...')
# seed
if seed != None:
random.seed(seed)
# derived stats
beta_mrg = np.array(sst_dict['BETA'], ndmin=2).T
maf = np.array(sst_dict['MAF'], ndmin=2).T
n_pst = int((n_iter-n_burnin)/thin)
p = len(sst_dict['SNP'])
n_blk = len(ld_blk)
# initialization
beta = np.zeros((p,1))
psi = np.ones((p,1))
sigma = 1.0
if phi == None:
phi = 1.0; phi_updt = True
else:
phi_updt = False
if write_pst == 'TRUE':
beta_pst = np.zeros((p,n_pst))
beta_est = np.zeros((p,1))
psi_est = np.zeros((p,1))
sigma_est = 0.0
phi_est = 0.0
# MCMC
pp = 0
for itr in range(1,n_iter+1):
if itr % 100 == 0:
print('--- iter-' + str(itr) + ' ---')
mm = 0; quad = 0.0
for kk in range(n_blk):
if blk_size[kk] == 0:
continue
else:
idx_blk = range(mm,mm+blk_size[kk])
dinvt = ld_blk[kk]+np.diag(1.0/psi[idx_blk].T[0])
dinvt_chol = linalg.cholesky(dinvt)
beta_tmp = linalg.solve_triangular(dinvt_chol, beta_mrg[idx_blk], trans='T') + np.sqrt(sigma/n)*random.randn(len(idx_blk),1)
beta[idx_blk] = linalg.solve_triangular(dinvt_chol, beta_tmp, trans='N')
quad += np.dot(np.dot(beta[idx_blk].T, dinvt), beta[idx_blk])
mm += blk_size[kk]
err = max(n/2.0*(1.0-2.0*sum(beta*beta_mrg)+quad), n/2.0*sum(beta**2/psi))
sigma = 1.0/random.gamma((n+p)/2.0, 1.0/err)
delta = random.gamma(a+b, 1.0/(psi+phi))
for jj in range(p):
psi[jj] = gigrnd.gigrnd(a-0.5, 2.0*delta[jj], n*beta[jj]**2/sigma)
psi[psi>1] = 1.0
if phi_updt == True:
w = random.gamma(1.0, 1.0/(phi+1.0))
phi = random.gamma(p*b+0.5, 1.0/(sum(delta)+w))
# posterior
if (itr>n_burnin) and (itr % thin == 0):
beta_est = beta_est + beta/n_pst
psi_est = psi_est + psi/n_pst
sigma_est = sigma_est + sigma/n_pst
phi_est = phi_est + phi/n_pst
if write_pst == 'TRUE':
beta_pst[:,[pp]] = beta
pp += 1
# convert standardized beta to per-allele beta
if beta_std == 'FALSE':
beta_est /= np.sqrt(2.0*maf*(1.0-maf))
if write_pst == 'TRUE':
beta_pst /= np.sqrt(2.0*maf*(1.0-maf))
# write posterior effect sizes
if phi_updt == True:
eff_file = out_dir + '_pst_eff_a%d_b%.1f_phiauto_chr%d.txt' % (a, b, chrom)
else:
eff_file = out_dir + '_pst_eff_a%d_b%.1f_phi%1.0e_chr%d.txt' % (a, b, phi, chrom)
with open(eff_file, 'w') as ff:
if write_pst == 'TRUE':
for snp, bp, a1, a2, beta in zip(sst_dict['SNP'], sst_dict['BP'], sst_dict['A1'], sst_dict['A2'], beta_pst):
ff.write(('%d\t%s\t%d\t%s\t%s' + '\t%.6e'*n_pst + '\n') % (chrom, snp, bp, a1, a2, *beta))
else:
for snp, bp, a1, a2, beta in zip(sst_dict['SNP'], sst_dict['BP'], sst_dict['A1'], sst_dict['A2'], beta_est):
ff.write('%d\t%s\t%d\t%s\t%s\t%.6e\n' % (chrom, snp, bp, a1, a2, beta))
# write posterior estimates of psi
if write_psi == 'TRUE':
if phi_updt == True:
psi_file = out_dir + '_pst_psi_a%d_b%.1f_phiauto_chr%d.txt' % (a, b, chrom)
else:
psi_file = out_dir + '_pst_psi_a%d_b%.1f_phi%1.0e_chr%d.txt' % (a, b, phi, chrom)
with open(psi_file, 'w') as ff:
for snp, psi in zip(sst_dict['SNP'], psi_est):
ff.write('%s\t%.6e\n' % (snp, psi))
# print estimated phi
if phi_updt == True:
print('... Estimated global shrinkage parameter: %1.2e ...' % phi_est )
print('... Done ...')