forked from rgmelko/Spin-Models-on-GPU
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tfising.cu
191 lines (147 loc) · 5.76 KB
/
tfising.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
/*!
\file tfising.cu
\brief Functions to generate a Hamiltonian for the Transverse Field Ising model
*/
#include "hamiltonian.h"
__device__ float HOffBondXTFI(const int si, const int bra, const float JJ)
{
float valH;
//int S0, S1;
//int T0, T1;
valH = JJ; //contribution from the J part of the Hamiltonian
return valH;
}
__device__ float HDiagPartTFI1D(const int bra, int latticeSize, int3* d_Bond, const float JJ)
{
int S0b,S1b ; //spins (bra
int T0,T1; //site
//int P0, P1, P2, P3; //sites for plaquette (Q)
//int s0p, s1p, s2p, s3p;
float valH = 0.f;
for (int Ti=0; Ti<latticeSize; Ti++)
{
T0 = (d_Bond[Ti]).x; //lower left spin
S0b = (bra>>T0)&1;
//if (T0 != Ti) cout<<"Square error 3\n";
T1 = (d_Bond[Ti]).y; //first bond
S1b = (bra>>T1)&1; //unpack bra
valH += JJ*(S0b-0.5)*(S1b-0.5);
}//T0
return valH;
}//HdiagPart
__device__ float HDiagPartTFI2D(const int bra, int latticeSize, int3* d_Bond, const float JJ)
{
int S0b,S1b ; //spins (bra
int T0,T1; //site
//int P0, P1, P2, P3; //sites for plaquette (Q)
//int s0p, s1p, s2p, s3p;
float valH = 0.f;
for (int Ti=0; Ti<latticeSize; Ti++)
{
T0 = (d_Bond[Ti]).x; //lower left spin
S0b = (bra>>T0)&1;
//if (T0 != Ti) cout<<"Square error 3\n";
T1 = (d_Bond[Ti]).y; //first bond
S1b = (bra>>T1)&1; //unpack bra
valH += JJ*(S0b-0.5)*(S1b-0.5);
T1 = (d_Bond[Ti]).z; //second bond
S1b = (bra>>T1)&1; //unpack bra
valH += JJ*(S0b-0.5)*(S1b-0.5);
}//T0
return valH;
}//HdiagPart
__global__ void FillDiagonalsTFI(int* d_basis, f_hamiltonian H, int* d_Bond, parameters data)
{
int latticeSize = data.nsite;
int row = blockIdx.x*blockDim.x + threadIdx.x;
int site = threadIdx.x%(latticeSize);
unsigned int tempi;
__shared__ int3 tempBond[18];
//int3 tempBond[16];
if (row < H.sectorDim)
{
tempi = d_basis[row];
(tempBond[site]).x = d_Bond[site];
(tempBond[site]).y = d_Bond[latticeSize + site];
switch( data.dimension )
{
case 1 :
H.vals[row] = HDiagPartTFI1D(tempi, latticeSize, tempBond, data.J1);
break;
case 2 :
(tempBond[site].z) = d_Bond[2*latticeSize + site];
H.vals[row] = HDiagPartTFI2D(tempi, latticeSize, tempBond, data.J1);
break;
}
H.rows[row] = row;
H.cols[row] = row;
H.set[row] = 1;
}
else
{
H.rows[row] = H.sectorDim + 1;
H.cols[row] = H.sectorDim + 1;
H.set[row] = 0;
}
}
/* Function FillSparse: this function takes the empty Hamiltonian arrays and fills them up. Each thread in x handles one ket |i>, and each thread in y handles one site T0
Inputs: d_basisPosition - position information about the basis
d_basis - other basis infos
d_dim - the number of kets
H_sort - an array that will store the Hamiltonian
d_Bond - the bond information
d_latticeSize - the number of lattice sites
JJ - the coupling parameter
*/
__global__ void FillSparseTFI(int* d_basisPosition, int* d_basis, f_hamiltonian H, int* d_Bond, parameters data, int offset)
{
int latticeSize = data.nsite;
int dim = H.sectorDim;
int ii = ( blockDim.x / ( 2 * latticeSize ) ) * blockIdx.x + threadIdx.x / ( 2 * latticeSize ) + offset;// + blockIdx.y* gridDim.x * blockDim.x / (2 * latticeSize);
int T0 = threadIdx.x % ( 2 * latticeSize );
#if __CUDA_ARCH__ < 200
const int array_size = 512;
#elif __CUDA_ARCH__ >= 200
const int array_size = 1024;
#else
#error Could not detect GPU architecture
#endif
//__shared__ int2 tempBond[20];
__shared__ int tempPos[ array_size ];
__shared__ float tempVal[ array_size ];
int stride = 2 * latticeSize;
int site = T0 % ( latticeSize );
int rowTemp;
__shared__ unsigned int tempi[array_size];
__shared__ unsigned int tempod[array_size];
//int start = (bool)(dim%array_size) ? (dim/array_size + 1)*array_size : dim/array_size;
int start = ( bool )( dim % 512 ) ? ( dim / 512 + 1 ) * 512 : dim ;
bool compare;
if( ii < dim )
{
tempi[ threadIdx.x ] = d_basis[ ii ];
if ( T0 < 2 * latticeSize )
{
//Putting bond info in shared memory
// (tempBond[site]).x = d_Bond[site];
// (tempBond[site]).y = d_Bond[latticeSize + site];
// __syncthreads();
tempod[ threadIdx.x ] = tempi[ threadIdx.x ];
//-----------------Horizontal bond ---------------
tempPos[ threadIdx.x ] = ( tempod[ threadIdx.x ] ^ ( 1 << site ) );
//flip the site-th bit of row - applying the sigma_x operator
compare = ( tempPos[ threadIdx.x ] > ii ) && ( tempPos[ threadIdx.x ] < dim );
tempPos[ threadIdx.x ] = compare ? tempPos[ threadIdx.x ] : dim + 1;
tempVal[ threadIdx.x ] = HOffBondXTFI(site, tempi[ threadIdx.x ], data.J2);
rowTemp = ( T0 / latticeSize ) ? ii : tempPos[ threadIdx.x ];
rowTemp = compare ? rowTemp : dim + 1;
tempPos[ threadIdx.x ] = ( T0 / latticeSize) ? tempPos[ threadIdx.x ] : ii;
tempPos[ threadIdx.x ] = compare ? tempPos[threadIdx.x] : dim + 1;
//----Putting everything back into GPU main memory-----------
H.vals[ ii*stride + 2 * site + ( T0 / latticeSize ) + start ] = tempVal[ threadIdx.x ];
H.cols[ ii*stride + 2 * site + ( T0 / latticeSize ) + start ] = tempPos[ threadIdx.x ];
H.rows[ ii*stride + 2 * site + ( T0 / latticeSize ) + start ] = rowTemp;
H.set[ ii*stride + 2 * site + ( T0 / latticeSize ) + start ] = (int)compare;
}
}//end of ii
}//end of FillSparse