From 4c05d88ba08d6bcecf9a416309d0040ece153b26 Mon Sep 17 00:00:00 2001 From: Ethan Meitz <54505069+ejmeitz@users.noreply.github.com> Date: Tue, 17 Oct 2023 14:03:51 -0400 Subject: [PATCH] stuff --- Project.toml | 2 +- src/autodiff_test.jl | 60 +++++-- src/finite_diff_ifc.jl | 58 ++----- src/fourth_order.jl | 16 +- src/mcc.jl | 104 +++++++----- src/old.jl | 6 + src/third_order.jl | 360 ++++++++++++++++++++++++----------------- src/types.jl | 59 ++++--- test/runtests.jl | 35 ++-- 9 files changed, 407 insertions(+), 293 deletions(-) diff --git a/Project.toml b/Project.toml index 2abfeb6..ad037fa 100644 --- a/Project.toml +++ b/Project.toml @@ -20,7 +20,7 @@ Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d" cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" [compat] -SimpleCrystals = "0.1.6" +SimpleCrystals = "0.1.7" TensorOperations = "4.0.0" julia = "1" diff --git a/src/autodiff_test.jl b/src/autodiff_test.jl index c74f6b4..15821f8 100644 --- a/src/autodiff_test.jl +++ b/src/autodiff_test.jl @@ -97,16 +97,8 @@ function third_order_AD_test(sys::SuperCellSystem{D}, pot::PairPotential, tol) w end end - #Acoustic Sum Rule #&TODO - # Threads.@threads for i in range(1, N_atoms) # index of block matrix - # for α in range(1,D) - # for β in range(1,D) - # ii = D*(i-1) + α - # jj = D*(i-1) + β # i == j because we're on diagonal - # IFC2[ii,jj] = -1*sum(IFC2[ii, β:D:end]) - # end - # end - # end + #Acoustic Sum Rule + ASR!(IFC3, N_atoms, D) IFC3 = apply_tols!(IFC3,tol) @@ -114,7 +106,7 @@ function third_order_AD_test(sys::SuperCellSystem{D}, pot::PairPotential, tol) w end -function fourth_order_AD_test(sys::SuperCellSystem{D}, pot::PairPotential, tol; float_type = Float32) where D +function fourth_order_AD_test(sys::SuperCellSystem{D}, pot::PairPotential, tol) where D vars = make_variables(:r, D) r_norm = sqrt(sum(x -> x^2, vars)) pot_symbolic = potential_nounits(pot, r_norm) @@ -124,7 +116,7 @@ function fourth_order_AD_test(sys::SuperCellSystem{D}, pot::PairPotential, tol; FO_exec = make_function(fourth_order_symbolic, vars) N_atoms = n_atoms(sys) - IFC4 = FC_val{float_type,4}[] + IFC4 = FC_val{Float64,4}[] for i in range(1, N_atoms) for j in range(i + 1, N_atoms) @@ -143,10 +135,10 @@ function fourth_order_AD_test(sys::SuperCellSystem{D}, pot::PairPotential, tol; #iiij ii = D*(i-1) + α; jj = D*(i-1) + β; kk = D*(i-1) + γ; ll = D*(j-1) + δ #& THERE WILL BE DUPLICATES SINCE THIS USES PUSH INSTEAD OF JUST OVERWRITTING DATA!!! - set_terms_fourth_order!(χ, ii, jj, kk, ll, float_type(iiij_block[α,β,γ,δ])) + set_terms_fourth_order!(χ, ii, jj, kk, ll, iiij_block[α,β,γ,δ]) #iijj ii = D*(i-1) + α; jj = D*(i-1) + β; kk = D*(j-1) + γ; ll = D*(j-1) + δ - set_terms_fourth_order!(χ, ii, jj, kk, ll, float_type(iiij_block[α,β,γ,δ])) + set_terms_fourth_order!(χ, ii, jj, kk, ll, iiij_block[α,β,γ,δ]) end end end @@ -167,7 +159,7 @@ function fourth_order_AD_test(sys::SuperCellSystem{D}, pot::PairPotential, tol; if abs(iiij_block[α,β,γ,δ]) > tol #ijjj ii = D*(i-1) + α; jj = D*(j-1) + β; kk = D*(j-1) + γ; ll = D*(j-1) + δ - set_terms_fourth_order!(χ, ii, jj, kk, ll, float_type(ijjj_block[α,β,γ,δ])) + set_terms_fourth_order!(χ, ii, jj, kk, ll, ijjj_block[α,β,γ,δ]) end end end @@ -219,4 +211,40 @@ function energy_loop_mcc(sys::SuperCellSystem{D}, pot::PairPotential, q, phi, ma return U_total -end \ No newline at end of file +end + +#q_vars = make_variable(:q, N_modes) +#r_vars = zeros(eltype(q_vars), length(q_vars)) +#for i in 1:N_modes +# r_vars[i] = dot(q_vars, phi[i,:]) +#end + +# divide r_vars by mass_sqrt + +# pass r_vars into energy_loop2 + +# function energy_loop2(pot::PairPotential, posns, r_cut, box_sizes, N_atoms) + +# U_total = 0.0 +# box_sizes = ustrip.(box_sizes) + +# for i in range(1,N_atoms) +# for j in range(i+1, N_atoms) +# r = posns[3*(i-1) + 1: 3*i] .- posns[3*(j-1) + 1: 3*j ] +# nearest_mirror_AD!(r, box_sizes) +# dist = sqrt(sum(x-> x*x, r)) +# # if dist < r_cut +# U_total += potential_nounits(pot, dist) +# # end +# end +# end + +# return U_total + +# end + +# #& not quite right but I just wanna test the AD +# function nearest_mirror_AD!(r_ij, box_sizes) +# r_ij .+= sign.(r_ij .- box_sizes./2) .* box_sizes +# return r_ij +# end \ No newline at end of file diff --git a/src/finite_diff_ifc.jl b/src/finite_diff_ifc.jl index a1423db..bfc6771 100644 --- a/src/finite_diff_ifc.jl +++ b/src/finite_diff_ifc.jl @@ -15,8 +15,7 @@ function second_order_finite_diff(sys_eq::SuperCellSystem{3}, pot::PairPotential energy_unit = zero(potential(pot,1u"Å")) - #Make mutable #& change SimpleCrystals to not use SVector - posns = [Vector(a) for a in positions(sys_eq)] + posns = positions(sys_eq) if atom_idxs[1] != atom_idxs[2] @@ -61,9 +60,8 @@ function third_order_finite_diff(sys_eq::SuperCellSystem{3}, pot::PairPotential, @assert length(atom_idxs) == 3 @assert length(cartesian_idxs) == 3 - @assert !all(atom_idxs .== atom_idxs[1]) "Cannot check self terms with finite difference" - N_atoms = n_atoms(sys) + N_atoms = n_atoms(sys_eq) z = zero(pot.σ) combos = [[z,h,-h],[z,h,h],[z,-h,h],[z,-h,-h]] @@ -71,15 +69,14 @@ function third_order_finite_diff(sys_eq::SuperCellSystem{3}, pot::PairPotential, force_unit = zero(force(pot,1u"Å")) forces = zeros(4)*force_unit - #Make mutable #& change SimpleCrystals to not use SVector - posns = [Vector(a) for a in positions(sys)] + posns = positions(sys_eq) for (c,combo) in enumerate(combos) posns[atom_idxs[2]][cartesian_idxs[2]] += combo[2] posns[atom_idxs[3]][cartesian_idxs[3]] += combo[3] - forces[c] = force_loop_j(pot, posns, force_unit, r_cut, sys.box_sizes_SC, N_atoms, atom_idxs[1], cartesian_idxs[1]) + forces[c] = force_loop_j(pot, posns, force_unit, r_cut, sys_eq.box_sizes_SC, N_atoms, atom_idxs[1], cartesian_idxs[1]) posns[atom_idxs[2]][cartesian_idxs[2]] -= combo[2] posns[atom_idxs[3]][cartesian_idxs[3]] -= combo[3] @@ -98,16 +95,15 @@ function fourth_order_finite_diff(sys_eq::SuperCellSystem{3}, pot::PairPotential @assert length(cartesian_idxs) == 4 @assert !all(atom_idxs .== atom_idxs[1]) "Cannot check self terms with finite difference" - N_atoms = n_atoms(sys) + N_atoms = n_atoms(sys_eq) combos = [[h,h,h],[h,h,-h],[h,-h,h],[h,-h,-h],[-h,h,h],[-h,h,-h],[-h,-h,h],[-h,-h,-h]] force_unit = zero(force(pot,1u"Å")) forces = zeros(8)*force_unit - #Make mutable #& change SimpleCrystals to not use SVector - posns = [Vector(a) for a in positions(sys)] - + posns = positions(sys_eq) + for (c,combo) in enumerate(combos) posns[atom_idxs[1]][cartesian_idxs[1]] += combo[1] @@ -120,7 +116,7 @@ function fourth_order_finite_diff(sys_eq::SuperCellSystem{3}, pot::PairPotential for i in range(1,N_atoms) if i != l r = posns[i] .- posns[l] - nearest_mirror!(r, sys.box_sizes_SC) + nearest_mirror!(r, sys_eq.box_sizes_SC) dist = norm(r) #Make sure mirrored particle is in cuttoff @@ -148,48 +144,28 @@ function fourth_order_finite_diff(sys_eq::SuperCellSystem{3}, pot::PairPotential return (1/(8*(h^3)))*(forces[1] - forces[2] - forces[3] + forces[4] - forces[5] + forces[6] + forces[7] - forces[8]) end +#& CURRENTLY DIFFERS by -1 FROM MINE function check_ifc(sys::SuperCellSystem{D}, ifc::DenseForceConstants{O}, pot::PairPotential, n_points; r_cut = pot.r_cut, h = 0.04*0.5291772109u"Å") where {D,O} N_atoms = n_atoms(sys) - ri = rand(1:N_atoms, (n_points,O)) #random indexes + ri = rand(1:N_atoms, (n_points,O)) #random atom indexes rci = rand(1:D, (n_points, O)) #random cartesian indexes finite_diff_funcs = [second_order_finite_diff, third_order_finite_diff, fourth_order_finite_diff] finite_diff_func = finite_diff_funcs[O-1] - ifc_fd = zeros(n_points)*ifc.units - #& ONLY WORKS FOR SPECIFIC DIMENSION - ifc_actual = [ifc[D*(ri[i,1] - 1) + rci[i,1], D*(ri[i,2] - 1) + rci[i,2]] for i in 1:n_points] - # ifc_actual = [ifc[D*(ri[i,1] - 1) + rci[i,1], D*(ri[i,2] - 1) + rci[i,2], D*(ri[i,3] - 1) + rci[i,3]] - # for i in 1:n_points] + idxs = (ri_row, rci_row) -> [D*(ri_row[o] - 1) + rci_row[o] for o in 1:O] + ifc_actual = @views [ifc[idxs(ri[i,:], rci[i,:])...] for i in 1:n_points] + ifc_fd = zeros(n_points)*ifc.units for i in 1:n_points - ifc_fd[i] = finite_diff_func(sys, pot, ri[i,:], rci[i,:]; r_cut = r_cut, h = h) + val = finite_diff_func(sys, pot, ri[i,:], rci[i,:]; r_cut = r_cut, h = h) + if ustrip.(abs(val)) > ifc.tol + ifc_fd[i] = val + end end return ifc_fd, (ifc_actual.*ifc.units) -end - -function check_ifc(sys::SuperCellSystem{D}, ifc::SparseForceConstants{O}, pot::PairPotential, n_points; - r_cut = pot.r_cut, h = 0.04*0.5291772109u"Å") where {D,O} - - random_idxs = rand(1:length(ifc), (n_points,)) - - finite_diff_funcs = [second_order_finite_diff, third_order_finite_diff, fourth_order_finite_diff] - finite_diff_func = finite_diff_funcs[O-1] - - ifc_fd = zeros(n_points)*ifc.units - ifc_actual = value.(ifc[random_idxs]) - - for p in 1:n_points - idxs = idx(ifc[random_idxs[p]]) - cart_idxs = ((idxs .- 1) .% D) .+ 1 - atom_idxs = ((idxs .- cart_idxs) ./ D) .- 1 - ifc_fd[p] = finite_diff_func(sys, pot, atom_idxs, cart_idxs; r_cut = r_cut, h = h) - end - - return ifc_fd, (ifc_actual.*ifc.units) - end \ No newline at end of file diff --git a/src/fourth_order.jl b/src/fourth_order.jl index 8fa394e..e788700 100644 --- a/src/fourth_order.jl +++ b/src/fourth_order.jl @@ -7,7 +7,7 @@ i = j and k = l (e.g. (i,i,j,j)) and terms where i = j = k (e.g. (i,i,i,j)). All more than 2 unique indices will be 0. """ function fourth_order_sparse(sys::SuperCellSystem{D}, pot::PairPotential, - tol; float_type = Float32, r_cut = pot.r_cut, nthreads::Integer = Threads.nthreads()) where D + tol; r_cut = pot.r_cut, nthreads::Integer = Threads.nthreads()) where D @assert all(r_cut .< sys.box_sizes_SC) "Cutoff larger than L/2" @@ -21,7 +21,7 @@ function fourth_order_sparse(sys::SuperCellSystem{D}, pot::PairPotential, #Create storage for each thread χ_arrays = MultiVectorStorage(FC_val{float_type,4}, nthreads) - atoms_per_thd = cld(N_atoms,nthreads) #* DONT LIKE THIS, BAD BALANCING + atoms_per_thd = cld(N_atoms,nthreads) #* DONT LIKE THIS, BAD BALANCING, USE ITERATORS.PARTITION Base.@sync for thd in 1:nthreads Base.Threads.@spawn begin @@ -43,7 +43,7 @@ function fourth_order_sparse(sys::SuperCellSystem{D}, pot::PairPotential, for β in range(1,D) for γ in range(1,D) for δ in range(1,D) - val = float_type(ustrip(ϕ₄(pot, dist, r, α, β, γ, δ))) + val = ustrip(ϕ₄(pot, dist, r, α, β, γ, δ)) if abs(val) > tol # i,i,j,j terms @@ -73,7 +73,7 @@ function fourth_order_sparse(sys::SuperCellSystem{D}, pot::PairPotential, for β in range(1,D) for γ in range(1,D) for δ in range(1,D) - val = -float_type(ustrip(ϕ₄(pot, dist, r, α, β, γ, δ))) + val = -ustrip(ϕ₄(pot, dist, r, α, β, γ, δ)) if abs(val) > tol # j,j,j,i terms @@ -97,16 +97,16 @@ function fourth_order_sparse(sys::SuperCellSystem{D}, pot::PairPotential, end #Concat arrays calculated by separate threads - χ = convert(Vector{FC_val{float_type, 4}}, χ_arrays) + χ = convert(Vector{FC_val{Float64, 4}}, χ_arrays) - #Acoustic Sum Rule (i,i,i,i terms) #& how to parallelize this part? - for i in range(1,N_atoms) + #Acoustic Sum Rule (i,i,i,i terms) + Threads.@threads for i in range(1,N_atoms) for α in range(1,D) for β in range(1,D) for γ in range(1,D) for δ in range(1,D) ii = D*(i-1) + α; jj = D*(i-1) + β; kk = D*(i-1) + γ; ll = D*(i-1) + δ - val = float_type(ϕ₄_self(sys, pot, i, α, β, γ, δ, r_cut)) + val = ϕ₄_self(sys, pot, i, α, β, γ, δ, r_cut) if abs(val) > tol push!(χ,FC_val(val, ii, jj, kk, ll)) end diff --git a/src/mcc.jl b/src/mcc.jl index 49f2c4b..82faf7e 100644 --- a/src/mcc.jl +++ b/src/mcc.jl @@ -73,46 +73,64 @@ function mcc3_blocked!(K3::CuArray{Float32, 3}, Ψ::CuArray{Float32, 3}, phi::Cu end -const BLOCK_SIZE = 128 - -#Each thread calculates fiber of K_nml -function mcc3_custom_kernel(num_psi_idxs::Int32, mcc::CuDeviceArray{Float32,1}, phi::CuDeviceArray{Float32,2}, - Ψ_vals::CuDeviceArray, is::CuDeviceArray{Int32,1}, js::CuDeviceArray{Int32,1}, ks::CuDeviceArray{Int32,1}) - - n = (blockIdx().x-1i32) * blockDim().x + threadIdx().x - m = (blockIdx().y-1i32) * blockDim().y + threadIdx().y - linearThreadIdx = (blockDim().x * threadIdx().y) + threadIdx().x #& OFF BY 1 iSSUES? - - #To store rows of phi matrix needed by current values of Psi - phi_i = CuStaticSharedArray(Float32, BLOCK_SIZE) - phi_j = CuStaticSharedArray(Float32, BLOCK_SIZE) - phi_k = CuStaticSharedArray(Float32, BLOCK_SIZE) - - - # Treat thread index as index into Ψ now and accumulate values into MCC - for i in 1:BLOCK_SIZE:num_psi_idxs - psi_idx = i + linearThreadIdx - - if psi_idx <= num_psi_idxs - #Move Phi Data Into Shared Memory - phi_i[linearThreadIdx] = phi[is[psi_idx]] - phi_j[linearThreadIdx] = phi[js[psi_idx]] - phi_k[linearThreadIdx] = phi[ks[psi_idx]] #& This might be the only one that needs to be in shared mem - sync_threads() - - #Calculate fiber of MCC - for l in 1:N_modes - mcc[n,m,l] += Ψ_vals[psi_idx]*phi_i[n]*phi_j[m]*phi_k[l] - end - sync_threads() - - end - end - -end - -# const BLOCK_SIDE = 32 -# const GRID_SIDE = ceil(sqrt(N)/BLOCK_SIDE) -# @cuda blocks = (GRID_SIDE,GRID_SIDE) threads = (BLOCK_SIDE,BLOCK_SIDE) mcc3_custom_kernel( - -# ) \ No newline at end of file +# using CUDA +# import CUDA: i32 +# N = 10000000 +# N_modes = 768; +# rand_nums = CUDA.rand(N); +# rand_idxs = rand(1:N_modes, (N,3)); +# is = CuArray{Int32}(rand_idxs[:,1]) +# js = CuArray{Int32}(rand_idxs[:,2]) +# ks = CuArray{Int32}(rand_idxs[:,3]) + +# phi = CUDA.rand(N_modes, N_modes) +# mcc_all = zeros(Float32, (N_modes, N_modes, N_modes)) + +# #256 x 256 x N_modes +# mcc_chunk_size = 256 +# mcc_chunk = CUDA.zeros(Float32, (mcc_chunk_size, mcc_chunk_size, N_modes)) + + +# #CUDA THREAD DIMENSIONS +# const BLOCK_SIDE::Int32 = 32 +# const BLOCK_SIZE::Int32 = BLOCK_SIDE*BLOCK_SIDE + +# const GRID_SIDE::Int64 = Int64(ceil(mcc_chunk_size/BLOCK_SIDE)) +# @cuda blocks = (GRID_SIDE,GRID_SIDE) threads = (BLOCK_SIDE,BLOCK_SIDE) mcc3_custom_kernel(Int32(N), Int32(0), Int32(N_modes), mcc_chunk, phi,rand_nums,is,js,ks) + +# #Each thread calculates fiber of K_nml +# function mcc3_custom_kernel(num_psi_idxs::Int32, mcc_offset::Int32, N_modes::Int32, mcc_block::CuDeviceArray{Float32,3}, phi::CuDeviceArray{Float32,2}, +# Ψ_vals::CuDeviceArray{Float32,1}, is::CuDeviceArray{Int32,1}, js::CuDeviceArray{Int32,1}, ks::CuDeviceArray{Int32,1}) + +# n::Int32 = (blockIdx().x-1i32) * blockDim().x + threadIdx().x +# m::Int32 = (blockIdx().y-1i32) * blockDim().y + threadIdx().y +# @cuprintln "$n $m" +# linearThreadIdx::Int32 = (blockDim().x * (threadIdx().y-1i32)) + threadIdx().x #& OFF BY 1 iSSUES? + +# #To store rows of phi matrix needed by current values of Psi +# phi_i = CuStaticSharedArray(Float32, BLOCK_SIZE) +# phi_j = CuStaticSharedArray(Float32, BLOCK_SIZE) +# phi_k = CuStaticSharedArray(Float32, BLOCK_SIZE) + +# # Treat thread index as index into Ψ now and accumulate values into MCC +# for i in 1:BLOCK_SIZE:num_psi_idxs +# psi_idx::Int32 = i + linearThreadIdx + +# if psi_idx <= num_psi_idxs +# #Move Phi Data Into Shared Memory +# phi_i[linearThreadIdx] = phi[is[psi_idx],linearThreadIdx] #* this read is not in order of memory +# phi_j[linearThreadIdx] = phi[js[psi_idx],linearThreadIdx] +# phi_k[linearThreadIdx] = phi[ks[psi_idx],linearThreadIdx] #& This might be the only one that needs to be in shared mem +# CUDA.sync_threads() + +# #Calculate fiber of MCC +# # if m >= n #* this could make it slower?? +# for l in 1:N_modes +# mcc_block[n,m,l] += Ψ_vals[psi_idx]*phi_i[n + mcc_offset]*phi_j[m + mcc_offset]*phi_k[l] +# end +# # end +# CUDA.sync_threads() +# end +# end +# return +# end diff --git a/src/old.jl b/src/old.jl index 41aab2b..c61f2a9 100644 --- a/src/old.jl +++ b/src/old.jl @@ -19,6 +19,12 @@ # return mapreduce(f, +, cuF3_sparse_mw) # end +# function test(cuF3_sparse_mw::CuArray{F3_val}, q::CuArray{Float32,1}) +# f = (f3_data) -> f3_data.val * q[f3_data.i] * q[f3_data.j] * q[f3_data.k] +# return mapreduce(f, +, cuF3_sparse_mw) +# end + + # """ # Takes in sparse third order data that has already been mass weighted diff --git a/src/third_order.jl b/src/third_order.jl index 13fb230..788efb2 100644 --- a/src/third_order.jl +++ b/src/third_order.jl @@ -1,72 +1,175 @@ -export third_order_IFC, third_order_test, mass_weight_sparsify_third_order, mass_weight_third_order!, F3_val +export third_order_IFC, third_order_sparse, mass_weight_sparsify_third_order, mass_weight_third_order!, F3_val -""" -Calculates analytical third order force constants for a pair potential (e.g. Lennard Jones). -For pair potentials the only non-zero terms will be self-terms (e.g. i,i,i) and terms where -i = j or j = k (e.g. 1,1,2). -""" function third_order_IFC(sys::SuperCellSystem{D}, pot::PairPotential, tol) where D @assert all(pot.r_cut .< sys.box_sizes_SC) "Cutoff larger than L/2" N_atoms = n_atoms(sys) - Ψ = zeros(D*N_atoms,D*N_atoms,D*N_atoms) - - unique_indices = with_replacement_combinations((1:N_atoms), 3) - is_not_self_term = (i,j,k) -> !(i == j && i == k) - not_all_unique = (i,j,k) -> (i == j || i == k || j == k) - - r = zeros(D)*unit(sys.atoms.position[1][1]) - for idx in unique_indices #TODO PARALLELIZE THIS, HOW TO STILL USE ITERATOR? just for i <= j <= k - i,j,k = idx - if is_not_self_term(i,j,k) && not_all_unique(i,j,k) - # println(i," ",j," ",k) - - if i == j - r .= sys.atoms.position[i] .- sys.atoms.position[k] - elseif i == k - r .= sys.atoms.position[i] .- sys.atoms.position[j] - elseif j == k - r .= sys.atoms.position[j] .- sys.atoms.position[i] - end + Ψ = zeros(D*N_atoms, D*N_atoms, D*N_atoms) + + + # pot is pair potential only loop atomic pairs + Threads.@threads for i in range(1,N_atoms) + r = zeros(D)*unit(sys.atoms.position[1][1]) #pre-allocate per thread + for j in range(i+1,N_atoms) + + #i,i,j terms + r .= sys.atoms.position[i] .- sys.atoms.position[j] + nearest_mirror!(r, sys.box_sizes_SC) + dist = norm(r) + - r .= nearest_mirror(r, sys.box_sizes_SC) + if dist < pot.r_cut + for α in range(1,D) + for β in range(1,D) + for γ in range(1,D) + val = -ustrip(ϕ₃(pot, dist, r, α, β, γ)) #* re-calculating derivatives ununecessarily in here + ii = D*(i-1) + α; jj = D*(i-1) + β; kk = D*(j-1) + γ + Ψ[ii,jj,kk] = val; Ψ[ii,kk,jj] = val + Ψ[jj,kk,ii] = val; Ψ[jj,ii,kk] = val + Ψ[kk,ii,jj] = val; Ψ[kk,jj,ii] = val + end + end + end + + end + + # #j,j,i term + r .= sys.atoms.position[j] .- sys.atoms.position[i] + nearest_mirror!(r, sys.box_sizes_SC) dist = norm(r) if dist < pot.r_cut for α in range(1,D) for β in range(1,D) for γ in range(1,D) - ii = D*(i-1) + α; jj = D*(j-1) + β; kk = D*(k-1) + γ - - val = -ustrip(ϕ₃(pot, dist, r, α, β, γ)) + val = -ustrip(ϕ₃(pot, dist, r, α, β, γ)) + ii = D*(i-1) + α; jj = D*(j-1) + β; kk = D*(j-1) + γ Ψ[ii,jj,kk] = val; Ψ[ii,kk,jj] = val Ψ[jj,kk,ii] = val; Ψ[jj,ii,kk] = val - Ψ[kk,ii,jj] = val; Ψ[kk,jj,ii] = val + Ψ[kk,ii,jj] = val; Ψ[kk,jj,ii] = val end end + end + + end + end + end + + #Self terms + Ψ = ASR!(Ψ, N_atoms, D) + + #Apply tolerances + Ψ = apply_tols!(Ψ, tol) + + #Give proper units + Ψ_unit = unit(pot.ϵ / pot.σ^3) + + return DenseForceConstants(Ψ, Ψ_unit, tol) + +end + +#* untested +function third_order_sparse(sys::SuperCellSystem{D}, pot::PairPotential, + tol; r_cut = pot.r_cut, nthreads::Integer = Threads.nthreads()) where D + + throw(error("Not yet implemented")) + + @assert all(r_cut .< sys.box_sizes_SC) "Cutoff larger than L/2" + + N_atoms = n_atoms(sys) + + if (N_atoms < nthreads) + @warn "More threads than atoms, using N_atoms as number of threads" + nthreads = N_atoms + end + + #Create storage for each thread + Ψ_arrays = MultiVectorStorage(FC_val{Float64,3}, nthreads) + atoms_per_thd = cld(N_atoms,nthreads) #* DONT LIKE THIS, BAD BALANCING, USE ITERATORS.PARTITION + + Base.@sync for thd in 1:nthreads + Base.Threads.@spawn begin + Ψ_thd = Ψ_arrays.data[thd] #Get local storage for force constants + start_idx = (thd-1)*atoms_per_thd + 1 + end_idx = min(thd*atoms_per_thd, N_atoms) + + r = zeros(D)*unit(sys.atoms.position[1][1]) #pre-allocate + for i in range(start_idx, end_idx) + for j in range(i+1,N_atoms) + + r .= sys.atoms.position[i] .- sys.atoms.position[j] + nearest_mirror!(r, sys.box_sizes_SC) + dist = norm(r) + + + if dist < pot.r_cut + for α in range(1,D) + for β in range(1,D) + for γ in range(1,D) + val = -ustrip(ϕ₃(pot, dist, r, α, β, γ)) #* re-calculating derivatives ununecessarily in here + ii = D*(i-1) + α; jj = D*(i-1) + β; kk = D*(j-1) + γ + push!(Ψ_thd, FC_val(val,ii,jj,kk)) + push!(Ψ_thd, FC_val(val,ii,kk,jj)) + push!(Ψ_thd, FC_val(val,jj,kk,ii)) + push!(Ψ_thd, FC_val(val,jj,ii,kk)) + push!(Ψ_thd, FC_val(val,kk,ii,jj)) + push!(Ψ_thd, FC_val(val,kk,jj,ii)) + end + end + end + + end + + # #j,j,i term + r .= sys.atoms.position[j] .- sys.atoms.position[i] + nearest_mirror!(r, sys.box_sizes_SC) + dist = norm(r) + + if dist < pot.r_cut + for α in range(1,D) + for β in range(1,D) + for γ in range(1,D) + val = -ustrip(ϕ₃(pot, dist, r, α, β, γ)) + ii = D*(i-1) + α; jj = D*(j-1) + β; kk = D*(j-1) + γ + push!(Ψ_thd, FC_val(val,ii,jj,kk)) + push!(Ψ_thd, FC_val(val,ii,kk,jj)) + push!(Ψ_thd, FC_val(val,jj,kk,ii)) + push!(Ψ_thd, FC_val(val,jj,ii,kk)) + push!(Ψ_thd, FC_val(val,kk,ii,jj)) + push!(Ψ_thd, FC_val(val,kk,jj,ii)) + end + end + end + + end end end end end - #Acoustic Sum Rule (technically re-calculating the whole loop above here) - Threads.@threads for i in range(1,N_atoms) + Ψ = convert(Vector{FC_val{Float64, 3}}, Ψ_arrays) + + #Acoustic Sum Rule (i,i,i,i terms) #& how to parallelize this part? + #* Can keep running track of these terms when generating others + for i in range(1,N_atoms) for α in range(1,D) for β in range(1,D) for γ in range(1,D) ii = D*(i-1) + α; jj = D*(i-1) + β; kk = D*(i-1) + γ - Ψ[ii,jj,kk] = ϕ₃_self(sys, pot, i, α, β, γ) + val = ϕ₃_self(sys, pot, i, α, β, γ) + if abs(val) > tol + push!(Ψ ,FC_val(val, ii, jj, kk)) + end end end end end - #Apply tolerances - Ψ = apply_tols!(Ψ, tol) #Give proper units Ψ_unit = unit(pot.ϵ / pot.σ^3) - return DenseForceConstants(Ψ, Ψ_unit, tol) + return SparseForceConstants(Ψ, Ψ_unit, tol) + end function mass_weight_third_order!(Ψ::ThirdOrderForceConstants, masses::AbstractVector) @@ -133,118 +236,6 @@ function ϕ₃_self(sys::SuperCellSystem, pot::PairPotential, i, α, β, γ) end -""" -Mass weights the force constant matrix such that element i,j,k is divided by -sqrt(m_i * m_j * m_k). This is useful when converting to modal coupling constants. -The force constants are also returned in sparse format as a vector of values and indices. -""" -function mass_weight_sparsify_third_order(Ψ::ThirdOrderForceConstants, masses::AbstractVector) - - N_modes = size(Ψ)[1] - D = Int(N_modes/length(masses)) - N_atoms = length(masses) - - @assert D ∈ [1,2,3] - - mass_unit = unit(masses[1]) - masses = ustrip.(masses) - - num_nonzero = sum(Ψ.values .!= 0.0) - Ψ_non_zero_mw = Vector{F3_val}(undef,(num_nonzero,)) - - count = 1 - for i in 1:N_atoms - for j in 1:N_atoms - for k in 1:N_atoms - for α in 1:D - for β in 1:D - for γ in 1:D - ii = D*(i-1) + α; jj = D*(j-1) + β; kk = D*(k-1) + γ - if Ψ[ii,jj,kk] != 0 - Ψ_non_zero_mw[count] = F3_val(ii, jj, kk, Ψ[ii,jj,kk]/sqrt(masses[i]*masses[j]*masses[k])) - count += 1 - end - end - end - end - end - end - end - @assert (count - 1) == num_nonzero - - - return SparseForceConstants(Ψ_non_zero_mw, Ψ.units/mass_unit, Ψ.tol) -end - - -function third_order_test(sys::SuperCellSystem{D}, pot::PairPotential, tol) where D - @assert all(pot.r_cut .< sys.box_sizes_SC) "Cutoff larger than L/2" - N_atoms = n_atoms(sys) - Ψ = zeros(D*N_atoms, D*N_atoms, D*N_atoms) - - - # pot is pair potential only loop atomic pairs - Threads.@threads for i in range(1,N_atoms) - r = zeros(D)*unit(sys.atoms.position[1][1]) #pre-allocate per thread - for j in range(i+1,N_atoms) - - #i,i,j terms - r .= sys.atoms.position[i] .- sys.atoms.position[j] - nearest_mirror!(r, sys.box_sizes_SC) - dist = norm(r) - - - if dist < pot.r_cut - for α in range(1,D) - for β in range(1,D) - for γ in range(1,D) - val = -ustrip(ϕ₃(pot, dist, r, α, β, γ)) #* re-calculating derivatives ununecessarily in here - ii = D*(i-1) + α; jj = D*(i-1) + β; kk = D*(j-1) + γ - Ψ[ii,jj,kk] = val; Ψ[ii,kk,jj] = val - Ψ[jj,kk,ii] = val; Ψ[jj,ii,kk] = val - Ψ[kk,ii,jj] = val; Ψ[kk,jj,ii] = val - end - end - end - - end - - # #j,j,i term - r .= sys.atoms.position[j] .- sys.atoms.position[i] - nearest_mirror!(r, sys.box_sizes_SC) - dist = norm(r) - - if dist < pot.r_cut - for α in range(1,D) - for β in range(1,D) - for γ in range(1,D) - val = -ustrip(ϕ₃(pot, dist, r, α, β, γ)) - ii = D*(i-1) + α; jj = D*(j-1) + β; kk = D*(j-1) + γ - Ψ[ii,jj,kk] = val; Ψ[ii,kk,jj] = val - Ψ[jj,kk,ii] = val; Ψ[jj,ii,kk] = val - Ψ[kk,ii,jj] = val; Ψ[kk,jj,ii] = val - end - end - end - - end - end - end - - #Self terms - Ψ = ASR!(Ψ, N_atoms, D) - - #Apply tolerances - Ψ = apply_tols!(Ψ, tol) - - #Give proper units - Ψ_unit = unit(pot.ϵ / pot.σ^3) - - return DenseForceConstants(Ψ, Ψ_unit, tol) - -end - - function ASR!(ifc3::Array{T,3}, N_atoms, D) where T #Loop all self terms Threads.@threads for i in range(1,N_atoms) @@ -267,4 +258,75 @@ function ASR!(ifc3::Array{T,3}, N_atoms, D) where T end return ifc3 -end \ No newline at end of file +end + + +#Old slower version +# """ +# Calculates analytical third order force constants for a pair potential (e.g. Lennard Jones). +# For pair potentials the only non-zero terms will be self-terms (e.g. i,i,i) and terms where +# i = j or j = k (e.g. 1,1,2). +# """ +# function third_order_IFC(sys::SuperCellSystem{D}, pot::PairPotential, tol) where D +# @assert all(pot.r_cut .< sys.box_sizes_SC) "Cutoff larger than L/2" +# N_atoms = n_atoms(sys) +# Ψ = zeros(D*N_atoms,D*N_atoms,D*N_atoms) + +# unique_indices = with_replacement_combinations((1:N_atoms), 3) +# is_not_self_term = (i,j,k) -> !(i == j && i == k) +# not_all_unique = (i,j,k) -> (i == j || i == k || j == k) + +# r = zeros(D)*unit(sys.atoms.position[1][1]) +# for idx in unique_indices #TODO PARALLELIZE THIS, HOW TO STILL USE ITERATOR? just for i <= j <= k +# i,j,k = idx +# if is_not_self_term(i,j,k) && not_all_unique(i,j,k) +# # println(i," ",j," ",k) + +# if i == j +# r .= sys.atoms.position[i] .- sys.atoms.position[k] +# elseif i == k +# r .= sys.atoms.position[i] .- sys.atoms.position[j] +# elseif j == k +# r .= sys.atoms.position[j] .- sys.atoms.position[i] +# end + +# r .= nearest_mirror(r, sys.box_sizes_SC) +# dist = norm(r) + +# if dist < pot.r_cut +# for α in range(1,D) +# for β in range(1,D) +# for γ in range(1,D) +# ii = D*(i-1) + α; jj = D*(j-1) + β; kk = D*(k-1) + γ + +# val = -ustrip(ϕ₃(pot, dist, r, α, β, γ)) +# Ψ[ii,jj,kk] = val; Ψ[ii,kk,jj] = val +# Ψ[jj,kk,ii] = val; Ψ[jj,ii,kk] = val +# Ψ[kk,ii,jj] = val; Ψ[kk,jj,ii] = val +# end +# end +# end +# end +# end +# end + +# #Acoustic Sum Rule (technically re-calculating the whole loop above here) +# Threads.@threads for i in range(1,N_atoms) +# for α in range(1,D) +# for β in range(1,D) +# for γ in range(1,D) +# ii = D*(i-1) + α; jj = D*(i-1) + β; kk = D*(i-1) + γ +# Ψ[ii,jj,kk] = ϕ₃_self(sys, pot, i, α, β, γ) +# end +# end +# end +# end + +# #Apply tolerances +# Ψ = apply_tols!(Ψ, tol) + +# #Give proper units +# Ψ_unit = unit(pot.ϵ / pot.σ^3) + +# return DenseForceConstants(Ψ, Ψ_unit, tol) +# end \ No newline at end of file diff --git a/src/types.jl b/src/types.jl index 7882684..20066b2 100644 --- a/src/types.jl +++ b/src/types.jl @@ -1,5 +1,6 @@ export SuperCellSystem, Potential, PairPotential, ThreeBodyPotential, - masses, mass, position, positions, charges, charge, n_atoms, n_atoms_per_uc + masses, mass, position, positions, charges, charge, n_atoms, n_atoms_per_uc, + SparseForceConstants, DenseForceConstants struct Atom{P,C,M} position::P @@ -75,6 +76,8 @@ end Base.size(fc::DenseForceConstants) = size(fc.values) Base.getindex(fc::DenseForceConstants{O}, idxs::Vararg{Integer, O}) where O = fc.values[idxs...] +Base.getindex(fc::DenseForceConstants{O}, idxs::CartesianIndex{O}) where O = fc.values[idxs] + n_modes(fc::DenseForceConstants) = size(fc)[1] #type aliases @@ -93,42 +96,58 @@ function FC_val(val, idxs::Vararg{Integer, O}) where O return FC_val{typeof(val), O}(val, Vector{Int32}(idxs)) end +function FC_val(val, idxs::AbstractVector{<:Integer}) + return FC_val{typeof(val), length(idxs)}(val, Int32.(idxs)) +end + +function FC_val(val, idxs::Tuple) + return FC_val{typeof(val), length(idxs)}(val, collect(idxs)) +end + struct SparseForceConstants{O,V,U,T} <: AbstractForceConstants{O} - values::AbstractVector{FC_val{V,O}} #& MAKE THIS A STRUCT ARRAY??? + values::StructArray{FC_val{V,O}} #1 entry per DoF units::U tol::T end -# struct SparseForceConstantsTest{O,V,U,T} <: AbstractForceConstant{O} -# values::StructArray{FC_val{V,O}} #& MAKE THIS A STRUCT ARRAY??? -# units::U -# tol::T -# end - -Base.length(fc::SparseForceConstants) = length(fc.values) +Base.length(fc::SparseForceConstants) = sum(length.(fc.values)) #type aliases const SparseSecondOrder{V,U,T} = SparseForceConstants{2,V,U,T} const SparseThirdOrder{V,U,T} = SparseForceConstants{3,V,U,T} const SparseFourthOrder{V,U,T} = SparseForceConstants{4,V,U,T} +function SparseForceConstants(vals::Vector{FC_val{V, O}}, units, tol) where {V,O} + return SparseForceConstants{O, V, typeof(units), typeof(tol)}(StructArray(vals), units, tol) +end # #Construct SparseForceConstants object from dense ForceConstants object -function SparseForceConstants(fc::ForceConstants{O,V,U,T}) where {O,V,U,T} - N_modes = size(fc)[1] - - idxs = with_replacement_combinations(1:N_modes, O) - values = FC_Val{V,O}[] - for idx in idxs - if fc[idx...] != 0.0 - for perm in permutations(idx) - push!(values, FC_Val(fc[perm...], perm...)) - end +function SparseForceConstants(fc::DenseForceConstants{O,V,U,T}; nthreads::Integer = Threads.nthreads()) where {O,V,U,T} + + num_nonzero = sum(fc.values .!= 0.0) + sa = StructArray{FC_val{V,O}}(undef, (num_nonzero,)) + + count = 1 + for idx in CartesianIndices(fc.values) + if fc[idx] != 0.0 + sa[count] = FC_val(fc[idx],Tuple(idx)) + count += 1 end end - return SparseForceConstants{O,V,U,T}(values, fc.units, fc.tol) + return SparseForceConstants{O,V,U,T}(sa, fc.units, fc.tol) +end + +#&NEED TO TEST THIS +function sort_by_mode(sfc::SparseForceConstants{O,V}, n_dof::Integer) where {O,V} + data = Vector{StructArray{FC_val{V,O}}}(undef, n_dof) + + for fc in sfc.values + data[fc.idxs[1]] = fc + end + + return data end # Storage is multiple vectors, acts as single vector diff --git a/test/runtests.jl b/test/runtests.jl index 6136b48..b0211e2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -18,11 +18,14 @@ else @warn "The GPU tests will not be run as a CUDA-enabled device is not available" end -pot = LJ(3.4u"Å", 0.24037u"kcal * mol^-1", 8.5u"Å") -fcc_crystal = FCC(5.2468u"Å", :Ar, SVector(4,4,4)) -sys = SuperCellSystem(fcc_crystal) -dynmat = dynamicalMatrix(sys, pot, 1e-12); -freqs_sq, phi = get_modes(dynmat_, 3) +#& Check that calculating sparse 3rd equals generating it from the dense +#& Various U_TEP tests, per mode vs total vs parallel vs serial + +# pot = LJ(3.4u"Å", 0.24037u"kcal * mol^-1", 8.5u"Å") +# fcc_crystal = FCC(5.2468u"Å", :Ar, SVector(4,4,4)) +# sys = SuperCellSystem(fcc_crystal) +# dynmat = dynamicalMatrix(sys, pot, 1e-12); +# freqs_sq, phi = get_modes(dynmat, 3) # @testset "Supercell vs Unitcell" begin @@ -54,19 +57,21 @@ freqs_sq, phi = get_modes(dynmat_, 3) @testset "MCC3, MCC3 Blocked" begin #Load test dataset - m = 39.95 - phi, dynmat, F3, K3_actual, freqs_sq = load("./test_data/TEP.jld2", "phi", "dynmat", "F3", "K3", "freqs_sq") - N_modes = length(freqs_sq) #should be 96 + if run_gpu_tests + m = 39.95 + phi, dynmat, F3, K3_actual, freqs_sq = load("./test_data/TEP.jld2", "phi", "dynmat", "F3", "K3", "freqs_sq") + N_modes = length(freqs_sq) #should be 96 - block_size = 32 - F3 ./= sqrt(m^3) - K3_full = mcc3(CuArray{Float32}(F3), CuArray{Float32}(phi)) - K3_blocked = mcc3(CuArray{Float32}(F3), CuArray{Float32}(phi), block_size) + block_size = 32 + F3 ./= sqrt(m^3) + K3_full = mcc3(CuArray{Float32}(F3), CuArray{Float32}(phi)) + K3_blocked = mcc3(CuArray{Float32}(F3), CuArray{Float32}(phi), block_size) - max_idx = argmax(K3_actual) + max_idx = argmax(K3_actual) - @test isapprox(K3_actual, K3_full, atol = 1e-6) - @test isapprox(K3_actual, K3_blocked, atol = 1e-6) + @test isapprox(K3_actual, K3_full, atol = 1e-6) + @test isapprox(K3_actual, K3_blocked, atol = 1e-6) + end end