diff --git a/examples/09_Disorder_KPM.jl b/examples/09_Disorder_KPM.jl index c7a286c46..7c6ca5901 100644 --- a/examples/09_Disorder_KPM.jl +++ b/examples/09_Disorder_KPM.jl @@ -47,12 +47,13 @@ plot_intensities(res) sys_inhom = to_inhomogeneous(repeat_periodically(sys, (10, 10, 1))) -# Use [`symmetry_equivalent_bonds`](@ref) to iterate over all nearest neighbor -# bonds of the inhomogeneous system. Modify each AFM exchange with a noise term -# that has variance of 1/3. The newly minimized energy configuration allows for -# long wavelength modulations on top of the original 120° order. +# Iterate over all [`symmetry_equivalent_bonds`](@ref) for nearest neighbor +# sites in the inhomogeneous system. Set the AFM exchange to include a noise +# term that has a standard deviation of 1/3. The newly minimized energy +# configuration allows for long wavelength modulations on top of the original +# 120° order. -for (site1, site2, offset) in symmetry_equivalent_bonds(sys_inhom, Bond(1,1,[1,0,0])) +for (site1, site2, offset) in symmetry_equivalent_bonds(sys_inhom, Bond(1, 1, [1, 0, 0])) noise = randn()/3 set_exchange_at!(sys_inhom, 1.0 + noise, site1, site2; offset) end @@ -60,38 +61,32 @@ end minimize_energy!(sys_inhom, maxiters=5_000) plot_spins(sys_inhom; color=[S[3] for S in sys_inhom.dipoles], ndims=2) -# Traditional spin wave theory calculations become impractical for large system -# sizes. Significant acceleration is possible with the [kernel polynomial -# method](https://arxiv.org/abs/2312.08349). Enable it by selecting -# [`SpinWaveTheoryKPM`](@ref) in place of the traditional -# [`SpinWaveTheory`](@ref). Using KPM, the cost of an [`intensities`](@ref) -# calculation becomes linear in system size and scales inversely with the width -# of the line broadening `kernel`. Error tolerance is controlled through the -# dimensionless `tol` parameter. A relatively small value, `tol = 0.01`, helps -# to resolve the large intensities near the ordering wavevector. The alternative -# choice `tol = 0.1` would be twice faster, but would introduce significant -# numerical artifacts. +# Spin wave theory with exact diagonalization becomes impractical for large +# system sizes. Significant acceleration is possible with an iterative Krylov +# space solver. With [`SpinWaveTheoryKPM`](@ref), the cost of an +# [`intensities`](@ref) calculation scales linearly in the system size and +# inversely with the width of the line broadening `kernel`. Good choices for the +# dimensionless error tolerance are `tol=0.05` (more speed) or `tol=0.01` (more +# accuracy). # # Observe from the KPM calculation that disorder in the nearest-neighbor # exchange serves to broaden the discrete excitation bands into a continuum. -swt = SpinWaveTheoryKPM(sys_inhom; measure=ssf_perp(sys_inhom), tol=0.01) +swt = SpinWaveTheoryKPM(sys_inhom; measure=ssf_perp(sys_inhom), tol=0.05) res = intensities(swt, path; energies, kernel) plot_intensities(res) # Now apply a magnetic field of magnitude 7.5 (energy units) along the global -# ``ẑ`` axis. This field fully polarizes the spins. Because gap opens, a larger -# tolerance of `tol = 0.1` can be used to accelerate the KPM calculation without -# sacrificing much accuracy. The resulting spin wave spectrum shows a sharp mode -# at the Γ-point (zone center) that broadens into a continuum along the K and M -# points (zone boundary). +# ``ẑ`` axis. This field fully polarizes the spins and opens a gap. The spin +# wave spectrum shows a sharp mode at the Γ-point (zone center) that broadens +# into a continuum along the K and M points (zone boundary). set_field!(sys_inhom, [0, 0, 7.5]) randomize_spins!(sys_inhom) minimize_energy!(sys_inhom) energies = range(0.0, 9.0, 150) -swt = SpinWaveTheoryKPM(sys_inhom; measure=ssf_perp(sys_inhom), tol=0.1) +swt = SpinWaveTheoryKPM(sys_inhom; measure=ssf_perp(sys_inhom), tol=0.05) res = intensities(swt, path; energies, kernel) plot_intensities(res) @@ -105,8 +100,10 @@ for site in eachsite(sys_inhom) noise = randn()/6 sys_inhom.gs[site] = [1 0 0; 0 1 0; 0 0 1+noise] end +randomize_spins!(sys_inhom) +minimize_energy!(sys_inhom) -swt = SpinWaveTheoryKPM(sys_inhom; measure=ssf_perp(sys_inhom), tol=0.1) +swt = SpinWaveTheoryKPM(sys_inhom; measure=ssf_perp(sys_inhom), tol=0.05) res = intensities(swt, path; energies, kernel) plot_intensities(res) diff --git a/src/KPM/Lanczos.jl b/src/KPM/Lanczos.jl index bb5ea2b67..a362e87dc 100644 --- a/src/KPM/Lanczos.jl +++ b/src/KPM/Lanczos.jl @@ -1,13 +1,40 @@ -# Reference implementation, without consideration of speed. +# Reference implementation of the generalized Lanczos method, without +# consideration of speed. The input A can be any Hermitian matrix. The optional +# argument S is an inner-product that must be both Hermitian and positive +# definite. Intuitively, Lanczos finds a low-rank approximant A S ≈ Q T Q† S, +# where T is tridiagonal and Q is orthogonal in the sense that Q† S Q = I. The +# first column of Q is the initial vector v, which must be normalized. The +# Lanczos method is useful in two ways. First, eigenvalues of T are +# representative of extremal eigenvalues of A (in an appropriate S-measure +# sense). Second, one obtains a very good approximation to the matrix-vector +# product, f(A S) v ≈ Q f(T) e₁, valid for any function f [1]. +# +# The generalization of Lanczos to an arbitrary measure S is implemented as +# follows: Each matrix-vector product A v is replaced by A S v, and each vector +# inner product w† v is replaced by w† S v. Similar generalizations of Lanczos +# have been considered in [2] and [3]. +# +# 1. N. Amsel, T. Chen, A. Greenbaum, C. Musco, C. Musco, Near-optimal +# approximation of matrix functions by the Lanczos method, (2013) +# https://arxiv.org/abs/2303.03358v2. +# 2. H. A. van der Vorst, Math. Comp. 39, 559-561 (1982), +# https://doi.org/10.1090/s0025-5718-1982-0669648-0 +# 3. M. Grüning, A. Marini, X. Gonze, Comput. Mater. Sci. 50, 2148-2156 (2011), +# https://doi.org/10.1016/j.commatsci.2011.02.021. function lanczos_ref(A, S, v; niters) αs = Float64[] βs = Float64[] + vs = typeof(v)[] + + c = sqrt(real(dot(v, S, v))) + @assert c ≈ 1 "Initial vector not normalized" + v /= c - v /= sqrt(real(dot(v, S, v))) w = A * S * v α = real(dot(w, S, v)) w = w - α * v push!(αs, α) + push!(vs, v) for _ in 2:niters β = sqrt(real(dot(w, S, w))) @@ -19,35 +46,54 @@ function lanczos_ref(A, S, v; niters) v = vp push!(αs, α) push!(βs, β) + push!(vs, v) end - return SymTridiagonal(αs, βs) + Q = reduce(hcat, vs) + T = SymTridiagonal(αs, βs) + return (; Q, T) end - -# Use Lanczos iterations to find a truncated tridiagonal representation of A S, -# up to similarity transformation. Here, A is any Hermitian matrix, while S is -# both Hermitian and positive definite. Traditional Lanczos uses the identity -# matrix, S = I. The extension to non-identity matrices S is as follows: Each -# matrix-vector product A v becomes A S v, and each vector inner product w† v -# becomes w† S v. The implementation below follows the notation of Wikipedia, -# and is the most stable of the four variants considered by Paige [1]. Each -# Lanczos iteration requires only one matrix-vector multiplication for A and S, -# respectively. - -# Similar generalizations of Lanczos have been considered in [2] and [3]. +# An optimized implementation of the generalized Lanczos method. See +# `lanczos_ref` for explanation, and a reference implementation. This optimized +# implementation follows "the most stable" of the four variants considered by +# Paige [1], as listed on Wikipedia. Here, however, we allow for an arbitary +# measure S. In practice, this means that each matrix-vector product A v is +# replaced by A S v, and each inner product w† v is replaced by w† S v. +# +# In this implementation, each Lanczos iteration requires only one matrix-vector +# multiplication involving A and S, respectively. +# +# Details: +# +# * The return value is a pair, (T, lhs† Q). The symbol `lhs` denotes "left-hand +# side" column vectors, packed horizontally into a matrix. If the `lhs` +# argument is ommitted, its default value will be a matrix of width 0. +# * If the initial vector `v` is not normalized, then this normalization will be +# performed automatically, and the scale `√v S v` will be absorbed into the +# return value `lhs† Q`. +# * The number of iterations will never exceed length(v). If this limit is hit +# then, mathematically, the Krylov subspace should be complete, and the matrix +# T should be exactly similar to the matrix A S. In practice, however, +# numerical error at finite precision may be severe. Further testing is +# required to determine whether the Lanczos method is appropriate in this +# case, where the matrices have modest dimension. (Direct diagonalization may +# be preferable.) +# * After `min_iters` iterations, this procedure will estimate the spectral +# bandwidth Δϵ between extreme eigenvalues of T. Then `Δϵ / resolution` will +# be an upper bound on the total number of iterations (which includes the +# initial `min_iters` iterations as well as subsequent ones). # # 1. C. C. Paige, IMA J. Appl. Math., 373-381 (1972), -# https://doi.org/10.1093%2Fimamat%2F10.3.373. -# 2. H. A. van der Vorst, Math. Comp. 39, 559-561 (1982), -# https://doi.org/10.1090/s0025-5718-1982-0669648-0 -# 3. M. Grüning, A. Marini, X. Gonze, Comput. Mater. Sci. 50, 2148-2156 (2011), -# https://doi.org/10.1016/j.commatsci.2011.02.021. -function lanczos(mulA!, mulS!, v; niters) +# https://doi.org/10.1093%2Fimamat%2F10.3.373. + +function lanczos(mulA!, mulS!, v; min_iters, resolution=Inf, lhs=zeros(length(v), 0), verbose=false) αs = Float64[] βs = Float64[] + lhs_adj_Q = Vector{ComplexF64}[] + v = copy(v) vp = zero(v) Sv = zero(v) Svp = zero(v) @@ -56,6 +102,7 @@ function lanczos(mulA!, mulS!, v; niters) mulS!(Sv, v) c = sqrt(real(dot(v, Sv))) + @assert c ≈ 1 "Initial vector not normalized" @. v /= c @. Sv /= c @@ -64,10 +111,36 @@ function lanczos(mulA!, mulS!, v; niters) @. w = w - α * v mulS!(Sw, w) push!(αs, α) - - for _ in 2:niters - β = sqrt(real(dot(w, Sw))) - iszero(β) && break + push!(lhs_adj_Q, lhs' * v) + + # The maximum number of iterations is length(v)-1, because that is the + # dimension of the full vector space. Break out early if niters has been + # reached, which will be set according to the spectral bounds Δϵ after + # iteration i == min_iters. + niters = typemax(Int) + for i in 1:length(v)-1 + if i == min_iters + T = SymTridiagonal(αs, βs) + Δϵ = eigmax(T) - eigmin(T) + niters = max(min_iters, fld(Δϵ, resolution)) + niters += mod(niters, 2) # Round up to nearest even integer + if verbose + println("Δϵ=$Δϵ, niters=$niters") + end + end + + i >= niters && break + + # Let β = w† S w. If β < 0 then S is not a valid positive definite + # measure. If β = 0, this would formally indicate that v is a linear + # combination of only a subset of the eigenvectors of A S. In this case, + # it is valid to break early for purposes of approximating the + # matrix-vector product f(A S) v. + β² = real(dot(w, Sw)) + iszero(β²) && break + β² < 0 && error("S is not a positive definite measure") + + β = sqrt(β²) @. vp = w / β @. Svp = Sw / β mulA!(w, Svp) @@ -77,9 +150,10 @@ function lanczos(mulA!, mulS!, v; niters) @. v = vp push!(αs, α) push!(βs, β) + push!(lhs_adj_Q, lhs' * v) end - return SymTridiagonal(αs, βs) + return SymTridiagonal(αs, βs), reduce(hcat, lhs_adj_Q) end @@ -107,6 +181,9 @@ function eigbounds(swt, q_reshaped, niters) end v = randn(ComplexF64, 2L) - tridiag = lanczos(mulA!, mulS!, v; niters) - return eigmin(tridiag), eigmax(tridiag) + Sv = zero(v) + mulS!(Sv, v) + v /= sqrt(v' * Sv) + T, _ = lanczos(mulA!, mulS!, v; min_iters=niters) + return eigmin(T), eigmax(T) end diff --git a/src/KPM/SpinWaveTheoryKPM.jl b/src/KPM/SpinWaveTheoryKPM.jl index e2e989ef7..31c51cb83 100644 --- a/src/KPM/SpinWaveTheoryKPM.jl +++ b/src/KPM/SpinWaveTheoryKPM.jl @@ -2,24 +2,16 @@ SpinWaveTheoryKPM(sys::System; measure, regularization=1e-8, tol) A variant of [`SpinWaveTheory`](@ref) that uses the kernel polynomial method -(KPM) to perform [`intensities`](@ref) calculations [1]. Instead of explicitly -diagonalizing the dynamical matrix, KPM approximates intensities using -polynomial expansion truncated at order ``M``. The reduces the computational -cost from ``𝒪(N^3)`` to ``𝒪(N M)``, which is favorable for large system sizes -``N``. +(KPM) to calculate [`intensities`](@ref) [1]. This method avoids direct matrix +diagonalization, which scales cubically in the system size ``N``. Instead, using +``M`` iterative matrix-vector multiplications, the cost becomes ``𝒪(N M + +M^2)``. This provides a very significant acceleration when ``N`` is large. -The polynomial order ``M`` will be determined from the line broadening kernel -and the specified error tolerance `tol`. Specifically, for each wavevector, -``M`` scales like the spectral bandwidth of excitations, divided by the energy -resolution of the broadening kernel, times the negative logarithm of `tol`. - -The error tolerance `tol` should be tuned empirically for each calculation. -Reasonable starting points are `1e-1` (more speed) or `1e-2` (more accuracy). - -!!! warning "Missing intensity at small quasi-particle energy" - The KPM-calculated intensities are unreliable at small energies ``ω``. In - particular, KPM may mask intensities that arise near the Goldstone modes of - an ordered state with continuous symmetry. +The number of iterations is typically not too large: `M ≈ -2 log10(tol) Δϵ / +fwhm` where `Δϵ` is the estimated spectral bandwidth of excitations, `fwhm` is +the full width at half maximum of the broadening kernel, and `tol` is a +dimensionless tolerance parameter. Good choices are `0.05` (more speed) or +`0.01` (more accuracy). ## References @@ -185,5 +177,137 @@ end function intensities(swt_kpm::SpinWaveTheoryKPM, qpts; energies, kernel::AbstractBroadening, kT=0.0, verbose=false) qpts = convert(AbstractQPoints, qpts) data = zeros(eltype(swt_kpm.swt.measure), length(energies), length(qpts.qs)) - return intensities!(data, swt_kpm, qpts; energies, kernel, kT, verbose) + # return intensities!(data, swt_kpm, qpts; energies, kernel, kT, verbose) + return intensities2!(data, swt_kpm, qpts; energies, kernel, kT, verbose) +end + + +function intensities2!(data, swt_kpm::SpinWaveTheoryKPM, qpts; energies, kernel::AbstractBroadening, kT=0.0, verbose=false) + qpts = convert(AbstractQPoints, qpts) + + (; swt, tol, lanczos_iters) = swt_kpm + (; sys, measure) = swt + cryst = orig_crystal(sys) + + isnothing(kernel.fwhm) && error("Cannot determine the kernel fwhm") + + @assert eltype(data) == eltype(measure) + @assert size(data) == (length(energies), length(qpts.qs)) + fill!(data, zero(eltype(data))) + + Na = nsites(sys) + Ncells = Na / natoms(cryst) + Nf = nflavors(swt) + L = Nf*Na + Avec_pref = zeros(ComplexF64, Na) + + Nobs = size(measure.observables, 1) + Ncorr = length(measure.corr_pairs) + corrbuf = zeros(ComplexF64, Ncorr) + + u = zeros(ComplexF64, 2L, Nobs) + v = zeros(ComplexF64, 2L) + Sv = zeros(ComplexF64, 2L) + + for (iq, q) in enumerate(qpts.qs) + q_reshaped = to_reshaped_rlu(sys, q) + q_global = cryst.recipvecs * q + + # Represent each local observable A(q) as a complex vector u(q) that + # denotes a linear combination of HP bosons. + + for i in 1:Na + r = sys.crystal.positions[i] + ff = get_swt_formfactor(measure, 1, i) + Avec_pref[i] = exp(2π*im * dot(q_reshaped, r)) + Avec_pref[i] *= compute_form_factor(ff, norm2(q_global)) + end + + if sys.mode == :SUN + (; observables_localized) = swt.data::SWTDataSUN + N = sys.Ns[1] + for μ in 1:Nobs, i in 1:Na + O = observables_localized[μ, i] + for f in 1:Nf + u[f + (i-1)*Nf, μ] = Avec_pref[i] * O[f, N] + u[f + (i-1)*Nf + L, μ] = Avec_pref[i] * O[N, f] + end + end + else + @assert sys.mode in (:dipole, :dipole_uncorrected) + (; sqrtS, observables_localized) = swt.data::SWTDataDipole + for μ in 1:Nobs, i in 1:Na + O = observables_localized[μ, i] + u[i, μ] = Avec_pref[i] * (sqrtS[i] / √2) * (O[1] + im*O[2]) + u[i+L, μ] = Avec_pref[i] * (sqrtS[i] / √2) * (O[1] - im*O[2]) + end + end + + # Perform Lanczos calculation + + # w = Ĩ v + function mulA!(w, v) + @views w[1:L] = +v[1:L] + @views w[L+1:2L] = -v[L+1:2L] + return w + end + + # w = D v + function mulS!(w, v) + mul_dynamical_matrix!(swt, reshape(w, 1, :), reshape(v, 1, :), [q_reshaped]) + return w + end + + resolution = (kernel.fwhm/2) / max(-log10(tol), 0) + + for ξ in 1:Nobs + # Don't accumulate observables that are zero + iszero(view(u, :, ξ)) && continue + + mulA!(v, view(u, :, ξ)) + mulS!(Sv, v) + c = sqrt(real(Sv' * v)) + v ./= c + tridiag, lhs_adj_Q = try + lanczos(mulA!, mulS!, v; lhs=u, min_iters=lanczos_iters, resolution, verbose) + catch e + if e.msg == "S is not a positive definite measure" + rethrow(ErrorException("Not an energy-minimum; wavevector q = $q unstable.")) + else + rethrow() + end + end + + (; values, vectors) = eigen(tridiag) + + for (iω, ω) in enumerate(energies) + f(x) = kernel(x, ω) * thermal_prefactor(x; kT) + + corr_ξ = c * lhs_adj_Q * vectors * Diagonal(f.(values)) * (vectors'[:, 1]) + + # This step assumes that each local observable in the + # correlation is Hermitian. In this case, bare correlations + # should be symmetric, C[μ, ν] = C[ν, μ]*. The Lanczos + # approximation C̃ breaks this symmetry. Restore it by looping + # over ξ in 1:Nobs and accumulate Lanczos data C̃[:, ξ] in a + # symmetric way. Accumulate C̃[μ, ξ] into C[μ, ν] if ξ = ν. Also + # accumulate C̃[ν, ξ]* into C[μ, ν] if ξ = μ. A factor of 1/2 + # avoids double counting. In the special case that μ = ν, this + # assigns real(C̃[μ, μ]) to C[μ, μ] only once. + corrbuf .= 0 + for (i, (μ, ν)) in enumerate(measure.corr_pairs) + ξ == ν && (corrbuf[i] += (1/2) * (corr_ξ[μ] / Ncells)) + ξ == μ && (corrbuf[i] += (1/2) * conj(corr_ξ[ν] / Ncells)) + end + + # This step assumes that combiner is linear, so that it is valid + # to move the ξ loop outside the data accumulation. One could + # relax this assumption by preallocating an array of size (Nω, + # Ncorr) to accumulate into corrbuf prior to calling combiner. + data[iω, iq] += measure.combiner(q_global, corrbuf) + end + end + end + + return Intensities(cryst, qpts, collect(energies), data) end diff --git a/src/SpinWaveTheory/DispersionAndIntensities.jl b/src/SpinWaveTheory/DispersionAndIntensities.jl index 643a750c8..b43dd9c35 100644 --- a/src/SpinWaveTheory/DispersionAndIntensities.jl +++ b/src/SpinWaveTheory/DispersionAndIntensities.jl @@ -97,7 +97,7 @@ function excitations!(T, tmp, swt::SpinWaveTheory, q) try return bogoliubov!(T, tmp) catch _ - error("Instability at wavevector q = $q") + rethrow(ErrorException("Not an energy-minimum; wavevector q = $q unstable.")) end end diff --git a/src/SpinWaveTheory/HamiltonianDipole.jl b/src/SpinWaveTheory/HamiltonianDipole.jl index aaf5581c5..f3e7c55b1 100644 --- a/src/SpinWaveTheory/HamiltonianDipole.jl +++ b/src/SpinWaveTheory/HamiltonianDipole.jl @@ -4,7 +4,7 @@ function swt_hamiltonian_dipole!(H::Matrix{ComplexF64}, swt::SpinWaveTheory, q_r (; local_rotations, stevens_coefs, sqrtS) = data (; extfield, gs) = sys - L = nbands(swt) + L = nbands(swt) @assert size(H) == (2L, 2L) # Initialize Hamiltonian buffer diff --git a/test/test_kpm.jl b/test/test_kpm.jl index 9ba1dd259..617c916f4 100644 --- a/test/test_kpm.jl +++ b/test/test_kpm.jl @@ -1,27 +1,35 @@ @testitem "Lanczos" begin using LinearAlgebra - N = 200 + N = 40 A = hermitianpart(randn(N, N)) - A = diagm(vcat(ones(N÷2), -ones(N÷2))) S = randn(N, N) S = S' * S S = S / eigmax(S) - S = S + 0.0I + S = S + 1e-2I + # Check that fast and slow Lanczos implementations match + + lhs = randn(N, 2) v = randn(N) - ev1 = eigvals(Sunny.lanczos_ref(A*S*A, S, v; niters=10)) + v /= sqrt(dot(v, S, v)) + (; Q, T) = Sunny.lanczos_ref(A, S, v; niters=10) + ev1 = eigvals(T) - mulA!(w, v) = (w .= A * S * A * v) + mulA!(w, v) = mul!(w, A, v) mulS!(w, v) = mul!(w, S, v) - ev2 = eigvals(Sunny.lanczos(mulA!, mulS!, copy(v); niters=10)) + T, lhs_adj_Q = Sunny.lanczos(mulA!, mulS!, copy(v); lhs, min_iters=10) + ev2 = eigvals(T) @test ev1 ≈ ev2 + @test lhs' * Q ≈ lhs_adj_Q + + # Check that extremal eigenvalues match to reasonable accuracy - ev3 = extrema(eigvals(Sunny.lanczos_ref(A*S*A, S, v; niters=100))) - ev4 = extrema(eigvals((A*S)^2)) - @test isapprox(collect(ev3), collect(ev4); atol=1e-3) + T, _ = Sunny.lanczos(mulA!, mulS!, copy(v); min_iters=20) + @test isapprox(eigmin(T), eigmin(A * S); atol=1e-3) + @test isapprox(eigmax(T), eigmax(A * S); atol=1e-3) end