diff --git a/Project.toml b/Project.toml index db5a705..9eaaec6 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Gaius" uuid = "bffe22d1-cb55-4f4e-ac2c-f4dd4bf58912" authors = ["MasonProtter "] -version = "0.6.2" +version = "0.6.3" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" @@ -13,10 +13,10 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" [compat] BenchmarkTools = "0.5" -LoopVectorization = "0.9.18" +LoopVectorization = "0.10" StructArrays = "0.4.4" UnsafeArrays = "1.0.1" -VectorizationBase = "0.15.2" +VectorizationBase = "0.16" julia = "1.5" [extras] diff --git a/src/Gaius.jl b/src/Gaius.jl index 71de1e2..75d9af3 100644 --- a/src/Gaius.jl +++ b/src/Gaius.jl @@ -12,7 +12,7 @@ using LinearAlgebra: Adjoint, Transpose using LoopVectorization: @avx using StructArrays: StructArray using UnsafeArrays: @uviews, UnsafeArray -using VectorizationBase: AVX512F, AbstractStridedPointer, gesp, vload, vstore! +using VectorizationBase: AbstractStridedPointer, gesp, vload, vstore! export t_blocked_mul export t_blocked_mul! diff --git a/src/choose_block_size.jl b/src/choose_block_size.jl index daecc6c..ac1b085 100644 --- a/src/choose_block_size.jl +++ b/src/choose_block_size.jl @@ -1,6 +1,6 @@ function choose_block_size(C, A, B, ::Nothing) - if (*)(length(C) |> Int128, length(A) |> Int128, length(B) |> Int128) >= ((3DEFAULT_BLOCK_SIZE) >>> 1)^6 - DEFAULT_BLOCK_SIZE + if (*)(length(C) |> Int128, length(A) |> Int128, length(B) |> Int128) >= ((3default_block_size()) >>> 1)^6 + default_block_size() else 32 end diff --git a/src/global_constants.jl b/src/global_constants.jl index 02e3def..b3ac299 100644 --- a/src/global_constants.jl +++ b/src/global_constants.jl @@ -1 +1 @@ -const DEFAULT_BLOCK_SIZE = AVX512F ? 96 : 64 +@generated default_block_size() = VectorizationBase.has_feature("x86_64_avx512f") ? 96 : 64 diff --git a/src/init.jl b/src/init.jl index 112a6f1..65e3cb6 100644 --- a/src/init.jl +++ b/src/init.jl @@ -4,7 +4,7 @@ function __init__() end function _print_num_threads_warning() - sys_nc = VectorizationBase.NUM_CORES + sys_nc = VectorizationBase.num_cores() jl_nt = Threads.nthreads() return _print_num_threads_warning(sys_nc, jl_nt) end diff --git a/src/public_mul.jl b/src/public_mul.jl index 41bc11c..3eee603 100644 --- a/src/public_mul.jl +++ b/src/public_mul.jl @@ -95,7 +95,7 @@ function mul_serial!(C::AbstractArray{T}, A::AbstractArray{T}, B::AbstractArray{ end function mul!(C::StructArray{Complex{T}}, A::StructArray{Complex{T}}, B::StructArray{Complex{T}}; - block_size = DEFAULT_BLOCK_SIZE, sizecheck=true) where {T <: Eltypes} + block_size = default_block_size(), sizecheck=true) where {T <: Eltypes} sizecheck && check_compatible_sizes(C.re, A.re, B.re) _block_size = choose_block_size(C, A, B, block_size) @@ -113,7 +113,7 @@ function mul!(C::StructArray{Complex{T}}, A::StructArray{Complex{T}}, B::StructA end function mul_serial!(C::StructArray{Complex{T}}, A::StructArray{Complex{T}}, B::StructArray{Complex{T}}; - block_size = DEFAULT_BLOCK_SIZE, sizecheck=true) where {T <: Eltypes} + block_size = default_block_size(), sizecheck=true) where {T <: Eltypes} sizecheck && check_compatible_sizes(C.re, A.re, B.re) _block_size = choose_block_size(C, A, B, block_size) @@ -133,7 +133,7 @@ end function mul!(C::Adjoint{Complex{T}, <:StructArray{Complex{T}}}, A::Adjoint{Complex{T}, <:StructArray{Complex{T}}}, B::StructArray{Complex{T}}; - block_size = DEFAULT_BLOCK_SIZE, sizecheck=true) where {T <: Eltypes} + block_size = default_block_size(), sizecheck=true) where {T <: Eltypes} sizecheck && check_compatible_sizes(C.parent.re', A.parent.re', B.re) _block_size = choose_block_size(C, A, B, block_size) @@ -155,7 +155,7 @@ end function mul_serial!(C::Adjoint{Complex{T}, <:StructArray{Complex{T}}}, A::Adjoint{Complex{T}, <:StructArray{Complex{T}}}, B::StructArray{Complex{T}}; - block_size = DEFAULT_BLOCK_SIZE, sizecheck=true) where {T <: Eltypes} + block_size = default_block_size(), sizecheck=true) where {T <: Eltypes} sizecheck && check_compatible_sizes(C.parent.re', A.parent.re', B.re) _block_size = choose_block_size(C, A, B, block_size) @@ -177,7 +177,7 @@ end function mul!(C::Transpose{Complex{T}, <:StructArray{Complex{T}}}, A::Transpose{Complex{T}, <:StructArray{Complex{T}}}, B::StructArray{Complex{T}}; - block_size = DEFAULT_BLOCK_SIZE, sizecheck=true) where {T <: Eltypes} + block_size = default_block_size(), sizecheck=true) where {T <: Eltypes} sizecheck && check_compatible_sizes(C.parent.re |> transpose, A.parent.re |> transpose, B.re) _block_size = choose_block_size(C, A, B, block_size) @@ -197,7 +197,7 @@ end function mul_serial!(C::Transpose{Complex{T}, <:StructArray{Complex{T}}}, A::Transpose{Complex{T}, <:StructArray{Complex{T}}}, B::StructArray{Complex{T}}; - block_size = DEFAULT_BLOCK_SIZE, sizecheck=true) where {T <: Eltypes} + block_size = default_block_size(), sizecheck=true) where {T <: Eltypes} sizecheck && check_compatible_sizes(C.parent.re |> transpose, A.parent.re |> transpose, B.re) _block_size = choose_block_size(C, A, B, block_size) diff --git a/test/runtests.jl b/test/runtests.jl index 8a80f56..90b0526 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -14,7 +14,7 @@ using Test: @testset, @test, @test_logs, @test_throws include("test_suite_preamble.jl") -@info("VectorizationBase.NUM_CORES is $(VectorizationBase.NUM_CORES)") +@info("VectorizationBase.num_cores() is $(VectorizationBase.num_cores())") include("block_operations.jl") include("public_mul_coverage.jl")