From 4977f8c623c83853c51ec70ff5a9775e2da18fe7 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 8 Oct 2024 18:48:20 +0200 Subject: [PATCH] implement vload/vstore! and a primitive Vec type --- base/experimental.jl | 2 ++ base/simd.jl | 70 ++++++++++++++++++++++++++++++++++++++++++++ test/choosetests.jl | 2 +- test/simd.jl | 36 +++++++++++++++++++++++ 4 files changed, 109 insertions(+), 1 deletion(-) create mode 100644 base/simd.jl create mode 100644 test/simd.jl diff --git a/base/experimental.jl b/base/experimental.jl index 648b5da0ed9a1a..80e596494ae0ed 100644 --- a/base/experimental.jl +++ b/base/experimental.jl @@ -471,4 +471,6 @@ function entrypoint(@nospecialize(argt::Type)) nothing end +include("simd.jl") + end diff --git a/base/simd.jl b/base/simd.jl new file mode 100644 index 00000000000000..eef40e5f21feab --- /dev/null +++ b/base/simd.jl @@ -0,0 +1,70 @@ +module SIMD + +import Base: VecElement, Memory, MemoryRef +import Base: @propagate_inbounds, @_propagate_inbounds_meta, @_boundscheck, @_noub_if_noinbounds_meta +import Base: memoryrefget, memoryrefnew, memoryrefset! + +export Vec +export vload, vstore!, natural_vecwidth + +# TODO: See C# and Co Vec type +# TODO: Hardware portable vector types... + +struct Vec{N, T} + data::NTuple{N, VecElement{T}} +end + +# Constructors +@inline Vec(v::NTuple{N, T}) where {N, T} = Vec(VecElement.(v)) +@inline Vec(v::Vararg{T, N}) where {N, T} = Vec(v) +@inline Vec(v::Vec) = v + +# Numbers defines this and it is needed in power_by_squaring... +Base.copy(v::Vec) = v + +function Base.show(io::IO, v::Vec{N, T}) where {N, T} + io = IOContext(io, :typeinfo => eltype(v)) + print(io, "<$N x $T>[") + join(io, [sprint(show, x.value; context=io) for x in v.data], ", ") + print(io, "]") +end + +# Breaks with multi-versioning +natural_vecwidth(::Type{Float32}) = 8 +natural_vecwidth(::Type{Float64}) = 4 + +import Base: +, -, * + +# Mocked vload/vstore! relying on SLP + +@inline function vload(::Type{Vec{N, T}}, A::Array{T}, i::Int) where {N, T} + @_noub_if_noinbounds_meta + # TODO: Alignment...; may need an intrinsic for vectorized loads. + # Writting my own boundscheck loop since `inbounds` doesn't propagate through `ntuple` FFS + @boundscheck checkbounds(A, i:(i+ N - 1)) + mem = A.ref + data = ntuple(Val(N)) do j + # why does `@inbounds ref = memoryrefnew(mem, i + j - 1, @_boundscheck)` not work? + ref = memoryrefnew(mem, i + j - 1, false) + VecElement{T}(memoryrefget(ref, :not_atomic, false)) + end + return Vec(data) +end + +@inline function vstore!(A::Array{T}, v::Vec{N, T}, i::Int) where {N, T} + @_noub_if_noinbounds_meta + # TODO: Alignment...; may need an intrinsic for vectorized loads. + # Writting my own boundscheck loop since `inbounds` doesn't propagate through `ntuple` FFS + @boundscheck checkbounds(A, i:(i+ N - 1)) + mem = A.ref + data = v.data + ntuple(Val(N)) do j + # why does `@inbounds ref = memoryrefnew(mem, i + j - 1, @_boundscheck)` not work? + ref = memoryrefnew(mem, i + j - 1, false) + memoryrefset!(ref, data[j].value, :not_atomic, false) + return nothing + end + return nothing +end + +end # module \ No newline at end of file diff --git a/test/choosetests.jl b/test/choosetests.jl index 96d230d185c713..87e77e6cfe3d4c 100644 --- a/test/choosetests.jl +++ b/test/choosetests.jl @@ -11,7 +11,7 @@ const TESTNAMES = [ "char", "strings", "triplequote", "unicode", "intrinsics", "dict", "hashing", "iobuffer", "staged", "offsetarray", "arrayops", "tuple", "reduce", "reducedim", "abstractarray", - "intfuncs", "simdloop", "vecelement", "rational", + "intfuncs", "simdloop", "vecelement", "rational", "simd", "bitarray", "copy", "math", "fastmath", "functional", "iterators", "operators", "ordering", "path", "ccall", "parse", "loading", "gmp", "sorting", "spawn", "backtrace", "exceptions", diff --git a/test/simd.jl b/test/simd.jl new file mode 100644 index 00000000000000..d35fadec23b9d7 --- /dev/null +++ b/test/simd.jl @@ -0,0 +1,36 @@ +using Base.Experimental.SIMD +using Test +using InteractiveUtils + +function vcopyto!(a::Array{T}, b::Array{T}) where T + stride = natural_vecwidth(T) + VT = Vec{stride, T} + @assert length(a) == length(b) + @assert length(a) % stride == 0 + @inbounds for i in 1:stride:length(a) + vec = vload(VT, a, i) + vstore!(b, vec, i) + end +end + +@testset "load/store" begin + A = rand(64) + B = zeros(64) + + vcopyto!(A, B) + @test A == B + + @test_throws BoundsError vload(Vec{4, Float64}, A, 62) + vec = vload(Vec{4, Float64}, A, 1) + @test_throws BoundsError vstore!(A, vec, 62) + + load(A, i) = @inbounds vload(Vec{4, Float64}, A, i) + store(A,v,i) = @inbounds vstore!(A, v, i) + + ir = sprint(io->code_llvm(io, vload, (Type{Vec{4, Float64}}, Vector{Float64}, Int))) + @test contains(ir, "call void @j_throw_boundserror") + + ir = sprint(io->code_llvm(io, load, (Vector{Float64}, Int))) + @test contains(ir, "load <4 x double>") + @test !contains(ir, "call void @j_throw_boundserror") +end