-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
implement vload/vstore! and a primitive Vec type
- Loading branch information
Showing
4 changed files
with
109 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -471,4 +471,6 @@ function entrypoint(@nospecialize(argt::Type)) | |
nothing | ||
end | ||
|
||
include("simd.jl") | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
module SIMD | ||
|
||
import Base: VecElement, Memory, MemoryRef | ||
import Base: @propagate_inbounds, @_propagate_inbounds_meta, @_boundscheck, @_noub_if_noinbounds_meta | ||
import Base: memoryrefget, memoryrefnew, memoryrefset! | ||
|
||
export Vec | ||
export vload, vstore!, natural_vecwidth | ||
|
||
# TODO: See C# and Co Vec type | ||
# TODO: Hardware portable vector types... | ||
|
||
struct Vec{N, T} | ||
data::NTuple{N, VecElement{T}} | ||
end | ||
|
||
# Constructors | ||
@inline Vec(v::NTuple{N, T}) where {N, T} = Vec(VecElement.(v)) | ||
@inline Vec(v::Vararg{T, N}) where {N, T} = Vec(v) | ||
@inline Vec(v::Vec) = v | ||
|
||
# Numbers defines this and it is needed in power_by_squaring... | ||
Base.copy(v::Vec) = v | ||
|
||
function Base.show(io::IO, v::Vec{N, T}) where {N, T} | ||
io = IOContext(io, :typeinfo => eltype(v)) | ||
print(io, "<$N x $T>[") | ||
join(io, [sprint(show, x.value; context=io) for x in v.data], ", ") | ||
print(io, "]") | ||
end | ||
|
||
# Breaks with multi-versioning | ||
natural_vecwidth(::Type{Float32}) = 8 | ||
natural_vecwidth(::Type{Float64}) = 4 | ||
|
||
import Base: +, -, * | ||
|
||
# Mocked vload/vstore! relying on SLP | ||
|
||
@inline function vload(::Type{Vec{N, T}}, A::Array{T}, i::Int) where {N, T} | ||
@_noub_if_noinbounds_meta | ||
# TODO: Alignment...; may need an intrinsic for vectorized loads. | ||
# Writting my own boundscheck loop since `inbounds` doesn't propagate through `ntuple` FFS | ||
@boundscheck checkbounds(A, i:(i+ N - 1)) | ||
mem = A.ref | ||
data = ntuple(Val(N)) do j | ||
# why does `@inbounds ref = memoryrefnew(mem, i + j - 1, @_boundscheck)` not work? | ||
ref = memoryrefnew(mem, i + j - 1, false) | ||
VecElement{T}(memoryrefget(ref, :not_atomic, false)) | ||
end | ||
return Vec(data) | ||
end | ||
|
||
@inline function vstore!(A::Array{T}, v::Vec{N, T}, i::Int) where {N, T} | ||
@_noub_if_noinbounds_meta | ||
# TODO: Alignment...; may need an intrinsic for vectorized loads. | ||
# Writting my own boundscheck loop since `inbounds` doesn't propagate through `ntuple` FFS | ||
@boundscheck checkbounds(A, i:(i+ N - 1)) | ||
mem = A.ref | ||
data = v.data | ||
ntuple(Val(N)) do j | ||
# why does `@inbounds ref = memoryrefnew(mem, i + j - 1, @_boundscheck)` not work? | ||
ref = memoryrefnew(mem, i + j - 1, false) | ||
memoryrefset!(ref, data[j].value, :not_atomic, false) | ||
return nothing | ||
end | ||
return nothing | ||
end | ||
|
||
end # module | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
using Base.Experimental.SIMD | ||
using Test | ||
using InteractiveUtils | ||
|
||
function vcopyto!(a::Array{T}, b::Array{T}) where T | ||
stride = natural_vecwidth(T) | ||
VT = Vec{stride, T} | ||
@assert length(a) == length(b) | ||
@assert length(a) % stride == 0 | ||
@inbounds for i in 1:stride:length(a) | ||
vec = vload(VT, a, i) | ||
vstore!(b, vec, i) | ||
end | ||
end | ||
|
||
@testset "load/store" begin | ||
A = rand(64) | ||
B = zeros(64) | ||
|
||
vcopyto!(A, B) | ||
@test A == B | ||
|
||
@test_throws BoundsError vload(Vec{4, Float64}, A, 62) | ||
vec = vload(Vec{4, Float64}, A, 1) | ||
@test_throws BoundsError vstore!(A, vec, 62) | ||
|
||
load(A, i) = @inbounds vload(Vec{4, Float64}, A, i) | ||
store(A,v,i) = @inbounds vstore!(A, v, i) | ||
|
||
ir = sprint(io->code_llvm(io, vload, (Type{Vec{4, Float64}}, Vector{Float64}, Int))) | ||
@test contains(ir, "call void @j_throw_boundserror") | ||
|
||
ir = sprint(io->code_llvm(io, load, (Vector{Float64}, Int))) | ||
@test contains(ir, "load <4 x double>") | ||
@test !contains(ir, "call void @j_throw_boundserror") | ||
end |