Skip to content

Commit

Permalink
add TLS.jl dep
Browse files Browse the repository at this point in the history
  • Loading branch information
carstenbauer committed Feb 5, 2024
1 parent 826506e commit e346d5c
Show file tree
Hide file tree
Showing 6 changed files with 185 additions and 1 deletion.
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@ version = "0.2.1"
BangBang = "198e06fe-97b7-11e9-32a5-e1d131e6ad66"
ChunkSplitters = "ae650224-84b6-46f8-82ea-d812ca08434e"
StableTasks = "91464d47-22a1-43fe-8b7f-2d57ee82463f"
TaskLocalValues = "ed4db957-447d-4319-bfb6-7fa9ae7ecf34"

[compat]
BangBang = "0.4"
ChunkSplitters = "2.1"
StableTasks = "0.1.4"
TaskLocalValues = "0.1"
julia = "1.6"

[extras]
Expand Down
3 changes: 2 additions & 1 deletion docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ makedocs(;
doctest = false,
pages = [
"OhMyThreads" => "index.md",
# "Getting Started" => "examples/getting_started.md",
# "Getting Started" => "getting_started.md",
"Examples" => [
"Parallel Monte Carlo" => "examples/mc/mc.md",
"Julia Set" => "examples/juliaset/juliaset.md",
Expand All @@ -21,6 +21,7 @@ makedocs(;
# "B" => "explanations/B.md",
# ],
"Translation Guide" => "translation.md",
"Task-Local Storage" => "examples/tls/tls.md",
"References" => [
"Public API" => "refs/api.md",
"Internal" => "refs/internal.md",
Expand Down
3 changes: 3 additions & 0 deletions docs/src/examples/tls/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[deps]
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
OhMyThreads = "67456a42-1dca-4109-a031-0a68de7e3ad5"
79 changes: 79 additions & 0 deletions docs/src/examples/tls/tls.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
using OhMyThreads: TaskLocalValue, tmap, chunks
using LinearAlgebra: mul!, BLAS
using Base.Threads: nthreads, @spawn

function matmulsums(As, Bs)
N = size(first(As), 1)
C = Matrix{Float64}(undef, N, N)
map(As, Bs) do A, B
mul!(C, A, B)
sum(C)
end
end

function matmulsums_race(As, Bs)
N = size(first(As), 1)
C = Matrix{Float64}(undef, N, N)
tmap(As, Bs) do A, B
mul!(C, A, B)
sum(C)
end
end

function matmulsums_naive(As, Bs)
N = size(first(As), 1)
tmap(As, Bs) do A, B
C = Matrix{Float64}(undef, N, N)
mul!(C, A, B)
sum(C)
end
end

function matmulsums_tls(As, Bs)
N = size(first(As), 1)
storage = TaskLocalValue{Matrix{Float64}}(() -> Matrix{Float64}(undef, N, N))
tmap(As, Bs) do A, B
C = storage[]
mul!(C, A, B)
sum(C)
end
end

function matmulsums_manual(As, Bs)
N = size(first(As), 1)
tasks = map(chunks(As; n = nthreads())) do idcs
@spawn begin
local C = Matrix{Float64}(undef, N, N)
local results = Vector{Float64}(undef, length(idcs))
@inbounds for (i, idx) in enumerate(idcs)
mul!(C, As[idx], Bs[idx])
results[i] = sum(C)
end
results
end
end
reduce(vcat, fetch.(tasks))
end

BLAS.set_num_threads(1) # to avoid potential oversubscription

As = [rand(1024, 1024) for _ in 1:64]
Bs = [rand(1024, 1024) for _ in 1:64]

res = matmulsums(As, Bs)
res_race = matmulsums_race(As, Bs)
res_naive = matmulsums_naive(As, Bs)
res_tls = matmulsums_tls(As, Bs)
res_manual = matmulsums_manual(As, Bs)

res res_race
res res_naive
res res_tls
res res_manual

using BenchmarkTools

@btime matmulsums($As, $Bs);
@btime matmulsums_naive($As, $Bs);
@btime matmulsums_tls($As, $Bs);
@btime matmulsums_manual($As, $Bs);
98 changes: 98 additions & 0 deletions docs/src/examples/tls/tls.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
```@meta
EditURL = "tls.jl"
```

````julia
using OhMyThreads: TaskLocalValue, tmap, chunks
using LinearAlgebra: mul!, BLAS
using Base.Threads: nthreads, @spawn

function matmulsums(As, Bs)
N = size(first(As), 1)
C = Matrix{Float64}(undef, N, N)
map(As, Bs) do A, B
mul!(C, A, B)
sum(C)
end
end

function matmulsums_race(As, Bs)
N = size(first(As), 1)
C = Matrix{Float64}(undef, N, N)
tmap(As, Bs) do A, B
mul!(C, A, B)
sum(C)
end
end

function matmulsums_naive(As, Bs)
N = size(first(As), 1)
tmap(As, Bs) do A, B
C = Matrix{Float64}(undef, N, N)
mul!(C, A, B)
sum(C)
end
end

function matmulsums_tls(As, Bs)
N = size(first(As), 1)
storage = TaskLocalValue{Matrix{Float64}}(() -> Matrix{Float64}(undef, N, N))
tmap(As, Bs) do A, B
C = storage[]
mul!(C, A, B)
sum(C)
end
end

function matmulsums_manual(As, Bs)
N = size(first(As), 1)
tasks = map(chunks(As; n = nthreads())) do idcs
@spawn begin
local C = Matrix{Float64}(undef, N, N)
local results = Vector{Float64}(undef, length(idcs))
@inbounds for (i, idx) in enumerate(idcs)
mul!(C, As[idx], Bs[idx])
results[i] = sum(C)
end
results
end
end
reduce(vcat, fetch.(tasks))
end

BLAS.set_num_threads(1) # to avoid potential oversubscription

As = [rand(1024, 1024) for _ in 1:64]
Bs = [rand(1024, 1024) for _ in 1:64]

res = matmulsums(As, Bs)
res_race = matmulsums_race(As, Bs)
res_naive = matmulsums_naive(As, Bs)
res_tls = matmulsums_tls(As, Bs)
res_manual = matmulsums_manual(As, Bs)

res res_race
res res_naive
res res_tls
res res_manual

using BenchmarkTools

@btime matmulsums($As, $Bs);
@btime matmulsums_naive($As, $Bs);
@btime matmulsums_tls($As, $Bs);
@btime matmulsums_manual($As, $Bs);
````

````
3.107 s (3 allocations: 8.00 MiB)
686.432 ms (174 allocations: 512.01 MiB)
792.403 ms (67 allocations: 40.01 MiB)
684.626 ms (51 allocations: 40.00 MiB)
````

---

*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*

1 change: 1 addition & 0 deletions src/OhMyThreads.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module OhMyThreads

using StableTasks: StableTasks, @spawn, @spawnat
using ChunkSplitters: ChunkSplitters, chunks
using TaskLocalValues: TaskLocalValue

export chunks, treduce, tmapreduce, treducemap, tmap, tmap!, tforeach, tcollect

Expand Down

0 comments on commit e346d5c

Please sign in to comment.