diff --git a/docs/src/examples/tls/tls.jl b/docs/src/examples/tls/tls.jl index f3e6d0af..7b96dced 100644 --- a/docs/src/examples/tls/tls.jl +++ b/docs/src/examples/tls/tls.jl @@ -65,6 +65,7 @@ res ≈ res_race # # A simple solution for the race condition issue above is to move the allocation of `C` # into the body of the parallel `tmap`: + function matmulsums_naive(As, Bs) N = size(first(As), 1) tmap(As, Bs) do A, B @@ -138,6 +139,23 @@ function matmulsums_manual(As, Bs) mapreduce(fetch, vcat, tasks) end +## Or alternatively: +## +## using OhMyThreads: DynamicScheduler, tmapreduce +## +## function matmulsums_manual2(As, Bs) +## N = size(first(As), 1) +## tmapreduce(vcat, chunks(As; n = 2 * nthreads()); scheduler=DynamicScheduler(; nchunks=0)) do idcs +## local C = Matrix{Float64}(undef, N, N) +## local results = Vector{Float64}(undef, length(idcs)) +## for (i, idx) in enumerate(idcs) +## mul!(C, As[idx], Bs[idx]) +## results[i] = sum(C) +## end +## results +## end +## end + res_manual = matmulsums_manual(As, Bs) res ≈ res_manual diff --git a/docs/src/examples/tls/tls.md b/docs/src/examples/tls/tls.md index 78ff7fc1..a477b1bd 100644 --- a/docs/src/examples/tls/tls.md +++ b/docs/src/examples/tls/tls.md @@ -186,6 +186,23 @@ function matmulsums_manual(As, Bs) mapreduce(fetch, vcat, tasks) end +# Or alternatively: +# +# using OhMyThreads: DynamicScheduler, tmapreduce +# +# function matmulsums_manual2(As, Bs) +# N = size(first(As), 1) +# tmapreduce(vcat, chunks(As; n = 2 * nthreads()); scheduler=DynamicScheduler(; nchunks=0)) do idcs +# local C = Matrix{Float64}(undef, N, N) +# local results = Vector{Float64}(undef, length(idcs)) +# for (i, idx) in enumerate(idcs) +# mul!(C, As[idx], Bs[idx]) +# results[i] = sum(C) +# end +# results +# end +# end + res_manual = matmulsums_manual(As, Bs) res ≈ res_manual ```` @@ -254,7 +271,7 @@ end ```` ```` - 576.448 ms (67 allocations: 40.01 MiB) + 576.448 ms (69 allocations: 40.01 MiB) 574.186 ms (67 allocations: 40.01 MiB) ````