diff --git a/docs/src/literate/integration/integration.jl b/docs/src/literate/integration/integration.jl index 4d37c2e4..0ec74dcb 100644 --- a/docs/src/literate/integration/integration.jl +++ b/docs/src/literate/integration/integration.jl @@ -58,6 +58,19 @@ function trapezoidal_parallel(a, b, N) end end +## or equivalently +## +## function trapezoidal_parallel(a, b, N) +## n = N ÷ nthreads() +## h = (b - a) / N +## @tasks for i in 1:nthreads() +## @set reducer=+ +## local α = a + (i - 1) * n * h +## local β = α + n * h +## trapezoidal(α, β, n; h) +## end +## end + # First, we check the correctness of our parallel implementation. trapezoidal_parallel(0, 1, N) ≈ π diff --git a/docs/src/literate/integration/integration.md b/docs/src/literate/integration/integration.md index 247ea841..6d979b34 100644 --- a/docs/src/literate/integration/integration.md +++ b/docs/src/literate/integration/integration.md @@ -86,10 +86,19 @@ function trapezoidal_parallel(a, b, N) trapezoidal(α, β, n; h) end end -```` -```` -trapezoidal_parallel (generic function with 1 method) +# or equivalently +# +# function trapezoidal_parallel(a, b, N) +# n = N ÷ nthreads() +# h = (b - a) / N +# @tasks for i in 1:nthreads() +# @set reducer=+ +# local α = a + (i - 1) * n * h +# local β = α + n * h +# trapezoidal(α, β, n; h) +# end +# end ```` First, we check the correctness of our parallel implementation. diff --git a/docs/src/literate/juliaset/juliaset.jl b/docs/src/literate/juliaset/juliaset.jl index 17600aca..33b5c3ee 100644 --- a/docs/src/literate/juliaset/juliaset.jl +++ b/docs/src/literate/juliaset/juliaset.jl @@ -70,6 +70,18 @@ function compute_juliaset_parallel!(img; kwargs...) return img end +## or alternatively +## +## function compute_juliaset_parallel!(img; kwargs...) +## N = size(img, 1) +## cart = CartesianIndices(img) +## @tasks for idx in eachindex(img) +## c = cart[idx] +## img[idx] = _compute_pixel(c[1], c[2], N) +## end +## return img +## end + N = 2000 img = zeros(Int, N, N) compute_juliaset_parallel!(img); diff --git a/docs/src/literate/juliaset/juliaset.md b/docs/src/literate/juliaset/juliaset.md index 1cf15ea9..1ffb385b 100644 --- a/docs/src/literate/juliaset/juliaset.md +++ b/docs/src/literate/juliaset/juliaset.md @@ -84,6 +84,18 @@ function compute_juliaset_parallel!(img; kwargs...) return img end +# or alternatively +# +# function compute_juliaset_parallel!(img; kwargs...) +# N = size(img, 1) +# cart = CartesianIndices(img) +# @tasks for idx in eachindex(img) +# c = cart[idx] +# img[idx] = _compute_pixel(c[1], c[2], N) +# end +# return img +# end + N = 2000 img = zeros(Int, N, N) compute_juliaset_parallel!(img); diff --git a/docs/src/literate/mc/mc.jl b/docs/src/literate/mc/mc.jl index 0f0274c1..89bea82d 100644 --- a/docs/src/literate/mc/mc.jl +++ b/docs/src/literate/mc/mc.jl @@ -43,6 +43,17 @@ function mc_parallel(N; kwargs...) return pi end +## or alternatively +## +## function mc_parallel(N) +## M = @tasks for _ in 1:N +## @set reducer = + +## rand()^2 + rand()^2 < 1.0 +## end +## pi = 4 * M / N +## return pi +## end + mc_parallel(N) # Let's run a quick benchmark. @@ -64,7 +75,7 @@ using Base.Threads: nthreads using OhMyThreads: StaticScheduler @btime mc_parallel($N) samples=10 evals=3; -@btime mc_parallel($N; scheduler=StaticScheduler()) samples=10 evals=3; +@btime mc_parallel($N; scheduler = StaticScheduler()) samples=10 evals=3; # ## Manual parallelization # @@ -76,7 +87,7 @@ using OhMyThreads: StaticScheduler using OhMyThreads: @spawn, chunks function mc_parallel_manual(N; nchunks = nthreads()) - tasks = map(chunks(1:N; n = nchunks)) do idcs # TODO: replace by `tmap` once ready + tasks = map(chunks(1:N; n = nchunks)) do idcs @spawn mc(length(idcs)) end pi = sum(fetch, tasks) / nchunks diff --git a/docs/src/literate/mc/mc.md b/docs/src/literate/mc/mc.md index ac592808..5696e5d9 100644 --- a/docs/src/literate/mc/mc.md +++ b/docs/src/literate/mc/mc.md @@ -54,6 +54,17 @@ function mc_parallel(N; kwargs...) return pi end +# or alternatively +# +# function mc_parallel(N) +# M = @tasks for _ in 1:N +# @set reducer = + +# rand()^2 + rand()^2 < 1.0 +# end +# pi = 4 * M / N +# return pi +# end + mc_parallel(N) ```` diff --git a/docs/src/translation.md b/docs/src/translation.md index a6316f32..f4775576 100644 --- a/docs/src/translation.md +++ b/docs/src/translation.md @@ -2,6 +2,7 @@ This page tries to give a general overview of how to translate patterns written with the built-in tools of [Base.Threads](https://docs.julialang.org/en/v1/base/multi-threading/) using the [OhMyThreads.jl API](@ref API). Note that this should be seen as a rough guide and (intentionally) isn't supposed to replace a systematic introduction into OhMyThreads.jl. + ## Basics ### `@threads` @@ -15,6 +16,12 @@ end ```julia # OhMyThreads +@tasks for i in 1:10 + println(i) +end + +# or + tforeach(1:10) do i println(i) end @@ -31,6 +38,13 @@ end ```julia # OhMyThreads +@tasks for i in 1:10 + @set scheduler=:static + println(i) +end + +# or + tforeach(1:10; scheduler=StaticScheduler()) do i println(i) end @@ -47,6 +61,13 @@ end ```julia # OhMyThreads +@tasks for i in 1:10 + @set scheduler=DynamicScheduler(; nchunks=0) # turn off chunking + println(i) +end + +# or + tforeach(1:10; scheduler=DynamicScheduler(; nchunks=0)) do i println(i) end @@ -69,13 +90,20 @@ reduce(+, fetch.(tasks)) ```julia # OhMyThreads data = rand(10) + +@tasks for x in data + @set reducer=+ +end + +# or + treduce(+, data) ``` ## Mutation !!! warning - Parallel mutation of non-local state, like writing to a shared array, can be the source of correctness errors (e.g. race conditions) and big performance issues (e.g. [false sharing](https://en.wikipedia.org/wiki/False_sharing#:~:text=False%20sharing%20is%20an%20inherent,is%20limited%20to%20RAM%20caches.)). You should carefully consider whether this is necessary or whether the use of [thread-safe storage](@ref TSS) is the better option. + Parallel mutation of non-local state, like writing to a shared array, can be the source of correctness errors (e.g. race conditions) and big performance issues (e.g. [false sharing](https://en.wikipedia.org/wiki/False_sharing#:~:text=False%20sharing%20is%20an%20inherent,is%20limited%20to%20RAM%20caches.)). You should carefully consider whether this is necessary or whether the use of [thread-safe storage](@ref TSS) is the better option. **We don't recommend using the examples in this section for anything serious!** ```julia # Base.Threads @@ -86,16 +114,21 @@ end ``` ```julia -# OhMyThreads: Variant 1 +# OhMyThreads data = rand(10) + +@tasks for i in 1:10 + data[i] = calc(i) +end + +# or + tforeach(data) do i data[i] = calc(i) end -``` -```julia -# OhMyThreads: Variant 2 -data = rand(10) +# or + tmap!(data, data) do i # this kind of aliasing is fine calc(i) end @@ -103,6 +136,9 @@ end ## Parallel initialization +!!! warning + Parallel mutation of non-local state, like writing to a shared array, can be the source of correctness errors (e.g. race conditions) and big performance issues (e.g. [false sharing](https://en.wikipedia.org/wiki/False_sharing#:~:text=False%20sharing%20is%20an%20inherent,is%20limited%20to%20RAM%20caches.)). You should carefully consider whether this is necessary or whether the use of [thread-safe storage](@ref TSS) is the better option. **We don't recommend using the examples in this section for anything serious!** + ```julia # Base.Threads data = Vector{Float64}(undef, 10) @@ -112,11 +148,17 @@ end ``` ```julia -# OhMyThreads: Variant 1 +# OhMyThreads +data = @tasks for i in 1:10 + @set collect=true + calc(i) +end + +# or + data = tmap(i->calc(i), 1:10) -``` -```julia -# OhMyThreads: Variant 2 +# or + data = tcollect(calc(i) for i in 1:10) ```