From b7d6e27793c264c90dfc591884cd24bafc9c7e33 Mon Sep 17 00:00:00 2001 From: Carsten Bauer Date: Fri, 2 Feb 2024 11:42:17 +0100 Subject: [PATCH 1/2] tmp --- docs/src/examples/integration/integration.jl | 49 ++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 docs/src/examples/integration/integration.jl diff --git a/docs/src/examples/integration/integration.jl b/docs/src/examples/integration/integration.jl new file mode 100644 index 00000000..5e5b0b20 --- /dev/null +++ b/docs/src/examples/integration/integration.jl @@ -0,0 +1,49 @@ +# # Trapezoidal Integration +# + +# Function to be integrated (from 0 to 1). The analytic result is π. + +f(x) = 4 * √(1 - x^2) + +# Integration routine + +"Evaluate definite integral (from `a` to `b`) by using the trapezoidal rule." +function trapezoidal(a, b, n, h) + y = (f(a) + f(b)) / 2.0 + for i in 1:n-1 + x = a + i * h + y = y + f(x) + end + return y * h +end + +using Base.Threads: nthreads + +N = 10_000 + +function integrate_parallel(N) + # compute local integration interval etc. + h = 1.0 / N + a_loc = [i * n_loc * h for i in 0:nthreads()-1] + b_loc = a_loc + n_loc * h + + result = tmapreduce(+, ...) do + trapezoidal(a_loc, b_loc, n, h) + end + + res_loc = trapezoidal(a_loc, b_loc, n_loc, h) + + println("π (numerical integration) ≈ ", res) +end + + +# perform local integration +res_loc = trapezoidal(a_loc, b_loc, n_loc, h) + +# parallel reduction +res = MPI.Reduce(res_loc, +, comm) + +# print result +if 0 == rank + @printf("π (numerical integration) ≈ %20.16f\\n", res) +end From 635a50f8d3a6b6ad1aedb0b2bb09e9192681bf96 Mon Sep 17 00:00:00 2001 From: Carsten Bauer Date: Mon, 5 Feb 2024 17:54:33 +0100 Subject: [PATCH 2/2] done --- docs/make.jl | 1 + docs/src/examples/integration/Project.toml | 3 + docs/src/examples/integration/integration.jl | 71 +++++++---- docs/src/examples/integration/integration.md | 124 +++++++++++++++++++ 4 files changed, 174 insertions(+), 25 deletions(-) create mode 100644 docs/src/examples/integration/Project.toml create mode 100644 docs/src/examples/integration/integration.md diff --git a/docs/make.jl b/docs/make.jl index 91f343b9..9e569b5b 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -16,6 +16,7 @@ makedocs(; "Examples" => [ "Parallel Monte Carlo" => "examples/mc/mc.md", "Julia Set" => "examples/juliaset/juliaset.md", + "Trapezoidal Integration" => "examples/integration/integration.md", ], # "Explanations" => [ # "B" => "explanations/B.md", diff --git a/docs/src/examples/integration/Project.toml b/docs/src/examples/integration/Project.toml new file mode 100644 index 00000000..e3884437 --- /dev/null +++ b/docs/src/examples/integration/Project.toml @@ -0,0 +1,3 @@ +[deps] +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +OhMyThreads = "67456a42-1dca-4109-a031-0a68de7e3ad5" diff --git a/docs/src/examples/integration/integration.jl b/docs/src/examples/integration/integration.jl index 5e5b0b20..e9c0c59c 100644 --- a/docs/src/examples/integration/integration.jl +++ b/docs/src/examples/integration/integration.jl @@ -1,49 +1,70 @@ # # Trapezoidal Integration # - -# Function to be integrated (from 0 to 1). The analytic result is π. +# In this example, we want to parallelize the computation of a simple numerical integral +# via the trapezoidal rule. The latter is given by +# +# $\int_{a}^{b}f(x)\,dx \approx h \sum_{i=1}^{N}\frac{f(x_{i-1})+f(x_{i})}{2}.$ +# +# The function to be integrated is the following. f(x) = 4 * √(1 - x^2) -# Integration routine +# The analytic result of the definite integral (from 0 to 1) is known to be $\pi$. +# +# ## Sequential +# +# Naturally, we implement the trapezoidal rule as a straightforward, sequential `for` loop. -"Evaluate definite integral (from `a` to `b`) by using the trapezoidal rule." -function trapezoidal(a, b, n, h) +function trapezoidal(a, b, n; h = (b - a) / n) y = (f(a) + f(b)) / 2.0 - for i in 1:n-1 + for i in 1:(n - 1) x = a + i * h y = y + f(x) end return y * h end +# Let's compute the integral of `f` above and see if we get the expected result. +# For simplicity, we choose `N`, the number of panels used to discretize the integration +# interval, as a multiple of the number of available Julia threads. + using Base.Threads: nthreads +@show nthreads() +N = nthreads() * 1_000_000 -N = 10_000 +# Calling `trapezoidal` we do indeed find the (approximate) value of $\pi$. -function integrate_parallel(N) - # compute local integration interval etc. - h = 1.0 / N - a_loc = [i * n_loc * h for i in 0:nthreads()-1] - b_loc = a_loc + n_loc * h +trapezoidal(0, 1, N) ≈ π - result = tmapreduce(+, ...) do - trapezoidal(a_loc, b_loc, n, h) - end +# ## Parallel +# +# Our strategy is the following: Divide the integration interval among the available +# Julia threads. On each thread, use the sequential trapezoidal rule to compute the partial +# integral. +# It is straightforward to implement this strategy with `tmapreduce`. The `map` part +# is, essentially, the application of `trapezoidal` and the reduction operator is chosen to +# be `+` to sum up the local integrals. - res_loc = trapezoidal(a_loc, b_loc, n_loc, h) +using OhMyThreads - println("π (numerical integration) ≈ ", res) +function trapezoidal_parallel(a, b, N) + n = N ÷ nthreads() + h = (b - a) / N + return tmapreduce(+, 1:nthreads()) do i + local α = a + (i - 1) * n * h + local β = α + n * h + trapezoidal(α, β, n; h) + end end +# First, we check the correctness of our parallel implementation. +trapezoidal_parallel(0, 1, N) ≈ π -# perform local integration -res_loc = trapezoidal(a_loc, b_loc, n_loc, h) +# Then, we benchmark and compare the performance of the sequential and parallel versions. -# parallel reduction -res = MPI.Reduce(res_loc, +, comm) +using BenchmarkTools +@btime trapezoidal(0, 1, $N); +@btime trapezoidal_parallel(0, 1, $N); -# print result -if 0 == rank - @printf("π (numerical integration) ≈ %20.16f\\n", res) -end +# Because the problem is trivially parallel - all threads to the same thing and don't need +# to communicate - we expect an ideal speedup of (close to) the number of available threads. diff --git a/docs/src/examples/integration/integration.md b/docs/src/examples/integration/integration.md new file mode 100644 index 00000000..67b70584 --- /dev/null +++ b/docs/src/examples/integration/integration.md @@ -0,0 +1,124 @@ +```@meta +EditURL = "integration.jl" +``` + +# Trapezoidal Integration + +In this example, we want to parallelize the computation of a simple numerical integral +via the trapezoidal rule. The latter is given by + +$\int_{a}^{b}f(x)\,dx \approx h \sum_{i=1}^{N}\frac{f(x_{i-1})+f(x_{i})}{2}.$ + +The function to be integrated is the following. + +````julia +f(x) = 4 * √(1 - x^2) +```` + +```` +f (generic function with 1 method) +```` + +The analytic result of the definite integral (from 0 to 1) is known to be $\pi$. + +## Sequential + +Naturally, we implement the trapezoidal rule as a straightforward, sequential `for` loop. + +````julia +function trapezoidal(a, b, n; h = (b - a) / n) + y = (f(a) + f(b)) / 2.0 + for i in 1:(n - 1) + x = a + i * h + y = y + f(x) + end + return y * h +end +```` + +```` +trapezoidal (generic function with 1 method) +```` + +Let's compute the integral of `f` above and see if we get the expected result. +For simplicity, we choose `N`, the number of panels used to discretize the integration +interval, as a multiple of the number of available Julia threads. + +````julia +using Base.Threads: nthreads +@show nthreads() +N = nthreads() * 1_000_000 +```` + +```` +5000000 +```` + +Calling `trapezoidal` we do indeed find the (approximate) value of $\pi$. + +````julia +trapezoidal(0, 1, N) ≈ π +```` + +```` +true +```` + +## Parallel + +Our strategy is the following: Divide the integration interval among the available +Julia threads. On each thread, use the sequential trapezoidal rule to compute the partial +integral. +It is straightforward to implement this strategy with `tmapreduce`. The `map` part +is, essentially, the application of `trapezoidal` and the reduction operator is chosen to +be `+` to sum up the local integrals. + +````julia +using OhMyThreads + +function trapezoidal_parallel(a, b, N) + n = N ÷ nthreads() + h = (b - a) / N + return tmapreduce(+, 1:nthreads()) do i + local α = a + (i - 1) * n * h + local β = α + n * h + trapezoidal(α, β, n; h) + end +end +```` + +```` +trapezoidal_parallel (generic function with 1 method) +```` + +First, we check the correctness of our parallel implementation. + +````julia +trapezoidal_parallel(0, 1, N) ≈ π +```` + +```` +true +```` + +Then, we benchmark and compare the performance of the sequential and parallel versions. + +````julia +using BenchmarkTools +@btime trapezoidal(0, 1, $N); +@btime trapezoidal_parallel(0, 1, $N); +```` + +```` + 13.871 ms (0 allocations: 0 bytes) + 2.781 ms (38 allocations: 3.19 KiB) + +```` + +Because the problem is trivially parallel - all threads to the same thing and don't need +to communicate - we expect an ideal speedup of (close to) the number of available threads. + +--- + +*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).* +