Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add/float64 benchmarks #3

Merged
merged 4 commits into from
Dec 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,24 @@ benchmark-mojo:
mojo mojo_impl/naive.mojo 10000
mojo mojo_impl/naive.mojo 100000
mojo mojo_impl/naive.mojo 1000000
mojo mojo_impl/naive.mojo 10000000

# segfault :(
# mojo mojo_impl/naive.mojo 10000000

mojo mojo_impl/optimized_a.mojo 100
mojo mojo_impl/optimized_a.mojo 1000
mojo mojo_impl/optimized_a.mojo 10000
mojo mojo_impl/optimized_a.mojo 100000
mojo mojo_impl/optimized_a.mojo 1000000
mojo mojo_impl/optimized_a.mojo 10000000

# segfault :(
# mojo mojo_impl/optimized_a.mojo 10000000

mojo mojo_impl/optimized_b.mojo 100
mojo mojo_impl/optimized_b.mojo 1000
mojo mojo_impl/optimized_b.mojo 10000
mojo mojo_impl/optimized_b.mojo 100000
mojo mojo_impl/optimized_b.mojo 1000000
mojo mojo_impl/optimized_b.mojo 10000000

# segfault :(
# mojo mojo_impl/optimized_b.mojo 10000000
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ sizes.

3. In addition to being performance winners, the Mojo variants are
parameterized by the number of dimensions (`dims`) and by data type (`dtype`).
In other words, the same generic code can run, for example, `float16`,
`float64` or with 3, 4 or more dimensions. In GIS systems the number of
In other words, the same generic code can run, for example, `int16`, `float16`,
`float64`, and with 3, 4 or more dimensions. In GIS systems the number of
dimensions is sometimes referred to as XY, XYZ, or XYZM, where Z is "height",
and M is "measure".

Expand Down
Binary file modified docs/benchmark-results.ods
Binary file not shown.
3 changes: 2 additions & 1 deletion mojo_impl/naive.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ fn envelope[dtype: DType, dims: Int](tensor: Tensor[dtype]) -> SIMD[dtype, 2 * d

return result

alias dtype = DType.float32
alias dtype = DType.float64
alias dims = 2

fn main() raises:
Expand Down Expand Up @@ -76,3 +76,4 @@ fn main() raises:
print("microsecs:", secs * 10 ** 6)
print("ms:", ms)
print("s:", secs)
print()
3 changes: 2 additions & 1 deletion mojo_impl/optimized_a.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ fn envelope[dtype: DType, dims: Int](tensor: Tensor[dtype]) -> SIMD[dtype, 2 * d
return result


alias dtype = DType.float32
alias dtype = DType.float64
alias dims = 2


Expand Down Expand Up @@ -87,3 +87,4 @@ fn main() raises:
print("microsecs:", secs * 10 ** 6)
print("ms:", ms)
print("s:", secs)
print()
2 changes: 1 addition & 1 deletion mojo_impl/optimized_b.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ fn envelope[dtype: DType, dims: Int](tensor: Tensor[dtype]) -> SIMD[dtype, 2 * d

return result.simd_load[2 * dims]()

alias dtype = DType.float32
alias dtype = DType.float64
alias dims = 2

fn main() raises:
Expand Down
34 changes: 21 additions & 13 deletions py_impl/naive_benchmark.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@
"metadata": {},
"outputs": [],
"source": [
"multipoint_10_2 = np.array(np.random.rand(2, 10 ** 2), dtype=np.float32)\n",
"multipoint_10_3 = np.array(np.random.rand(2, 10 ** 3), dtype=np.float32)\n",
"multipoint_10_4 = np.array(np.random.rand(2, 10 ** 4), dtype=np.float32)\n",
"multipoint_10_5= np.array(np.random.rand(2, 10 ** 5), dtype=np.float32)\n",
"multipoint_10_6 = np.array(np.random.rand(2, 10 ** 6), dtype=np.float32)\n",
"multipoint_10_7 = np.array(np.random.rand(2, 10 ** 7), dtype=np.float32)"
"dtype=np.float64\n",
"\n",
"multipoint_10_2 = np.array(np.random.rand(2, 10 ** 2), dtype)\n",
"multipoint_10_3 = np.array(np.random.rand(2, 10 ** 3), dtype)\n",
"multipoint_10_4 = np.array(np.random.rand(2, 10 ** 4), dtype)\n",
"multipoint_10_5= np.array(np.random.rand(2, 10 ** 5), dtype)\n",
"multipoint_10_6 = np.array(np.random.rand(2, 10 ** 6), dtype)\n",
"multipoint_10_7 = np.array(np.random.rand(2, 10 ** 7), dtype)"
]
},
{
Expand All @@ -33,7 +35,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"13.5 µs ± 183 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n"
"11.1 µs ± 16.4 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n"
]
}
],
Expand All @@ -52,7 +54,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"103 µs ± 95 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n"
"101 µs ± 99.5 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n"
]
}
],
Expand All @@ -71,7 +73,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1 ms ± 6.2 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
"1.01 ms ± 2.05 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
]
}
],
Expand All @@ -90,7 +92,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"10.5 ms ± 132 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
"10.4 ms ± 10.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
Expand All @@ -109,7 +111,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"109 ms ± 399 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
"109 ms ± 532 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
Expand All @@ -128,15 +130,21 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1.09 s ± 2.14 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
"1.09 s ± 1.84 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit\n",
"\n",
"envelope(x_coords=list(multipoint_10_7[0]), y_coords=list(multipoint_10_7[1]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
26 changes: 14 additions & 12 deletions py_impl/optimized_benchmark.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@
"metadata": {},
"outputs": [],
"source": [
"multipoint_10_2 = np.array(np.random.rand(2, 10 ** 2), dtype=np.float32)\n",
"multipoint_10_3 = np.array(np.random.rand(2, 10 ** 3), dtype=np.float32)\n",
"multipoint_10_4 = np.array(np.random.rand(2, 10 ** 4), dtype=np.float32)\n",
"multipoint_10_5= np.array(np.random.rand(2, 10 ** 5), dtype=np.float32)\n",
"multipoint_10_6 = np.array(np.random.rand(2, 10 ** 6), dtype=np.float32)\n",
"multipoint_10_7 = np.array(np.random.rand(2, 10 ** 7), dtype=np.float32)"
"dtype = np.float64\n",
"\n",
"multipoint_10_2 = np.array(np.random.rand(2, 10 ** 2), dtype)\n",
"multipoint_10_3 = np.array(np.random.rand(2, 10 ** 3), dtype)\n",
"multipoint_10_4 = np.array(np.random.rand(2, 10 ** 4), dtype)\n",
"multipoint_10_5= np.array(np.random.rand(2, 10 ** 5), dtype)\n",
"multipoint_10_6 = np.array(np.random.rand(2, 10 ** 6), dtype)\n",
"multipoint_10_7 = np.array(np.random.rand(2, 10 ** 7), dtype)"
]
},
{
Expand All @@ -33,7 +35,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"2.84 µs ± 21.6 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n"
"2.75 µs ± 9.27 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n"
]
}
],
Expand All @@ -52,7 +54,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"3.09 µs ± 57.9 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n"
"3.07 µs ± 12.3 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n"
]
}
],
Expand All @@ -71,7 +73,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"4.14 µs ± 10.2 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n"
"6.08 µs ± 21.2 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n"
]
}
],
Expand All @@ -90,7 +92,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"22.5 µs ± 62.3 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n"
"40.1 µs ± 137 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n"
]
}
],
Expand All @@ -109,7 +111,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"195 µs ± 229 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n"
"392 µs ± 643 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
]
}
],
Expand All @@ -128,7 +130,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"2.64 ms ± 16.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
"5.2 ms ± 17.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
Expand Down