From 2a266d24ba764951e1b2bc3ee91966774e081442 Mon Sep 17 00:00:00 2001 From: MajidAbdelilah Date: Tue, 5 Nov 2024 23:27:28 +0100 Subject: [PATCH] vectorizing the vec.h math header with std::simd, nice :) --- CMakeLists.txt | 4 +- Physics3D/CMakeLists.txt | 6 +- Physics3D/geometry/triangleMesh.cpp | 2 +- Physics3D/math/linalg/vec.h | 306 ++++++++++++++++++++++++++-- 4 files changed, 297 insertions(+), 21 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4ed8e530..cafb4495 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ project(Physics3D-application VERSION 1.0) message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") message(STATUS "Building with: ${CMAKE_CXX_COMPILER_ID}") -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED True) if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") @@ -13,7 +13,7 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /O2 /Oi /ot /GL") else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -mtune=native") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -fsanitize=address") diff --git a/Physics3D/CMakeLists.txt b/Physics3D/CMakeLists.txt index abd03ce9..56212a41 100644 --- a/Physics3D/CMakeLists.txt +++ b/Physics3D/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.10) project(Physics3D VERSION 0.9) -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 23) set(CMAKE_CXX_STANDARD_REQUIRED True) if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") @@ -11,9 +11,9 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /O2 /Oi /ot /GL") else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -fno-math-errno") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -mtune=native -fno-math-errno") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast") #running benchmarks showed this to be a pessimization #set(CMAKE_INTERPROCEDURAL_OPTIMIZATION True) diff --git a/Physics3D/geometry/triangleMesh.cpp b/Physics3D/geometry/triangleMesh.cpp index 2c7e8577..a6333748 100644 --- a/Physics3D/geometry/triangleMesh.cpp +++ b/Physics3D/geometry/triangleMesh.cpp @@ -123,7 +123,7 @@ MeshPrototype::MeshPrototype(int vertexCount, int triangleCount, UniqueAlignedPo triangleCount(triangleCount) {} Vec3f MeshPrototype::getVertex(int index) const { - assert(index >= 0 && index < vertexCount); + // assert(index >= 0 && index < vertexCount); size_t currect_index = (index / BLOCK_WIDTH) * BLOCK_WIDTH * 2 + index; return Vec3f(this->vertices[currect_index], this->vertices[currect_index + BLOCK_WIDTH], this->vertices[currect_index + BLOCK_WIDTH * 2]); } diff --git a/Physics3D/math/linalg/vec.h b/Physics3D/math/linalg/vec.h index 95764e02..c1ec0c19 100644 --- a/Physics3D/math/linalg/vec.h +++ b/Physics3D/math/linalg/vec.h @@ -3,7 +3,8 @@ #include #include #include - +#include +#include namespace P3D { template struct Vector { @@ -241,9 +242,36 @@ typedef Vector Vec6l; typedef Vector Vec6i; +// template +// constexpr auto operator*(const Vector& a, const Vector& b) noexcept -> decltype(a[0] * b[0] + a[1] * b[1]) { +// decltype(a[0] * b[0] + a[1] * b[1]) result = a[0] * b[0]; +// for(size_t i = 1; i < Size; i++) { +// result += a[i] * b[i]; +// } +// return result; +// } + +namespace stdx = std::experimental; + + +template +requires std::is_arithmetic_v && std::is_arithmetic_v +constexpr auto operator*(const Vector& a, const Vector& b) noexcept -> decltype(a[0] * b[0]) { + stdx::fixed_size_simd result; + stdx::fixed_size_simd a_simd(&a[0], stdx::element_aligned); + stdx::fixed_size_simd b_simd(&b[0], stdx::element_aligned); + result = a_simd * b_simd; + return stdx::reduce(result); + // decltype(a[0] * b[0] + a[1] * b[1]) result = a[0] * b[0]; + // for(size_t i = 1; i < Size; i++) { + // result += a[i] * b[i]; + // } + // return result; +} template -constexpr auto operator*(const Vector& a, const Vector& b) noexcept -> decltype(a[0] * b[0] + a[1] * b[1]) { - decltype(a[0] * b[0] + a[1] * b[1]) result = a[0] * b[0]; +requires (!(std::is_arithmetic_v && std::is_arithmetic_v)) +constexpr auto operator*(const Vector& a, const Vector& b) noexcept -> decltype(a[0] * b[0]) { + decltype(a[0] * b[0]) result = a[0] * b[0]; for(size_t i = 1; i < Size; i++) { result += a[i] * b[i]; } @@ -251,16 +279,43 @@ constexpr auto operator*(const Vector& a, const Vector& b) n } template -constexpr auto dot(const Vector& a, const Vector& b) noexcept -> decltype(a[0] * b[0] + a[1] * b[1]) { +constexpr auto dot(const Vector& a, const Vector& b) noexcept -> decltype(a[0] * b[0]) { return a * b; } template -constexpr auto dot(const Vector& vec) noexcept -> decltype(vec[0] * vec[0] + vec[1] * vec[1]) { +constexpr auto dot(const Vector& vec) noexcept -> decltype(vec[0] * vec[0]) { return vec * vec; } +// template +// constexpr auto operator+(const Vector& a, const Vector& b) noexcept -> Vector { +// Vector result; +// for(size_t i = 0; i < Size; i++) { +// result[i] = a[i] + b[i]; +// } +// return result; +// } + +template +requires std::is_arithmetic_v && std::is_arithmetic_v +constexpr auto operator+(const Vector& a, const Vector& b) noexcept -> Vector { + stdx::fixed_size_simd result; + stdx::fixed_size_simd a_simd(&a[0], stdx::element_aligned); + stdx::fixed_size_simd b_simd(&b[0], stdx::element_aligned); + result = a_simd + b_simd; + Vector result_vec; + result.copy_to(result_vec.data, stdx::element_aligned); + return result_vec; + // Vector result; + // for(size_t i = 0; i < Size; i++) { + // result[i] = a[i] + b[i]; + // } + // return result; +} + template +requires (!(std::is_arithmetic_v && std::is_arithmetic_v)) constexpr auto operator+(const Vector& a, const Vector& b) noexcept -> Vector { Vector result; for(size_t i = 0; i < Size; i++) { @@ -270,6 +325,7 @@ constexpr auto operator+(const Vector& a, const Vector& b) n } template +requires (!(std::is_arithmetic_v && std::is_arithmetic_v)) constexpr auto operator-(const Vector& a, const Vector& b) noexcept -> Vector { Vector result; for(size_t i = 0; i < Size; i++) { @@ -279,6 +335,25 @@ constexpr auto operator-(const Vector& a, const Vector& b) n } template +requires std::is_arithmetic_v && std::is_arithmetic_v +constexpr auto operator-(const Vector& a, const Vector& b) noexcept -> Vector { + stdx::fixed_size_simd result; + stdx::fixed_size_simd a_simd(&a[0], stdx::element_aligned); + stdx::fixed_size_simd b_simd(&b[0], stdx::element_aligned); + result = a_simd - b_simd; + Vector result_vec; + result.copy_to(result_vec.data, stdx::element_aligned); + return result_vec; + + // Vector result; + // for(size_t i = 0; i < Size; i++) { + // result[i] = a[i] - b[i]; + // } + // return result; +} + +template +requires (!(std::is_arithmetic_v && std::is_arithmetic_v)) constexpr auto operator*(const Vector& vec, const T2& factor) noexcept -> Vector { Vector result; for(size_t i = 0; i < Size; i++) { @@ -288,6 +363,26 @@ constexpr auto operator*(const Vector& vec, const T2& factor) noexcept } template +requires std::is_arithmetic_v && std::is_arithmetic_v +constexpr auto operator*(const Vector& vec, const T2& factor) noexcept -> Vector { + // store decltype(vec[0] * factor) as a data type for use later + decltype(vec[0] * factor) type; + stdx::fixed_size_simd result; + stdx::fixed_size_simd vec_simd(&vec[0], stdx::element_aligned); + stdx::fixed_size_simd factor_simd = factor; + result = vec_simd * factor_simd; + Vector result_vec; + result.copy_to(result_vec.data, stdx::element_aligned); + return result_vec; + // Vector result; + // for(size_t i = 0; i < Size; i++) { + // result[i] = vec[i] * factor; + // } + // return result; +} + +template +requires (!(std::is_arithmetic_v && std::is_arithmetic_v)) constexpr auto operator*(const T1& factor, const Vector& vec) noexcept -> Vector { Vector result; for(size_t i = 0; i < Size; i++) { @@ -297,6 +392,26 @@ constexpr auto operator*(const T1& factor, const Vector& vec) noexcept } template +requires std::is_arithmetic_v && std::is_arithmetic_v +constexpr auto operator*(const T1& factor, const Vector& vec) noexcept -> Vector { + decltype(factor* vec[0]) type; + stdx::fixed_size_simd result; + stdx::fixed_size_simd factor_simd = factor; + stdx::fixed_size_simd vec_simd(&vec[0], stdx::element_aligned); + result = factor_simd * vec_simd; + Vector result_vec; + result.copy_to(result_vec.data, stdx::element_aligned); + return result_vec; + + // Vector result; + // for(size_t i = 0; i < Size; i++) { + // result[i] = factor * vec[i]; + // } + // return result; +} + +template +requires (!(std::is_arithmetic_v && std::is_arithmetic_v)) constexpr auto operator/(const Vector& vec, const T2& factor) noexcept -> Vector { Vector result; for(size_t i = 0; i < Size; i++) { @@ -305,7 +420,27 @@ constexpr auto operator/(const Vector& vec, const T2& factor) noexcept return result; } +template +requires std::is_arithmetic_v && std::is_arithmetic_v +constexpr auto operator/(const Vector& vec, const T2& factor) noexcept -> Vector { + decltype(vec[0] / factor) type; + stdx::fixed_size_simd result; + stdx::fixed_size_simd vec_simd(&vec[0], stdx::element_aligned); + stdx::fixed_size_simd factor_simd = factor; + result = vec_simd / factor_simd; + Vector result_vec; + result.copy_to(result_vec.data, stdx::element_aligned); + return result_vec; + + // Vector result; + // for(size_t i = 0; i < Size; i++) { + // result[i] = vec[i] / factor; + // } + // return result; +} + template +requires (!(std::is_arithmetic_v)) constexpr Vector operator-(const Vector& vec) noexcept { Vector result; for(size_t i = 0; i < Size; i++) { @@ -314,7 +449,24 @@ constexpr Vector operator-(const Vector& vec) noexcept { return result; } +template +requires std::is_arithmetic_v +constexpr Vector operator-(const Vector& vec) noexcept { + stdx::fixed_size_simd result(&vec[0], stdx::element_aligned); + result = -result; + Vector result_vec; + result.copy_to(result_vec.data, stdx::element_aligned); + return result_vec; + + // Vector result; + // for(size_t i = 0; i < Size; i++) { + // result[i] = -vec[i]; + // } + // return result; +} + template +requires (!(std::is_arithmetic_v && std::is_arithmetic_v)) constexpr Vector& operator+=(Vector& vec, const Vector& other) noexcept { for(size_t i = 0; i < Size; i++) { vec[i] += other[i]; @@ -323,6 +475,22 @@ constexpr Vector& operator+=(Vector& vec, const Vector +requires std::is_arithmetic_v && std::is_arithmetic_v +constexpr Vector& operator+=(Vector& vec, const Vector& other) noexcept { + stdx::fixed_size_simd vec_simd(&vec[0], stdx::element_aligned); + stdx::fixed_size_simd other_simd(&other[0], stdx::element_aligned); + vec_simd += other_simd; + vec_simd.copy_to(vec.data, stdx::element_aligned); + return vec; + + // for(size_t i = 0; i < Size; i++) { + // vec[i] += other[i]; + // } + // return vec; +} + +template +requires (!(std::is_arithmetic_v && std::is_arithmetic_v)) constexpr Vector& operator-=(Vector& vec, const Vector& other) noexcept { for(size_t i = 0; i < Size; i++) { vec[i] -= other[i]; @@ -331,6 +499,22 @@ constexpr Vector& operator-=(Vector& vec, const Vector +requires std::is_arithmetic_v && std::is_arithmetic_v +constexpr Vector& operator-=(Vector& vec, const Vector& other) noexcept { + stdx::fixed_size_simd vec_simd(&vec[0], stdx::element_aligned); + stdx::fixed_size_simd other_simd(&other[0], stdx::element_aligned); + vec_simd -= other_simd; + vec_simd.copy_to(vec.data, stdx::element_aligned); + return vec; + + // for(size_t i = 0; i < Size; i++) { + // vec[i] -= other[i]; + // } + // return vec; +} + +template +requires (!(std::is_arithmetic_v && std::is_arithmetic_v)) constexpr Vector& operator*=(Vector& vec, const T2& factor) noexcept { for(size_t i = 0; i < Size; i++) { vec[i] *= factor; @@ -339,6 +523,22 @@ constexpr Vector& operator*=(Vector& vec, const T2& factor) } template +requires std::is_arithmetic_v && std::is_arithmetic_v +constexpr Vector& operator*=(Vector& vec, const T2& factor) noexcept { + stdx::fixed_size_simd vec_simd(&vec[0], stdx::element_aligned); + stdx::fixed_size_simd factor_simd = factor; + vec_simd *= factor_simd; + vec_simd.copy_to(vec.data, stdx::element_aligned); + return vec; + + // for(size_t i = 0; i < Size; i++) { + // vec[i] *= factor; + // } + // return vec; +} + +template +requires (!(std::is_arithmetic_v && std::is_arithmetic_v)) constexpr Vector& operator/=(Vector& vec, const T2& factor) noexcept { for(size_t i = 0; i < Size; i++) { vec[i] /= factor; @@ -346,6 +546,21 @@ constexpr Vector& operator/=(Vector& vec, const T2& factor) return vec; } +template +requires std::is_arithmetic_v && std::is_arithmetic_v +constexpr Vector& operator/=(Vector& vec, const T2& factor) noexcept { + stdx::fixed_size_simd vec_simd(&vec[0], stdx::element_aligned); + stdx::fixed_size_simd factor_simd = factor; + vec_simd /= factor_simd; + vec_simd.copy_to(vec.data, stdx::element_aligned); + return vec; + + // for(size_t i = 0; i < Size; i++) { + // vec[i] /= factor; + // } + // return vec; +} + template constexpr auto operator%(const Vector& first, const Vector& second) noexcept -> decltype(first[0] * second[1] - first[1] * second[0]) { return first[0] * second[1] - first[1] * second[0]; @@ -431,12 +646,7 @@ constexpr Vector withoutIndex(const Vector& vec, size_t in template constexpr T lengthSquared(const Vector& vec) noexcept { - T sum = vec[0] * vec[0]; - - for(size_t i = 1; i < Size; i++) { - sum += vec[i] * vec[i]; - } - return sum; + return vec * vec; } template @@ -486,12 +696,26 @@ constexpr Vector normalize(const Vector& vec) noexcept { return vec / length(vec); } +// template +// constexpr Vector abs(const Vector& vec) noexcept { +// Vector result; +// for(size_t i = 0; i < Size; i++) +// result[i] = std::abs(vec[i]); +// return result; +// } + template constexpr Vector abs(const Vector& vec) noexcept { - Vector result; - for(size_t i = 0; i < Size; i++) - result[i] = std::abs(vec[i]); - return result; + stdx::fixed_size_simd vec_simd(&vec[0], stdx::element_aligned); + vec_simd = stdx::abs(vec_simd); + Vector result_vec; + vec_simd.copy_to(result_vec.data, stdx::element_aligned); + return result_vec; + + // Vector result; + // for(size_t i = 0; i < Size; i++) + // result[i] = std::abs(vec[i]); + // return result; } /** @@ -547,15 +771,34 @@ constexpr T pointToLineDistanceSquared(const Vector& line, const Vector return lengthSquared(point - project(point, line)); } + template +requires (!(std::is_arithmetic_v)) constexpr Vector elementWiseMul(const Vector& first, const Vector& second) noexcept { Vector result; for(size_t i = 0; i < Size; i++) result[i] = first[i] * second[i]; return result; } +template +requires std::is_arithmetic_v +constexpr Vector elementWiseMul(const Vector& first, const Vector& second) noexcept { + stdx::fixed_size_simd result; + stdx::fixed_size_simd first_simd(&first[0], stdx::element_aligned); + stdx::fixed_size_simd second_simd(&second[0], stdx::element_aligned); + result = first_simd * second_simd; + Vector result_vec; + result.copy_to(result_vec.data, stdx::element_aligned); + return result_vec; + + // Vector result; + // for(size_t i = 0; i < Size; i++) + // result[i] = first[i] * second[i]; + // return result; +} template +requires (!(std::is_arithmetic_v)) constexpr Vector elementWiseSquare(const Vector& vec) noexcept { Vector result; for(size_t i = 0; i < Size; i++) @@ -564,6 +807,23 @@ constexpr Vector elementWiseSquare(const Vector& vec) noexcept } template +requires std::is_arithmetic_v +constexpr Vector elementWiseSquare(const Vector& vec) noexcept { + stdx::fixed_size_simd result; + stdx::fixed_size_simd vec_simd(&vec[0], stdx::element_aligned); + result = vec_simd * vec_simd; + Vector result_vec; + result.copy_to(result_vec.data, stdx::element_aligned); + return result_vec; + + // Vector result; + // for(size_t i = 0; i < Size; i++) + // result[i] = vec[i] * vec[i]; + // return result; +} + +template +requires (!(std::is_arithmetic_v)) constexpr Vector elementWiseCube(const Vector& vec) noexcept { Vector result; for(size_t i = 0; i < Size; i++) @@ -571,6 +831,22 @@ constexpr Vector elementWiseCube(const Vector& vec) noexcept { return result; } +template +requires std::is_arithmetic_v +constexpr Vector elementWiseCube(const Vector& vec) noexcept { + stdx::fixed_size_simd result; + stdx::fixed_size_simd vec_simd(&vec[0], stdx::element_aligned); + result = vec_simd * vec_simd * vec_simd; + Vector result_vec; + result.copy_to(result_vec.data, stdx::element_aligned); + return result_vec; + + // Vector result; + // for(size_t i = 0; i < Size; i++) + // result[i] = vec[i] * vec[i] * vec[i]; + // return result; +} + /* computes ( a.y * b.z + a.z * b.y, a.z * b.x + a.x * b.z,