From 2347a4adb15eee24ce8a69aec71ab2921bafa790 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Sat, 23 Nov 2024 14:50:06 +0100 Subject: [PATCH 01/36] Payload: enable functionality to print the generated assembler --- include/firestarter/Environment/Payload/Payload.hpp | 5 +++-- .../Environment/X86/Payload/AVX512Payload.hpp | 3 ++- .../firestarter/Environment/X86/Payload/AVXPayload.hpp | 3 ++- .../Environment/X86/Payload/FMA4Payload.hpp | 3 ++- .../firestarter/Environment/X86/Payload/FMAPayload.hpp | 3 ++- .../Environment/X86/Payload/SSE2Payload.hpp | 3 ++- .../firestarter/Environment/X86/Payload/X86Payload.hpp | 10 ++++++++++ .../Environment/X86/Payload/ZENFMAPayload.hpp | 3 ++- .../Environment/X86/Payload/AVX512Payload.cpp | 7 ++++++- src/firestarter/Environment/X86/Payload/AVXPayload.cpp | 7 ++++++- .../Environment/X86/Payload/FMA4Payload.cpp | 7 ++++++- src/firestarter/Environment/X86/Payload/FMAPayload.cpp | 7 ++++++- .../Environment/X86/Payload/SSE2Payload.cpp | 7 ++++++- .../Environment/X86/Payload/ZENFMAPayload.cpp | 7 ++++++- src/firestarter/LoadWorker.cpp | 8 ++++---- 15 files changed, 65 insertions(+), 18 deletions(-) diff --git a/include/firestarter/Environment/Payload/Payload.hpp b/include/firestarter/Environment/Payload/Payload.hpp index b5b17199..df7d08a7 100644 --- a/include/firestarter/Environment/Payload/Payload.hpp +++ b/include/firestarter/Environment/Payload/Payload.hpp @@ -94,9 +94,10 @@ class Payload { /// compiled payload. /// \arg ErrorDetection Should the code to support error detection between thread be baked into the high load routine /// of the compiled payload. + /// \arg PrintAssembler Should the generated assembler code be logged. /// \returns The compiled payload that provides access to the init and load functions. - [[nodiscard]] virtual auto compilePayload(const PayloadSettings& Settings, bool DumpRegisters, - bool ErrorDetection) const -> CompiledPayload::UniquePtr = 0; + [[nodiscard]] virtual auto compilePayload(const PayloadSettings& Settings, bool DumpRegisters, bool ErrorDetection, + bool PrintAssembler) const -> CompiledPayload::UniquePtr = 0; /// Get the available instruction items that are supported by this payload. /// \returns The available instruction items that are supported by this payload. diff --git a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp index 20bfc491..d783b984 100644 --- a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp +++ b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp @@ -56,9 +56,10 @@ class AVX512Payload final : public X86Payload { /// compiled payload. /// \arg ErrorDetection Should the code to support error detection between thread be baked into the high load routine /// of the compiled payload. + /// \arg PrintAssembler Should the generated assembler code be logged. /// \returns The compiled payload that provides access to the init and load functions. [[nodiscard]] auto compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters, - bool ErrorDetection) const + bool ErrorDetection, bool PrintAssembler) const -> environment::payload::CompiledPayload::UniquePtr override; private: diff --git a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp index 24ef7a15..db9c1a42 100644 --- a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp +++ b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp @@ -55,9 +55,10 @@ class AVXPayload final : public X86Payload { /// compiled payload. /// \arg ErrorDetection Should the code to support error detection between thread be baked into the high load routine /// of the compiled payload. + /// \arg PrintAssembler Should the generated assembler code be logged. /// \returns The compiled payload that provides access to the init and load functions. [[nodiscard]] auto compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters, - bool ErrorDetection) const + bool ErrorDetection, bool PrintAssembler) const -> environment::payload::CompiledPayload::UniquePtr override; private: diff --git a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp index f0e711f6..13e66550 100644 --- a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp +++ b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp @@ -55,9 +55,10 @@ class FMA4Payload final : public X86Payload { /// compiled payload. /// \arg ErrorDetection Should the code to support error detection between thread be baked into the high load routine /// of the compiled payload. + /// \arg PrintAssembler Should the generated assembler code be logged. /// \returns The compiled payload that provides access to the init and load functions. [[nodiscard]] auto compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters, - bool ErrorDetection) const + bool ErrorDetection, bool PrintAssembler) const -> environment::payload::CompiledPayload::UniquePtr override; private: diff --git a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp index 8280a5b2..1bfe361b 100644 --- a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp +++ b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp @@ -45,9 +45,10 @@ class FMAPayload final : public X86Payload { /// compiled payload. /// \arg ErrorDetection Should the code to support error detection between thread be baked into the high load routine /// of the compiled payload. + /// \arg PrintAssembler Should the generated assembler code be logged. /// \returns The compiled payload that provides access to the init and load functions. [[nodiscard]] auto compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters, - bool ErrorDetection) const + bool ErrorDetection, bool PrintAssembler) const -> environment::payload::CompiledPayload::UniquePtr override; private: diff --git a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp index 557af0d4..f6d667be 100644 --- a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp +++ b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp @@ -55,9 +55,10 @@ class SSE2Payload final : public X86Payload { /// compiled payload. /// \arg ErrorDetection Should the code to support error detection between thread be baked into the high load routine /// of the compiled payload. + /// \arg PrintAssembler Should the generated assembler code be logged. /// \returns The compiled payload that provides access to the init and load functions. [[nodiscard]] auto compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters, - bool ErrorDetection) const + bool ErrorDetection, bool PrintAssembler) const -> environment::payload::CompiledPayload::UniquePtr override; private: diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp index 44d5bd4f..87ba8e6b 100644 --- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp +++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp @@ -90,6 +90,16 @@ class X86Payload : public environment::payload::Payload { }; protected: + /// Print the generated assembler Code of asmjit + /// \arg Builder The builder that contains the assembler code. + static void printAssembler(asmjit::BaseBuilder& Builder) { + asmjit::String Sb; + asmjit::FormatOptions FormatOptions{}; + + asmjit::Formatter::formatNodeList(Sb, FormatOptions, &Builder); + log::info() << Sb.data(); + } + /// Emit the code to dump the xmm, ymm or zmm registers into memory for the dump registers feature. /// \tparam Vec the type of the vector register used. /// \arg Cb The asmjit code builder that is used to emit the assembler code. diff --git a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp index 5d624725..777e6587 100644 --- a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp +++ b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp @@ -40,9 +40,10 @@ class ZENFMAPayload final : public X86Payload { /// compiled payload. /// \arg ErrorDetection Should the code to support error detection between thread be baked into the high load routine /// of the compiled payload. + /// \arg PrintAssembler Should the generated assembler code be logged. /// \returns The compiled payload that provides access to the init and load functions. [[nodiscard]] auto compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters, - bool ErrorDetection) const + bool ErrorDetection, bool PrintAssembler) const -> environment::payload::CompiledPayload::UniquePtr override; private: diff --git a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp index f52a5410..88d02440 100644 --- a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp +++ b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp @@ -25,7 +25,8 @@ namespace firestarter::environment::x86::payload { auto AVX512Payload::compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters, - bool ErrorDetection) const -> environment::payload::CompiledPayload::UniquePtr { + bool ErrorDetection, bool PrintAssembler) const + -> environment::payload::CompiledPayload::UniquePtr { using Imm = asmjit::Imm; using Zmm = asmjit::x86::Zmm; // NOLINTBEGIN(readability-identifier-naming) @@ -364,6 +365,10 @@ auto AVX512Payload::compilePayload(const environment::payload::PayloadSettings& Cb.finalize(); + if (PrintAssembler) { + printAssembler(Cb); + } + auto CompiledPayloadPtr = CompiledX86Payload::create(Stats, Code); // skip if we could not determine cache size diff --git a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp index b20a85f7..82ded951 100644 --- a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp +++ b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp @@ -25,7 +25,8 @@ namespace firestarter::environment::x86::payload { auto AVXPayload::compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters, - bool ErrorDetection) const -> environment::payload::CompiledPayload::UniquePtr { + bool ErrorDetection, bool PrintAssembler) const + -> environment::payload::CompiledPayload::UniquePtr { using Imm = asmjit::Imm; using Mm = asmjit::x86::Mm; using Xmm = asmjit::x86::Xmm; @@ -394,6 +395,10 @@ auto AVXPayload::compilePayload(const environment::payload::PayloadSettings& Set Cb.finalize(); + if (PrintAssembler) { + printAssembler(Cb); + } + auto CompiledPayloadPtr = CompiledX86Payload::create(Stats, Code); // skip if we could not determine cache size diff --git a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp index 202d34c7..91f3479a 100644 --- a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp +++ b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp @@ -25,7 +25,8 @@ namespace firestarter::environment::x86::payload { auto FMA4Payload::compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters, - bool ErrorDetection) const -> environment::payload::CompiledPayload::UniquePtr { + bool ErrorDetection, bool PrintAssembler) const + -> environment::payload::CompiledPayload::UniquePtr { using Imm = asmjit::Imm; using Xmm = asmjit::x86::Xmm; // NOLINTBEGIN(readability-identifier-naming) @@ -367,6 +368,10 @@ auto FMA4Payload::compilePayload(const environment::payload::PayloadSettings& Se Cb.finalize(); + if (PrintAssembler) { + printAssembler(Cb); + } + auto CompiledPayloadPtr = CompiledX86Payload::create(Stats, Code); // skip if we could not determine cache size diff --git a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp index cec0021a..03b8995c 100644 --- a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp +++ b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp @@ -25,7 +25,8 @@ namespace firestarter::environment::x86::payload { auto FMAPayload::compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters, - bool ErrorDetection) const -> environment::payload::CompiledPayload::UniquePtr { + bool ErrorDetection, bool PrintAssembler) const + -> environment::payload::CompiledPayload::UniquePtr { using Imm = asmjit::Imm; using Xmm = asmjit::x86::Xmm; using Ymm = asmjit::x86::Ymm; @@ -402,6 +403,10 @@ auto FMAPayload::compilePayload(const environment::payload::PayloadSettings& Set Cb.finalize(); + if (PrintAssembler) { + printAssembler(Cb); + } + auto CompiledPayloadPtr = CompiledX86Payload::create(Stats, Code); // skip if we could not determine cache size diff --git a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp index fc77c8e1..8f443781 100644 --- a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp +++ b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp @@ -25,7 +25,8 @@ namespace firestarter::environment::x86::payload { auto SSE2Payload::compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters, - bool ErrorDetection) const -> environment::payload::CompiledPayload::UniquePtr { + bool ErrorDetection, bool PrintAssembler) const + -> environment::payload::CompiledPayload::UniquePtr { using Imm = asmjit::Imm; using Mm = asmjit::x86::Mm; using Xmm = asmjit::x86::Xmm; @@ -385,6 +386,10 @@ auto SSE2Payload::compilePayload(const environment::payload::PayloadSettings& Se Cb.finalize(); + if (PrintAssembler) { + printAssembler(Cb); + } + auto CompiledPayloadPtr = CompiledX86Payload::create(Stats, Code); // skip if we could not determine cache size diff --git a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp index 4857f82d..f12dca1d 100644 --- a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp +++ b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp @@ -25,7 +25,8 @@ namespace firestarter::environment::x86::payload { auto ZENFMAPayload::compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters, - bool ErrorDetection) const -> environment::payload::CompiledPayload::UniquePtr { + bool ErrorDetection, bool PrintAssembler) const + -> environment::payload::CompiledPayload::UniquePtr { using Imm = asmjit::Imm; using Xmm = asmjit::x86::Xmm; using Ymm = asmjit::x86::Ymm; @@ -352,6 +353,10 @@ auto ZENFMAPayload::compilePayload(const environment::payload::PayloadSettings& Cb.finalize(); + if (PrintAssembler) { + printAssembler(Cb); + } + auto CompiledPayloadPtr = CompiledX86Payload::create(Stats, Code); // skip if we could not determine cache size diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp index 4d473832..238dace8 100644 --- a/src/firestarter/LoadWorker.cpp +++ b/src/firestarter/LoadWorker.cpp @@ -267,8 +267,8 @@ void Firestarter::loadThreadWorker(const std::shared_ptr& Td) { Td->environment().setCpuAffinity(Td->id()); // compile payload - Td->CompiledPayloadPtr = - Td->config().payload()->compilePayload(Td->config().settings(), Td->DumpRegisters, Td->ErrorDetection); + Td->CompiledPayloadPtr = Td->config().payload()->compilePayload(Td->config().settings(), Td->DumpRegisters, + Td->ErrorDetection, /*PrintAssembler=*/false); // allocate memory // if we should dump some registers, we use the first part of the memory @@ -354,8 +354,8 @@ void Firestarter::loadThreadWorker(const std::shared_ptr& Td) { break; case LoadThreadState::ThreadSwitch: // compile payload - Td->CompiledPayloadPtr = - Td->config().payload()->compilePayload(Td->config().settings(), Td->DumpRegisters, Td->ErrorDetection); + Td->CompiledPayloadPtr = Td->config().payload()->compilePayload(Td->config().settings(), Td->DumpRegisters, + Td->ErrorDetection, /*PrintAssembler=*/false); // call init function Td->CompiledPayloadPtr->init(Td->Memory->getMemoryAddress(), Td->BuffersizeMem); From cbf8dd5af65a82101b80c8718206d1827bbb3fe0 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Sat, 23 Nov 2024 16:45:27 +0100 Subject: [PATCH 02/36] CMake:: make two static libraries available that are used in the build --- cmake/InstallHwloc.cmake | 4 + src/CMakeLists.txt | 168 ++++++++++++++++++++++----------------- 2 files changed, 99 insertions(+), 73 deletions(-) diff --git a/cmake/InstallHwloc.cmake b/cmake/InstallHwloc.cmake index ef50c736..a292d8fb 100644 --- a/cmake/InstallHwloc.cmake +++ b/cmake/InstallHwloc.cmake @@ -17,6 +17,7 @@ if (FIRESTARTER_BUILD_HWLOC) BUILD_IN_SOURCE 1 BUILD_COMMAND make -j INSTALL_COMMAND make install + BUILD_BYPRODUCTS /lib/libhwloc.a ) SET(HWLOC_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/lib/Hwloc/install") @@ -36,6 +37,7 @@ if (FIRESTARTER_BUILD_HWLOC) CONFIGURE_COMMAND "" BUILD_COMMAND cd \\contrib\\windows && MSBuild /p:Configuration=Release /p:Platform=x64 hwloc.sln INSTALL_COMMAND "" + BUILD_BYPRODUCTS /contrib/windows/x64/Release/libhwloc.lib ) SET(HWLOC_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/lib/Hwloc/sources") @@ -55,6 +57,7 @@ if (FIRESTARTER_BUILD_HWLOC) CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND "" + BUILD_BYPRODUCTS /lib/libhwloc.a ) SET(HWLOC_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/lib/Hwloc/sources") @@ -66,4 +69,5 @@ if (FIRESTARTER_BUILD_HWLOC) endif() include_directories(${HWLOC_INCLUDE_DIR}/include) + add_dependencies(hwloc HwlocInstall) endif() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c0355fa0..1bfae466 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,8 +1,17 @@ -SET(FIRESTARTER_FILES +# General linker flags +if (FIRESTARTER_THREAD_AFFINITY) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DFIRESTARTER_THREAD_AFFINITY") +endif() + +if((NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin") AND FIRESTARTER_LINK_STATIC) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DFIRESTARTER_LINK_STATIC") +endif() + + +# Create the core firestarter library that is used in all builds and tests +add_library(firestartercore STATIC firestarter/Config.cpp - firestarter/Main.cpp firestarter/Firestarter.cpp - firestarter/LoadWorker.cpp firestarter/SafeExit.cpp firestarter/WatchdogWorker.cpp firestarter/DumpRegisterWorker.cpp @@ -27,10 +36,18 @@ SET(FIRESTARTER_FILES firestarter/Environment/X86/Payload/SSE2Payload.cpp ) -if(CMAKE_SYSTEM_NAME STREQUAL "Linux") - SET(FIRESTARTER_FILES - ${FIRESTARTER_FILES} +target_compile_features(firestartercore PRIVATE cxx_std_17) + +target_link_libraries(firestartercore + hwloc + AsmJit::AsmJit + Nitro::log + ) + +# Create the linux firestarter library that is used for specific linux only features +if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + add_library(firestarterlinux STATIC # measurement stuff firestarter/Measurement/MeasurementWorker.cpp firestarter/Measurement/Summary.cpp @@ -44,15 +61,23 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Linux") firestarter/Optimizer/Util/MultiObjective.cpp firestarter/Optimizer/Algorithm/NSGA2.cpp ) -endif() -SET(FIRESTARTER_LINK_LIBRARIES - ) + target_compile_features(firestarterlinux PRIVATE cxx_std_17) -if (FIRESTARTER_THREAD_AFFINITY) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DFIRESTARTER_THREAD_AFFINITY") + target_link_libraries(firestarterlinux + Nitro::log + ) endif() + +SET(FIRESTARTER_FILES + firestarter/Main.cpp + + # IpcEstimateMetricData::insertValue is accesses which is part of the firestarterlinux library. + # This reference should be removed there and the file moved back to the firestartercore library. + firestarter/LoadWorker.cpp + ) + if ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_CUDA") find_package(CUDAToolkit REQUIRED) include_directories(${CUDAToolkit_INCLUDE_DIRS}) @@ -65,18 +90,17 @@ if ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_CUDA") ) target_compile_features(FIRESTARTER_CUDA PRIVATE cxx_std_17) - if(FIRESTARTER_BUILD_HWLOC) - add_dependencies(FIRESTARTER_CUDA - HwlocInstall - ) + target_link_libraries(FIRESTARTER_CUDA + firestartercore + ) + + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + target_link_libraries(FIRESTARTER_CUDA + firestarterlinux + ) endif() target_link_libraries(FIRESTARTER_CUDA - hwloc - AsmJit::AsmJit - Nitro::log - nlohmann_json::nlohmann_json - Threads::Threads CUDA::cuda_driver CUDA::cudart CUDA::curand @@ -114,19 +138,16 @@ elseif ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_ONEAPI") ) target_compile_features(FIRESTARTER_ONEAPI PRIVATE cxx_std_17) - if(FIRESTARTER_BUILD_HWLOC) - add_dependencies(FIRESTARTER_ONEAPI - HwlocInstall - ) - endif() - target_link_libraries(FIRESTARTER_ONEAPI - hwloc - AsmJit::AsmJit - Nitro::log - nlohmann_json::nlohmann_json - Threads::Threads + firestartercore + ) + + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + target_link_libraries(FIRESTARTER_ONEAPI + firestarterlinux ) + endif() + target_link_libraries(FIRESTARTER_ONEAPI mkl_sycl mkl_intel_ilp64 @@ -160,18 +181,17 @@ elseif("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_HIP") ) target_compile_features(FIRESTARTER_HIP PRIVATE cxx_std_17) - if(FIRESTARTER_BUILD_HWLOC) - add_dependencies(FIRESTARTER_HIP - HwlocInstall - ) + target_link_libraries(FIRESTARTER_HIP + firestartercore + ) + + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + target_link_libraries(FIRESTARTER_HIP + firestarterlinux + ) endif() target_link_libraries(FIRESTARTER_HIP - hwloc - AsmJit::AsmJit - Nitro::log - nlohmann_json::nlohmann_json - Threads::Threads hip::host hip::hiprand roc::hipblas @@ -190,49 +210,51 @@ elseif(${FIRESTARTER_BUILD_TYPE} STREQUAL "FIRESTARTER") ) target_compile_features(FIRESTARTER PRIVATE cxx_std_17) + target_link_libraries(FIRESTARTER + firestartercore + ) + + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + target_link_libraries(FIRESTARTER + firestarterlinux + ) + endif() + # static linking is not supported on Darwin, see Apple Technical QA1118 - if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") - find_library( COREFOUNDATION_LIBRARY CoreFoundation ) - find_library( IOKIT_LIBRARY IOKit ) + if((NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin") AND FIRESTARTER_LINK_STATIC) target_link_libraries(FIRESTARTER - Threads::Threads - ${COREFOUNDATION_LIBRARY} - ${IOKIT_LIBRARY} + -static ) - else() - if (FIRESTARTER_LINK_STATIC) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static -DFIRESTARTER_LINK_STATIC") - # we are using pthreads - # static linking with pthreads and std::condition_variabale is evil and will cause segfaults - # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58909#c1 - if(CMAKE_THREAD_LIBS_INIT MATCHES "^.*pthread.*$") - target_link_libraries(FIRESTARTER - "-Wl,--whole-archive -lpthread -Wl,--no-whole-archive" - ) - endif() - else() - if(CMAKE_SYSTEM_NAME STREQUAL "Linux") - target_link_libraries(FIRESTARTER - -ldl - ) - endif() + # we are using pthreads + # static linking with pthreads and std::condition_variabale is evil and will cause segfaults + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58909#c1 + if(CMAKE_THREAD_LIBS_INIT MATCHES "^.*pthread.*$") target_link_libraries(FIRESTARTER - Threads::Threads + "-Wl,--whole-archive -lpthread -Wl,--no-whole-archive" ) endif() endif() - if(FIRESTARTER_BUILD_HWLOC) - add_dependencies(FIRESTARTER - HwlocInstall + if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND NOT FIRESTARTER_LINK_STATIC) + target_link_libraries(FIRESTARTER + -ldl ) endif() - target_link_libraries(FIRESTARTER - hwloc - AsmJit::AsmJit - Nitro::log - nlohmann_json::nlohmann_json - ) + if (NOT FIRESTARTER_LINK_STATIC) + target_link_libraries(FIRESTARTER + Threads::Threads + ) + endif() + + # static linking is not supported on Darwin, see Apple Technical QA1118 + if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + find_library( COREFOUNDATION_LIBRARY CoreFoundation ) + find_library( IOKIT_LIBRARY IOKit ) + target_link_libraries(FIRESTARTER + ${COREFOUNDATION_LIBRARY} + ${IOKIT_LIBRARY} + ) + endif() endif() From 121ac174e9caee820b259e6325a66b3be9a66779 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Sun, 24 Nov 2024 11:22:35 +0100 Subject: [PATCH 03/36] add ref test infrastructure and add first ref test --- CMakeLists.txt | 3 + .../Environment/X86/Payload/X86Payload.hpp | 1 + test/CMakeLists.txt | 14 + test/DumpPayloads/CMakeLists.txt | 4 + test/DumpPayloads/Main.cpp | 74 + test/refs/DumpPayloads.log | 1476 +++++++++++++++++ tooling/ref-test.py | 34 + 7 files changed, 1606 insertions(+) create mode 100644 test/CMakeLists.txt create mode 100644 test/DumpPayloads/CMakeLists.txt create mode 100644 test/DumpPayloads/Main.cpp create mode 100644 test/refs/DumpPayloads.log create mode 100644 tooling/ref-test.py diff --git a/CMakeLists.txt b/CMakeLists.txt index c8f580e4..05735444 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,6 @@ cmake_minimum_required(VERSION 3.22) project(FIRESTARTER) +enable_testing() set(CMAKE_EXPORT_COMPILE_COMMANDS ON) @@ -111,3 +112,5 @@ find_package(Threads REQUIRED) include(cmake/InstallHwloc.cmake) add_subdirectory(src) + +add_subdirectory(test) \ No newline at end of file diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp index 87ba8e6b..2608c8f4 100644 --- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp +++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp @@ -558,6 +558,7 @@ class X86Payload : public environment::payload::Payload { /// LoadVar changed. void lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chrono::microseconds Period) const final; +public: /// Get the available instruction items that are supported by this payload. /// \returns The available instruction items that are supported by this payload. [[nodiscard]] auto getAvailableInstructions() const -> std::list final; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 00000000..9a40045a --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,14 @@ +find_package(Python REQUIRED) + +# Function to add reference tests +function(add_ref_test) + set(oneValueArgs NAME) + cmake_parse_arguments(TEST "" "${oneValueArgs}" + "" ${ARGN} ) + + add_test(NAME ${TEST_NAME} + COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/tooling/ref-test.py $ ${PROJECT_SOURCE_DIR}/test/refs/${TEST_NAME}.log + ) +endfunction() + +add_subdirectory(DumpPayloads) \ No newline at end of file diff --git a/test/DumpPayloads/CMakeLists.txt b/test/DumpPayloads/CMakeLists.txt new file mode 100644 index 00000000..c42fd887 --- /dev/null +++ b/test/DumpPayloads/CMakeLists.txt @@ -0,0 +1,4 @@ +add_executable(DumpPayloads Main.cpp) +target_link_libraries(DumpPayloads firestartercore) + +add_ref_test(NAME DumpPayloads) \ No newline at end of file diff --git a/test/DumpPayloads/Main.cpp b/test/DumpPayloads/Main.cpp new file mode 100644 index 00000000..7f7986f3 --- /dev/null +++ b/test/DumpPayloads/Main.cpp @@ -0,0 +1,74 @@ +/****************************************************************************** + * FIRESTARTER - A Processor Stress Test Utility + * Copyright (C) 2024 TU Dresden, Center for Information Services and High + * Performance Computing + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Contact: daniel.hackenberg@tu-dresden.de + *****************************************************************************/ + +#include "firestarter/Environment/Payload/Payload.hpp" +#include "firestarter/Environment/X86/Payload/AVX512Payload.hpp" +#include "firestarter/Environment/X86/Payload/AVXPayload.hpp" +#include "firestarter/Environment/X86/Payload/FMA4Payload.hpp" +#include "firestarter/Environment/X86/Payload/FMAPayload.hpp" +#include "firestarter/Environment/X86/Payload/SSE2Payload.hpp" +#include "firestarter/Environment/X86/Payload/ZENFMAPayload.hpp" + +namespace { + +/// Take a list of instructions and return a list with a pair containing the each instruction in the first element of +/// the pair and a one in the second. +auto oneEach(const std::list& Instructions) + -> std::vector { + std::vector OneEach; + for (const auto& Instruction : Instructions) { + OneEach.emplace_back(Instruction, 1); + } + return OneEach; +} + +/// Dump the generated assembler code of the payload with some given settings. Each item is printed once. +void dumpPayload(firestarter::environment::payload::Payload& PayloadPtr) { + const auto& Instuctions = PayloadPtr.getAvailableInstructions(); + + firestarter::environment::payload::PayloadSettings Settings(/*Threads=*/{1}, + /*DataCacheBufferSize=*/{32768, 1048576, 1441792}, + /*RamBufferSize=*/1048576000, + /*Lines=*/3 * Instuctions.size(), + /*InstructionGroups=*/oneEach(Instuctions)); + + (void)PayloadPtr.compilePayload(Settings, /*DumpRegisters=*/false, /*ErrorDetection=*/false, + /*PrintAssembler=*/true); +} + +} // namespace + +auto main(int /*argc*/, const char** /*argv*/) -> int { + const std::vector> PayloadPtrs = { + std::make_unique(), + std::make_unique(), + std::make_unique(), + std::make_unique(), + std::make_unique(), + std::make_unique()}; + + for (const auto& PayloadPtr : PayloadPtrs) { + firestarter::log::info() << "Payload " << PayloadPtr->name(); + dumpPayload(*PayloadPtr); + } + + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/test/refs/DumpPayloads.log b/test/refs/DumpPayloads.log new file mode 100644 index 00000000..64d7faaf --- /dev/null +++ b/test/refs/DumpPayloads.log @@ -0,0 +1,1476 @@ +Payload AVX512 +.section .text +push rbx +push rbp +push r12 +push r13 +push r14 +push r15 +mov rax, rdi +mov r15, rsi +mov r13, rdx +movq mm0, r13 +mov r13, qword ptr [r15] +test r13, r13 +jz L0 +mov r14, 64 +mov edi, 2863311530 +mov esi, 2863311530 +mov edx, 2863311530 +vmovapd zmm0, zmmword ptr [rax] +vmovapd zmm1, zmmword ptr [rax+64] +vmovapd zmm2, zmmword ptr [rax+128] +vmovapd zmm3, zmmword ptr [rax+448] +vmovapd zmm4, zmmword ptr [rax+512] +vmovapd zmm5, zmmword ptr [rax+576] +vmovapd zmm6, zmmword ptr [rax+640] +vmovapd zmm7, zmmword ptr [rax+704] +vmovapd zmm8, zmmword ptr [rax+768] +vmovapd zmm9, zmmword ptr [rax+832] +vmovapd zmm10, zmmword ptr [rax+896] +vmovapd zmm11, zmmword ptr [rax+960] +vmovapd zmm12, zmmword ptr [rax+1024] +vmovapd zmm13, zmmword ptr [rax+1088] +vmovapd zmm14, zmmword ptr [rax+1152] +vmovapd zmm15, zmmword ptr [rax+1216] +vmovapd zmm16, zmmword ptr [rax+1280] +vmovapd zmm17, zmmword ptr [rax+1344] +vmovapd zmm18, zmmword ptr [rax+1408] +vmovapd zmm19, zmmword ptr [rax+1472] +vmovapd zmm20, zmmword ptr [rax+1536] +vmovapd zmm21, zmmword ptr [rax+1600] +vmovapd zmm22, zmmword ptr [rax+1664] +vmovapd zmm23, zmmword ptr [rax+1728] +vmovapd zmm24, zmmword ptr [rax+1792] +vmovapd zmm25, zmmword ptr [rax+1856] +vmovapd zmm26, zmmword ptr [rax+1920] +vmovapd zmm27, zmmword ptr [rax+1984] +vmovapd zmm28, zmmword ptr [rax+2048] +vmovapd zmm29, zmmword ptr [rax+2112] +mov rbx, rax +mov rcx, rax +add rcx, 32768 +mov r8, rax +add r8, 1048576 +mov r9, rax +add r9, 1441792 +mov r10, 1456 +mov r11, 1501 +mov r12, 1365333 +.align 64 (code) +L1: +vfmadd231pd zmm4, zmm0, zmm2 +vbroadcastsd zmm4, qword ptr [rbx+64] +add rbx, r14 +shl edi, 1 +vfmadd231pd zmm5, zmm0, zmm2 +vfmadd231pd zmm25, zmm2, zmm1 +xor rdi, r13 +shl esi, 1 +vmovapd zmmword ptr [r9+64], zmm6 +vfmadd231pd zmm6, zmm0, zmm2 +add r9, r14 +shl edx, 1 +vfmadd231pd zmm7, zmm0, zmmword ptr [rbx+64] +prefetcht2 [r9] +add r9, r14 +shr edi, 1 +vmovapd zmmword ptr [r9+64], zmm8 +vfmadd231pd zmm8, zmm0, zmmword ptr [r9+128] +add r9, r14 +shr esi, 1 +vfmadd231pd zmm9, zmm0, zmm2 +vfmadd231pd zmm30, zmm1, zmmword ptr [r9+64] +add r9, r14 +shr edx, 1 +vmovapd zmmword ptr [r8+64], zmm10 +vfmadd231pd zmm10, zmm0, zmm2 +add r8, r14 +shl edi, 1 +vfmadd231pd zmm11, zmm0, zmmword ptr [rbx+64] +prefetcht2 [r8] +add r8, r14 +shl esi, 1 +vmovapd zmmword ptr [r8+64], zmm12 +vfmadd231pd zmm12, zmm0, zmmword ptr [r8+128] +add r8, r14 +shl edx, 1 +vfmadd231pd zmm13, zmm0, zmm2 +vfmadd231pd zmm13, zmm1, zmmword ptr [r8+64] +add r8, r14 +shr edi, 1 +vmovapd zmmword ptr [rcx+64], zmm14 +vfmadd231pd zmm14, zmm0, zmm2 +add rcx, r14 +shr esi, 1 +vmovapd zmmword ptr [rcx+64], zmm15 +vfmadd231pd zmm15, zmm0, zmmword ptr [rcx+128] +add rcx, r14 +shr edx, 1 +vfmadd231pd zmm16, zmm0, zmm2 +vfmadd231pd zmm16, zmm1, zmmword ptr [rcx+64] +add rcx, r14 +shl edi, 1 +vmovapd zmmword ptr [rbx+64], zmm17 +vfmadd231pd zmm17, zmm0, zmm2 +add rbx, r14 +shl esi, 1 +vmovapd zmmword ptr [rbx+64], zmm18 +vfmadd231pd zmm18, zmm0, zmmword ptr [rbx+128] +add rbx, r14 +shl edx, 1 +vfmadd231pd zmm19, zmm0, zmm2 +vfmadd231pd zmm19, zmm1, zmmword ptr [rbx+64] +add rbx, r14 +shr edi, 1 +vfmadd231pd zmm20, zmm0, zmm2 +vbroadcastsd zmm20, qword ptr [rbx+64] +add rbx, r14 +shr esi, 1 +vfmadd231pd zmm21, zmm0, zmm2 +vfmadd231pd zmm26, zmm2, zmm1 +xor rsi, r13 +shr edx, 1 +vmovapd zmmword ptr [r9+64], zmm22 +vfmadd231pd zmm22, zmm0, zmm2 +add r9, r14 +shl edi, 1 +vfmadd231pd zmm23, zmm0, zmmword ptr [rbx+64] +prefetcht2 [r9] +add r9, r14 +shl esi, 1 +vmovapd zmmword ptr [r9+64], zmm24 +vfmadd231pd zmm24, zmm0, zmmword ptr [r9+128] +add r9, r14 +shl edx, 1 +vfmadd231pd zmm3, zmm0, zmm2 +vfmadd231pd zmm30, zmm1, zmmword ptr [r9+64] +add r9, r14 +shr edi, 1 +vmovapd zmmword ptr [r8+64], zmm4 +vfmadd231pd zmm4, zmm0, zmm2 +add r8, r14 +shr esi, 1 +vfmadd231pd zmm5, zmm0, zmmword ptr [rbx+64] +prefetcht2 [r8] +add r8, r14 +shr edx, 1 +vmovapd zmmword ptr [r8+64], zmm6 +vfmadd231pd zmm6, zmm0, zmmword ptr [r8+128] +add r8, r14 +shl edi, 1 +vfmadd231pd zmm7, zmm0, zmm2 +vfmadd231pd zmm7, zmm1, zmmword ptr [r8+64] +add r8, r14 +shl esi, 1 +vmovapd zmmword ptr [rcx+64], zmm8 +vfmadd231pd zmm8, zmm0, zmm2 +add rcx, r14 +shl edx, 1 +vmovapd zmmword ptr [rcx+64], zmm9 +vfmadd231pd zmm9, zmm0, zmmword ptr [rcx+128] +add rcx, r14 +shr edi, 1 +vfmadd231pd zmm10, zmm0, zmm2 +vfmadd231pd zmm10, zmm1, zmmword ptr [rcx+64] +add rcx, r14 +shr esi, 1 +vmovapd zmmword ptr [rbx+64], zmm11 +vfmadd231pd zmm11, zmm0, zmm2 +add rbx, r14 +shr edx, 1 +vmovapd zmmword ptr [rbx+64], zmm12 +vfmadd231pd zmm12, zmm0, zmmword ptr [rbx+128] +add rbx, r14 +shl edi, 1 +vfmadd231pd zmm13, zmm0, zmm2 +vfmadd231pd zmm13, zmm1, zmmword ptr [rbx+64] +add rbx, r14 +shl esi, 1 +vfmadd231pd zmm14, zmm0, zmm2 +vbroadcastsd zmm14, qword ptr [rbx+64] +add rbx, r14 +shl edx, 1 +vfmadd231pd zmm15, zmm0, zmm2 +vfmadd231pd zmm27, zmm2, zmm1 +xor rdx, r13 +shr edi, 1 +vmovapd zmmword ptr [r9+64], zmm16 +vfmadd231pd zmm16, zmm0, zmm2 +add r9, r14 +shr esi, 1 +vfmadd231pd zmm17, zmm0, zmmword ptr [rbx+64] +prefetcht2 [r9] +add r9, r14 +shr edx, 1 +vmovapd zmmword ptr [r9+64], zmm18 +vfmadd231pd zmm18, zmm0, zmmword ptr [r9+128] +add r9, r14 +shl edi, 1 +vfmadd231pd zmm19, zmm0, zmm2 +vfmadd231pd zmm30, zmm1, zmmword ptr [r9+64] +add r9, r14 +shl esi, 1 +vmovapd zmmword ptr [r8+64], zmm20 +vfmadd231pd zmm20, zmm0, zmm2 +add r8, r14 +shl edx, 1 +vfmadd231pd zmm21, zmm0, zmmword ptr [rbx+64] +prefetcht2 [r8] +add r8, r14 +shr edi, 1 +vmovapd zmmword ptr [r8+64], zmm22 +vfmadd231pd zmm22, zmm0, zmmword ptr [r8+128] +add r8, r14 +shr esi, 1 +vfmadd231pd zmm23, zmm0, zmm2 +vfmadd231pd zmm23, zmm1, zmmword ptr [r8+64] +add r8, r14 +shr edx, 1 +vmovapd zmmword ptr [rcx+64], zmm24 +vfmadd231pd zmm24, zmm0, zmm2 +add rcx, r14 +shl edi, 1 +vmovapd zmmword ptr [rcx+64], zmm3 +vfmadd231pd zmm3, zmm0, zmmword ptr [rcx+128] +add rcx, r14 +shl esi, 1 +vfmadd231pd zmm4, zmm0, zmm2 +vfmadd231pd zmm4, zmm1, zmmword ptr [rcx+64] +add rcx, r14 +shl edx, 1 +vmovapd zmmword ptr [rbx+64], zmm5 +vfmadd231pd zmm5, zmm0, zmm2 +add rbx, r14 +shr edi, 1 +vmovapd zmmword ptr [rbx+64], zmm6 +vfmadd231pd zmm6, zmm0, zmmword ptr [rbx+128] +add rbx, r14 +shr esi, 1 +vfmadd231pd zmm7, zmm0, zmm2 +vfmadd231pd zmm7, zmm1, zmmword ptr [rbx+64] +add rbx, r14 +shr edx, 1 +movq r13, mm0 +sub r12, 1 +jnz L2 +mov r12, 1365333 +mov r9, rax +add r9, 1441792 +L2: +inc r13 +sub r10, 1 +jnz L3 +mov r10, 1456 +mov rcx, rax +add rcx, 32768 +L3: +movq mm0, r13 +sub r11, 1 +jnz L4 +mov r11, 1501 +mov r8, rax +add r8, 1048576 +L4: +mov rbx, rax +test qword ptr [r15], 1 +jnz L1 +L0: +movq rax, mm0 +pop r15 +pop r14 +pop r13 +pop r12 +pop rbp +pop rbx +ret + +Payload FMA +.section .text +push rbx +push rbp +push r12 +push r13 +push r14 +push r15 +mov rax, rdi +mov r15, rsi +mov r13, rdx +movq mm0, r13 +mov r13, qword ptr [r15] +test r13, r13 +jz L0 +mov r14, 64 +mov edi, 2863311530 +mov esi, 2863311530 +mov edx, 2863311530 +vmovapd ymm0, ymmword ptr [rax] +vmovapd ymm1, ymmword ptr [rax+32] +vmovapd ymm2, ymmword ptr [rax+64] +vmovapd ymm3, ymmword ptr [rax+352] +vmovapd ymm4, ymmword ptr [rax+384] +vmovapd ymm5, ymmword ptr [rax+416] +vmovapd ymm6, ymmword ptr [rax+448] +vmovapd ymm7, ymmword ptr [rax+480] +vmovapd ymm8, ymmword ptr [rax+512] +vmovapd ymm9, ymmword ptr [rax+544] +vmovapd ymm10, ymmword ptr [rax+576] +vmovapd ymm11, ymmword ptr [rax+608] +vmovapd ymm12, ymmword ptr [rax+640] +vmovapd ymm13, ymmword ptr [rax+672] +vmovapd ymm14, ymmword ptr [rax+704] +mov rbx, rax +mov rcx, rax +add rcx, 32768 +mov r8, rax +add r8, 1048576 +mov r9, rax +add r9, 1441792 +mov r10, 873 +mov r11, 1201 +mov r12, 1365333 +.align 64 (code) +L1: +vfmadd231pd ymm4, ymm0, ymmword ptr [rbx+32] +vfmadd231pd ymm12, ymm1, ymmword ptr [rbx+64] +add rbx, r14 +shl edi, 1 +vfmadd231pd ymm5, ymm0, ymm2 +vfmadd231pd ymm12, ymm2, ymm1 +xor rdi, r13 +shl esi, 1 +vmovapd xmmword ptr [r9+64], xmm6 +vfmadd231pd ymm6, ymm0, ymm2 +add r9, r14 +shl edx, 1 +vfmadd231pd ymm7, ymm0, ymmword ptr [rbx+32] +prefetcht2 [r9] +add r9, r14 +shr edi, 1 +vmovapd xmmword ptr [r9+64], xmm8 +vfmadd231pd ymm8, ymm0, ymmword ptr [r9+32] +add r9, r14 +shr esi, 1 +vfmadd231pd ymm9, ymm0, ymm2 +vfmadd231pd ymm15, ymm1, ymmword ptr [r9+64] +add r9, r14 +shr edx, 1 +vmovapd xmmword ptr [r8+96], xmm10 +vfmadd231pd ymm10, ymm0, ymm2 +add r8, r14 +shl edi, 1 +vfmadd231pd ymm11, ymm0, ymmword ptr [rbx+32] +prefetcht2 [r8] +add r8, r14 +shl esi, 1 +vmovapd ymmword ptr [r8+96], ymm3 +vfmadd231pd ymm3, ymm0, ymmword ptr [r8+64] +add r8, r14 +shl edx, 1 +vmovapd xmmword ptr [r8+96], xmm4 +vfmadd231pd ymm4, ymm0, ymmword ptr [r8+64] +add r8, r14 +shr edi, 1 +vfmadd231pd ymm5, ymm0, ymm2 +vfmadd231pd ymm5, ymm1, ymmword ptr [r8+64] +add r8, r14 +shr esi, 1 +vmovapd xmmword ptr [rcx+64], xmm6 +vfmadd231pd ymm6, ymm0, ymm2 +add rcx, r14 +shr edx, 1 +vmovapd ymmword ptr [rcx+96], ymm7 +vfmadd231pd ymm7, ymm0, [rcx+64] +add rcx, r14 +shl edi, 1 +vmovapd xmmword ptr [rcx+96], xmm8 +vfmadd231pd ymm8, ymm0, ymmword ptr [rcx+64] +add rcx, r14 +shl esi, 1 +vfmadd231pd ymm9, ymm0, ymm2 +vfmadd231pd ymm9, ymm1, ymmword ptr [rcx+64] +add rcx, r14 +shl edx, 1 +vfmadd231pd ymm10, ymm0, [rcx+64] +vfmadd231pd ymm13, ymm1, [rcx+96] +vmovapd ymmword ptr [rcx+32], ymm10 +add rcx, 128 +vmovapd xmmword ptr [rbx+32], xmm11 +vfmadd231pd ymm11, ymm0, ymm2 +add rbx, r14 +shr esi, 1 +vfmadd231pd ymm3, ymm0, ymmword ptr [rbx+64] +vmovapd ymmword ptr [rbx+32], ymm3 +add rbx, r14 +shr edx, 1 +vmovapd xmmword ptr [rbx+64], xmm4 +vfmadd231pd ymm4, ymm0, ymmword ptr [rbx+32] +add rbx, r14 +shl edi, 1 +vfmadd231pd ymm5, ymm0, ymm2 +vfmadd231pd ymm5, ymm1, ymmword ptr [rbx+32] +add rbx, r14 +shl esi, 1 +vfmadd231pd ymm6, ymm0, ymmword ptr [rbx+64] +vfmadd231pd ymm13, ymm1, ymmword ptr [rbx+96] +vmovapd ymmword ptr [rbx+32], ymm6 +add rbx, r14 +vfmadd231pd ymm7, ymm0, ymmword ptr [rbx+32] +vfmadd231pd ymm13, ymm1, ymmword ptr [rbx+64] +add rbx, r14 +shr edi, 1 +vfmadd231pd ymm8, ymm0, ymm2 +vfmadd231pd ymm13, ymm2, ymm1 +xor rdi, r13 +shr esi, 1 +vmovapd xmmword ptr [r9+64], xmm9 +vfmadd231pd ymm9, ymm0, ymm2 +add r9, r14 +shr edx, 1 +vfmadd231pd ymm10, ymm0, ymmword ptr [rbx+32] +prefetcht2 [r9] +add r9, r14 +shl edi, 1 +vmovapd xmmword ptr [r9+64], xmm11 +vfmadd231pd ymm11, ymm0, ymmword ptr [r9+32] +add r9, r14 +shl esi, 1 +vfmadd231pd ymm3, ymm0, ymm2 +vfmadd231pd ymm15, ymm1, ymmword ptr [r9+64] +add r9, r14 +shl edx, 1 +vmovapd xmmword ptr [r8+96], xmm4 +vfmadd231pd ymm4, ymm0, ymm2 +add r8, r14 +shr edi, 1 +vfmadd231pd ymm5, ymm0, ymmword ptr [rbx+32] +prefetcht2 [r8] +add r8, r14 +shr esi, 1 +vmovapd ymmword ptr [r8+96], ymm6 +vfmadd231pd ymm6, ymm0, ymmword ptr [r8+64] +add r8, r14 +shr edx, 1 +vmovapd xmmword ptr [r8+96], xmm7 +vfmadd231pd ymm7, ymm0, ymmword ptr [r8+64] +add r8, r14 +shl edi, 1 +vfmadd231pd ymm8, ymm0, ymm2 +vfmadd231pd ymm8, ymm1, ymmword ptr [r8+64] +add r8, r14 +shl esi, 1 +vmovapd xmmword ptr [rcx+64], xmm9 +vfmadd231pd ymm9, ymm0, ymm2 +add rcx, r14 +shl edx, 1 +vmovapd ymmword ptr [rcx+96], ymm10 +vfmadd231pd ymm10, ymm0, [rcx+64] +add rcx, r14 +shr edi, 1 +vmovapd xmmword ptr [rcx+96], xmm11 +vfmadd231pd ymm11, ymm0, ymmword ptr [rcx+64] +add rcx, r14 +shr esi, 1 +vfmadd231pd ymm3, ymm0, ymm2 +vfmadd231pd ymm3, ymm1, ymmword ptr [rcx+64] +add rcx, r14 +shr edx, 1 +vfmadd231pd ymm4, ymm0, [rcx+64] +vfmadd231pd ymm14, ymm1, [rcx+96] +vmovapd ymmword ptr [rcx+32], ymm4 +add rcx, 128 +vmovapd xmmword ptr [rbx+32], xmm5 +vfmadd231pd ymm5, ymm0, ymm2 +add rbx, r14 +shl esi, 1 +vfmadd231pd ymm6, ymm0, ymmword ptr [rbx+64] +vmovapd ymmword ptr [rbx+32], ymm6 +add rbx, r14 +shl edx, 1 +vmovapd xmmword ptr [rbx+64], xmm7 +vfmadd231pd ymm7, ymm0, ymmword ptr [rbx+32] +add rbx, r14 +shr edi, 1 +vfmadd231pd ymm8, ymm0, ymm2 +vfmadd231pd ymm8, ymm1, ymmword ptr [rbx+32] +add rbx, r14 +shr esi, 1 +vfmadd231pd ymm9, ymm0, ymmword ptr [rbx+64] +vfmadd231pd ymm14, ymm1, ymmword ptr [rbx+96] +vmovapd ymmword ptr [rbx+32], ymm9 +add rbx, r14 +vfmadd231pd ymm10, ymm0, ymmword ptr [rbx+32] +vfmadd231pd ymm14, ymm1, ymmword ptr [rbx+64] +add rbx, r14 +shl edi, 1 +vfmadd231pd ymm11, ymm0, ymm2 +vfmadd231pd ymm14, ymm2, ymm1 +xor rdi, r13 +shl esi, 1 +vmovapd xmmword ptr [r9+64], xmm3 +vfmadd231pd ymm3, ymm0, ymm2 +add r9, r14 +shl edx, 1 +vfmadd231pd ymm4, ymm0, ymmword ptr [rbx+32] +prefetcht2 [r9] +add r9, r14 +shr edi, 1 +vmovapd xmmword ptr [r9+64], xmm5 +vfmadd231pd ymm5, ymm0, ymmword ptr [r9+32] +add r9, r14 +shr esi, 1 +vfmadd231pd ymm6, ymm0, ymm2 +vfmadd231pd ymm15, ymm1, ymmword ptr [r9+64] +add r9, r14 +shr edx, 1 +vmovapd xmmword ptr [r8+96], xmm7 +vfmadd231pd ymm7, ymm0, ymm2 +add r8, r14 +shl edi, 1 +vfmadd231pd ymm8, ymm0, ymmword ptr [rbx+32] +prefetcht2 [r8] +add r8, r14 +shl esi, 1 +vmovapd ymmword ptr [r8+96], ymm9 +vfmadd231pd ymm9, ymm0, ymmword ptr [r8+64] +add r8, r14 +shl edx, 1 +vmovapd xmmword ptr [r8+96], xmm10 +vfmadd231pd ymm10, ymm0, ymmword ptr [r8+64] +add r8, r14 +shr edi, 1 +vfmadd231pd ymm11, ymm0, ymm2 +vfmadd231pd ymm11, ymm1, ymmword ptr [r8+64] +add r8, r14 +shr esi, 1 +vmovapd xmmword ptr [rcx+64], xmm3 +vfmadd231pd ymm3, ymm0, ymm2 +add rcx, r14 +shr edx, 1 +vmovapd ymmword ptr [rcx+96], ymm4 +vfmadd231pd ymm4, ymm0, [rcx+64] +add rcx, r14 +shl edi, 1 +vmovapd xmmword ptr [rcx+96], xmm5 +vfmadd231pd ymm5, ymm0, ymmword ptr [rcx+64] +add rcx, r14 +shl esi, 1 +vfmadd231pd ymm6, ymm0, ymm2 +vfmadd231pd ymm6, ymm1, ymmword ptr [rcx+64] +add rcx, r14 +shl edx, 1 +vfmadd231pd ymm7, ymm0, [rcx+64] +vfmadd231pd ymm12, ymm1, [rcx+96] +vmovapd ymmword ptr [rcx+32], ymm7 +add rcx, 128 +vmovapd xmmword ptr [rbx+32], xmm8 +vfmadd231pd ymm8, ymm0, ymm2 +add rbx, r14 +shr esi, 1 +vfmadd231pd ymm9, ymm0, ymmword ptr [rbx+64] +vmovapd ymmword ptr [rbx+32], ymm9 +add rbx, r14 +shr edx, 1 +vmovapd xmmword ptr [rbx+64], xmm10 +vfmadd231pd ymm10, ymm0, ymmword ptr [rbx+32] +add rbx, r14 +shl edi, 1 +vfmadd231pd ymm11, ymm0, ymm2 +vfmadd231pd ymm11, ymm1, ymmword ptr [rbx+32] +add rbx, r14 +shl esi, 1 +vfmadd231pd ymm3, ymm0, ymmword ptr [rbx+64] +vfmadd231pd ymm12, ymm1, ymmword ptr [rbx+96] +vmovapd ymmword ptr [rbx+32], ymm3 +add rbx, r14 +movq r13, mm0 +sub r12, 1 +jnz L2 +mov r12, 1365333 +mov r9, rax +add r9, 1441792 +L2: +inc r13 +sub r10, 1 +jnz L3 +mov r10, 873 +mov rcx, rax +add rcx, 32768 +L3: +movq mm0, r13 +sub r11, 1 +jnz L4 +mov r11, 1201 +mov r8, rax +add r8, 1048576 +L4: +mov rbx, rax +test qword ptr [r15], 1 +jnz L1 +L0: +movq rax, mm0 +pop r15 +pop r14 +pop r13 +pop r12 +pop rbp +pop rbx +ret + +Payload ZENFMA +.section .text +push rbx +push rbp +push r12 +push r13 +push r14 +push r15 +mov rax, rdi +mov r15, rsi +mov r13, rdx +movq mm0, r13 +mov r13, qword ptr [r15] +test r13, r13 +jz L0 +mov r14, 64 +mov rdi, -6148914691236517206 +mov rsi, -6148914691236517206 +mov rdx, -6148914691236517206 +vmovapd ymm0, ymmword ptr [rax] +vmovapd ymm1, ymmword ptr [rax+32] +vmovapd ymm2, ymmword ptr [rax+320] +vmovapd ymm3, ymmword ptr [rax+352] +vmovapd ymm4, ymmword ptr [rax+384] +vmovapd ymm5, ymmword ptr [rax+416] +vmovapd ymm6, ymmword ptr [rax+448] +vmovapd ymm7, ymmword ptr [rax+480] +vmovapd ymm8, ymmword ptr [rax+512] +vmovapd ymm9, ymmword ptr [rax+544] +vmovapd ymm10, ymmword ptr [rax+576] +vmovapd ymm11, ymmword ptr [rax+608] +vmovapd ymm12, ymmword ptr [rax+640] +vbroadcastss xmm13, xmm13 +vmovapd xmm14, xmm13 +vpsrlq xmm14, xmm14, 1 +mov rbx, rax +mov rcx, rax +add rcx, 32768 +mov r8, rax +add r8, 1048576 +mov r9, rax +add r9, 1441792 +mov r10, 4369 +mov r11, 6007 +mov r12, 5461333 +.align 64 (code) +L1: +vfmadd231pd ymm2, ymm0, ymmword ptr [rbx+32] +vmovapd xmmword ptr [rbx+64], xmm2 +add rbx, r14 +vpsrlq xmm13, xmm13, 1 +vfmadd231pd ymm3, ymm1, ymm0 +xor r13, rdi +shl rsi, 1 +vpsllq xmm14, xmm14, 1 +vfmadd231pd ymm15, ymm0, ymmword ptr [r9+32] +xor r13, rsi +add r9, r14 +vpsllq xmm13, xmm13, 1 +vfmadd231pd ymm5, ymm1, ymmword ptr [r8+64] +xor r13, rdx +add r8, r14 +vpsrlq xmm14, xmm14, 1 +vfmadd231pd ymm6, ymm0, ymmword ptr [rcx+64] +xor r13, rdi +add rcx, r14 +vpsrlq xmm13, xmm13, 1 +vfmadd231pd ymm7, ymm1, ymmword ptr [rbx+32] +vmovapd xmmword ptr [rbx+64], xmm7 +add rbx, r14 +vpsllq xmm14, xmm14, 1 +vfmadd231pd ymm8, ymm0, ymm1 +xor r13, rdx +shl rdi, 1 +vpsllq xmm13, xmm13, 1 +vfmadd231pd ymm15, ymm1, ymmword ptr [r9+32] +xor r13, rdi +add r9, r14 +vpsrlq xmm14, xmm14, 1 +vfmadd231pd ymm10, ymm0, ymmword ptr [r8+64] +xor r13, rsi +add r8, r14 +vpsrlq xmm13, xmm13, 1 +vfmadd231pd ymm11, ymm1, ymmword ptr [rcx+64] +xor r13, rdx +add rcx, r14 +vpsllq xmm14, xmm14, 1 +vfmadd231pd ymm12, ymm0, ymmword ptr [rbx+32] +vmovapd xmmword ptr [rbx+64], xmm12 +add rbx, r14 +vpsllq xmm13, xmm13, 1 +vfmadd231pd ymm2, ymm1, ymm0 +xor r13, rsi +shr rdx, 1 +vpsrlq xmm14, xmm14, 1 +vfmadd231pd ymm15, ymm0, ymmword ptr [r9+32] +xor r13, rdx +add r9, r14 +vfmadd231pd ymm4, ymm1, ymmword ptr [r8+64] +xor r13, rdi +add r8, r14 +vfmadd231pd ymm5, ymm0, ymmword ptr [rcx+64] +xor r13, rsi +add rcx, r14 +movq r13, mm0 +sub r12, 1 +jnz L2 +mov r12, 5461333 +mov r9, rax +add r9, 1441792 +L2: +inc r13 +sub r10, 1 +jnz L3 +mov r10, 4369 +mov rcx, rax +add rcx, 32768 +L3: +movq mm0, r13 +sub r11, 1 +jnz L4 +mov r11, 6007 +mov r8, rax +add r8, 1048576 +L4: +mov rbx, rax +test qword ptr [r15], 1 +jnz L1 +L0: +movq rax, mm0 +pop r15 +pop r14 +pop r13 +pop r12 +pop rbp +pop rbx +ret + +Payload FMA4 +.section .text +push rbx +push rbp +push r12 +push r13 +push r14 +push r15 +mov rax, rdi +mov r15, rsi +mov r13, rdx +movq mm0, r13 +mov r13, qword ptr [r15] +test r13, r13 +jz L0 +mov r14, 64 +mov edi, 2863311530 +mov esi, 2863311530 +mov edx, 2863311530 +vmovapd ymm0, ymmword ptr [rax] +vmovapd ymm1, ymmword ptr [rax] +vmovapd ymm2, ymmword ptr [rax+320] +vmovapd ymm3, ymmword ptr [rax+352] +vmovapd ymm4, ymmword ptr [rax+384] +vmovapd ymm5, ymmword ptr [rax+416] +vmovapd ymm6, ymmword ptr [rax+448] +vmovapd ymm7, ymmword ptr [rax+480] +vmovapd ymm8, ymmword ptr [rax+512] +vmovapd ymm9, ymmword ptr [rax+544] +vmovapd ymm10, ymmword ptr [rax+576] +vmovapd ymm11, ymmword ptr [rax+608] +vmovapd ymm12, ymmword ptr [rax+640] +vmovapd ymm13, ymmword ptr [rax+672] +mov rbx, rax +mov rcx, rax +add rcx, 32768 +mov r8, rax +add r8, 1048576 +mov r9, rax +add r9, 1441792 +mov r10, 1456 +mov r11, 1501 +mov r12, 1365333 +.align 64 (code) +L1: +vfmaddpd xmm3, xmm3, xmm0, xmm4 +vfmaddpd ymm3, ymm3, ymm1, ymmword ptr [rbx+32] +add rbx, r14 +shl edi, 1 +vfmaddpd xmm4, xmm4, xmm0, xmm5 +vfmaddpd xmm11, xmm11, xmm1, xmm6 +xor rdi, r13 +shl esi, 1 +vmovapd xmmword ptr [r9+64], xmm5 +vfmaddpd xmm5, xmm5, xmm0, xmm6 +add r9, r14 +shl edx, 1 +vfmaddpd xmm6, xmm6, xmm0, xmmword ptr [rbx+32] +prefetcht2 [r9] +add r9, r14 +shr edi, 1 +vmovapd xmmword ptr [r9+64], xmm7 +vfmaddpd xmm7, xmm7, xmm0, xmmword ptr [r9+32] +add r9, r14 +shr esi, 1 +vfmaddpd xmm8, xmm8, xmm0, xmm9 +vfmaddpd xmm15, xmm15, xmm1, xmmword ptr [r9+64] +add r9, r14 +shr edx, 1 +vmovapd xmmword ptr [r8+96], xmm9 +vfmaddpd xmm9, xmm9, xmm0, xmm10 +add r8, r14 +shl edi, 1 +vfmaddpd xmm10, xmm10, xmm0, xmmword ptr [rbx+32] +prefetcht2 [r8] +add r8, r14 +shl esi, 1 +vmovapd xmmword ptr [r8+96], xmm2 +vfmaddpd xmm2, xmm2, xmm0, xmmword ptr [r8+64] +add r8, r14 +shl edx, 1 +vfmaddpd xmm3, xmm3, xmm0, xmm4 +vfmaddpd xmm3, xmm3, xmm1, xmmword ptr [r8+64] +add r8, r14 +shr edi, 1 +vmovapd xmmword ptr [rcx+64], xmm4 +vfmaddpd xmm4, xmm4, xmm0, xmm5 +add rcx, r14 +shr esi, 1 +vmovapd xmmword ptr [rcx+96], xmm5 +vfmaddpd xmm5, xmm5, xmm0, xmmword ptr [rcx+64] +add rcx, r14 +shr edx, 1 +vfmaddpd xmm6, xmm6, xmm0, xmm7 +vfmaddpd xmm6, xmm6, xmm1, xmmword ptr [rcx+64] +add rcx, r14 +shl edi, 1 +vmovapd xmmword ptr [rbx+32], xmm7 +vfmaddpd ymm7, ymm7, ymm0, ymm8 +add rbx, r14 +shl esi, 1 +vmovapd xmmword ptr [rbx+64], xmm8 +vfmaddpd ymm8, ymm8, ymm0, ymmword ptr [rbx+32] +add rbx, r14 +shl edx, 1 +vfmaddpd xmm9, xmm9, xmm0, xmm10 +vfmaddpd ymm9, ymm9, ymm1, ymmword ptr [rbx+32] +add rbx, r14 +shr edi, 1 +vfmaddpd xmm10, xmm10, xmm0, xmm2 +vfmaddpd xmm12, xmm12, xmm1, xmm3 +xor rdi, r13 +shr esi, 1 +vmovapd xmmword ptr [r9+64], xmm2 +vfmaddpd xmm2, xmm2, xmm0, xmm3 +add r9, r14 +shr edx, 1 +vfmaddpd xmm3, xmm3, xmm0, xmmword ptr [rbx+32] +prefetcht2 [r9] +add r9, r14 +shl edi, 1 +vmovapd xmmword ptr [r9+64], xmm4 +vfmaddpd xmm4, xmm4, xmm0, xmmword ptr [r9+32] +add r9, r14 +shl esi, 1 +vfmaddpd xmm5, xmm5, xmm0, xmm6 +vfmaddpd xmm15, xmm15, xmm1, xmmword ptr [r9+64] +add r9, r14 +shl edx, 1 +vmovapd xmmword ptr [r8+96], xmm6 +vfmaddpd xmm6, xmm6, xmm0, xmm7 +add r8, r14 +shr edi, 1 +vfmaddpd xmm7, xmm7, xmm0, xmmword ptr [rbx+32] +prefetcht2 [r8] +add r8, r14 +shr esi, 1 +vmovapd xmmword ptr [r8+96], xmm8 +vfmaddpd xmm8, xmm8, xmm0, xmmword ptr [r8+64] +add r8, r14 +shr edx, 1 +vfmaddpd xmm9, xmm9, xmm0, xmm10 +vfmaddpd xmm9, xmm9, xmm1, xmmword ptr [r8+64] +add r8, r14 +shl edi, 1 +vmovapd xmmword ptr [rcx+64], xmm10 +vfmaddpd xmm10, xmm10, xmm0, xmm2 +add rcx, r14 +shl esi, 1 +vmovapd xmmword ptr [rcx+96], xmm2 +vfmaddpd xmm2, xmm2, xmm0, xmmword ptr [rcx+64] +add rcx, r14 +shl edx, 1 +vfmaddpd xmm3, xmm3, xmm0, xmm4 +vfmaddpd xmm3, xmm3, xmm1, xmmword ptr [rcx+64] +add rcx, r14 +shr edi, 1 +vmovapd xmmword ptr [rbx+32], xmm4 +vfmaddpd ymm4, ymm4, ymm0, ymm5 +add rbx, r14 +shr esi, 1 +vmovapd xmmword ptr [rbx+64], xmm5 +vfmaddpd ymm5, ymm5, ymm0, ymmword ptr [rbx+32] +add rbx, r14 +shr edx, 1 +vfmaddpd xmm6, xmm6, xmm0, xmm7 +vfmaddpd ymm6, ymm6, ymm1, ymmword ptr [rbx+32] +add rbx, r14 +shl edi, 1 +vfmaddpd xmm7, xmm7, xmm0, xmm8 +vfmaddpd xmm13, xmm13, xmm1, xmm9 +xor rdi, r13 +shl esi, 1 +vmovapd xmmword ptr [r9+64], xmm8 +vfmaddpd xmm8, xmm8, xmm0, xmm9 +add r9, r14 +shl edx, 1 +vfmaddpd xmm9, xmm9, xmm0, xmmword ptr [rbx+32] +prefetcht2 [r9] +add r9, r14 +shr edi, 1 +vmovapd xmmword ptr [r9+64], xmm10 +vfmaddpd xmm10, xmm10, xmm0, xmmword ptr [r9+32] +add r9, r14 +shr esi, 1 +vfmaddpd xmm2, xmm2, xmm0, xmm3 +vfmaddpd xmm15, xmm15, xmm1, xmmword ptr [r9+64] +add r9, r14 +shr edx, 1 +vmovapd xmmword ptr [r8+96], xmm3 +vfmaddpd xmm3, xmm3, xmm0, xmm4 +add r8, r14 +shl edi, 1 +vfmaddpd xmm4, xmm4, xmm0, xmmword ptr [rbx+32] +prefetcht2 [r8] +add r8, r14 +shl esi, 1 +vmovapd xmmword ptr [r8+96], xmm5 +vfmaddpd xmm5, xmm5, xmm0, xmmword ptr [r8+64] +add r8, r14 +shl edx, 1 +vfmaddpd xmm6, xmm6, xmm0, xmm7 +vfmaddpd xmm6, xmm6, xmm1, xmmword ptr [r8+64] +add r8, r14 +shr edi, 1 +vmovapd xmmword ptr [rcx+64], xmm7 +vfmaddpd xmm7, xmm7, xmm0, xmm8 +add rcx, r14 +shr esi, 1 +vmovapd xmmword ptr [rcx+96], xmm8 +vfmaddpd xmm8, xmm8, xmm0, xmmword ptr [rcx+64] +add rcx, r14 +shr edx, 1 +vfmaddpd xmm9, xmm9, xmm0, xmm10 +vfmaddpd xmm9, xmm9, xmm1, xmmword ptr [rcx+64] +add rcx, r14 +shl edi, 1 +vmovapd xmmword ptr [rbx+32], xmm10 +vfmaddpd ymm10, ymm10, ymm0, ymm2 +add rbx, r14 +shl esi, 1 +vmovapd xmmword ptr [rbx+64], xmm2 +vfmaddpd ymm2, ymm2, ymm0, ymmword ptr [rbx+32] +add rbx, r14 +shl edx, 1 +movq r13, mm0 +sub r12, 1 +jnz L2 +mov r12, 1365333 +mov r9, rax +add r9, 1441792 +L2: +inc r13 +sub r10, 1 +jnz L3 +mov r10, 1456 +mov rcx, rax +add rcx, 32768 +L3: +movq mm0, r13 +sub r11, 1 +jnz L4 +mov r11, 1501 +mov r8, rax +add r8, 1048576 +L4: +mov rbx, rax +test qword ptr [r15], 1 +jnz L1 +L0: +movq rax, mm0 +pop r15 +pop r14 +pop r13 +pop r12 +pop rbp +pop rbx +ret + +Payload AVX +.section .text +push rbx +push rbp +push r12 +push r13 +push r14 +mov rax, rdi +mov r13, rsi +mov r14, rdx +mov r11, qword ptr [r13] +test r11, r11 +jz L0 +mov r12, 64 +vmovapd ymm0, ymmword ptr [rax] +vmovapd ymm1, ymmword ptr [rax+32] +vmovapd ymm2, ymmword ptr [rax+64] +vmovapd ymm3, ymmword ptr [rax+96] +vmovapd ymm4, ymmword ptr [rax+128] +vmovapd ymm5, ymmword ptr [rax+160] +vmovapd ymm6, ymmword ptr [rax+192] +vmovapd ymm7, ymmword ptr [rax+224] +vmovapd ymm8, ymmword ptr [rax+256] +vmovapd ymm9, ymmword ptr [rax+288] +mov r11, 6148914691236517205 +movq mm0, r11 +movq mm1, mm0 +movq mm2, mm0 +movq mm3, mm0 +movq mm4, mm0 +movq mm5, mm0 +mov r11, 1085102592571150095 +pinsrq xmm10, r11, 0 +pinsrq xmm10, r11, 1 +vinsertf128 ymm10, ymm10, xmm10, 1 +shl r11, 4 +pinsrq xmm11, r11, 0 +pinsrq xmm11, r11, 1 +vinsertf128 ymm11, ymm11, xmm11, 1 +shr r11, 4 +pinsrq xmm12, r11, 0 +pinsrq xmm12, r11, 1 +vinsertf128 ymm12, ymm12, xmm12, 1 +shl r11, 4 +pinsrq xmm13, r11, 0 +pinsrq xmm13, r11, 1 +vinsertf128 ymm13, ymm13, xmm13, 1 +shr r11, 4 +pinsrq xmm14, r11, 0 +pinsrq xmm14, r11, 1 +vinsertf128 ymm14, ymm14, xmm14, 1 +shl r11, 4 +pinsrq xmm15, r11, 0 +pinsrq xmm15, r11, 1 +vinsertf128 ymm15, ymm15, xmm15, 1 +mov rbx, rax +mov rcx, rax +add rcx, 32768 +mov rdx, rax +add rdx, 1048576 +mov rdi, rax +add rdi, 1441792 +mov r8, 1456 +mov r9, 1501 +mov r10, 1365333 +.align 64 (code) +L1: +vaddpd ymm1, ymm1, ymmword ptr [rbx+32] +add rbx, r12 +psllw mm3, mm0 +vaddpd ymm2, ymm2, ymm3 +vmovdqa ymm11, ymm12 +psllw mm4, mm1 +vaddpd ymm3, ymm3, ymm2 +vmovapd xmmword ptr [rdi+64], xmm3 +add rdi, r12 +psllw mm5, mm2 +vaddpd ymm4, ymm4, ymmword ptr [rbx+32] +prefetcht2 [rdi] +add rdi, r12 +psllw mm0, mm3 +vaddpd ymm5, ymm5, ymmword ptr [rdx+64] +vmovapd xmmword ptr [rdi+64], xmm5 +add rdi, r12 +psllw mm1, mm4 +vaddpd ymm6, ymm6, ymmword ptr [rdi+64] +add rdi, r12 +psllw mm2, mm5 +vaddpd ymm7, ymm7, ymm6 +vmovapd xmmword ptr [rdx+96], xmm7 +add rdx, r12 +psrlw mm3, mm0 +vaddpd ymm8, ymm8, ymmword ptr [rbx+32] +prefetcht0 [rdx] +add rdx, r12 +psrlw mm4, mm1 +vaddpd ymm9, ymm9, ymmword ptr [rdx+64] +vmovapd xmmword ptr [rdx+96], xmm9 +add rdx, r12 +psrlw mm5, mm2 +vaddpd ymm1, ymm1, ymmword ptr [rdx+64] +add rdx, r12 +psrlw mm0, mm3 +vaddpd ymm2, ymm2, ymm1 +vmovapd xmmword ptr [rcx+64], xmm2 +add rcx, r12 +psrlw mm1, mm4 +vaddpd ymm3, ymm3, ymmword ptr [rcx+64] +vmovapd xmmword ptr [rcx+96], xmm3 +add rcx, r12 +psrlw mm2, mm5 +vaddpd ymm4, ymm4, ymmword ptr [rcx+64] +add rcx, r12 +psllw mm3, mm0 +vaddpd ymm5, ymm5, ymm4 +vmovapd xmmword ptr [rbx+32], xmm5 +add rbx, r12 +psllw mm4, mm1 +vaddpd ymm6, ymm6, ymmword ptr [rbx+32] +vmovapd xmmword ptr [rbx+64], xmm6 +add rbx, r12 +psllw mm5, mm2 +vaddpd ymm7, ymm7, ymmword ptr [rbx+32] +add rbx, r12 +psllw mm0, mm3 +vaddpd ymm8, ymm8, ymm9 +vmovdqa ymm14, ymm15 +psllw mm1, mm4 +vaddpd ymm9, ymm9, ymm8 +vmovapd xmmword ptr [rdi+64], xmm9 +add rdi, r12 +psllw mm2, mm5 +vaddpd ymm1, ymm1, ymmword ptr [rbx+32] +prefetcht2 [rdi] +add rdi, r12 +psrlw mm3, mm0 +vaddpd ymm2, ymm2, ymmword ptr [rdx+64] +vmovapd xmmword ptr [rdi+64], xmm2 +add rdi, r12 +psrlw mm4, mm1 +vaddpd ymm3, ymm3, ymmword ptr [rdi+64] +add rdi, r12 +psrlw mm5, mm2 +vaddpd ymm4, ymm4, ymm3 +vmovapd xmmword ptr [rdx+96], xmm4 +add rdx, r12 +psrlw mm0, mm3 +vaddpd ymm5, ymm5, ymmword ptr [rbx+32] +prefetcht0 [rdx] +add rdx, r12 +psrlw mm1, mm4 +vaddpd ymm6, ymm6, ymmword ptr [rdx+64] +vmovapd xmmword ptr [rdx+96], xmm6 +add rdx, r12 +psrlw mm2, mm5 +vaddpd ymm7, ymm7, ymmword ptr [rdx+64] +add rdx, r12 +psllw mm3, mm0 +vaddpd ymm8, ymm8, ymm7 +vmovapd xmmword ptr [rcx+64], xmm8 +add rcx, r12 +psllw mm4, mm1 +vaddpd ymm9, ymm9, ymmword ptr [rcx+64] +vmovapd xmmword ptr [rcx+96], xmm9 +add rcx, r12 +psllw mm5, mm2 +vaddpd ymm1, ymm1, ymmword ptr [rcx+64] +add rcx, r12 +psllw mm0, mm3 +vaddpd ymm2, ymm2, ymm1 +vmovapd xmmword ptr [rbx+32], xmm2 +add rbx, r12 +psllw mm1, mm4 +vaddpd ymm3, ymm3, ymmword ptr [rbx+32] +vmovapd xmmword ptr [rbx+64], xmm3 +add rbx, r12 +psllw mm2, mm5 +vaddpd ymm4, ymm4, ymmword ptr [rbx+32] +add rbx, r12 +psrlw mm3, mm0 +vaddpd ymm5, ymm5, ymm6 +vmovdqa ymm11, ymm12 +psrlw mm4, mm1 +vaddpd ymm6, ymm6, ymm5 +vmovapd xmmword ptr [rdi+64], xmm6 +add rdi, r12 +psrlw mm5, mm2 +vaddpd ymm7, ymm7, ymmword ptr [rbx+32] +prefetcht2 [rdi] +add rdi, r12 +psrlw mm0, mm3 +vaddpd ymm8, ymm8, ymmword ptr [rdx+64] +vmovapd xmmword ptr [rdi+64], xmm8 +add rdi, r12 +psrlw mm1, mm4 +vaddpd ymm9, ymm9, ymmword ptr [rdi+64] +add rdi, r12 +psrlw mm2, mm5 +vaddpd ymm1, ymm1, ymm0 +vmovapd xmmword ptr [rdx+96], xmm1 +add rdx, r12 +psllw mm3, mm0 +vaddpd ymm2, ymm2, ymmword ptr [rbx+32] +prefetcht0 [rdx] +add rdx, r12 +psllw mm4, mm1 +vaddpd ymm3, ymm3, ymmword ptr [rdx+64] +vmovapd xmmword ptr [rdx+96], xmm3 +add rdx, r12 +psllw mm5, mm2 +vaddpd ymm4, ymm4, ymmword ptr [rdx+64] +add rdx, r12 +psllw mm0, mm3 +vaddpd ymm5, ymm5, ymm4 +vmovapd xmmword ptr [rcx+64], xmm5 +add rcx, r12 +psllw mm1, mm4 +vaddpd ymm6, ymm6, ymmword ptr [rcx+64] +vmovapd xmmword ptr [rcx+96], xmm6 +add rcx, r12 +psllw mm2, mm5 +vaddpd ymm7, ymm7, ymmword ptr [rcx+64] +add rcx, r12 +psrlw mm3, mm0 +vaddpd ymm8, ymm8, ymm7 +vmovapd xmmword ptr [rbx+32], xmm8 +add rbx, r12 +psrlw mm4, mm1 +vaddpd ymm9, ymm9, ymmword ptr [rbx+32] +vmovapd xmmword ptr [rbx+64], xmm9 +add rbx, r12 +psrlw mm5, mm2 +sub r10, 1 +jnz L2 +mov r10, 1365333 +mov rdi, rax +add rdi, 1441792 +L2: +sub r8, 1 +jnz L3 +mov r8, 1456 +mov rcx, rax +add rcx, 32768 +L3: +sub r9, 1 +jnz L4 +mov r9, 1501 +mov rdx, rax +add rdx, 1048576 +L4: +inc r14 +mov rbx, rax +test qword ptr [r13], 1 +jnz L1 +L0: +mov rax, r14 +pop r14 +pop r13 +pop r12 +pop rbp +pop rbx +ret + +Payload SSE2 +.section .text +push rbx +push rbp +push r12 +push r13 +push r14 +mov rax, rdi +mov r13, rsi +mov r14, rdx +mov r11, qword ptr [r13] +test r11, r11 +jz L0 +mov r12, 64 +movapd xmm0, xmmword ptr [rax] +movapd xmm1, xmmword ptr [rax+32] +movapd xmm2, xmmword ptr [rax+64] +movapd xmm3, xmmword ptr [rax+96] +movapd xmm4, xmmword ptr [rax+128] +movapd xmm5, xmmword ptr [rax+160] +movapd xmm6, xmmword ptr [rax+192] +movapd xmm7, xmmword ptr [rax+224] +movapd xmm8, xmmword ptr [rax+256] +movapd xmm9, xmmword ptr [rax+288] +movapd xmm10, xmmword ptr [rax+320] +movapd xmm11, xmmword ptr [rax+352] +movapd xmm12, xmmword ptr [rax+384] +movapd xmm13, xmmword ptr [rax+416] +mov r11, 1085102592571150095 +pinsrq xmm14, r11, 0 +pinsrq xmm14, r11, 1 +shl r11, 4 +pinsrq xmm15, r11, 0 +pinsrq xmm15, r11, 1 +mov rbx, rax +mov rcx, rax +add rcx, 32768 +mov rdx, rax +add rdx, 1048576 +mov rdi, rax +add rdi, 1441792 +mov r8, 1456 +mov r9, 1501 +mov r10, 1365333 +.align 64 (code) +L1: +addpd xmm1, xmmword ptr [rbx+32] +add rbx, r12 +addpd xmm2, xmm3 +movdqa xmm15, xmm14 +addpd xmm3, xmm2 +movapd xmmword ptr [rdi+64], xmm3 +add rdi, r12 +addpd xmm4, xmmword ptr [rbx+32] +prefetcht2 [rdi] +add rdi, r12 +addpd xmm5, xmmword ptr [rdx+64] +movapd xmmword ptr [rdi+64], xmm5 +add rdi, r12 +addpd xmm6, xmmword ptr [rdi+64] +add rdi, r12 +addpd xmm7, xmm6 +movapd xmmword ptr [rdx+96], xmm7 +add rdx, r12 +addpd xmm8, xmmword ptr [rbx+32] +prefetcht0 [rdx] +add rdx, r12 +addpd xmm9, xmmword ptr [rdx+64] +movapd xmmword ptr [rdx+96], xmm9 +add rdx, r12 +addpd xmm10, xmmword ptr [rdx+64] +add rdx, r12 +addpd xmm11, xmm10 +movapd xmmword ptr [rcx+64], xmm11 +add rcx, r12 +addpd xmm12, xmmword ptr [rcx+64] +movapd xmmword ptr [rcx+96], xmm12 +add rcx, r12 +addpd xmm13, xmmword ptr [rcx+64] +add rcx, r12 +addpd xmm1, xmm0 +movapd xmmword ptr [rbx+32], xmm1 +add rbx, r12 +addpd xmm2, xmmword ptr [rbx+32] +movapd xmmword ptr [rbx+64], xmm2 +add rbx, r12 +addpd xmm3, xmmword ptr [rbx+32] +add rbx, r12 +addpd xmm4, xmm5 +movdqa xmm14, xmm15 +addpd xmm5, xmm4 +movapd xmmword ptr [rdi+64], xmm5 +add rdi, r12 +addpd xmm6, xmmword ptr [rbx+32] +prefetcht2 [rdi] +add rdi, r12 +addpd xmm7, xmmword ptr [rdx+64] +movapd xmmword ptr [rdi+64], xmm7 +add rdi, r12 +addpd xmm8, xmmword ptr [rdi+64] +add rdi, r12 +addpd xmm9, xmm8 +movapd xmmword ptr [rdx+96], xmm9 +add rdx, r12 +addpd xmm10, xmmword ptr [rbx+32] +prefetcht0 [rdx] +add rdx, r12 +addpd xmm11, xmmword ptr [rdx+64] +movapd xmmword ptr [rdx+96], xmm11 +add rdx, r12 +addpd xmm12, xmmword ptr [rdx+64] +add rdx, r12 +addpd xmm13, xmm12 +movapd xmmword ptr [rcx+64], xmm13 +add rcx, r12 +addpd xmm1, xmmword ptr [rcx+64] +movapd xmmword ptr [rcx+96], xmm1 +add rcx, r12 +addpd xmm2, xmmword ptr [rcx+64] +add rcx, r12 +addpd xmm3, xmm2 +movapd xmmword ptr [rbx+32], xmm3 +add rbx, r12 +addpd xmm4, xmmword ptr [rbx+32] +movapd xmmword ptr [rbx+64], xmm4 +add rbx, r12 +addpd xmm5, xmmword ptr [rbx+32] +add rbx, r12 +addpd xmm6, xmm7 +movdqa xmm15, xmm14 +addpd xmm7, xmm6 +movapd xmmword ptr [rdi+64], xmm7 +add rdi, r12 +addpd xmm8, xmmword ptr [rbx+32] +prefetcht2 [rdi] +add rdi, r12 +addpd xmm9, xmmword ptr [rdx+64] +movapd xmmword ptr [rdi+64], xmm9 +add rdi, r12 +addpd xmm10, xmmword ptr [rdi+64] +add rdi, r12 +addpd xmm11, xmm10 +movapd xmmword ptr [rdx+96], xmm11 +add rdx, r12 +addpd xmm12, xmmword ptr [rbx+32] +prefetcht0 [rdx] +add rdx, r12 +addpd xmm13, xmmword ptr [rdx+64] +movapd xmmword ptr [rdx+96], xmm13 +add rdx, r12 +addpd xmm1, xmmword ptr [rdx+64] +add rdx, r12 +addpd xmm2, xmm1 +movapd xmmword ptr [rcx+64], xmm2 +add rcx, r12 +addpd xmm3, xmmword ptr [rcx+64] +movapd xmmword ptr [rcx+96], xmm3 +add rcx, r12 +addpd xmm4, xmmword ptr [rcx+64] +add rcx, r12 +addpd xmm5, xmm4 +movapd xmmword ptr [rbx+32], xmm5 +add rbx, r12 +addpd xmm6, xmmword ptr [rbx+32] +movapd xmmword ptr [rbx+64], xmm6 +add rbx, r12 +sub r10, 1 +jnz L2 +mov r10, 1365333 +mov rdi, rax +add rdi, 1441792 +L2: +sub r8, 1 +jnz L3 +mov r8, 1456 +mov rcx, rax +add rcx, 32768 +L3: +sub r9, 1 +jnz L4 +mov r9, 1501 +mov rdx, rax +add rdx, 1048576 +L4: +inc r14 +mov rbx, rax +test qword ptr [r13], 1 +jnz L1 +L0: +mov rax, r14 +pop r14 +pop r13 +pop r12 +pop rbp +pop rbx +ret + diff --git a/tooling/ref-test.py b/tooling/ref-test.py new file mode 100644 index 00000000..aab40526 --- /dev/null +++ b/tooling/ref-test.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import sys +import os +import subprocess + +def run_and_update(executable, ref_file, update_refs): + p = subprocess.Popen([ executable ], stdout=subprocess.PIPE) + p.wait() + stdout, _ = p.communicate() + + reference = open(ref_file, 'rb').read() + + if stdout != reference: + # Update the reference if applicable + if update_refs: + open(ref_file, 'wb').write(stdout) + return + + sys.exit(1) + +# Run the first argument and compare it to the file provided in the second argument +if __name__ == "__main__": + if len(sys.argv) != 3: + print(f"Usage: {sys.argv[0]} EXECUTABLE REFERENCE_FILE") + print("Run with env variable UPDATE_REFERENCES set to update the reference files.") + sys.exit(1) + + executable = sys.argv[1] + ref_file = sys.argv[2] + update_refs = "UPDATE_REFERENCES" in os.environ + + run_and_update(executable, ref_file, update_refs) \ No newline at end of file From e6b7166b8bf48ce36175b0f31cf7e889e12915a5 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Sun, 24 Nov 2024 12:37:14 +0100 Subject: [PATCH 04/36] Move compile flag that is used in header file to toplevel cmake --- CMakeLists.txt | 5 +++++ src/CMakeLists.txt | 4 ---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 05735444..b48d4e99 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,6 +111,11 @@ find_package(Threads REQUIRED) include(cmake/InstallHwloc.cmake) +# General linker flags +if (FIRESTARTER_THREAD_AFFINITY) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DFIRESTARTER_THREAD_AFFINITY") +endif() + add_subdirectory(src) add_subdirectory(test) \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1bfae466..f3a9e74c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,8 +1,4 @@ # General linker flags -if (FIRESTARTER_THREAD_AFFINITY) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DFIRESTARTER_THREAD_AFFINITY") -endif() - if((NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin") AND FIRESTARTER_LINK_STATIC) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DFIRESTARTER_LINK_STATIC") endif() From 978e6603534105c1305a81ab664112c0a6e615e3 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Sun, 24 Nov 2024 12:38:21 +0100 Subject: [PATCH 05/36] add ref test for functions in firestarter --- .../firestarter/Environment/Environment.hpp | 3 +- .../Environment/X86/X86Environment.hpp | 3 +- .../Environment/X86/X86Environment.cpp | 4 +-- src/firestarter/Firestarter.cpp | 2 +- test/CMakeLists.txt | 3 +- test/X86Functions/CMakeLists.txt | 4 +++ test/X86Functions/Main.cpp | 32 +++++++++++++++++++ test/refs/X86Functions.log | 25 +++++++++++++++ 8 files changed, 70 insertions(+), 6 deletions(-) create mode 100644 test/X86Functions/CMakeLists.txt create mode 100644 test/X86Functions/Main.cpp create mode 100644 test/refs/X86Functions.log diff --git a/include/firestarter/Environment/Environment.hpp b/include/firestarter/Environment/Environment.hpp index 41446bde..eff470d2 100644 --- a/include/firestarter/Environment/Environment.hpp +++ b/include/firestarter/Environment/Environment.hpp @@ -85,7 +85,8 @@ class Environment { virtual void printSelectedCodePathSummary() = 0; /// Print a list of available high-load function and if they are available on the current system. - virtual void printFunctionSummary() = 0; + /// \arg ForceYes Force all functions to be shown as avaialable + virtual void printFunctionSummary(bool ForceYes) = 0; /// Get the number of threads FIRESTARTER will run with. [[nodiscard]] auto requestedNumThreads() const -> uint64_t { return RequestedNumThreads; } diff --git a/include/firestarter/Environment/X86/X86Environment.hpp b/include/firestarter/Environment/X86/X86Environment.hpp index f4760f7e..40d1b6e0 100644 --- a/include/firestarter/Environment/X86/X86Environment.hpp +++ b/include/firestarter/Environment/X86/X86Environment.hpp @@ -90,7 +90,8 @@ class X86Environment final : public Environment { /// Print a list of available high-load function and if they are available on the current system. This includes all /// PlatformConfigs in combination with all thread per core counts. - void printFunctionSummary() override; + /// \arg ForceYes Force all functions to be shown as avaialable + void printFunctionSummary(bool ForceYes) override; private: /// The list of availabe platform configs that is printed when supplying the --avail command line argument. The IDs diff --git a/src/firestarter/Environment/X86/X86Environment.cpp b/src/firestarter/Environment/X86/X86Environment.cpp index 3ecd89c1..4511f514 100644 --- a/src/firestarter/Environment/X86/X86Environment.cpp +++ b/src/firestarter/Environment/X86/X86Environment.cpp @@ -169,7 +169,7 @@ void X86Environment::setLineCount(unsigned LineCount) { config().settings().setL void X86Environment::printSelectedCodePathSummary() { config().printCodePathSummary(); } -void X86Environment::printFunctionSummary() { +void X86Environment::printFunctionSummary(bool ForceYes) { log::info() << " available load-functions:\n" << " ID | NAME | available on this " "system | payload default setting\n" @@ -182,7 +182,7 @@ void X86Environment::printFunctionSummary() { for (auto const& Config : PlatformConfigs) { for (auto const& ThreadsPerCore : Config->settings().threads()) { - const char* Available = Config->isAvailable(topology()) ? "yes" : "no"; + const char* Available = (Config->isAvailable(topology()) || ForceYes) ? "yes" : "no"; const auto& FunctionName = Config->functionName(ThreadsPerCore); const auto& InstructionGroupsString = Config->settings().getInstructionGroupsString(); diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp index 379e2039..a62fc2f8 100644 --- a/src/firestarter/Firestarter.cpp +++ b/src/firestarter/Firestarter.cpp @@ -59,7 +59,7 @@ Firestarter::Firestarter(Config&& ProvidedConfig) } if (Cfg.PrintFunctionSummary) { - Environment->printFunctionSummary(); + Environment->printFunctionSummary(/*ForceYes=*/false); safeExit(EXIT_SUCCESS); } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 9a40045a..785f6f53 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -11,4 +11,5 @@ function(add_ref_test) ) endfunction() -add_subdirectory(DumpPayloads) \ No newline at end of file +add_subdirectory(DumpPayloads) +add_subdirectory(X86Functions) \ No newline at end of file diff --git a/test/X86Functions/CMakeLists.txt b/test/X86Functions/CMakeLists.txt new file mode 100644 index 00000000..c97e311a --- /dev/null +++ b/test/X86Functions/CMakeLists.txt @@ -0,0 +1,4 @@ +add_executable(X86Functions Main.cpp) +target_link_libraries(X86Functions firestartercore) + +add_ref_test(NAME X86Functions) \ No newline at end of file diff --git a/test/X86Functions/Main.cpp b/test/X86Functions/Main.cpp new file mode 100644 index 00000000..4b35389a --- /dev/null +++ b/test/X86Functions/Main.cpp @@ -0,0 +1,32 @@ +/****************************************************************************** + * FIRESTARTER - A Processor Stress Test Utility + * Copyright (C) 2024 TU Dresden, Center for Information Services and High + * Performance Computing + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Contact: daniel.hackenberg@tu-dresden.de + *****************************************************************************/ + +#include "firestarter/Environment/X86/X86Environment.hpp" + +auto main(int /*argc*/, const char** /*argv*/) -> int { + firestarter::logging::Filter::set_severity(nitro::log::severity_level::info); + + firestarter::environment::x86::X86Environment Env; + + Env.printFunctionSummary(/*ForceYes=*/true); + + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/test/refs/X86Functions.log b/test/refs/X86Functions.log new file mode 100644 index 00000000..d0640ee2 --- /dev/null +++ b/test/refs/X86Functions.log @@ -0,0 +1,25 @@ + available load-functions: + ID | NAME | available on this system | payload default setting + ------------------------------------------------------------------------------------------------------------------------------------------------------- + 1 | FUNC_KNL_XEONPHI_AVX512_4T | yes | RAM_P:3,L2_S:8,L1_L:40,REG:10 + 2 | FUNC_SKL_COREI_FMA_1T | yes | RAM_L:3,L3_LS_256:5,L2_LS_256:18,L1_2LS_256:78,REG:40 + 3 | FUNC_SKL_COREI_FMA_2T | yes | RAM_L:3,L3_LS_256:5,L2_LS_256:18,L1_2LS_256:78,REG:40 + 4 | FUNC_SKL_XEONEP_AVX512_1T | yes | RAM_S:3,RAM_P:1,L3_S:1,L3_P:1,L2_S:4,L2_L:70,L1_S:0,L1_L:40,REG:140 + 5 | FUNC_SKL_XEONEP_AVX512_2T | yes | RAM_S:3,RAM_P:1,L3_S:1,L3_P:1,L2_S:4,L2_L:70,L1_S:0,L1_L:40,REG:140 + 6 | FUNC_HSW_COREI_FMA_1T | yes | RAM_L:2,L3_LS:3,L2_LS:9,L1_LS:90,REG:40 + 7 | FUNC_HSW_COREI_FMA_2T | yes | RAM_L:2,L3_LS:3,L2_LS:9,L1_LS:90,REG:40 + 8 | FUNC_HSW_XEONEP_FMA_1T | yes | RAM_L:8,L3_LS:1,L2_LS:29,L1_LS:100,REG:100 + 9 | FUNC_HSW_XEONEP_FMA_2T | yes | RAM_L:8,L3_LS:1,L2_LS:29,L1_LS:100,REG:100 + 10 | FUNC_SNB_COREI_AVX_1T | yes | RAM_L:2,L3_LS:4,L2_LS:10,L1_LS:90,REG:45 + 11 | FUNC_SNB_COREI_AVX_2T | yes | RAM_L:2,L3_LS:4,L2_LS:10,L1_LS:90,REG:45 + 12 | FUNC_SNB_XEONEP_AVX_1T | yes | RAM_L:3,L3_LS:2,L2_LS:10,L1_LS:90,REG:30 + 13 | FUNC_SNB_XEONEP_AVX_2T | yes | RAM_L:3,L3_LS:2,L2_LS:10,L1_LS:90,REG:30 + 14 | FUNC_NHM_COREI_SSE2_1T | yes | RAM_P:1,L1_LS:70,REG:2 + 15 | FUNC_NHM_COREI_SSE2_2T | yes | RAM_P:1,L1_LS:70,REG:2 + 16 | FUNC_NHM_XEONEP_SSE2_1T | yes | RAM_P:1,L1_LS:60,REG:2 + 17 | FUNC_NHM_XEONEP_SSE2_2T | yes | RAM_P:1,L1_LS:60,REG:2 + 18 | FUNC_BLD_OPTERON_FMA4_1T | yes | RAM_L:1,L3_L:1,L2_LS:5,L1_L:90,REG:45 + 19 | FUNC_ZEN_EPYC_ZENFMA_1T | yes | RAM_L:3,L3_L:14,L2_L:75,L1_LS:81,REG:100 + 20 | FUNC_ZEN_EPYC_ZENFMA_2T | yes | RAM_L:3,L3_L:14,L2_L:75,L1_LS:81,REG:100 + 21 | FUNC_ZEN_2_EPYC_FMA_1T | yes | RAM_L:10,L3_L:25,L2_L:91,L1_2LS_256:72,L1_LS_256:82,REG:75 + 22 | FUNC_ZEN_2_EPYC_FMA_2T | yes | RAM_L:10,L3_L:25,L2_L:91,L1_2LS_256:72,L1_LS_256:82,REG:75 From 9fa50ddb57e5625fa8aef26264615298d8e08bbc Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Mon, 25 Nov 2024 11:16:20 +0100 Subject: [PATCH 06/36] add ci workflow for ctest --- .github/workflows/ctest.yml | 38 +++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 .github/workflows/ctest.yml diff --git a/.github/workflows/ctest.yml b/.github/workflows/ctest.yml new file mode 100644 index 00000000..8db9f31d --- /dev/null +++ b/.github/workflows/ctest.yml @@ -0,0 +1,38 @@ +name: ctest + +on: [push, pull_request] + +env: + PYTHONUNBUFFERED: 1 + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + submodules: 'true' + + - name: Install python3 + run: | + sudo apt update + sudo apt install python3 + + - name: Create build directory + run: | + mkdir build + + - name: Run CMake configure (default) + run: | + cd build + cmake .. + + - name: Build + run: | + cd build + make -j4 + + - name: Ctest + run: | + make test \ No newline at end of file From 5a73928160eeea5678eb6a9bc2e8bdfc93327ec2 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Mon, 25 Nov 2024 11:25:17 +0100 Subject: [PATCH 07/36] cmake: add nlohmann_json depedency to firestarterlinux --- src/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f3a9e74c..3024ee12 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -62,6 +62,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Linux") target_link_libraries(firestarterlinux Nitro::log + nlohmann_json::nlohmann_json ) endif() From 0539cb42576a0ec90acbf8759a4a6b92789ffbbf Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Mon, 25 Nov 2024 11:28:58 +0100 Subject: [PATCH 08/36] cmake: add nlohmann_json depedency to firestartercore --- src/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3024ee12..e659a453 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -38,6 +38,7 @@ target_link_libraries(firestartercore hwloc AsmJit::AsmJit Nitro::log + nlohmann_json::nlohmann_json ) From 53d597539ad93dc6cdf916144c79f51871e641d7 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Mon, 25 Nov 2024 11:38:13 +0100 Subject: [PATCH 09/36] move cmake cxx standard definition up in hierarchy --- CMakeLists.txt | 1 + src/CMakeLists.txt | 9 --------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b48d4e99..f08b844c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,7 @@ project(FIRESTARTER) enable_testing() set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +set(CMAKE_CXX_STANDARD 17) include(cmake/GitSubmoduleUpdate.cmake) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e659a453..5db9d285 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -32,8 +32,6 @@ add_library(firestartercore STATIC firestarter/Environment/X86/Payload/SSE2Payload.cpp ) -target_compile_features(firestartercore PRIVATE cxx_std_17) - target_link_libraries(firestartercore hwloc AsmJit::AsmJit @@ -59,8 +57,6 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Linux") firestarter/Optimizer/Algorithm/NSGA2.cpp ) - target_compile_features(firestarterlinux PRIVATE cxx_std_17) - target_link_libraries(firestarterlinux Nitro::log nlohmann_json::nlohmann_json @@ -86,7 +82,6 @@ if ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_CUDA") ${FIRESTARTER_FILES} firestarter/Cuda/Cuda.cpp ) - target_compile_features(FIRESTARTER_CUDA PRIVATE cxx_std_17) target_link_libraries(FIRESTARTER_CUDA firestartercore @@ -134,7 +129,6 @@ elseif ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_ONEAPI") ${FIRESTARTER_FILES} firestarter/OneAPI/OneAPI.cpp ) - target_compile_features(FIRESTARTER_ONEAPI PRIVATE cxx_std_17) target_link_libraries(FIRESTARTER_ONEAPI firestartercore @@ -171,13 +165,11 @@ elseif("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_HIP") set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) - set( CMAKE_CXX_STANDARD 17 ) add_executable(FIRESTARTER_HIP ${FIRESTARTER_FILES} firestarter/Cuda/Cuda.cpp ) - target_compile_features(FIRESTARTER_HIP PRIVATE cxx_std_17) target_link_libraries(FIRESTARTER_HIP firestartercore @@ -206,7 +198,6 @@ elseif(${FIRESTARTER_BUILD_TYPE} STREQUAL "FIRESTARTER") add_executable(FIRESTARTER ${FIRESTARTER_FILES} ) - target_compile_features(FIRESTARTER PRIVATE cxx_std_17) target_link_libraries(FIRESTARTER firestartercore From d388a198778c7965b34a192f2cfc1cb9d0118797 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Mon, 25 Nov 2024 12:00:26 +0100 Subject: [PATCH 10/36] run ctest correctly in ci --- .github/workflows/ctest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ctest.yml b/.github/workflows/ctest.yml index 8db9f31d..d864a978 100644 --- a/.github/workflows/ctest.yml +++ b/.github/workflows/ctest.yml @@ -35,4 +35,4 @@ jobs: - name: Ctest run: | - make test \ No newline at end of file + ctest \ No newline at end of file From 6eca93bde1449bb58dcf3a809e82df76bdec14b4 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Mon, 25 Nov 2024 12:03:22 +0100 Subject: [PATCH 11/36] run ctest correctly in ci --- .github/workflows/ctest.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ctest.yml b/.github/workflows/ctest.yml index d864a978..96655b97 100644 --- a/.github/workflows/ctest.yml +++ b/.github/workflows/ctest.yml @@ -35,4 +35,5 @@ jobs: - name: Ctest run: | + cd build ctest \ No newline at end of file From 278604f8ec25f51e6a4f37fde87a2bec0fa64549 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Mon, 25 Nov 2024 12:25:07 +0100 Subject: [PATCH 12/36] link correctly on darwin --- CMakeLists.txt | 2 ++ cmake/DarwinBuild.cmake | 18 ++++++++++++++++++ src/CMakeLists.txt | 15 +++------------ test/DumpPayloads/CMakeLists.txt | 1 + test/X86Functions/CMakeLists.txt | 1 + 5 files changed, 25 insertions(+), 12 deletions(-) create mode 100644 cmake/DarwinBuild.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index f08b844c..db358cf8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,6 +57,8 @@ endif() include(cmake/GitSubmoduleUpdate.cmake) git_submodule_update() +include(cmake/DarwinBuild.cmake) + if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") else() SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -O2 -fdata-sections -ffunction-sections") diff --git a/cmake/DarwinBuild.cmake b/cmake/DarwinBuild.cmake new file mode 100644 index 00000000..9519a6de --- /dev/null +++ b/cmake/DarwinBuild.cmake @@ -0,0 +1,18 @@ +if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + find_library( COREFOUNDATION_LIBRARY CoreFoundation ) + find_library( IOKIT_LIBRARY IOKit ) +endif() + +# Function to link against the correct libraries on darwin +function(target_link_libraries_darwin) + set(oneValueArgs NAME) + cmake_parse_arguments(TARGET "" "${oneValueArgs}" + "" ${ARGN} ) + + if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + target_link_libraries(${TARGET_NAME} + ${COREFOUNDATION_LIBRARY} + ${IOKIT_LIBRARY} + ) + endif() +endfunction() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5db9d285..2e37fd21 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -39,7 +39,6 @@ target_link_libraries(firestartercore nlohmann_json::nlohmann_json ) - # Create the linux firestarter library that is used for specific linux only features if(CMAKE_SYSTEM_NAME STREQUAL "Linux") add_library(firestarterlinux STATIC @@ -209,6 +208,8 @@ elseif(${FIRESTARTER_BUILD_TYPE} STREQUAL "FIRESTARTER") ) endif() + target_link_libraries_darwin(NAME FIRESTARTER) + # static linking is not supported on Darwin, see Apple Technical QA1118 if((NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin") AND FIRESTARTER_LINK_STATIC) target_link_libraries(FIRESTARTER @@ -236,14 +237,4 @@ elseif(${FIRESTARTER_BUILD_TYPE} STREQUAL "FIRESTARTER") Threads::Threads ) endif() - - # static linking is not supported on Darwin, see Apple Technical QA1118 - if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") - find_library( COREFOUNDATION_LIBRARY CoreFoundation ) - find_library( IOKIT_LIBRARY IOKit ) - target_link_libraries(FIRESTARTER - ${COREFOUNDATION_LIBRARY} - ${IOKIT_LIBRARY} - ) - endif() -endif() +endif() \ No newline at end of file diff --git a/test/DumpPayloads/CMakeLists.txt b/test/DumpPayloads/CMakeLists.txt index c42fd887..6ebd81f8 100644 --- a/test/DumpPayloads/CMakeLists.txt +++ b/test/DumpPayloads/CMakeLists.txt @@ -1,4 +1,5 @@ add_executable(DumpPayloads Main.cpp) target_link_libraries(DumpPayloads firestartercore) +target_link_libraries_darwin(NAME DumpPayloads) add_ref_test(NAME DumpPayloads) \ No newline at end of file diff --git a/test/X86Functions/CMakeLists.txt b/test/X86Functions/CMakeLists.txt index c97e311a..c2a2649b 100644 --- a/test/X86Functions/CMakeLists.txt +++ b/test/X86Functions/CMakeLists.txt @@ -1,4 +1,5 @@ add_executable(X86Functions Main.cpp) target_link_libraries(X86Functions firestartercore) +target_link_libraries_darwin(NAME X86Functions) add_ref_test(NAME X86Functions) \ No newline at end of file From 6e1b75236365b3df57a868d896ff9559b7448187 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Mon, 25 Nov 2024 13:08:24 +0100 Subject: [PATCH 13/36] enable position independant code on linux --- CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index db358cf8..10644860 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,8 +71,11 @@ else() SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections") endif() -# enable debug features on linux if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + # enable position independant code on linux + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") + + # enable debug features on linux option(FIRESTARTER_DEBUG_FEATURES "Enable debug features" ON) if (FIRESTARTER_DEBUG_FEATURES) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DFIRESTARTER_DEBUG_FEATURES") From dacec9d0c6b744e2e2de0233fbbea8f90b764cb4 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Mon, 25 Nov 2024 18:11:34 +0100 Subject: [PATCH 14/36] refactor cmake --- CMakeLists.txt | 47 ++--------------------- cmake/BuildOptions.cmake | 23 ++++++++++++ cmake/BuildSettings.cmake | 79 +++++++++++++++++++++++++++++++++++++++ src/CMakeLists.txt | 40 -------------------- 4 files changed, 105 insertions(+), 84 deletions(-) create mode 100644 cmake/BuildOptions.cmake create mode 100644 cmake/BuildSettings.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 10644860..b86bec79 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,46 +57,10 @@ endif() include(cmake/GitSubmoduleUpdate.cmake) git_submodule_update() -include(cmake/DarwinBuild.cmake) - -if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") -else() -SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -O2 -fdata-sections -ffunction-sections") -endif() - -if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") - SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-dead_strip") -elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") -else() - SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections") -endif() - -if(CMAKE_SYSTEM_NAME STREQUAL "Linux") - # enable position independant code on linux - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") +include(cmake/BuildOptions.cmake) +include(cmake/BuildSettings.cmake) - # enable debug features on linux - option(FIRESTARTER_DEBUG_FEATURES "Enable debug features" ON) - if (FIRESTARTER_DEBUG_FEATURES) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DFIRESTARTER_DEBUG_FEATURES") - endif() -endif() - -set(FIRESTARTER_BUILD_TYPE "FIRESTARTER" CACHE STRING "FIRESTARTER_BUILD_TYPE can be any of FIRESTARTER, FIRESTARTER_CUDA, FIRESTARTER_ONEAPI, or FIRESTARTER_HIP.") -set_property(CACHE FIRESTARTER_BUILD_TYPE PROPERTY STRINGS FIRESTARTER FIRESTARTER_CUDA FIRESTARTER_ONEAPI) -if (${FIRESTARTER_BUILD_TYPE} STREQUAL "FIRESTARTER") - option(FIRESTARTER_LINK_STATIC "Link FIRESTARTER as a static binary. Note, dlopen is not supported in static binaries. This option is not available on macOS or with CUDA or OneAPI enabled." ON) -endif() -if (${FIRESTARTER_BUILD_TYPE} STREQUAL "FIRESTARTER") - option(FIRESTARTER_BUILD_HWLOC "Build hwloc dependency." ON) -elseif(${FIRESTARTER_BUILD_TYPE} STREQUAL "FIRESTARTER_CUDA") - option(FIRESTARTER_BUILD_HWLOC "Build hwloc dependency." ON) -elseif(${FIRESTARTER_BUILD_TYPE} STREQUAL "FIRESTARTER_ONEAPI") - option(FIRESTARTER_BUILD_HWLOC "Build hwloc dependency." ON) -elseif(${FIRESTARTER_BUILD_TYPE} STREQUAL "FIRESTARTER_HIP") - option(FIRESTARTER_BUILD_HWLOC "Build hwloc dependency." ON) -endif() -option(FIRESTARTER_THREAD_AFFINITY "Enable FIRESTARTER to set affinity to hardware threads." ON) +include(cmake/DarwinBuild.cmake) if(NOT DEFINED ASMJIT_STATIC) set(ASMJIT_STATIC TRUE) @@ -117,11 +81,6 @@ find_package(Threads REQUIRED) include(cmake/InstallHwloc.cmake) -# General linker flags -if (FIRESTARTER_THREAD_AFFINITY) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DFIRESTARTER_THREAD_AFFINITY") -endif() - add_subdirectory(src) add_subdirectory(test) \ No newline at end of file diff --git a/cmake/BuildOptions.cmake b/cmake/BuildOptions.cmake new file mode 100644 index 00000000..eb1b508e --- /dev/null +++ b/cmake/BuildOptions.cmake @@ -0,0 +1,23 @@ +include(CMakeDependentOption) + +# Set the different available FIRESTARTER builds. +set(FIRESTARTER_BUILD_TYPE "FIRESTARTER" CACHE STRING "FIRESTARTER_BUILD_TYPE can be any of FIRESTARTER, FIRESTARTER_CUDA, FIRESTARTER_ONEAPI, or FIRESTARTER_HIP.") +set_property(CACHE FIRESTARTER_BUILD_TYPE PROPERTY STRINGS FIRESTARTER FIRESTARTER_CUDA FIRESTARTER_ONEAPI) + +# Static linking is not supported with GPU devices or MacOS. +set(FIRESTARTER_LINK_STATIC_FLAG ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER") AND (NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin")) +cmake_dependent_option(FIRESTARTER_LINK_STATIC "Link FIRESTARTER as a static binary. Note, dlopen is not supported in static binaries. This option is not available on macOS or with CUDA, OneAPI or HIP enabled." ON "FIRESTARTER_LINK_STATIC_FLAG" OFF) + + +# We vendor hwloc per default. +option(FIRESTARTER_BUILD_HWLOC "Build hwloc dependency." ON) + + +# Use of thread affinity is enabled on linux per default. +set(FIRESTARTER_THREAD_AFFINITY_FLAG (CMAKE_SYSTEM_NAME STREQUAL "Linux")) +cmake_dependent_option(FIRESTARTER_THREAD_AFFINITY "Enable FIRESTARTER to set affinity to hardware threads." ON "FIRESTARTER_THREAD_AFFINITY_FLAG" OFF) + + +# Debug feature are enabled on linux per default. +set(FIRESTARTER_DEBUG_FEATURES_FLAG (CMAKE_SYSTEM_NAME STREQUAL "Linux")) +cmake_dependent_option(FIRESTARTER_DEBUG_FEATURES "Enable debug features" ON "FIRESTARTER_DEBUG_FEATURES_FLAG" OFF) \ No newline at end of file diff --git a/cmake/BuildSettings.cmake b/cmake/BuildSettings.cmake new file mode 100644 index 00000000..73fd2f8a --- /dev/null +++ b/cmake/BuildSettings.cmake @@ -0,0 +1,79 @@ +# Dependent Linux features +if(FIRESTARTER_LINK_STATIC) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DFIRESTARTER_LINK_STATIC") +endif() + +if (FIRESTARTER_DEBUG_FEATURES) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DFIRESTARTER_DEBUG_FEATURES") +endif() + +if (FIRESTARTER_THREAD_AFFINITY) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DFIRESTARTER_THREAD_AFFINITY") +endif() + + +# Not MSVC +if(NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -O2 -fdata-sections -ffunction-sections") +endif() + + +# Darwin +if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-dead_strip") +endif() + + +# Not (Darwin or MSVC) +# equivalent to Linux and Windows with mingw +if(NOT (CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")) + SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections") +endif() + + +# Linux +if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + # enable position independant code on linux + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") +endif() + + +# Find packages, set the compiler and compile flags specific to the selected FIRESTARTER build. +if(${FIRESTARTER_BUILD_TYPE} STREQUAL "FIRESTARTER") + # No specific compiler selected +elseif ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_CUDA") + find_package(CUDAToolkit REQUIRED) + include_directories(${CUDAToolkit_INCLUDE_DIRS}) + + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DFIRESTARTER_BUILD_CUDA") +elseif ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_ONEAPI") + find_program(ICX_PRESENT icx) + + if(ICX_PRESENT) + message(STATUS "Executable found: ${ICX_PRESENT}") + else() + message(FATAL_ERROR "OneAPI Intel Compiler icx not found") + endif() + + SET(CMAKE_CXX_COMPILER "icx") + SET(CMAKE_C_COMPILER "icx") + + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -DFIRESTARTER_BUILD_ONEAPI") +elseif("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_HIP") + if (NOT DEFINED ROCM_PATH ) + set ( ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory." ) + endif () + + # Search for rocm in common locations + list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/hip ${ROCM_PATH}/lib ${ROCM_PATH}) + find_package(HIP REQUIRED) + find_package(rocblas REQUIRED) + find_package(rocrand REQUIRED) + find_package(hiprand REQUIRED) + find_package(hipblas REQUIRED) + + set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) + set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) + + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DFIRESTARTER_BUILD_HIP") +endif() \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2e37fd21..ed46855d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,9 +1,3 @@ -# General linker flags -if((NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin") AND FIRESTARTER_LINK_STATIC) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DFIRESTARTER_LINK_STATIC") -endif() - - # Create the core firestarter library that is used in all builds and tests add_library(firestartercore STATIC firestarter/Config.cpp @@ -72,11 +66,6 @@ SET(FIRESTARTER_FILES ) if ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_CUDA") - find_package(CUDAToolkit REQUIRED) - include_directories(${CUDAToolkit_INCLUDE_DIRS}) - - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DFIRESTARTER_BUILD_CUDA") - add_executable(FIRESTARTER_CUDA ${FIRESTARTER_FILES} firestarter/Cuda/Cuda.cpp @@ -111,19 +100,6 @@ if ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_CUDA") endif() elseif ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_ONEAPI") - - find_program(ICX_PRESENT icx) - - if(ICX_PRESENT) - message(STATUS "Executable found: ${ICX_PRESENT}") - else() - message(FATAL_ERROR "OneAPI Intel Compiler icx not found") - endif() - SET(CMAKE_CXX_COMPILER "icx") - SET(CMAKE_C_COMPILER "icx") - - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -DFIRESTARTER_BUILD_ONEAPI") - add_executable(FIRESTARTER_ONEAPI ${FIRESTARTER_FILES} firestarter/OneAPI/OneAPI.cpp @@ -149,22 +125,6 @@ elseif ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_ONEAPI") ) elseif("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_HIP") - if (NOT DEFINED ROCM_PATH ) - set ( ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory." ) - endif () - # Search for rocm in common locations - list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/hip ${ROCM_PATH}/lib ${ROCM_PATH}) - find_package(HIP REQUIRED) - find_package(rocblas REQUIRED) - find_package(rocrand REQUIRED) - find_package(hiprand REQUIRED) - find_package(hipblas REQUIRED) - - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DFIRESTARTER_BUILD_HIP") - - set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) - set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) - add_executable(FIRESTARTER_HIP ${FIRESTARTER_FILES} firestarter/Cuda/Cuda.cpp From fe4d5e9f9eb59c991f9e02806dcd5ad0aca6ddd0 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Mon, 25 Nov 2024 19:26:02 +0100 Subject: [PATCH 15/36] fix icx build --- cmake/BuildSettings.cmake | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/cmake/BuildSettings.cmake b/cmake/BuildSettings.cmake index 73fd2f8a..c9b28c0e 100644 --- a/cmake/BuildSettings.cmake +++ b/cmake/BuildSettings.cmake @@ -47,16 +47,12 @@ elseif ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_CUDA") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DFIRESTARTER_BUILD_CUDA") elseif ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_ONEAPI") - find_program(ICX_PRESENT icx) + find_program(ICX_PATH icx REQUIRED) - if(ICX_PRESENT) - message(STATUS "Executable found: ${ICX_PRESENT}") - else() - message(FATAL_ERROR "OneAPI Intel Compiler icx not found") - endif() + message(STATUS "Path of icx executable is: ${ICX_PATH}") - SET(CMAKE_CXX_COMPILER "icx") - SET(CMAKE_C_COMPILER "icx") + SET(CMAKE_CXX_COMPILER ${ICX_PATH}) + SET(CMAKE_C_COMPILER ${ICX_PATH}) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -DFIRESTARTER_BUILD_ONEAPI") elseif("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_HIP") From 44119f8465b040c8fd96d3f131d1f29e8ab5822a Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Mon, 25 Nov 2024 19:27:19 +0100 Subject: [PATCH 16/36] add FIRESTARTER_HIP to the available build types --- cmake/BuildOptions.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/BuildOptions.cmake b/cmake/BuildOptions.cmake index eb1b508e..cf250b65 100644 --- a/cmake/BuildOptions.cmake +++ b/cmake/BuildOptions.cmake @@ -2,7 +2,7 @@ include(CMakeDependentOption) # Set the different available FIRESTARTER builds. set(FIRESTARTER_BUILD_TYPE "FIRESTARTER" CACHE STRING "FIRESTARTER_BUILD_TYPE can be any of FIRESTARTER, FIRESTARTER_CUDA, FIRESTARTER_ONEAPI, or FIRESTARTER_HIP.") -set_property(CACHE FIRESTARTER_BUILD_TYPE PROPERTY STRINGS FIRESTARTER FIRESTARTER_CUDA FIRESTARTER_ONEAPI) +set_property(CACHE FIRESTARTER_BUILD_TYPE PROPERTY STRINGS FIRESTARTER FIRESTARTER_CUDA FIRESTARTER_ONEAPI FIRESTARTER_HIP) # Static linking is not supported with GPU devices or MacOS. set(FIRESTARTER_LINK_STATIC_FLAG ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER") AND (NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin")) From 930e84ba7bd04d1a9b69d16e3fd6967e2f4967b9 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Tue, 26 Nov 2024 10:47:44 +0100 Subject: [PATCH 17/36] add debug prints to cmake --- src/CMakeLists.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ed46855d..459565a1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -65,7 +65,14 @@ SET(FIRESTARTER_FILES firestarter/LoadWorker.cpp ) +get_cmake_property(_variableNames VARIABLES) +list (SORT _variableNames) +foreach (_variableName ${_variableNames}) + message(STATUS "${_variableName}=${${_variableName}}") +endforeach() + if ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_CUDA") + message(STATUS "Building FIRESTARTER_CUDA") add_executable(FIRESTARTER_CUDA ${FIRESTARTER_FILES} firestarter/Cuda/Cuda.cpp @@ -100,6 +107,7 @@ if ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_CUDA") endif() elseif ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_ONEAPI") + message(STATUS "Building FIRESTARTER_ONEAPI") add_executable(FIRESTARTER_ONEAPI ${FIRESTARTER_FILES} firestarter/OneAPI/OneAPI.cpp @@ -125,6 +133,7 @@ elseif ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_ONEAPI") ) elseif("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_HIP") + message(STATUS "Building FIRESTARTER_HIP") add_executable(FIRESTARTER_HIP ${FIRESTARTER_FILES} firestarter/Cuda/Cuda.cpp @@ -154,6 +163,7 @@ elseif("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_HIP") elseif(${FIRESTARTER_BUILD_TYPE} STREQUAL "FIRESTARTER") + message(STATUS "Building FIRESTARTER") add_executable(FIRESTARTER ${FIRESTARTER_FILES} ) From 5ae3e405d86308e1c52f4d6e32fc716ed6caec0b Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Tue, 26 Nov 2024 11:07:49 +0100 Subject: [PATCH 18/36] use hipcc to compile hip build --- .github/workflows/cmake.yml | 49 ++++--------------------------------- cmake/BuildSettings.cmake | 3 --- 2 files changed, 5 insertions(+), 47 deletions(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index b58b7e03..824fbe3f 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -10,7 +10,6 @@ jobs: fail-fast: true matrix: os: [ubuntu-22.04] - compiler: [g++-9, g++-10, g++-11, g++-12, clang++-11, clang++-12, clang++-13, clang++-14, clang++-15] HIP: ['6.2'] runs-on: ${{ matrix.os }} @@ -24,42 +23,6 @@ jobs: sudo rm -rf /opt/ghc sudo rm -rf /usr/local/.ghcup - - name: Install g++-9 (if needed) - if: matrix.compiler == 'g++-9' - run: | - sudo apt install g++-9 - - name: Install g++-10 (if needed) - if: matrix.compiler == 'g++-10' - run: | - sudo apt install g++-10 - - name: Install g++-11 (if needed) - if: matrix.compiler == 'g++-11' - run: | - sudo apt install g++-11 - - name: Install g++-12 (if needed) - if: matrix.compiler == 'g++-12' - run: | - sudo apt install g++-12 - - name: Install clang++-11 (if needed) - if: matrix.compiler == 'clang++-11' - run: | - sudo apt install clang-11 - - name: Install clang++-12 (if needed) - if: matrix.compiler == 'clang++-12' - run: | - sudo apt install clang-12 - - name: Install clang++-13 (if needed) - if: matrix.compiler == 'clang++-13' - run: | - sudo apt install clang-13 - - name: Install clang++-14 (if needed) - if: matrix.compiler == 'clang++-14' - run: | - sudo apt install clang-14 - - name: Install clang++-15 (if needed) - if: matrix.compiler == 'clang++-15' - run: | - sudo apt install clang-15 - name: Setup Node.js uses: actions/setup-node@v4 with: @@ -69,7 +32,6 @@ jobs: with: fetch-depth: '0' - name: Install HIP runtime (if needed) - if: matrix.HIP != '0' && matrix.ONEAPI == 0 && matrix.CUDA == 0 run: | case ${{ matrix.HIP }} in 6.2) @@ -83,9 +45,9 @@ jobs: run: | mkdir build - name: Run CMake configure (HIP) - if: matrix.HIP != '0' && matrix.CUDA == '0' && matrix.ONEAPI == '0' env: - CXX: ${{ matrix.compiler }} + CMAKE_C_COMPILER: '/opt/rocm/bin/hipcc' + CMAKE_CXX_COMPILER: '/opt/rocm/bin/hipcc' run: | export CPATH=${HIP_ROOT}/include:${HIP_ROOT} export LD_LIBRARY_PATH=${HIP_ROOT}/lib64:${HIP_ROOT}/lib64/stubs:${LD_LIBRARY_PATH} @@ -98,22 +60,21 @@ jobs: cd build cmake -DFIRESTARTER_BUILD_TYPE="FIRESTARTER_HIP" -DCMAKE_EXE_LINKER_FLAGS="-L${HIP_ROOT}/lib64/stubs/" .. - - name: Build (default, CUDA, HIP) - if: matrix.ONEAPI =='0' + - name: Build (HIP) run: | cd build make -j2 - name: Strip binary (HIP) - if: matrix.CUDA == '0' && matrix.HIP != '0' && matrix.ONEAPI == '0' run: | cd build strip src/FIRESTARTER_HIP - uses: actions/upload-artifact@v4 - if: matrix.compiler == 'g++-9' && matrix.HIP != '0' && matrix.CUDA == '0' && matrix.ONEAPI == '0' && ( github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') || github.event_name == 'pull_request' ) + if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'pull_request' with: name: FIRESTARTER_HIP_${{ matrix.HIP }}-linux retention-days: 1 path: build/src/FIRESTARTER_HIP + build-linux: strategy: # max-parallel: 1 # Sets the limit of jobs to run concurrently diff --git a/cmake/BuildSettings.cmake b/cmake/BuildSettings.cmake index c9b28c0e..f40b32ed 100644 --- a/cmake/BuildSettings.cmake +++ b/cmake/BuildSettings.cmake @@ -68,8 +68,5 @@ elseif("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_HIP") find_package(hiprand REQUIRED) find_package(hipblas REQUIRED) - set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) - set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DFIRESTARTER_BUILD_HIP") endif() \ No newline at end of file From 0ba17e0006208c09a6aef4884412524786a6ad08 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Tue, 26 Nov 2024 11:19:01 +0100 Subject: [PATCH 19/36] fix assert in cuda wrapper code --- include/firestarter/Cuda/CudaHipCompat.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/firestarter/Cuda/CudaHipCompat.hpp b/include/firestarter/Cuda/CudaHipCompat.hpp index f0543f4d..1651606e 100644 --- a/include/firestarter/Cuda/CudaHipCompat.hpp +++ b/include/firestarter/Cuda/CudaHipCompat.hpp @@ -370,7 +370,7 @@ template void accellSafeCall(T TVal, const char* File, const int Li } } else if constexpr (std::is_same_v) { #ifndef FIRESTARTER_BUILD_CUDA - static_assert(false, "Tried to call accellSafeCall with CUresult, but not building for CUDA."); + assert(false && "Tried to call accellSafeCall with CUresult, but not building for CUDA."); #endif if (TVal == CUDA_SUCCESS) { return; From 8df365f91ec22eac422daafb86bf9cc545aada00 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Tue, 26 Nov 2024 11:21:59 +0100 Subject: [PATCH 20/36] rewrite cuda exception in cuda/hip compat header --- include/firestarter/Cuda/CudaHipCompat.hpp | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/include/firestarter/Cuda/CudaHipCompat.hpp b/include/firestarter/Cuda/CudaHipCompat.hpp index 1651606e..12723d0f 100644 --- a/include/firestarter/Cuda/CudaHipCompat.hpp +++ b/include/firestarter/Cuda/CudaHipCompat.hpp @@ -346,13 +346,6 @@ auto getErrorString(CUresult Result) -> const char* { accellSafeCall(cuGetErrorName(Result, &ErrorString), __FILE__, __LINE__); return ErrorString; } -#else -// define types to not run into compile errors with if constexpr - -enum class CUresult {}; -// NOLINTBEGIN(readability-identifier-naming) -constexpr const int CUDA_SUCCESS = 0; -// NOLINTEND(readability-identifier-naming) #endif template void accellSafeCall(T TVal, const char* File, const int Line, std::optional DeviceIndex) { @@ -368,14 +361,15 @@ template void accellSafeCall(T TVal, const char* File, const int Li if (TVal == RandStatusT::RAND_STATUS_SUCCESS) { return; } - } else if constexpr (std::is_same_v) { -#ifndef FIRESTARTER_BUILD_CUDA - assert(false && "Tried to call accellSafeCall with CUresult, but not building for CUDA."); -#endif + } +#ifdef FIRESTARTER_BUILD_CUDA + else if constexpr (std::is_same_v) { if (TVal == CUDA_SUCCESS) { return; } - } else { + } +#endif + else { assert(false && "Tried to call accellSafeCall with an unknown type."); } From 1a4ab9fe8f7e32b9c837f558a5cd4d0d2e11cd8e Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Tue, 26 Nov 2024 11:36:50 +0100 Subject: [PATCH 21/36] ci: refactor oneapi build --- .github/workflows/cmake.yml | 130 ++++++++++++++++++++++-------------- 1 file changed, 80 insertions(+), 50 deletions(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 824fbe3f..07257789 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -75,6 +75,81 @@ jobs: retention-days: 1 path: build/src/FIRESTARTER_HIP + build-linux-oneapi: + strategy: + fail-fast: true + matrix: + os: [ubuntu-20.04] + ONEAPI: ['2023.2.0', '2024.0'] + + runs-on: ${{ matrix.os }} + + env: + CMAKE_C_COMPILER: 'icx' + CMAKE_CXX_COMPILER: 'icx' + + steps: + - name: Try to clean up some things + run: | + sudo rm -rf /usr/local/lib/android + sudo rm -rf /usr/share/dotnet + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '20' + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: '0' + - name: Install OneAPI Base-Toolkit + run: | + wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null + echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list + sudo apt update + sudo apt install intel-basekit-${{ matrix.ONEAPI }} + - name: Create build directory + run: | + mkdir build + - name: Run CMake configure (OneAPI 2023.2.0) + if: matrix.ONEAPI == '2023.2.0' + run: | + . /opt/intel/oneapi/setvars.sh + cd build + cmake -DFIRESTARTER_BUILD_TYPE="FIRESTARTER_ONEAPI" .. + - name: Run CMake configure (OneAPI 2024.0) + if: matrix.ONEAPI == '2024.0' + run: | + . /opt/intel/oneapi/${{ matrix.ONEAPI }}/oneapi-vars.sh + cd build + cmake -DFIRESTARTER_BUILD_TYPE="FIRESTARTER_ONEAPI" .. + - name: Build (OneAPI 2023.2.0) + if: matrix.ONEAPI == '2023.2.0' + run: | + . /opt/intel/oneapi/setvars.sh + cd build + make -j2 + - name: Build (OneAPI 2024.0) + if: matrix.ONEAPI == '2024.0' + run: | + . /opt/intel/oneapi/${{ matrix.ONEAPI }}/oneapi-vars.sh + cd build + make -j2 + - name: Strip binary (OneAPI) + run: | + cd build + strip src/FIRESTARTER_ONEAPI + - uses: actions/upload-artifact@v4 + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') || github.event_name == 'pull_request' + with: + name: FIRESTARTER_ONEAPI_${{ matrix.ONEAPI }}-linux + retention-days: 1 + path: build/src/FIRESTARTER_ONEAPI + + - name: UnInstall OneAPI Base-Toolkit (if needed) + run: | + sudo apt remove intel-basekit-${{ matrix.ONEAPI }} + sudo apt autoremove + build-linux: strategy: # max-parallel: 1 # Sets the limit of jobs to run concurrently @@ -83,7 +158,6 @@ jobs: os: [ubuntu-20.04] compiler: [g++-7, g++-8, g++-9, g++-10, clang++-8, clang++-9, clang++-10] CUDA: ['0', '8.0', '11.0', 'NVHPC-22.5'] - ONEAPI: ['0', '2023.2.0', '2024.0'] runs-on: ${{ matrix.os }} @@ -124,7 +198,7 @@ jobs: with: fetch-depth: '0' - name: Install CUDA runtime (if needed) - if: matrix.CUDA != '0' && matrix.ONEAPI == 0 + if: matrix.CUDA != '0' run: | case ${{ matrix.CUDA }} in 8.0) @@ -149,13 +223,6 @@ jobs: rm -rf nvhpc_2022_225_Linux_x86_64_cuda_11.7 esac - - name: Install OneAPI Base-Toolkit (if needed) - if: matrix.ONEAPI != '0' && matrix.CUDA == '0' - run: | - wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null - echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list - sudo apt update - sudo apt install intel-basekit-${{ matrix.ONEAPI }} - name: Create build directory run: | mkdir build @@ -195,35 +262,11 @@ jobs: cd build cmake -DFIRESTARTER_BUILD_TYPE="FIRESTARTER_CUDA" -DCMAKE_EXE_LINKER_FLAGS=-L"$CUDA_ROOT/$NVARCH/22.5/cuda/11.7/lib64/stubs" -LA .. - - name: Run CMake configure (OneAPI 2023.2.0) - if: matrix.CUDA == '0' && matrix.ONEAPI =='2023.2.0' - run: | - . /opt/intel/oneapi/setvars.sh - cd build - cmake -DFIRESTARTER_BUILD_TYPE="FIRESTARTER_ONEAPI" .. - - name: Run CMake configure (OneAPI 2024.0) - if: matrix.CUDA == '0' && matrix.ONEAPI =='2024.0' - run: | - . /opt/intel/oneapi/${{ matrix.ONEAPI }}/oneapi-vars.sh - cd build - cmake -DFIRESTARTER_BUILD_TYPE="FIRESTARTER_ONEAPI" .. - name: Build (default, CUDA) if: matrix.ONEAPI =='0' run: | cd build make -j2 - - name: Build (OneAPI 2023.2.0) - if: matrix.CUDA == '0' && matrix.ONEAPI =='2023.2.0' - run: | - . /opt/intel/oneapi/setvars.sh - cd build - make -j2 - - name: Build (OneAPI 2024.0) - if: matrix.CUDA == '0' && matrix.ONEAPI =='2024.0' - run: | - . /opt/intel/oneapi/${{ matrix.ONEAPI }}/oneapi-vars.sh - cd build - make -j2 - name: Strip binary (default) if: matrix.CUDA == '0' && matrix.ONEAPI == '0' run: | @@ -234,11 +277,6 @@ jobs: run: | cd build strip src/FIRESTARTER_CUDA - - name: Strip binary (OneAPI) - if: matrix.ONEAPI != '0' && matrix.CUDA == '0' - run: | - cd build - strip src/FIRESTARTER_ONEAPI - name: Test FIRESTARTER (default) if: matrix.CUDA == '0' && matrix.ONEAPI == '0' run: ./build/src/FIRESTARTER -t 1 @@ -254,13 +292,7 @@ jobs: name: FIRESTARTER_CUDA_${{ matrix.CUDA }}-linux retention-days: 1 path: build/src/FIRESTARTER_CUDA - - uses: actions/upload-artifact@v4 - if: matrix.compiler == 'clang++-10' && matrix.CUDA == '0' && matrix.ONEAPI != '0' && ( github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') || github.event_name == 'pull_request' ) - with: - name: FIRESTARTER_ONEAPI_${{ matrix.ONEAPI }}-linux - retention-days: 1 - path: build/src/FIRESTARTER_ONEAPI - + - name: UnInstall g++-7 (if needed) if: matrix.compiler == 'g++-7' run: | @@ -290,11 +322,7 @@ jobs: if: matrix.CUDA != '0' && matrix.ONEAPI == '0' run: | sudo rm -rf ${CUDA_ROOT} - - name: UnInstall OneAPI Base-Toolkit (if needed) - if: matrix.ONEAPI != '0' && matrix.CUDA == '0' - run: | - sudo apt remove intel-basekit-${{ matrix.ONEAPI }} - sudo apt autoremove + build-windows: strategy: fail-fast: false @@ -388,6 +416,7 @@ jobs: path: | build\src\FIRESTARTER_CUDA.exe build\src\libhwloc-15.dll + build-macos: strategy: fail-fast: false @@ -434,6 +463,7 @@ jobs: retention-days: 1 name: FIRESTARTER-macOS_13 path: build/src/FIRESTARTER + create-download: name: Create download for Website runs-on: ubuntu-latest From ae664c2c7c0c4c71bee71c049752140c4025c144 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Tue, 26 Nov 2024 11:49:19 +0100 Subject: [PATCH 22/36] fix setting icx/icpx compiler --- .github/workflows/cmake.yml | 9 ++------- cmake/BuildSettings.cmake | 3 --- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 07257789..1ba1bff3 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -6,7 +6,6 @@ jobs: build-linux-hip-rocm: strategy: -# max-parallel: 1 # Sets the limit of jobs to run concurrently fail-fast: true matrix: os: [ubuntu-22.04] @@ -84,10 +83,6 @@ jobs: runs-on: ${{ matrix.os }} - env: - CMAKE_C_COMPILER: 'icx' - CMAKE_CXX_COMPILER: 'icx' - steps: - name: Try to clean up some things run: | @@ -115,13 +110,13 @@ jobs: run: | . /opt/intel/oneapi/setvars.sh cd build - cmake -DFIRESTARTER_BUILD_TYPE="FIRESTARTER_ONEAPI" .. + cmake -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DFIRESTARTER_BUILD_TYPE="FIRESTARTER_ONEAPI" .. - name: Run CMake configure (OneAPI 2024.0) if: matrix.ONEAPI == '2024.0' run: | . /opt/intel/oneapi/${{ matrix.ONEAPI }}/oneapi-vars.sh cd build - cmake -DFIRESTARTER_BUILD_TYPE="FIRESTARTER_ONEAPI" .. + cmake -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DFIRESTARTER_BUILD_TYPE="FIRESTARTER_ONEAPI" .. - name: Build (OneAPI 2023.2.0) if: matrix.ONEAPI == '2023.2.0' run: | diff --git a/cmake/BuildSettings.cmake b/cmake/BuildSettings.cmake index f40b32ed..d16949ff 100644 --- a/cmake/BuildSettings.cmake +++ b/cmake/BuildSettings.cmake @@ -51,9 +51,6 @@ elseif ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_ONEAPI") message(STATUS "Path of icx executable is: ${ICX_PATH}") - SET(CMAKE_CXX_COMPILER ${ICX_PATH}) - SET(CMAKE_C_COMPILER ${ICX_PATH}) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -DFIRESTARTER_BUILD_ONEAPI") elseif("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_HIP") if (NOT DEFINED ROCM_PATH ) From d46d9fcdac070350443c5203d65685a3ea1c38f7 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Tue, 26 Nov 2024 12:07:52 +0100 Subject: [PATCH 23/36] ci: refactor cuda/nvhpc build --- .github/workflows/cmake.yml | 147 +++++++++++++++++++----------------- 1 file changed, 77 insertions(+), 70 deletions(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 1ba1bff3..78e1f9e6 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -145,45 +145,18 @@ jobs: sudo apt remove intel-basekit-${{ matrix.ONEAPI }} sudo apt autoremove - build-linux: + build-linux-cuda: strategy: -# max-parallel: 1 # Sets the limit of jobs to run concurrently fail-fast: true matrix: os: [ubuntu-20.04] - compiler: [g++-7, g++-8, g++-9, g++-10, clang++-8, clang++-9, clang++-10] - CUDA: ['0', '8.0', '11.0', 'NVHPC-22.5'] - + CUDA: ['8.0', '11.0', 'NVHPC-22.5'] + runs-on: ${{ matrix.os }} - + env: CUDA_ROOT: '/usr/local/cuda' -#kudos to https://github.com/easimon/maximize-build-space/blob/master/action.yml steps: - - name: Try to clean up some things - run: | - sudo rm -rf /usr/local/lib/android - sudo rm -rf /usr/share/dotnet - - name: Install g++-7 (if needed) - if: matrix.compiler == 'g++-7' - run: | - sudo apt install g++-7 - - name: Install g++-8 (if needed) - if: matrix.compiler == 'g++-8' - run: | - sudo apt install g++-8 - - name: Install clang++-8 (if needed) - if: matrix.compiler == 'clang++-8' - run: | - sudo apt install clang-8 - - name: Install clang++-9 (if needed) - if: matrix.compiler == 'clang++-9' - run: | - sudo apt install clang-9 - - name: Install clang++-10 (if needed) - if: matrix.compiler == 'clang++-10' - run: | - sudo apt install clang-10 - name: Setup Node.js uses: actions/setup-node@v4 with: @@ -193,7 +166,6 @@ jobs: with: fetch-depth: '0' - name: Install CUDA runtime (if needed) - if: matrix.CUDA != '0' run: | case ${{ matrix.CUDA }} in 8.0) @@ -216,22 +188,13 @@ jobs: rm nvhpc_2022_225_Linux_x86_64_cuda_11.7.tar.gz sudo NVHPC_SILENT="true" NVHPC_INSTALL_DIR="$CUDA_ROOT" NVHPC_INSTALL_TYPE="single" ./nvhpc_2022_225_Linux_x86_64_cuda_11.7/install rm -rf nvhpc_2022_225_Linux_x86_64_cuda_11.7 - + ;; esac - name: Create build directory run: | mkdir build - - name: Run CMake configure (default) - if: matrix.CUDA == '0' && matrix.ONEAPI == '0' - env: - CXX: ${{ matrix.compiler }} - run: | - cd build - cmake .. - name: Run CMake configure (CUDA) - if: matrix.CUDA != '0' && matrix.CUDA != 'NVHPC-22.5' && matrix.ONEAPI == '0' - env: - CXX: ${{ matrix.compiler }} + if: matrix.CUDA != 'NVHPC-22.5' run: | export CPATH=${CUDA_ROOT}/include:${CPATH} export LD_LIBRARY_PATH=${CUDA_ROOT}/lib64:${CUDA_ROOT}/lib64/stubs:${LD_LIBRARY_PATH} @@ -241,53 +204,101 @@ jobs: export CUDA_HOME=${CUDA_ROOT} export CUDA_PATH=${CUDA_ROOT} export CUDAToolkit_ROOT=${CUDA_ROOT} - + cd build - cmake -DFIRESTARTER_BUILD_TYPE="FIRESTARTER_CUDA" -DCMAKE_EXE_LINKER_FLAGS="-L${CUDA_ROOT}/lib64/stubs/" .. + cmake -DCMAKE_C_COMPILER=nvcc -DCMAKE_CXX_COMPILER=nvcc -DFIRESTARTER_BUILD_TYPE="FIRESTARTER_CUDA" -DCMAKE_EXE_LINKER_FLAGS="-L${CUDA_ROOT}/lib64/stubs/" .. - name: Run CMake configure (CUDA with NVHPC) - if: matrix.CUDA == 'NVHPC-22.5' && matrix.ONEAPI == '0' - env: - CXX: ${{ matrix.compiler }} + if: matrix.CUDA == 'NVHPC-22.5' run: | NVARCH=`uname -s`_`uname -m`; export NVARCH PATH=$CUDA_ROOT/$NVARCH/22.5/compilers/bin:$PATH; export PATH LD_LIBRARY_PATH=$CUDA_ROOT/$NVARCH/22.5/compilers/lib:$LD_LIBRARY_PATH; export LD_LIBRARY_PATH LD_LIBRARY_PATH=$CUDA_ROOT/$NVARCH/22.5/cuda/11.7/lib64:$LD_LIBRARY_PATH; export LD_LIBRARY_PATH LD_LIBRARY_PATH=$CUDA_ROOT/$NVARCH/22.5/cuda/11.7/lib64/stubs:$LD_LIBRARY_PATH; export LD_LIBRARY_PATH + + cd build + cmake -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DFIRESTARTER_BUILD_TYPE="FIRESTARTER_CUDA" -DCMAKE_EXE_LINKER_FLAGS=-L"$CUDA_ROOT/$NVARCH/22.5/cuda/11.7/lib64/stubs" -LA .. + - name: Build (CUDA) + run: | + cd build + make -j2 + - name: Strip binary (CUDA) + run: | + cd build + strip src/FIRESTARTER_CUDA + - uses: actions/upload-artifact@v4 + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') || github.event_name == 'pull_request' + with: + name: FIRESTARTER_CUDA_${{ matrix.CUDA }}-linux + retention-days: 1 + path: build/src/FIRESTARTER_CUDA + - name: UnInstall CUDA runtime (if needed) + run: | + sudo rm -rf ${CUDA_ROOT} + + build-linux: + strategy: + fail-fast: true + matrix: + os: [ubuntu-20.04] + compiler: [g++-7, g++-8, g++-9, g++-10, clang++-8, clang++-9, clang++-10] + runs-on: ${{ matrix.os }} + + steps: + - name: Install g++-7 (if needed) + if: matrix.compiler == 'g++-7' + run: | + sudo apt install g++-7 + - name: Install g++-8 (if needed) + if: matrix.compiler == 'g++-8' + run: | + sudo apt install g++-8 + - name: Install clang++-8 (if needed) + if: matrix.compiler == 'clang++-8' + run: | + sudo apt install clang-8 + - name: Install clang++-9 (if needed) + if: matrix.compiler == 'clang++-9' + run: | + sudo apt install clang-9 + - name: Install clang++-10 (if needed) + if: matrix.compiler == 'clang++-10' + run: | + sudo apt install clang-10 + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '20' + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: '0' + - name: Create build directory + run: | + mkdir build + - name: Run CMake configure (default) + env: + CXX: ${{ matrix.compiler }} + run: | cd build - cmake -DFIRESTARTER_BUILD_TYPE="FIRESTARTER_CUDA" -DCMAKE_EXE_LINKER_FLAGS=-L"$CUDA_ROOT/$NVARCH/22.5/cuda/11.7/lib64/stubs" -LA .. - - name: Build (default, CUDA) - if: matrix.ONEAPI =='0' + cmake .. + - name: Build (default) run: | cd build make -j2 - name: Strip binary (default) - if: matrix.CUDA == '0' && matrix.ONEAPI == '0' run: | cd build strip src/FIRESTARTER - - name: Strip binary (CUDA) - if: matrix.CUDA != '0' && matrix.ONEAPI == '0' - run: | - cd build - strip src/FIRESTARTER_CUDA - name: Test FIRESTARTER (default) - if: matrix.CUDA == '0' && matrix.ONEAPI == '0' run: ./build/src/FIRESTARTER -t 1 - uses: actions/upload-artifact@v4 - if: matrix.compiler == 'clang++-10' && matrix.CUDA == '0' && matrix.ONEAPI == '0' && ( github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') || github.event_name == 'pull_request' ) + if: matrix.compiler == 'clang++-10' && ( github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') || github.event_name == 'pull_request' ) with: name: FIRESTARTER-linux retention-days: 1 path: build/src/FIRESTARTER - - uses: actions/upload-artifact@v4 - if: matrix.compiler == 'clang++-10' && matrix.CUDA != '0' && matrix.ONEAPI == '0' && ( github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') || github.event_name == 'pull_request' ) - with: - name: FIRESTARTER_CUDA_${{ matrix.CUDA }}-linux - retention-days: 1 - path: build/src/FIRESTARTER_CUDA - - name: UnInstall g++-7 (if needed) if: matrix.compiler == 'g++-7' run: | @@ -313,10 +324,6 @@ jobs: run: | sudo apt remove clang-10 sudo apt autoremove - - name: UnInstall CUDA runtime (if needed) - if: matrix.CUDA != '0' && matrix.ONEAPI == '0' - run: | - sudo rm -rf ${CUDA_ROOT} build-windows: strategy: From fbf1ca781d83be8515b429566c4c3d2a0403b1b5 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Tue, 26 Nov 2024 12:11:05 +0100 Subject: [PATCH 24/36] oneapi: link to threads --- src/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 459565a1..02203db9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -130,6 +130,7 @@ elseif ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_ONEAPI") mkl_core sycl stdc++ + Threads::Threads ) elseif("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_HIP") From da4a1f0213e81c77dfa2862f520b7e0f00bb3e84 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Tue, 26 Nov 2024 12:17:34 +0100 Subject: [PATCH 25/36] make cmake find nvcc --- .github/workflows/cmake.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 78e1f9e6..dfe7bf00 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -200,7 +200,7 @@ jobs: export LD_LIBRARY_PATH=${CUDA_ROOT}/lib64:${CUDA_ROOT}/lib64/stubs:${LD_LIBRARY_PATH} export LIBRARY_PATH=${CUDA_ROOT}/lib64:${CUDA_ROOT}/lib64/stubs:${LIBRARY_PATH} export CUDA_LIB_PATH=${CUDA_ROOT}/lib64:${CUDA_ROOT}/lib64/stubs - export PATH=${CUDA_ROOT}:${PATH} + export PATH=${CUDA_ROOT}/bin:${PATH} export CUDA_HOME=${CUDA_ROOT} export CUDA_PATH=${CUDA_ROOT} export CUDAToolkit_ROOT=${CUDA_ROOT} @@ -279,6 +279,7 @@ jobs: mkdir build - name: Run CMake configure (default) env: + # TODO: set cmake c compiler correctly CXX: ${{ matrix.compiler }} run: | cd build From bfcf5c335cb18a1f53c67b8d223aff539379535d Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Tue, 26 Nov 2024 13:17:30 +0100 Subject: [PATCH 26/36] fix cuda 8 and 11 builds --- .github/workflows/cmake.yml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index dfe7bf00..b76e2b47 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -165,7 +165,10 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: '0' - - name: Install CUDA runtime (if needed) + - name: Install clang++-10 + run: | + sudo apt install clang-10 + - name: Install CUDA runtime run: | case ${{ matrix.CUDA }} in 8.0) @@ -206,7 +209,7 @@ jobs: export CUDAToolkit_ROOT=${CUDA_ROOT} cd build - cmake -DCMAKE_C_COMPILER=nvcc -DCMAKE_CXX_COMPILER=nvcc -DFIRESTARTER_BUILD_TYPE="FIRESTARTER_CUDA" -DCMAKE_EXE_LINKER_FLAGS="-L${CUDA_ROOT}/lib64/stubs/" .. + cmake -DFIRESTARTER_BUILD_TYPE="FIRESTARTER_CUDA" -DCMAKE_EXE_LINKER_FLAGS="-L${CUDA_ROOT}/lib64/stubs/" .. - name: Run CMake configure (CUDA with NVHPC) if: matrix.CUDA == 'NVHPC-22.5' run: | @@ -235,6 +238,11 @@ jobs: - name: UnInstall CUDA runtime (if needed) run: | sudo rm -rf ${CUDA_ROOT} + - name: UnInstall clang++-10 + run: | + sudo apt remove clang-10 + sudo apt autoremove + build-linux: strategy: From 2d32c38768d8fba28aad99472ac61ec718cc5239 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Tue, 26 Nov 2024 14:05:00 +0100 Subject: [PATCH 27/36] add comment to ci build yaml. fix cuda builds --- .github/workflows/cmake.yml | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index b76e2b47..32052ce4 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -2,6 +2,18 @@ name: Build on: [push, pull_request] +# We define multiple jobs that test compatibility of the FIRESTARTER code aginst different compilers. +# On linux: +# FIRESTARTER_HIP version 6.2 against the hipcc compiler +# FIRESTARTER_ONEAPI versions 2023.2.0 and 2024.0 against icx and icpx +# FIRESTARTER_CUDA with cuda versions 8.0, 11.0 and NVHPC-22.5 against clang++-10 +# FIRESTARTER aginst g++-7, g++-8, g++-9, g++-10, clang++-8, clang++-9, clang++-10 +# On Windows: +# FIRESTARTER aginst windows-2019 MSVC and mingw +# FIRESTARTER_CUDA against windows-2019 MSVC +# On MacOS: +# FIRESTARTER aginst XCode on MacOS 12 and 13 + jobs: build-linux-hip-rocm: @@ -74,6 +86,7 @@ jobs: retention-days: 1 path: build/src/FIRESTARTER_HIP + build-linux-oneapi: strategy: fail-fast: true @@ -145,6 +158,7 @@ jobs: sudo apt remove intel-basekit-${{ matrix.ONEAPI }} sudo apt autoremove + build-linux-cuda: strategy: fail-fast: true @@ -197,6 +211,9 @@ jobs: run: | mkdir build - name: Run CMake configure (CUDA) + env: + C: 'clang-10' + CXX: 'clang++-10' if: matrix.CUDA != 'NVHPC-22.5' run: | export CPATH=${CUDA_ROOT}/include:${CPATH} @@ -211,6 +228,9 @@ jobs: cd build cmake -DFIRESTARTER_BUILD_TYPE="FIRESTARTER_CUDA" -DCMAKE_EXE_LINKER_FLAGS="-L${CUDA_ROOT}/lib64/stubs/" .. - name: Run CMake configure (CUDA with NVHPC) + env: + C: 'clang-10' + CXX: 'clang++-10' if: matrix.CUDA == 'NVHPC-22.5' run: | NVARCH=`uname -s`_`uname -m`; export NVARCH @@ -220,7 +240,7 @@ jobs: LD_LIBRARY_PATH=$CUDA_ROOT/$NVARCH/22.5/cuda/11.7/lib64/stubs:$LD_LIBRARY_PATH; export LD_LIBRARY_PATH cd build - cmake -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DFIRESTARTER_BUILD_TYPE="FIRESTARTER_CUDA" -DCMAKE_EXE_LINKER_FLAGS=-L"$CUDA_ROOT/$NVARCH/22.5/cuda/11.7/lib64/stubs" -LA .. + cmake -DFIRESTARTER_BUILD_TYPE="FIRESTARTER_CUDA" -DCMAKE_EXE_LINKER_FLAGS=-L"$CUDA_ROOT/$NVARCH/22.5/cuda/11.7/lib64/stubs" -LA .. - name: Build (CUDA) run: | cd build @@ -334,6 +354,7 @@ jobs: sudo apt remove clang-10 sudo apt autoremove + build-windows: strategy: fail-fast: false From d04125df6244f937054dc66d1cb2c90af2b45a54 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Tue, 26 Nov 2024 17:58:37 +0100 Subject: [PATCH 28/36] cmake: do not use dependent options as they do not work for our intended usecase --- cmake/BuildOptions.cmake | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/cmake/BuildOptions.cmake b/cmake/BuildOptions.cmake index cf250b65..660373ea 100644 --- a/cmake/BuildOptions.cmake +++ b/cmake/BuildOptions.cmake @@ -5,8 +5,9 @@ set(FIRESTARTER_BUILD_TYPE "FIRESTARTER" CACHE STRING "FIRESTARTER_BUILD_TYPE ca set_property(CACHE FIRESTARTER_BUILD_TYPE PROPERTY STRINGS FIRESTARTER FIRESTARTER_CUDA FIRESTARTER_ONEAPI FIRESTARTER_HIP) # Static linking is not supported with GPU devices or MacOS. -set(FIRESTARTER_LINK_STATIC_FLAG ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER") AND (NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin")) -cmake_dependent_option(FIRESTARTER_LINK_STATIC "Link FIRESTARTER as a static binary. Note, dlopen is not supported in static binaries. This option is not available on macOS or with CUDA, OneAPI or HIP enabled." ON "FIRESTARTER_LINK_STATIC_FLAG" OFF) +if(${FIRESTARTER_BUILD_TYPE} STREQUAL "FIRESTARTER" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") + set(FIRESTARTER_LINK_STATIC "Link FIRESTARTER as a static binary. Note, dlopen is not supported in static binaries. This option is not available on macOS or with CUDA, OneAPI or HIP enabled." ON) +endif() # We vendor hwloc per default. @@ -14,10 +15,12 @@ option(FIRESTARTER_BUILD_HWLOC "Build hwloc dependency." ON) # Use of thread affinity is enabled on linux per default. -set(FIRESTARTER_THREAD_AFFINITY_FLAG (CMAKE_SYSTEM_NAME STREQUAL "Linux")) -cmake_dependent_option(FIRESTARTER_THREAD_AFFINITY "Enable FIRESTARTER to set affinity to hardware threads." ON "FIRESTARTER_THREAD_AFFINITY_FLAG" OFF) +if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + set(FIRESTARTER_THREAD_AFFINITY "Enable FIRESTARTER to set affinity to hardware threads." ON) +endif() # Debug feature are enabled on linux per default. -set(FIRESTARTER_DEBUG_FEATURES_FLAG (CMAKE_SYSTEM_NAME STREQUAL "Linux")) -cmake_dependent_option(FIRESTARTER_DEBUG_FEATURES "Enable debug features" ON "FIRESTARTER_DEBUG_FEATURES_FLAG" OFF) \ No newline at end of file +if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + set(FIRESTARTER_DEBUG_FEATURES "Enable debug features" ON) +endif() \ No newline at end of file From 2e34ebb83eac867130e65dd12ed4c7fcb8a20c53 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Tue, 26 Nov 2024 18:49:07 +0100 Subject: [PATCH 29/36] rocm ci update upload artifact condition to the one that is used in all other conditions --- .github/workflows/cmake.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 32052ce4..4635678f 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -80,7 +80,7 @@ jobs: cd build strip src/FIRESTARTER_HIP - uses: actions/upload-artifact@v4 - if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'pull_request' + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') || github.event_name == 'pull_request' with: name: FIRESTARTER_HIP_${{ matrix.HIP }}-linux retention-days: 1 From 116194cc7570894ba5d38ecf3a6f66e763f96729 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Tue, 26 Nov 2024 18:52:26 +0100 Subject: [PATCH 30/36] ci: fix rocm build configure --- .github/workflows/cmake.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 4635678f..e2afd62a 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -57,8 +57,8 @@ jobs: mkdir build - name: Run CMake configure (HIP) env: - CMAKE_C_COMPILER: '/opt/rocm/bin/hipcc' - CMAKE_CXX_COMPILER: '/opt/rocm/bin/hipcc' + C: '/opt/rocm/bin/hipcc' + CXX: '/opt/rocm/bin/hipcc' run: | export CPATH=${HIP_ROOT}/include:${HIP_ROOT} export LD_LIBRARY_PATH=${HIP_ROOT}/lib64:${HIP_ROOT}/lib64/stubs:${LD_LIBRARY_PATH} From 03d609940c2b154c621ef8d9218c332e60994550 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Tue, 26 Nov 2024 18:58:57 +0100 Subject: [PATCH 31/36] ci: set CC variable in build correctly --- .github/workflows/cmake.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index e2afd62a..0bf22ff2 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -57,7 +57,7 @@ jobs: mkdir build - name: Run CMake configure (HIP) env: - C: '/opt/rocm/bin/hipcc' + CC: '/opt/rocm/bin/hipcc' CXX: '/opt/rocm/bin/hipcc' run: | export CPATH=${HIP_ROOT}/include:${HIP_ROOT} @@ -212,7 +212,7 @@ jobs: mkdir build - name: Run CMake configure (CUDA) env: - C: 'clang-10' + CC: 'clang-10' CXX: 'clang++-10' if: matrix.CUDA != 'NVHPC-22.5' run: | @@ -229,7 +229,7 @@ jobs: cmake -DFIRESTARTER_BUILD_TYPE="FIRESTARTER_CUDA" -DCMAKE_EXE_LINKER_FLAGS="-L${CUDA_ROOT}/lib64/stubs/" .. - name: Run CMake configure (CUDA with NVHPC) env: - C: 'clang-10' + CC: 'clang-10' CXX: 'clang++-10' if: matrix.CUDA == 'NVHPC-22.5' run: | @@ -307,7 +307,7 @@ jobs: mkdir build - name: Run CMake configure (default) env: - # TODO: set cmake c compiler correctly + # We do not set the CC flag as it is not used in the FIRESTARTER build CXX: ${{ matrix.compiler }} run: | cd build From 623323737e30794ff2d340b050876f0a75310475 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Tue, 26 Nov 2024 19:19:21 +0100 Subject: [PATCH 32/36] ci: fix dependencies in build --- .github/workflows/cmake.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 0bf22ff2..1bdd08df 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -500,7 +500,7 @@ jobs: name: Create download for Website runs-on: ubuntu-latest if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') || github.event_name == 'pull_request' - needs: [build-linux-hip-rocm, build-linux, build-macos, build-windows] + needs: [ build-linux-hip-rocm, build-linux-oneapi, build-linux-cuda, build-linux, build-macos, build-windows ] steps: - uses: actions/checkout@v4 with: From 00ebd7f3003013c8d3a80c7bca7997f2a48ebe9a Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Tue, 26 Nov 2024 19:45:04 +0100 Subject: [PATCH 33/36] ci: fix type in comment --- .github/workflows/cmake.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 1bdd08df..85b2e7b2 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -7,12 +7,12 @@ on: [push, pull_request] # FIRESTARTER_HIP version 6.2 against the hipcc compiler # FIRESTARTER_ONEAPI versions 2023.2.0 and 2024.0 against icx and icpx # FIRESTARTER_CUDA with cuda versions 8.0, 11.0 and NVHPC-22.5 against clang++-10 -# FIRESTARTER aginst g++-7, g++-8, g++-9, g++-10, clang++-8, clang++-9, clang++-10 +# FIRESTARTER against g++-7, g++-8, g++-9, g++-10, clang++-8, clang++-9, clang++-10 # On Windows: # FIRESTARTER aginst windows-2019 MSVC and mingw # FIRESTARTER_CUDA against windows-2019 MSVC # On MacOS: -# FIRESTARTER aginst XCode on MacOS 12 and 13 +# FIRESTARTER against XCode on MacOS 12 and 13 jobs: From 57776ea1f6c2ec1a09b1b5f69eb5ae037e48a51b Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Tue, 26 Nov 2024 19:45:56 +0100 Subject: [PATCH 34/36] ci: fix type in comment --- .github/workflows/cmake.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 85b2e7b2..19c9466a 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -2,14 +2,14 @@ name: Build on: [push, pull_request] -# We define multiple jobs that test compatibility of the FIRESTARTER code aginst different compilers. +# We define multiple jobs that test compatibility of the FIRESTARTER code against different compilers. # On linux: # FIRESTARTER_HIP version 6.2 against the hipcc compiler # FIRESTARTER_ONEAPI versions 2023.2.0 and 2024.0 against icx and icpx # FIRESTARTER_CUDA with cuda versions 8.0, 11.0 and NVHPC-22.5 against clang++-10 # FIRESTARTER against g++-7, g++-8, g++-9, g++-10, clang++-8, clang++-9, clang++-10 # On Windows: -# FIRESTARTER aginst windows-2019 MSVC and mingw +# FIRESTARTER against windows-2019 MSVC and mingw # FIRESTARTER_CUDA against windows-2019 MSVC # On MacOS: # FIRESTARTER against XCode on MacOS 12 and 13 From 049b627948dce5069e3031639eb2759479c6a326 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Wed, 27 Nov 2024 11:40:21 +0100 Subject: [PATCH 35/36] Revert "add debug prints to cmake" This reverts commit 930e84ba7bd04d1a9b69d16e3fd6967e2f4967b9. --- src/CMakeLists.txt | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 02203db9..96de8119 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -65,14 +65,7 @@ SET(FIRESTARTER_FILES firestarter/LoadWorker.cpp ) -get_cmake_property(_variableNames VARIABLES) -list (SORT _variableNames) -foreach (_variableName ${_variableNames}) - message(STATUS "${_variableName}=${${_variableName}}") -endforeach() - if ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_CUDA") - message(STATUS "Building FIRESTARTER_CUDA") add_executable(FIRESTARTER_CUDA ${FIRESTARTER_FILES} firestarter/Cuda/Cuda.cpp @@ -107,7 +100,6 @@ if ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_CUDA") endif() elseif ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_ONEAPI") - message(STATUS "Building FIRESTARTER_ONEAPI") add_executable(FIRESTARTER_ONEAPI ${FIRESTARTER_FILES} firestarter/OneAPI/OneAPI.cpp @@ -134,7 +126,6 @@ elseif ("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_ONEAPI") ) elseif("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_HIP") - message(STATUS "Building FIRESTARTER_HIP") add_executable(FIRESTARTER_HIP ${FIRESTARTER_FILES} firestarter/Cuda/Cuda.cpp @@ -164,7 +155,6 @@ elseif("${FIRESTARTER_BUILD_TYPE}" STREQUAL "FIRESTARTER_HIP") elseif(${FIRESTARTER_BUILD_TYPE} STREQUAL "FIRESTARTER") - message(STATUS "Building FIRESTARTER") add_executable(FIRESTARTER ${FIRESTARTER_FILES} ) From 10e1572a93f34db6359259d6b88e4cdbfef95b28 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Thu, 28 Nov 2024 11:38:48 +0100 Subject: [PATCH 36/36] Update readme section for HIP and ONEAPI compilers --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index efda1299..392e4040 100644 --- a/README.md +++ b/README.md @@ -152,7 +152,12 @@ CMake option | Description `FIRESTARTER_BUILD_HWLOC` | Build hwloc dependency. Default `ON` `FIRESTARTER_THREAD_AFFINITY` | Enable FIRESTARTER to set affinity to hardware threads. Default `ON` -When building `FIRESTARTER_ONEAPI` make sure that the Intel Math Kernel Library (MKL) and the complier `icx` can be found. These will be used to build `FIRESTARTER`, while dependencies will be build with `$CC` and `$CXX` respectively. +When building `FIRESTARTER_ONEAPI` make sure that the Intel Math Kernel +Library (MKL) and the complier `icx` and `icpx` can be found. Please provide +them through the `CC` and `CXX` environment variables. + +When building `FIRESTARTER_HIP` make sure that the complier `hipcc` can be +found. Please provide it through the `CC` and `CXX` environment variables. ## Metrics