From fc7c3b6455f57909998222a3158625d04448db96 Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Fri, 18 May 2018 16:16:32 -0700 Subject: [PATCH 01/58] Add RAJA plugin --- CMakeLists.txt | 1 + cmake/thirdparty/SetupChaiThirdparty.cmake | 11 + src/ArrayManager.hpp | 2 + src/CMakeLists.txt | 15 + src/RajaExecutionSpacePlugin.cpp | 89 +++++ src/RajaExecutionSpacePlugin.hpp | 70 ++++ src/pluginLinker.hpp | 56 +++ src/tests/CMakeLists.txt | 4 + src/tests/integration/CMakeLists.txt | 31 ++ src/tests/integration/chai-nested.cpp | 420 +++++++++++++++++++++ src/tests/integration/raja-chai-tests.cpp | 128 +++++++ 11 files changed, 827 insertions(+) create mode 100644 src/RajaExecutionSpacePlugin.cpp create mode 100644 src/RajaExecutionSpacePlugin.hpp create mode 100644 src/pluginLinker.hpp create mode 100644 src/tests/integration/CMakeLists.txt create mode 100644 src/tests/integration/chai-nested.cpp create mode 100644 src/tests/integration/raja-chai-tests.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b97c911..f0b13b51 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,6 +52,7 @@ option(ENABLE_IMPLICIT_CONVERSIONS "Enable implicit conversions to-from raw poin option(DISABLE_RM "Make ManagedArray a thin wrapper" Off) mark_as_advanced(DISABLE_RM) option(ENABLE_UM "Use CUDA unified (managed) memory" Off) +option(ENABLE_RAJA_PLUGIN "Build plugin to set RAJA execution spaces" On) if (ENABLE_UM AND NOT ENABLE_CUDA) message(FATAL_ERROR "Option ENABLE_UM requires ENABLE_CUDA") diff --git a/cmake/thirdparty/SetupChaiThirdparty.cmake b/cmake/thirdparty/SetupChaiThirdparty.cmake index 20f12b7e..030d1249 100644 --- a/cmake/thirdparty/SetupChaiThirdparty.cmake +++ b/cmake/thirdparty/SetupChaiThirdparty.cmake @@ -47,3 +47,14 @@ blt_register_library( NAME umpire INCLUDES ${UMPIRE_INCLUDE_DIRS} LIBRARIES umpire) + +if (ENABLE_RAJA_PLUGIN) + find_package(RAJA REQUIRED) + + blt_register_library( + NAME raja + INCLUDES ${RAJA_INCLUDE_DIR} + LIBRARIES RAJA) + + message(STATUS "RAJA: ${RAJA_INCLUDE_DIR}") +endif () diff --git a/src/ArrayManager.hpp b/src/ArrayManager.hpp index 3ddfc369..4088b17d 100644 --- a/src/ArrayManager.hpp +++ b/src/ArrayManager.hpp @@ -47,6 +47,8 @@ #include "chai/PointerRecord.hpp" #include "chai/Types.hpp" +#include "chai/pluginLinker.hpp" + #include #include "umpire/Allocator.hpp" diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f4837f45..6c1a60bc 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -54,6 +54,7 @@ set (chai_headers ArrayManager.hpp ArrayManager.inl ChaiMacros.hpp + pluginLinker.hpp ExecutionSpaces.hpp ManagedArray.hpp ManagedArray.inl @@ -78,6 +79,20 @@ if (ENABLE_CUDA) cuda_runtime) endif () +if (ENABLE_RAJA_PLUGIN) + set (chai_headers + ${chai_headers} + RajaExecutionSpacePlugin.hpp) + + set (chai_sources + ${chai_sources} + RajaExecutionSpacePlugin.cpp) + + set (chai_depends + ${chai_depends} + raja) +endif () + blt_add_library( NAME chai SOURCES ${chai_sources} diff --git a/src/RajaExecutionSpacePlugin.cpp b/src/RajaExecutionSpacePlugin.cpp new file mode 100644 index 00000000..e46279da --- /dev/null +++ b/src/RajaExecutionSpacePlugin.cpp @@ -0,0 +1,89 @@ +// --------------------------------------------------------------------- +// Copyright (c) 2016, Lawrence Livermore National Security, LLC. All +// rights reserved. +// +// Produced at the Lawrence Livermore National Laboratory. +// +// This file is part of CHAI. +// +// LLNL-CODE-705877 +// +// For details, see https:://github.com/LLNL/CHAI +// Please also see the NOTICE and LICENSE files. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the +// distribution. +// +// - Neither the name of the LLNS/LLNL nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// --------------------------------------------------------------------- +#include "chai/config.hpp" + +#include "chai/RajaExecutionSpacePlugin.hpp" + +#include "chai/ArrayManager.hpp" + +namespace chai { + +RajaExecutionSpacePlugin::RajaExecutionSpacePlugin() : + m_arraymanager(chai::ArrayManager::getInstance()) +{ +} + +void +RajaExecutionSpacePlugin::preLaunch(RAJA::util::PluginContext p) +{ + switch (p.platform) { + case RAJA::Platform::host: + m_arraymanager->setExecutionSpace(chai::CPU); break; +#if defined(CHAI_ENABLE_CUDA) + case RAJA::Platform::cuda: + m_arraymanager->setExecutionSpace(chai::GPU); break; +#endif + default: + m_arraymanager->setExecutionSpace(chai::NONE); + } + +} + +void +RajaExecutionSpacePlugin::postLaunch(RAJA::util::PluginContext) +{ + m_arraymanager->setExecutionSpace(chai::NONE); +} + +} + +// Register plugin with RAJA +RAJA::util::PluginRegistry::Add P( + "RajaExecutionSpacePlugin", + "Plugin to set CHAI execution space based on RAJA execution platform"); + +namespace chai { + + void linkRajaPlugin() {} + +} diff --git a/src/RajaExecutionSpacePlugin.hpp b/src/RajaExecutionSpacePlugin.hpp new file mode 100644 index 00000000..dfbba300 --- /dev/null +++ b/src/RajaExecutionSpacePlugin.hpp @@ -0,0 +1,70 @@ +// --------------------------------------------------------------------- +// Copyright (c) 2016, Lawrence Livermore National Security, LLC. All +// rights reserved. +// +// Produced at the Lawrence Livermore National Laboratory. +// +// This file is part of CHAI. +// +// LLNL-CODE-705877 +// +// For details, see https:://github.com/LLNL/CHAI +// Please also see the NOTICE and LICENSE files. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the +// distribution. +// +// - Neither the name of the LLNS/LLNL nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// --------------------------------------------------------------------- +#ifndef CHAI_RajaExecutionSpacePlugin_HPP +#define CHAI_RajaExecutionSpacePlugin_HPP + +#include "RAJA/util/PluginStrategy.hpp" + +namespace chai { + +class ArrayManager; + +class RajaExecutionSpacePlugin : + public RAJA::util::PluginStrategy +{ + public: + RajaExecutionSpacePlugin(); + + void preLaunch(RAJA::util::PluginContext p); + + void postLaunch(RAJA::util::PluginContext p); + + private: + chai::ArrayManager* m_arraymanager; +}; + +void linkRajaPlugin(); + +} + +#endif // CHAI_RajaExecutionSpacePlugin_HPP diff --git a/src/pluginLinker.hpp b/src/pluginLinker.hpp new file mode 100644 index 00000000..9e8484a2 --- /dev/null +++ b/src/pluginLinker.hpp @@ -0,0 +1,56 @@ +// --------------------------------------------------------------------- +// Copyright (c) 2016, Lawrence Livermore National Security, LLC. All +// rights reserved. +// +// Produced at the Lawrence Livermore National Laboratory. +// +// This file is part of CHAI. +// +// LLNL-CODE-705877 +// +// For details, see https:://github.com/LLNL/CHAI +// Please also see the NOTICE and LICENSE files. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the +// distribution. +// +// - Neither the name of the LLNS/LLNL nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// --------------------------------------------------------------------- +#ifndef CHAI_pluginLinker_HPP +#define CHAI_pluginLinker_HPP + +#include "chai/RajaExecutionSpacePlugin.hpp" + +namespace { + struct pluginLinker { + pluginLinker() { + (void) chai::linkRajaPlugin(); + } + } pluginLinker; +} + +#endif // CHAI_pluginLinker_HPP diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 7f472780..5a575abb 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -62,3 +62,7 @@ target_include_directories( blt_add_test( NAME managed_array_test COMMAND managed_array_tests) + +if (ENABLE_RAJA_PLUGIN) + add_subdirectory(integration) +endif () diff --git a/src/tests/integration/CMakeLists.txt b/src/tests/integration/CMakeLists.txt new file mode 100644 index 00000000..dc34cbfb --- /dev/null +++ b/src/tests/integration/CMakeLists.txt @@ -0,0 +1,31 @@ +############################################################################### +# +# Copyright (c) 2016-18, Lawrence Livermore National Security, LLC. +# +# Produced at the Lawrence Livermore National Laboratory +# +# LLNL-CODE-689114 +# +# All rights reserved. +# +# This file is part of RAJA. +# +# For details about use and distribution, please read RAJA/LICENSE. +# +############################################################################### + +set (raja_test_depends + chai umpire raja openmp gtest) + +blt_add_executable( + NAME raja-chai-tests + SOURCES raja-chai-tests.cpp + DEPENDS_ON ${raja_test_depends}) + +blt_add_test( + NAME raja-chai-tests + COMMAND raja-chai-tests) + +target_include_directories( + raja-chai-tests + PUBLIC ${PROJECT_BINARY_DIR}/include) diff --git a/src/tests/integration/chai-nested.cpp b/src/tests/integration/chai-nested.cpp new file mode 100644 index 00000000..ff50af96 --- /dev/null +++ b/src/tests/integration/chai-nested.cpp @@ -0,0 +1,420 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-18, Lawrence Livermore National Security, LLC. +// +// Produced at the Lawrence Livermore National Laboratory +// +// LLNL-CODE-689114 +// +// All rights reserved. +// +// This file is part of RAJA. +// +// For details about use and distribution, please read RAJA/LICENSE. +// +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +/// +/// Source file containing tests for CHAI in RAJA nested loops. +/// +/// +#include +#include +#include + +#include +#include +#include + +#include "RAJA/RAJA.hpp" +#include "RAJA_gtest.hpp" + +using namespace RAJA; +using namespace std; + +#include "chai/ArrayManager.hpp" +#include "chai/ManagedArray.hpp" + +/* + * Simple tests using forallN and View + */ +CUDA_TEST(Chai, NestedSimpleOld) { + typedef RAJA::NestedPolicy< RAJA::ExecList< RAJA::seq_exec, RAJA::seq_exec> > POLICY; + typedef RAJA::NestedPolicy< RAJA::ExecList< RAJA::seq_exec, RAJA::cuda_thread_y_exec > > POLICY_GPU; + + const int X = 16; + const int Y = 16; + + chai::ManagedArray v1(X*Y); + chai::ManagedArray v2(X*Y); + + RAJA::forallN(RangeSegment(0,Y), RangeSegment(0,X), [=] (int i, int j) { + int index = j*X + i; + v1[index] = index; + }); + + RAJA::forallN(RangeSegment(0,Y), RangeSegment(0,X), [=] __device__ (int i, int j) { + int index = j*X + i; + v2[index] = v1[index]*2.0f; + }); + cudaDeviceSynchronize(); + + RAJA::forallN(RangeSegment(0,Y), RangeSegment(0,X), [=] (int i, int j) { + int index = j*X + i; + ASSERT_FLOAT_EQ(v1[index], index*1.0f); + ASSERT_FLOAT_EQ(v2[index], index*2.0f); + }); +} + + +/* + * Simple tests using nested::forall and View + */ +CUDA_TEST(Chai, NestedSimple) { + typedef RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::seq_exec, + RAJA::statement::For<1, RAJA::seq_exec> > > POLICY; + typedef RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::seq_exec, + RAJA::statement::CudaKernel< + RAJA::statement::For<1, RAJA::cuda_threadblock_exec<32> > > > >POLICY_GPU; + + const int X = 16; + const int Y = 16; + + chai::ManagedArray v1(X*Y); + chai::ManagedArray v2(X*Y); + + RAJA::kernel( + + RAJA::make_tuple(RAJA::RangeSegment(0,Y), RAJA::RangeSegment(0,X) ), + + [=] (int i, int j) { + int index = j*X + i; + v1[index] = index; + }); + + RAJA::kernel( + + RAJA::make_tuple(RangeSegment(0,Y), RangeSegment(0,X) ), + + [=] __host__ __device__ (int i, int j) { + int index = j*X + i; + v2[index] = v1[index]*2.0f; + }); + + cudaDeviceSynchronize(); + + RAJA::kernel( + + RAJA::make_tuple(RAJA::RangeSegment(0,Y), RAJA::RangeSegment(0,X) ), + + [=] (int i, int j) { + int index = j*X + i; + ASSERT_FLOAT_EQ(v1[index], index*1.0f); + ASSERT_FLOAT_EQ(v2[index], index*2.0f); + }); +} + +CUDA_TEST(Chai, NestedView) { + typedef RAJA::NestedPolicy< RAJA::ExecList< RAJA::seq_exec, RAJA::seq_exec> > POLICY; + typedef RAJA::NestedPolicy< RAJA::ExecList< RAJA::seq_exec, RAJA::cuda_thread_y_exec > > POLICY_GPU; + + const int X = 16; + const int Y = 16; + + chai::ManagedArray v1_array(X*Y); + chai::ManagedArray v2_array(X*Y); + + typedef RAJA::ManagedArrayView > view; + + view v1(v1_array, X, Y); + view v2(v2_array, X, Y); + + RAJA::forallN(RangeSegment(0,Y), RangeSegment(0,X), [=] (int i, int j) { + v1(i,j) = (i+(j*X)) * 1.0f; + }); + + RAJA::forallN(RangeSegment(0,Y), RangeSegment(0,X), [=] __device__ (int i, int j) { + v2(i,j) = v1(i,j)*2.0f; + }); + + RAJA::forallN(RangeSegment(0,Y), RangeSegment(0,X), [=] (int i, int j) { + ASSERT_FLOAT_EQ(v2(i,j), v1(i,j)*2.0f); + }); +} + +CUDA_TEST(Chai, NestedView2) { + typedef RAJA::NestedPolicy< RAJA::ExecList< RAJA::seq_exec, RAJA::seq_exec> > POLICY; + +#if defined (RAJA_ENABLE_OPENMP) + typedef RAJA::NestedPolicy< RAJA::ExecList< RAJA::omp_for_nowait_exec, RAJA::cuda_thread_x_exec >, RAJA::OMP_Parallel<> > POLICY_GPU; +#else + typedef RAJA::NestedPolicy< RAJA::ExecList< RAJA::seq_exec, RAJA::cuda_thread_x_exec > > POLICY_GPU; +#endif + + const int X = 16; + const int Y = 16; + + chai::ManagedArray v1_array(X*Y); + chai::ManagedArray v2_array(X*Y); + + typedef RAJA::ManagedArrayView > view; + + view v1(v1_array, X, Y); + view v2(v2_array, X, Y); + + RAJA::forallN(RangeSegment(0,Y), RangeSegment(0,X), [=] (int i, int j) { + v1(i,j) = (i+(j*X)) * 1.0f; + }); + + RAJA::forallN(RangeSegment(0,Y), RangeSegment(0,X), [=] __device__ (int i, int j) { + v2(i,j) = v1(i,j)*2.0f; + }); + + RAJA::forallN(RangeSegment(0,Y), RangeSegment(0,X), [=] (int i, int j) { + ASSERT_FLOAT_EQ(v2(i,j), v1(i,j)*2.0f); + }); +} + +/////////////////////////////////////////////////////////////////////////// +// +// Example LTimes kernel test routines +// +// Demonstrates a 4-nested loop, the use of complex nested policies and +// the use of strongly-typed indices +// +// This routine computes phi(m, g, z) = SUM_d { ell(m, d)*psi(d,g,z) } +// +/////////////////////////////////////////////////////////////////////////// +typedef struct { + double val; + int idx; +} minmaxloc_t; + +// block_size is needed by the reduction variables to setup shared memory +// Care should be used here to cover the maximum block dimensions used by this +// test +const size_t block_size = 256; + +RAJA_INDEX_VALUE(IMoment, "IMoment"); +RAJA_INDEX_VALUE(IDirection, "IDirection"); +RAJA_INDEX_VALUE(IGroup, "IGroup"); +RAJA_INDEX_VALUE(IZone, "IZone"); + +template +void runLTimesTest(std::string const &policy, + Index_type num_moments, + Index_type num_directions, + Index_type num_groups, + Index_type num_zones) +{ + // cout << "\n TestLTimes " << num_moments << " moments, " << num_directions + // << " directions, " << num_groups << " groups, and " << num_zones + // << " zones" + // << " with policy " << policy << endl; + + // allocate data + // phi is initialized to all zeros, the others are randomized + chai::ManagedArray ell_data(num_moments * num_directions); + chai::ManagedArray psi_data(num_directions * num_groups * num_zones); + //chai::ManagedArray phi_data(num_moments * num_groups * num_zones, 0.0); + chai::ManagedArray phi_data(num_moments * num_groups * num_zones); + + // setup CUDA Reduction variables to be exercised + ReduceSum, double> pdsum(0.0); + ReduceMin, double> pdmin(DBL_MAX); + ReduceMax, double> pdmax(-DBL_MAX); + ReduceMinLoc, double> pdminloc(DBL_MAX, -1); + ReduceMaxLoc, double> pdmaxloc(-DBL_MAX, -1); + + + // data setup using RAJA to ensure that chai is activated + RAJA::forall(0, (num_moments*num_directions), [=] (int i) { + ell_data[i] = drand48(); + }); + + RAJA::forall(0, (num_directions*num_groups*num_zones), [=] (int i) { + psi_data[i] = drand48(); + }); + + RAJA::forall(0, (num_moments*num_groups*num_zones), [=] (int i) { + phi_data[i] = 0.0; + }); + + typename POL::ELL_VIEW ell(ell_data, RAJA::make_permuted_layout({num_moments, num_directions}, RAJA::as_array::get())); + typename POL::PSI_VIEW psi(psi_data, RAJA::make_permuted_layout({num_directions, num_groups, num_zones}, RAJA::as_array::get())); + typename POL::PHI_VIEW phi(phi_data, RAJA::make_permuted_layout({num_moments, num_groups, num_zones}, RAJA::as_array::get())); + + using EXEC = typename POL::EXEC; + + // do calculation using RAJA + forallN( + RangeSegment(0, num_moments), + RangeSegment(0, num_directions), + RangeSegment(0, num_groups), + RangeSegment(0, num_zones), + [=] __device__(IMoment m, IDirection d, IGroup g, IZone z) { + double val = ell(m, d) * psi(d, g, z); + phi(m, g, z) += val; + pdsum += val; + pdmin.min(val); + pdmax.max(val); + + int index = *d + (*m * num_directions) + + (*g * num_directions * num_moments) + + (*z * num_directions * num_moments * num_groups); + + pdminloc.minloc(val, index); + pdmaxloc.maxloc(val, index); + }); + + cudaDeviceSynchronize(); + + // Make sure data is copied to host for checking results. + chai::ArrayManager* rm = chai::ArrayManager::getInstance(); + rm->setExecutionSpace(chai::CPU); + // setup local Reduction variables as a crosscheck + double the_lsum = 0.0; + double the_lmin = DBL_MAX; + double the_lmax = -DBL_MAX; + + double* lsum = &the_lsum; + double* lmin = &the_lmin; + double* lmax = &the_lmax; + + forall(RangeSegment(0, num_zones), [=] (int z) { + for (IGroup g(0); g < num_groups; ++g) { + for (IMoment m(0); m < num_moments; ++m) { + double total = 0.0; + for (IDirection d(0); d < num_directions; ++d) { + double val = ell(m, d) * psi(d, g, IZone(z)); + total += val; + *lmin = RAJA_MIN(*lmin, val); + *lmax = RAJA_MAX(*lmax, val); + int index = *d + (*m * num_directions) + + (*g * num_directions * num_moments) + + (z * num_directions * num_moments * num_groups); + } + *lsum += total; + + // check answer with some reasonable tolerance + ASSERT_FLOAT_EQ(total, phi(m, g, IZone(z))); + } + } + }); + + rm->setExecutionSpace(chai::NONE); + + ASSERT_FLOAT_EQ(*lsum, pdsum.get()); + ASSERT_FLOAT_EQ(*lmin, pdmin.get()); + ASSERT_FLOAT_EQ(*lmax, pdmax.get()); +} + +// Use thread-block mappings +struct PolLTimesA_GPU { + // Loops: Moments, Directions, Groups, Zones + typedef NestedPolicy, + cuda_threadblock_y_exec<32>>> + EXEC; + + // psi[direction, group, zone] + typedef RAJA::TypedManagedArrayView, IDirection, IGroup, IZone> + PSI_VIEW; + + // phi[moment, group, zone] + typedef RAJA::TypedManagedArrayView, IMoment, IGroup, IZone> + PHI_VIEW; + + // ell[moment, direction] + typedef RAJA::TypedManagedArrayView, IMoment, IDirection> + ELL_VIEW; + + typedef RAJA::PERM_IJK PSI_PERM; + typedef RAJA::PERM_IJK PHI_PERM; + typedef RAJA::PERM_IJ ELL_PERM; +}; + +// Use thread and block mappings +struct PolLTimesB_GPU { + // Loops: Moments, Directions, Groups, Zones + typedef NestedPolicy, + Permute> + EXEC; + + // psi[direction, group, zone] + typedef RAJA::TypedManagedArrayView, IDirection, IGroup, IZone> + PSI_VIEW; + + // phi[moment, group, zone] + typedef RAJA::TypedManagedArrayView, IMoment, IGroup, IZone> + PHI_VIEW; + + // ell[moment, direction] + typedef RAJA::TypedManagedArrayView, IMoment, IDirection> + ELL_VIEW; + + typedef RAJA::PERM_IJK PSI_PERM; + typedef RAJA::PERM_IJK PHI_PERM; + typedef RAJA::PERM_IJ ELL_PERM; +}; + +// Combine OMP Parallel, omp nowait, and cuda thread-block launch +struct PolLTimesC_GPU { + // Loops: Moments, Directions, Groups, Zones +#if defined(RAJA_ENABLE_OPENMP) + typedef NestedPolicy>, + OMP_Parallel<>> + EXEC; +#else + typedef NestedPolicy> > + EXEC; +#endif + + // psi[direction, group, zone] + typedef RAJA::TypedManagedArrayView, IDirection, IGroup, IZone> + PSI_VIEW; + + // phi[moment, group, zone] + typedef RAJA::TypedManagedArrayView, IMoment, IGroup, IZone> + PHI_VIEW; + + // ell[moment, direction] + typedef RAJA::TypedManagedArrayView, IMoment, IDirection> + ELL_VIEW; + + typedef RAJA::PERM_IJK PSI_PERM; + typedef RAJA::PERM_IJK PHI_PERM; + typedef RAJA::PERM_IJ ELL_PERM; +}; + +void runLTimesTests(Index_type num_moments, + Index_type num_directions, + Index_type num_groups, + Index_type num_zones) +{ + runLTimesTest( + "PolLTimesA_GPU", num_moments, num_directions, num_groups, num_zones); + runLTimesTest( + "PolLTimesB_GPU", num_moments, num_directions, num_groups, num_zones); + runLTimesTest( + "PolLTimesC_GPU", num_moments, num_directions, num_groups, num_zones); +} + +TEST(Chai, Nested) { +// runLTimesTests(2, 0, 7, 3); + runLTimesTests(2, 3, 7, 3); + runLTimesTests(2, 3, 32, 4); + runLTimesTests(25, 96, 8, 32); + runLTimesTests(100, 15, 7, 13); +} diff --git a/src/tests/integration/raja-chai-tests.cpp b/src/tests/integration/raja-chai-tests.cpp new file mode 100644 index 00000000..aa3322ce --- /dev/null +++ b/src/tests/integration/raja-chai-tests.cpp @@ -0,0 +1,128 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-18, Lawrence Livermore National Security, LLC. +// +// Produced at the Lawrence Livermore National Laboratory +// +// LLNL-CODE-689114 +// +// All rights reserved. +// +// This file is part of RAJA. +// +// For details about use and distribution, please read RAJA/LICENSE. +// +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +/// +/// Source file containing tests for CHAI with basic RAJA constructs +/// + +#include "gtest/gtest.h" + +#include "chai/ManagedArray.hpp" +#include "chai/RajaExecutionSpacePlugin.hpp" + +#define CUDA_TEST(X, Y) \ +static void cuda_test_ ## X ## Y();\ +TEST(X,Y) { cuda_test_ ## X ## Y();}\ +static void cuda_test_ ## X ## Y() + +#include "RAJA/RAJA.hpp" + +// // Register plugin with RAJA +// static RAJA::util::PluginRegistry::Add P( +// "RajaExecutionSpacePlugin", + // "Plugin to set CHAI execution space based on RAJA execution platform"); + + +#include + +CUDA_TEST(ChaiTest, Simple) { + chai::ManagedArray v1(10); + chai::ManagedArray v2(10); + + RAJA::forall(RAJA::RangeSegment(0, 10), [=] (int i) { + v1[i] = static_cast(i * 1.0f); + }); + + std::cout << "end of loop 1" << std::endl; + + +#if defined(CHAI_ENABLE_CUDA) + RAJA::forall >(0, 10, [=] __device__ (int i) { + v2[i] = v1[i]*2.0f; + }); +#else + RAJA::forall(0, 10, [=] (int i) { + v2[i] = v1[i]*2.0f; + }); +#endif + + std::cout << "end of loop 2" << std::endl; + + RAJA::forall(0, 10, [=] (int i) { + ASSERT_FLOAT_EQ(v2[i], i*2.0f); + }); + + +#if defined(CHAI_ENABLE_CUDA) + RAJA::forall >(0, 10, [=] __device__ (int i) { + v2[i] *= 2.0f; + }); +#else + RAJA::forall(0, 10, [=] (int i) { + v2[i] *= 2.0f; + }); +#endif + + float * raw_v2 = v2; + for (int i = 0; i < 10; i++ ) { + ASSERT_FLOAT_EQ(raw_v2[i], i*2.0f*2.0f);; + } +} + +#if 0 +CUDA_TEST(ChaiTest, Views) { + chai::ManagedArray v1_array(10); + chai::ManagedArray v2_array(10); + + typedef RAJA::ManagedArrayView > view; + + view v1(v1_array, 10); + view v2(v2_array, 10); + + RAJA::forall(0, 10, [=] (int i) { + v1(i) = static_cast(i * 1.0f); + }); + +#if defined(CHAI_ENABLE_CUDA) + RAJA::forall >(0, 10, [=] __device__ (int i) { + v2(i) = v1(i)*2.0f; + }); +#else + RAJA::forall(0, 10, [=](int i) { + v2(i) = v1(i)*2.0f; + }); +#endif + + RAJA::forall(0, 10, [=] (int i) { + ASSERT_FLOAT_EQ(v2(i), i*2.0f); + }); + + +#if defined(CHAI_ENABLE_CUDA) + RAJA::forall >(0, 10, [=] __device__ (int i) { + v2(i) *= 2.0f; + }); +#else + RAJA::forall(0, 10, [=](int i) { + v2(i) *= 2.0f; + }); +#endif + + float * raw_v2 = v2.data; + for (int i = 0; i < 10; i++ ) { + ASSERT_FLOAT_EQ(raw_v2[i], i*1.0f*2.0f*2.0f);; + } +} +#endif From 7a9b3352e643a4b1a77419445cdccf53d70c19d0 Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Mon, 21 May 2018 14:08:16 -0700 Subject: [PATCH 02/58] Add ManagedArrayView to CHAI --- src/CMakeLists.txt | 2 ++ src/ManagedArrayView.hpp | 70 ++++++++++++++++++++++++++++++++++++++++ src/config.hpp.in | 1 + 3 files changed, 73 insertions(+) create mode 100644 src/ManagedArrayView.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6c1a60bc..af0593ac 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -45,6 +45,7 @@ set(CHAI_ENABLE_CUDA ${ENABLE_CUDA}) set(CHAI_ENABLE_IMPLICIT_CONVERSIONS ${ENABLE_IMPLICIT_CONVERSIONS}) set(CHAI_DISABLE_RM ${DISABLE_RM}) set(CHAI_ENABLE_UM ${ENABLE_UM}) +set(CHAI_ENABLE_RAJA_PLUGIN ${ENABLE_RAJA_PLUGIN}) configure_file( ${PROJECT_SOURCE_DIR}/src/config.hpp.in @@ -82,6 +83,7 @@ endif () if (ENABLE_RAJA_PLUGIN) set (chai_headers ${chai_headers} + ManagedArrayView.hpp RajaExecutionSpacePlugin.hpp) set (chai_sources diff --git a/src/ManagedArrayView.hpp b/src/ManagedArrayView.hpp new file mode 100644 index 00000000..1d7543b7 --- /dev/null +++ b/src/ManagedArrayView.hpp @@ -0,0 +1,70 @@ +// --------------------------------------------------------------------- +// Copyright (c) 2018, Lawrence Livermore National Security, LLC. All +// rights reserved. +// +// Produced at the Lawrence Livermore National Laboratory. +// +// This file is part of CHAI. +// +// LLNL-CODE-705877 +// +// For details, see https:://github.com/LLNL/CHAI +// Please also see the NOTICE and LICENSE files. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the +// distribution. +// +// - Neither the name of the LLNS/LLNL nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// --------------------------------------------------------------------- +#ifndef CHAI_ManagedArrayView_HPP +#define CHAI_ManagedArrayView_HPP + +#if defined(CHAI_ENABLE_RAJA_PLUGIN) + +#include "chai/config.hpp" +#include "chai/ManagedArray.hpp" + +#include "RAJA/util/View.hpp" + +namespace chai { + +template +using ManagedArrayView = + RAJA::View>; + + +template +using TypedManagedArrayView = RAJA::TypedViewBase, + LayoutType, + IndexTypes...>; + +} // end of namespace chai + +#endif // defined(CHAI_ENABLE_RAJA_PLUGIN) + +#endif // CHAI_ManagedArrayView_HPP diff --git a/src/config.hpp.in b/src/config.hpp.in index 75610721..22c3c63f 100644 --- a/src/config.hpp.in +++ b/src/config.hpp.in @@ -47,5 +47,6 @@ #cmakedefine CHAI_ENABLE_IMPLICIT_CONVERSIONS #cmakedefine CHAI_DISABLE_RM #cmakedefine CHAI_ENABLE_UM +#cmakedefine CHAI_ENABLE_RAJA_PLUGIN #endif // CHAI_config_HPP From 2bb71f9e1236e7a81438c571082774b5adc50bf0 Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Tue, 29 May 2018 10:15:52 -0700 Subject: [PATCH 03/58] Add ManagedArrayView --- src/ManagedArrayView.hpp | 4 ++-- src/tests/integration/chai-nested.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ManagedArrayView.hpp b/src/ManagedArrayView.hpp index 1d7543b7..afce47dd 100644 --- a/src/ManagedArrayView.hpp +++ b/src/ManagedArrayView.hpp @@ -43,11 +43,11 @@ #ifndef CHAI_ManagedArrayView_HPP #define CHAI_ManagedArrayView_HPP +#include "chai/config.hpp" + #if defined(CHAI_ENABLE_RAJA_PLUGIN) -#include "chai/config.hpp" #include "chai/ManagedArray.hpp" - #include "RAJA/util/View.hpp" namespace chai { diff --git a/src/tests/integration/chai-nested.cpp b/src/tests/integration/chai-nested.cpp index ff50af96..97790808 100644 --- a/src/tests/integration/chai-nested.cpp +++ b/src/tests/integration/chai-nested.cpp @@ -125,7 +125,7 @@ CUDA_TEST(Chai, NestedView) { chai::ManagedArray v1_array(X*Y); chai::ManagedArray v2_array(X*Y); - typedef RAJA::ManagedArrayView > view; + typedef chai::ManagedArrayView > view; view v1(v1_array, X, Y); view v2(v2_array, X, Y); From 754d6cecd726988c95149c686969c7d83532e63d Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Wed, 14 Aug 2019 12:18:56 -0700 Subject: [PATCH 04/58] Add check to guard examples --- CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a7baa16a..fefd839f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,7 +111,9 @@ if (ENABLE_BENCHMARKS) add_subdirectory(benchmarks) endif() -add_subdirectory(examples) +if (ENABLE_EXAMPLES) + add_subdirectory(examples) +endif () if (ENABLE_DOCUMENTATION) add_subdirectory(docs) From 36ec2131157b4491ff9e7315cb72ead056f8a451 Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Thu, 12 Sep 2019 11:13:47 -0700 Subject: [PATCH 05/58] More merging --- cmake/thirdparty/SetupChaiThirdparty.cmake | 1 + examples/CMakeLists.txt | 6 ++-- src/{ => chai}/ManagedArrayView.hpp | 0 src/{ => chai}/RajaExecutionSpacePlugin.cpp | 0 src/{ => chai}/RajaExecutionSpacePlugin.hpp | 0 src/{ => chai}/pluginLinker.hpp | 0 src/tests/integration/CMakeLists.txt | 31 ------------------- tests/integration/CMakeLists.txt | 16 ++++++++++ .../integration/chai-nested.cpp | 0 .../integration/raja-chai-tests.cpp | 0 10 files changed, 20 insertions(+), 34 deletions(-) rename src/{ => chai}/ManagedArrayView.hpp (100%) rename src/{ => chai}/RajaExecutionSpacePlugin.cpp (100%) rename src/{ => chai}/RajaExecutionSpacePlugin.hpp (100%) rename src/{ => chai}/pluginLinker.hpp (100%) delete mode 100644 src/tests/integration/CMakeLists.txt rename {src/tests => tests}/integration/chai-nested.cpp (100%) rename {src/tests => tests}/integration/raja-chai-tests.cpp (100%) diff --git a/cmake/thirdparty/SetupChaiThirdparty.cmake b/cmake/thirdparty/SetupChaiThirdparty.cmake index 08ab19af..afc7ebf0 100644 --- a/cmake/thirdparty/SetupChaiThirdparty.cmake +++ b/cmake/thirdparty/SetupChaiThirdparty.cmake @@ -59,6 +59,7 @@ blt_register_library( LIBRARIES umpire) if (ENABLE_RAJA_PLUGIN) + find_package(camp REQUIRED) find_package(RAJA REQUIRED) blt_register_library( diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 47d8da4b..1c6e1b1b 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -71,6 +71,6 @@ if (ENABLE_CUDA OR ENABLE_HIP) DEPENDS_ON ${chai_umpire_example_depends}) endif () -if (ENABLE_RAJA_PLUGIN) - add_subdirectory(integration) -endif () +# if (ENABLE_RAJA_PLUGIN) +# add_subdirectory(integration) +# endif () diff --git a/src/ManagedArrayView.hpp b/src/chai/ManagedArrayView.hpp similarity index 100% rename from src/ManagedArrayView.hpp rename to src/chai/ManagedArrayView.hpp diff --git a/src/RajaExecutionSpacePlugin.cpp b/src/chai/RajaExecutionSpacePlugin.cpp similarity index 100% rename from src/RajaExecutionSpacePlugin.cpp rename to src/chai/RajaExecutionSpacePlugin.cpp diff --git a/src/RajaExecutionSpacePlugin.hpp b/src/chai/RajaExecutionSpacePlugin.hpp similarity index 100% rename from src/RajaExecutionSpacePlugin.hpp rename to src/chai/RajaExecutionSpacePlugin.hpp diff --git a/src/pluginLinker.hpp b/src/chai/pluginLinker.hpp similarity index 100% rename from src/pluginLinker.hpp rename to src/chai/pluginLinker.hpp diff --git a/src/tests/integration/CMakeLists.txt b/src/tests/integration/CMakeLists.txt deleted file mode 100644 index dc34cbfb..00000000 --- a/src/tests/integration/CMakeLists.txt +++ /dev/null @@ -1,31 +0,0 @@ -############################################################################### -# -# Copyright (c) 2016-18, Lawrence Livermore National Security, LLC. -# -# Produced at the Lawrence Livermore National Laboratory -# -# LLNL-CODE-689114 -# -# All rights reserved. -# -# This file is part of RAJA. -# -# For details about use and distribution, please read RAJA/LICENSE. -# -############################################################################### - -set (raja_test_depends - chai umpire raja openmp gtest) - -blt_add_executable( - NAME raja-chai-tests - SOURCES raja-chai-tests.cpp - DEPENDS_ON ${raja_test_depends}) - -blt_add_test( - NAME raja-chai-tests - COMMAND raja-chai-tests) - -target_include_directories( - raja-chai-tests - PUBLIC ${PROJECT_BINARY_DIR}/include) diff --git a/tests/integration/CMakeLists.txt b/tests/integration/CMakeLists.txt index 22cbdd04..021b5147 100644 --- a/tests/integration/CMakeLists.txt +++ b/tests/integration/CMakeLists.txt @@ -24,3 +24,19 @@ target_include_directories( blt_add_test( NAME managed_array_test COMMAND managed_array_tests) + +set (raja_test_depends + chai umpire raja openmp gtest) + +blt_add_executable( + NAME raja-chai-tests + SOURCES raja-chai-tests.cpp + DEPENDS_ON ${raja_test_depends}) + +blt_add_test( + NAME raja-chai-tests + COMMAND raja-chai-tests) + +target_include_directories( + raja-chai-tests + PUBLIC ${PROJECT_BINARY_DIR}/include) diff --git a/src/tests/integration/chai-nested.cpp b/tests/integration/chai-nested.cpp similarity index 100% rename from src/tests/integration/chai-nested.cpp rename to tests/integration/chai-nested.cpp diff --git a/src/tests/integration/raja-chai-tests.cpp b/tests/integration/raja-chai-tests.cpp similarity index 100% rename from src/tests/integration/raja-chai-tests.cpp rename to tests/integration/raja-chai-tests.cpp From c0f3a819d3951a7461b07833e4ce7e410686b50b Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Thu, 12 Sep 2019 12:52:09 -0700 Subject: [PATCH 06/58] Fixing up tests --- tests/integration/CMakeLists.txt | 12 ++++++++++-- tests/integration/raja-chai-tests.cpp | 10 +++++----- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/tests/integration/CMakeLists.txt b/tests/integration/CMakeLists.txt index 021b5147..64c2dfe9 100644 --- a/tests/integration/CMakeLists.txt +++ b/tests/integration/CMakeLists.txt @@ -6,12 +6,19 @@ if (ENABLE_CUDA) ${managed_array_test_depends} cuda) endif () + if (ENABLE_HIP) set (managed_array_test_depends ${managed_array_test_depends} hip) endif () +if (ENABLE_OPENMP) + set (managed_array_test_depends + ${managed_array_test_depends} + openmp) +endif () + blt_add_executable( NAME managed_array_tests SOURCES managed_array_tests.cpp @@ -25,8 +32,9 @@ blt_add_test( NAME managed_array_test COMMAND managed_array_tests) -set (raja_test_depends - chai umpire raja openmp gtest) +set(raja_test_depends + ${managed_array_test_depends} + raja) blt_add_executable( NAME raja-chai-tests diff --git a/tests/integration/raja-chai-tests.cpp b/tests/integration/raja-chai-tests.cpp index aa3322ce..713dd1ec 100644 --- a/tests/integration/raja-chai-tests.cpp +++ b/tests/integration/raja-chai-tests.cpp @@ -49,28 +49,28 @@ CUDA_TEST(ChaiTest, Simple) { #if defined(CHAI_ENABLE_CUDA) - RAJA::forall >(0, 10, [=] __device__ (int i) { + RAJA::forall >(RAJA::RangeSegment(0, 10), [=] __device__ (int i) { v2[i] = v1[i]*2.0f; }); #else - RAJA::forall(0, 10, [=] (int i) { + RAJA::forall(RAJA::RangeSegment(0, 10), [=] (int i) { v2[i] = v1[i]*2.0f; }); #endif std::cout << "end of loop 2" << std::endl; - RAJA::forall(0, 10, [=] (int i) { + RAJA::forall(RAJA::RangeSegment(0, 10), [=] (int i) { ASSERT_FLOAT_EQ(v2[i], i*2.0f); }); #if defined(CHAI_ENABLE_CUDA) - RAJA::forall >(0, 10, [=] __device__ (int i) { + RAJA::forall >(RAJA::RangeSegment(0, 10), [=] __device__ (int i) { v2[i] *= 2.0f; }); #else - RAJA::forall(0, 10, [=] (int i) { + RAJA::forall(RAJA::RangeSegment(0, 10), [=] (int i) { v2[i] *= 2.0f; }); #endif From 2fe6a3876b2f16c1bf235f43c6fde0e26596105b Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Thu, 12 Sep 2019 13:27:56 -0700 Subject: [PATCH 07/58] Finish test fixup --- src/chai/ManagedArrayView.hpp | 3 +- src/chai/RajaExecutionSpacePlugin.cpp | 1 - tests/integration/CMakeLists.txt | 41 ++- tests/integration/chai-nested.cpp | 420 ------------------------- tests/integration/raja-chai-nested.cpp | 255 +++++++++++++++ tests/integration/raja-chai-tests.cpp | 117 +++---- 6 files changed, 333 insertions(+), 504 deletions(-) delete mode 100644 tests/integration/chai-nested.cpp create mode 100644 tests/integration/raja-chai-nested.cpp diff --git a/src/chai/ManagedArrayView.hpp b/src/chai/ManagedArrayView.hpp index afce47dd..2d53b522 100644 --- a/src/chai/ManagedArrayView.hpp +++ b/src/chai/ManagedArrayView.hpp @@ -48,11 +48,12 @@ #if defined(CHAI_ENABLE_RAJA_PLUGIN) #include "chai/ManagedArray.hpp" + #include "RAJA/util/View.hpp" namespace chai { -template + template using ManagedArrayView = RAJA::View>; diff --git a/src/chai/RajaExecutionSpacePlugin.cpp b/src/chai/RajaExecutionSpacePlugin.cpp index e46279da..71fe7716 100644 --- a/src/chai/RajaExecutionSpacePlugin.cpp +++ b/src/chai/RajaExecutionSpacePlugin.cpp @@ -66,7 +66,6 @@ RajaExecutionSpacePlugin::preLaunch(RAJA::util::PluginContext p) default: m_arraymanager->setExecutionSpace(chai::NONE); } - } void diff --git a/tests/integration/CMakeLists.txt b/tests/integration/CMakeLists.txt index 64c2dfe9..03e6fd61 100644 --- a/tests/integration/CMakeLists.txt +++ b/tests/integration/CMakeLists.txt @@ -32,19 +32,34 @@ blt_add_test( NAME managed_array_test COMMAND managed_array_tests) -set(raja_test_depends - ${managed_array_test_depends} - raja) +if (ENABLE_RAJA_PLUGIN) + set(raja_test_depends + ${managed_array_test_depends} + raja) -blt_add_executable( - NAME raja-chai-tests - SOURCES raja-chai-tests.cpp - DEPENDS_ON ${raja_test_depends}) + blt_add_executable( + NAME raja-chai-tests + SOURCES raja-chai-tests.cpp + DEPENDS_ON ${raja_test_depends}) -blt_add_test( - NAME raja-chai-tests - COMMAND raja-chai-tests) + blt_add_test( + NAME raja-chai-tests + COMMAND raja-chai-tests) -target_include_directories( - raja-chai-tests - PUBLIC ${PROJECT_BINARY_DIR}/include) + target_include_directories( + raja-chai-tests + PUBLIC ${PROJECT_BINARY_DIR}/include) + + blt_add_executable( + NAME raja-chai-nested-tests + SOURCES raja-chai-nested.cpp + DEPENDS_ON ${raja_test_depends}) + + blt_add_test( + NAME raja-chai-nested-tests + COMMAND raja-chai-nested-tests) + + target_include_directories( + raja-chai-nested-tests + PUBLIC ${PROJECT_BINARY_DIR}/include) +endif () diff --git a/tests/integration/chai-nested.cpp b/tests/integration/chai-nested.cpp deleted file mode 100644 index 97790808..00000000 --- a/tests/integration/chai-nested.cpp +++ /dev/null @@ -1,420 +0,0 @@ -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2016-18, Lawrence Livermore National Security, LLC. -// -// Produced at the Lawrence Livermore National Laboratory -// -// LLNL-CODE-689114 -// -// All rights reserved. -// -// This file is part of RAJA. -// -// For details about use and distribution, please read RAJA/LICENSE. -// -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - -/// -/// Source file containing tests for CHAI in RAJA nested loops. -/// -/// -#include -#include -#include - -#include -#include -#include - -#include "RAJA/RAJA.hpp" -#include "RAJA_gtest.hpp" - -using namespace RAJA; -using namespace std; - -#include "chai/ArrayManager.hpp" -#include "chai/ManagedArray.hpp" - -/* - * Simple tests using forallN and View - */ -CUDA_TEST(Chai, NestedSimpleOld) { - typedef RAJA::NestedPolicy< RAJA::ExecList< RAJA::seq_exec, RAJA::seq_exec> > POLICY; - typedef RAJA::NestedPolicy< RAJA::ExecList< RAJA::seq_exec, RAJA::cuda_thread_y_exec > > POLICY_GPU; - - const int X = 16; - const int Y = 16; - - chai::ManagedArray v1(X*Y); - chai::ManagedArray v2(X*Y); - - RAJA::forallN(RangeSegment(0,Y), RangeSegment(0,X), [=] (int i, int j) { - int index = j*X + i; - v1[index] = index; - }); - - RAJA::forallN(RangeSegment(0,Y), RangeSegment(0,X), [=] __device__ (int i, int j) { - int index = j*X + i; - v2[index] = v1[index]*2.0f; - }); - cudaDeviceSynchronize(); - - RAJA::forallN(RangeSegment(0,Y), RangeSegment(0,X), [=] (int i, int j) { - int index = j*X + i; - ASSERT_FLOAT_EQ(v1[index], index*1.0f); - ASSERT_FLOAT_EQ(v2[index], index*2.0f); - }); -} - - -/* - * Simple tests using nested::forall and View - */ -CUDA_TEST(Chai, NestedSimple) { - typedef RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::seq_exec, - RAJA::statement::For<1, RAJA::seq_exec> > > POLICY; - typedef RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::seq_exec, - RAJA::statement::CudaKernel< - RAJA::statement::For<1, RAJA::cuda_threadblock_exec<32> > > > >POLICY_GPU; - - const int X = 16; - const int Y = 16; - - chai::ManagedArray v1(X*Y); - chai::ManagedArray v2(X*Y); - - RAJA::kernel( - - RAJA::make_tuple(RAJA::RangeSegment(0,Y), RAJA::RangeSegment(0,X) ), - - [=] (int i, int j) { - int index = j*X + i; - v1[index] = index; - }); - - RAJA::kernel( - - RAJA::make_tuple(RangeSegment(0,Y), RangeSegment(0,X) ), - - [=] __host__ __device__ (int i, int j) { - int index = j*X + i; - v2[index] = v1[index]*2.0f; - }); - - cudaDeviceSynchronize(); - - RAJA::kernel( - - RAJA::make_tuple(RAJA::RangeSegment(0,Y), RAJA::RangeSegment(0,X) ), - - [=] (int i, int j) { - int index = j*X + i; - ASSERT_FLOAT_EQ(v1[index], index*1.0f); - ASSERT_FLOAT_EQ(v2[index], index*2.0f); - }); -} - -CUDA_TEST(Chai, NestedView) { - typedef RAJA::NestedPolicy< RAJA::ExecList< RAJA::seq_exec, RAJA::seq_exec> > POLICY; - typedef RAJA::NestedPolicy< RAJA::ExecList< RAJA::seq_exec, RAJA::cuda_thread_y_exec > > POLICY_GPU; - - const int X = 16; - const int Y = 16; - - chai::ManagedArray v1_array(X*Y); - chai::ManagedArray v2_array(X*Y); - - typedef chai::ManagedArrayView > view; - - view v1(v1_array, X, Y); - view v2(v2_array, X, Y); - - RAJA::forallN(RangeSegment(0,Y), RangeSegment(0,X), [=] (int i, int j) { - v1(i,j) = (i+(j*X)) * 1.0f; - }); - - RAJA::forallN(RangeSegment(0,Y), RangeSegment(0,X), [=] __device__ (int i, int j) { - v2(i,j) = v1(i,j)*2.0f; - }); - - RAJA::forallN(RangeSegment(0,Y), RangeSegment(0,X), [=] (int i, int j) { - ASSERT_FLOAT_EQ(v2(i,j), v1(i,j)*2.0f); - }); -} - -CUDA_TEST(Chai, NestedView2) { - typedef RAJA::NestedPolicy< RAJA::ExecList< RAJA::seq_exec, RAJA::seq_exec> > POLICY; - -#if defined (RAJA_ENABLE_OPENMP) - typedef RAJA::NestedPolicy< RAJA::ExecList< RAJA::omp_for_nowait_exec, RAJA::cuda_thread_x_exec >, RAJA::OMP_Parallel<> > POLICY_GPU; -#else - typedef RAJA::NestedPolicy< RAJA::ExecList< RAJA::seq_exec, RAJA::cuda_thread_x_exec > > POLICY_GPU; -#endif - - const int X = 16; - const int Y = 16; - - chai::ManagedArray v1_array(X*Y); - chai::ManagedArray v2_array(X*Y); - - typedef RAJA::ManagedArrayView > view; - - view v1(v1_array, X, Y); - view v2(v2_array, X, Y); - - RAJA::forallN(RangeSegment(0,Y), RangeSegment(0,X), [=] (int i, int j) { - v1(i,j) = (i+(j*X)) * 1.0f; - }); - - RAJA::forallN(RangeSegment(0,Y), RangeSegment(0,X), [=] __device__ (int i, int j) { - v2(i,j) = v1(i,j)*2.0f; - }); - - RAJA::forallN(RangeSegment(0,Y), RangeSegment(0,X), [=] (int i, int j) { - ASSERT_FLOAT_EQ(v2(i,j), v1(i,j)*2.0f); - }); -} - -/////////////////////////////////////////////////////////////////////////// -// -// Example LTimes kernel test routines -// -// Demonstrates a 4-nested loop, the use of complex nested policies and -// the use of strongly-typed indices -// -// This routine computes phi(m, g, z) = SUM_d { ell(m, d)*psi(d,g,z) } -// -/////////////////////////////////////////////////////////////////////////// -typedef struct { - double val; - int idx; -} minmaxloc_t; - -// block_size is needed by the reduction variables to setup shared memory -// Care should be used here to cover the maximum block dimensions used by this -// test -const size_t block_size = 256; - -RAJA_INDEX_VALUE(IMoment, "IMoment"); -RAJA_INDEX_VALUE(IDirection, "IDirection"); -RAJA_INDEX_VALUE(IGroup, "IGroup"); -RAJA_INDEX_VALUE(IZone, "IZone"); - -template -void runLTimesTest(std::string const &policy, - Index_type num_moments, - Index_type num_directions, - Index_type num_groups, - Index_type num_zones) -{ - // cout << "\n TestLTimes " << num_moments << " moments, " << num_directions - // << " directions, " << num_groups << " groups, and " << num_zones - // << " zones" - // << " with policy " << policy << endl; - - // allocate data - // phi is initialized to all zeros, the others are randomized - chai::ManagedArray ell_data(num_moments * num_directions); - chai::ManagedArray psi_data(num_directions * num_groups * num_zones); - //chai::ManagedArray phi_data(num_moments * num_groups * num_zones, 0.0); - chai::ManagedArray phi_data(num_moments * num_groups * num_zones); - - // setup CUDA Reduction variables to be exercised - ReduceSum, double> pdsum(0.0); - ReduceMin, double> pdmin(DBL_MAX); - ReduceMax, double> pdmax(-DBL_MAX); - ReduceMinLoc, double> pdminloc(DBL_MAX, -1); - ReduceMaxLoc, double> pdmaxloc(-DBL_MAX, -1); - - - // data setup using RAJA to ensure that chai is activated - RAJA::forall(0, (num_moments*num_directions), [=] (int i) { - ell_data[i] = drand48(); - }); - - RAJA::forall(0, (num_directions*num_groups*num_zones), [=] (int i) { - psi_data[i] = drand48(); - }); - - RAJA::forall(0, (num_moments*num_groups*num_zones), [=] (int i) { - phi_data[i] = 0.0; - }); - - typename POL::ELL_VIEW ell(ell_data, RAJA::make_permuted_layout({num_moments, num_directions}, RAJA::as_array::get())); - typename POL::PSI_VIEW psi(psi_data, RAJA::make_permuted_layout({num_directions, num_groups, num_zones}, RAJA::as_array::get())); - typename POL::PHI_VIEW phi(phi_data, RAJA::make_permuted_layout({num_moments, num_groups, num_zones}, RAJA::as_array::get())); - - using EXEC = typename POL::EXEC; - - // do calculation using RAJA - forallN( - RangeSegment(0, num_moments), - RangeSegment(0, num_directions), - RangeSegment(0, num_groups), - RangeSegment(0, num_zones), - [=] __device__(IMoment m, IDirection d, IGroup g, IZone z) { - double val = ell(m, d) * psi(d, g, z); - phi(m, g, z) += val; - pdsum += val; - pdmin.min(val); - pdmax.max(val); - - int index = *d + (*m * num_directions) - + (*g * num_directions * num_moments) - + (*z * num_directions * num_moments * num_groups); - - pdminloc.minloc(val, index); - pdmaxloc.maxloc(val, index); - }); - - cudaDeviceSynchronize(); - - // Make sure data is copied to host for checking results. - chai::ArrayManager* rm = chai::ArrayManager::getInstance(); - rm->setExecutionSpace(chai::CPU); - // setup local Reduction variables as a crosscheck - double the_lsum = 0.0; - double the_lmin = DBL_MAX; - double the_lmax = -DBL_MAX; - - double* lsum = &the_lsum; - double* lmin = &the_lmin; - double* lmax = &the_lmax; - - forall(RangeSegment(0, num_zones), [=] (int z) { - for (IGroup g(0); g < num_groups; ++g) { - for (IMoment m(0); m < num_moments; ++m) { - double total = 0.0; - for (IDirection d(0); d < num_directions; ++d) { - double val = ell(m, d) * psi(d, g, IZone(z)); - total += val; - *lmin = RAJA_MIN(*lmin, val); - *lmax = RAJA_MAX(*lmax, val); - int index = *d + (*m * num_directions) - + (*g * num_directions * num_moments) - + (z * num_directions * num_moments * num_groups); - } - *lsum += total; - - // check answer with some reasonable tolerance - ASSERT_FLOAT_EQ(total, phi(m, g, IZone(z))); - } - } - }); - - rm->setExecutionSpace(chai::NONE); - - ASSERT_FLOAT_EQ(*lsum, pdsum.get()); - ASSERT_FLOAT_EQ(*lmin, pdmin.get()); - ASSERT_FLOAT_EQ(*lmax, pdmax.get()); -} - -// Use thread-block mappings -struct PolLTimesA_GPU { - // Loops: Moments, Directions, Groups, Zones - typedef NestedPolicy, - cuda_threadblock_y_exec<32>>> - EXEC; - - // psi[direction, group, zone] - typedef RAJA::TypedManagedArrayView, IDirection, IGroup, IZone> - PSI_VIEW; - - // phi[moment, group, zone] - typedef RAJA::TypedManagedArrayView, IMoment, IGroup, IZone> - PHI_VIEW; - - // ell[moment, direction] - typedef RAJA::TypedManagedArrayView, IMoment, IDirection> - ELL_VIEW; - - typedef RAJA::PERM_IJK PSI_PERM; - typedef RAJA::PERM_IJK PHI_PERM; - typedef RAJA::PERM_IJ ELL_PERM; -}; - -// Use thread and block mappings -struct PolLTimesB_GPU { - // Loops: Moments, Directions, Groups, Zones - typedef NestedPolicy, - Permute> - EXEC; - - // psi[direction, group, zone] - typedef RAJA::TypedManagedArrayView, IDirection, IGroup, IZone> - PSI_VIEW; - - // phi[moment, group, zone] - typedef RAJA::TypedManagedArrayView, IMoment, IGroup, IZone> - PHI_VIEW; - - // ell[moment, direction] - typedef RAJA::TypedManagedArrayView, IMoment, IDirection> - ELL_VIEW; - - typedef RAJA::PERM_IJK PSI_PERM; - typedef RAJA::PERM_IJK PHI_PERM; - typedef RAJA::PERM_IJ ELL_PERM; -}; - -// Combine OMP Parallel, omp nowait, and cuda thread-block launch -struct PolLTimesC_GPU { - // Loops: Moments, Directions, Groups, Zones -#if defined(RAJA_ENABLE_OPENMP) - typedef NestedPolicy>, - OMP_Parallel<>> - EXEC; -#else - typedef NestedPolicy> > - EXEC; -#endif - - // psi[direction, group, zone] - typedef RAJA::TypedManagedArrayView, IDirection, IGroup, IZone> - PSI_VIEW; - - // phi[moment, group, zone] - typedef RAJA::TypedManagedArrayView, IMoment, IGroup, IZone> - PHI_VIEW; - - // ell[moment, direction] - typedef RAJA::TypedManagedArrayView, IMoment, IDirection> - ELL_VIEW; - - typedef RAJA::PERM_IJK PSI_PERM; - typedef RAJA::PERM_IJK PHI_PERM; - typedef RAJA::PERM_IJ ELL_PERM; -}; - -void runLTimesTests(Index_type num_moments, - Index_type num_directions, - Index_type num_groups, - Index_type num_zones) -{ - runLTimesTest( - "PolLTimesA_GPU", num_moments, num_directions, num_groups, num_zones); - runLTimesTest( - "PolLTimesB_GPU", num_moments, num_directions, num_groups, num_zones); - runLTimesTest( - "PolLTimesC_GPU", num_moments, num_directions, num_groups, num_zones); -} - -TEST(Chai, Nested) { -// runLTimesTests(2, 0, 7, 3); - runLTimesTests(2, 3, 7, 3); - runLTimesTests(2, 3, 32, 4); - runLTimesTests(25, 96, 8, 32); - runLTimesTests(100, 15, 7, 13); -} diff --git a/tests/integration/raja-chai-nested.cpp b/tests/integration/raja-chai-nested.cpp new file mode 100644 index 00000000..5caee295 --- /dev/null +++ b/tests/integration/raja-chai-nested.cpp @@ -0,0 +1,255 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC +// and RAJA project contributors. See the RAJA/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +/// +/// Source file containing tests for CHAI in RAJA nested loops. +/// +/// +#include +#include +#include + +#include +#include +#include + +#include "RAJA/RAJA.hpp" + +using namespace RAJA; +using namespace std; + +#include "chai/ArrayManager.hpp" +#include "chai/ManagedArrayView.hpp" +#include "chai/ManagedArray.hpp" + +#include "gtest/gtest.h" + +#define CUDA_TEST(X, Y) \ + static void cuda_test_##X##_##Y(); \ + TEST(X, Y) { cuda_test_##X##_##Y(); } \ + static void cuda_test_##X##_##Y() + +/* + * Simple tests using nested::forall and View + */ +CUDA_TEST(Chai, NestedSimple) +{ + using POLICY = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::seq_exec, + RAJA::statement::For<1, RAJA::seq_exec + > + > + >; + + using POLICY_GPU = RAJA::KernelPolicy< + RAJA::statement::CudaKernel< + RAJA::statement::For<1, RAJA::cuda_block_x_loop, + RAJA::statement::For<0, RAJA::cuda_thread_x_loop, + RAJA::statement::Lambda<0> + > + > + > + >; + + const int X = 16; + const int Y = 16; + + chai::ManagedArray v1(X * Y); + chai::ManagedArray v2(X * Y); + + RAJA::kernel( + + RAJA::make_tuple(RAJA::RangeSegment(0, Y), RAJA::RangeSegment(0, X)), + + [=](int i, int j) { + int index = j * X + i; + v1[index] = index; + }); + + RAJA::kernel( + RAJA::make_tuple(RangeSegment(0, Y), RangeSegment(0, X)), + + [=] __host__ __device__(int i, int j) { + int index = j * X + i; + v2[index] = v1[index] * 2.0f; + }); + + cudaDeviceSynchronize(); + + RAJA::kernel( + + RAJA::make_tuple(RAJA::RangeSegment(0, Y), RAJA::RangeSegment(0, X)), + + [=](int i, int j) { + int index = j * X + i; + ASSERT_FLOAT_EQ(v1[index], index * 1.0f); + ASSERT_FLOAT_EQ(v2[index], index * 2.0f); + }); +} + +CUDA_TEST(Chai, NestedView) +{ + using POLICY = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::seq_exec, + RAJA::statement::For<1, RAJA::seq_exec + > + > + >; + + using POLICY_GPU = + RAJA::KernelPolicy< + RAJA::statement::CudaKernel< + RAJA::statement::For<1, RAJA::cuda_block_x_loop, + RAJA::statement::For<0, RAJA::cuda_thread_x_loop + > + > + > + >; + + const int X = 16; + const int Y = 16; + + chai::ManagedArray v1_array(X * Y); + chai::ManagedArray v2_array(X * Y); + + using view = chai::ManagedArrayView>; + + view v1(v1_array, X, Y); + view v2(v2_array, X, Y); + + RAJA::kernel(RAJA::make_tuple(RangeSegment(0, Y), RangeSegment(0, X)), + [=](int i, int j) { v1(i, j) = (i + (j * X)) * 1.0f; }); + + RAJA::kernel(RAJA::make_tuple(RangeSegment(0, Y), RangeSegment(0, X)), + [=] __device__(int i, int j) { + v2(i, j) = v1(i, j) * 2.0f; + }); + + RAJA::kernel(RAJA::make_tuple(RangeSegment(0, Y), RangeSegment(0, X)), + [=](int i, int j) { + ASSERT_FLOAT_EQ(v2(i, j), v1(i, j) * 2.0f); + }); +} + +/////////////////////////////////////////////////////////////////////////// +// +// Example LTimes kernel test routines +// +// Demonstrates a 4-nested loop, the use of complex nested policies and +// the use of strongly-typed indices +// +// This routine computes phi(m, g, z) = SUM_d { ell(m, d)*psi(d,g,z) } +// +/////////////////////////////////////////////////////////////////////////// + +RAJA_INDEX_VALUE_T(IM, int, "IM"); +RAJA_INDEX_VALUE_T(ID, int, "ID"); +RAJA_INDEX_VALUE_T(IG, int, "IG"); +RAJA_INDEX_VALUE_T(IZ, int, "IZ"); + +void runLTimesTests(Index_type num_moments, + Index_type num_directions, + Index_type num_groups, + Index_type num_zones) +{ + // allocate data + // phi is initialized to all zeros, the others are randomized + chai::ManagedArray L_data(num_moments * num_directions); + chai::ManagedArray psi_data(num_directions * num_groups * num_zones); + chai::ManagedArray phi_data(num_moments * num_groups * num_zones); + + RAJA::forall( + RAJA::RangeSegment(0, (num_moments * num_directions)), + [=](int i) { + L_data[i] = i+2; + }); + + RAJA::forall( + RAJA::RangeSegment(0, (num_directions * num_groups * num_zones)), + [=](int i) { psi_data[i] = 2*i+1; }); + + RAJA::forall( + RAJA::RangeSegment(0, (num_moments * num_groups * num_zones)), + [=](int i) { phi_data[i] = 0.0; }); + + using LView = chai::TypedManagedArrayView, IM, ID>; + + // psi(d, g, z) : 2 -> z is stride-1 dimension + using PsiView = chai::TypedManagedArrayView, ID, IG, IZ>; + + // phi(m, g, z) : 2 -> z is stride-1 dimension + using PhiView = chai::TypedManagedArrayView, IM, IG, IZ>; + + std::array L_perm {{0, 1}}; + LView L(L_data, + RAJA::make_permuted_layout({{num_moments, num_directions}}, L_perm)); + + std::array psi_perm {{0, 1, 2}}; + PsiView psi(psi_data, + RAJA::make_permuted_layout({{num_directions, num_groups, num_zones}}, psi_perm)); + + std::array phi_perm {{0, 1, 2}}; + PhiView phi(phi_data, + RAJA::make_permuted_layout({{num_moments, num_groups, num_zones}}, phi_perm)); + + using EXECPOL = + RAJA::KernelPolicy< + statement::CudaKernelAsync< + statement::For<0, cuda_block_x_loop, // m + statement::For<2, cuda_block_y_loop, // g + statement::For<3, cuda_thread_x_loop, // z + statement::For<1, seq_exec, // d + statement::Lambda<0> + > + > + > + > + > + >; + + auto segments = RAJA::make_tuple(RAJA::TypedRangeSegment(0, num_moments), + RAJA::TypedRangeSegment(0, num_directions), + RAJA::TypedRangeSegment(0, num_groups), + RAJA::TypedRangeSegment(0, num_zones)); + + cudaErrchk( cudaDeviceSynchronize() ); + + RAJA::kernel( segments, + [=] RAJA_DEVICE (IM m, ID d, IG g, IZ z) { + phi(m, g, z) += L(m, d) * psi(d, g, z); + } + ); + + cudaErrchk( cudaDeviceSynchronize() ); + + RAJA::forall( + RAJA::TypedRangeSegment(0, num_moments), [=] (IM m) { + for (IG g(0); g < num_groups; ++g) { + for (IZ z(0); z < num_zones; ++z) { + double total = 0.0; + for (ID d(0); d < num_directions; ++d) { + double val = L(m, d) * psi(d, g, z); + total += val; + } + ASSERT_FLOAT_EQ(total, phi(m, g, z)); + } + } + }); + + //rm->setExecutionSpace(chai::NONE); +} + +TEST(Chai, LTimes) +{ + // runLTimesTests(2, 0, 7, 3); + runLTimesTests(2, 3, 7, 3); + runLTimesTests(2, 3, 32, 4); + runLTimesTests(25, 96, 8, 32); + runLTimesTests(100, 15, 7, 13); +} diff --git a/tests/integration/raja-chai-tests.cpp b/tests/integration/raja-chai-tests.cpp index 713dd1ec..c1e9416c 100644 --- a/tests/integration/raja-chai-tests.cpp +++ b/tests/integration/raja-chai-tests.cpp @@ -1,128 +1,107 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2016-18, Lawrence Livermore National Security, LLC. -// -// Produced at the Lawrence Livermore National Laboratory -// -// LLNL-CODE-689114 -// -// All rights reserved. -// -// This file is part of RAJA. -// -// For details about use and distribution, please read RAJA/LICENSE. +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC +// and RAJA project contributors. See the RAJA/COPYRIGHT file for details. // +// SPDX-License-Identifier: (BSD-3-Clause) //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// /// /// Source file containing tests for CHAI with basic RAJA constructs /// - -#include "gtest/gtest.h" +#include "RAJA/RAJA.hpp" #include "chai/ManagedArray.hpp" -#include "chai/RajaExecutionSpacePlugin.hpp" +#include "chai/ManagedArrayView.hpp" -#define CUDA_TEST(X, Y) \ -static void cuda_test_ ## X ## Y();\ -TEST(X,Y) { cuda_test_ ## X ## Y();}\ -static void cuda_test_ ## X ## Y() - -#include "RAJA/RAJA.hpp" +#include -// // Register plugin with RAJA -// static RAJA::util::PluginRegistry::Add P( -// "RajaExecutionSpacePlugin", - // "Plugin to set CHAI execution space based on RAJA execution platform"); +#include "gtest/gtest.h" +#define CUDA_TEST(X, Y) \ + static void cuda_test_##X##_##Y(); \ + TEST(X, Y) { cuda_test_##X##_##Y(); } \ + static void cuda_test_##X##_##Y() -#include - -CUDA_TEST(ChaiTest, Simple) { +CUDA_TEST(ChaiTest, Simple) +{ chai::ManagedArray v1(10); chai::ManagedArray v2(10); - RAJA::forall(RAJA::RangeSegment(0, 10), [=] (int i) { - v1[i] = static_cast(i * 1.0f); + RAJA::forall(RAJA::RangeSegment(0, 10), [=](int i) { + v1[i] = static_cast(i * 1.0f); }); std::cout << "end of loop 1" << std::endl; -#if defined(CHAI_ENABLE_CUDA) - RAJA::forall >(RAJA::RangeSegment(0, 10), [=] __device__ (int i) { - v2[i] = v1[i]*2.0f; +#if defined(RAJA_ENABLE_CUDA) + RAJA::forall >(RAJA::RangeSegment(0, 10), [=] __device__(int i) { + v2[i] = v1[i] * 2.0f; }); #else - RAJA::forall(RAJA::RangeSegment(0, 10), [=] (int i) { - v2[i] = v1[i]*2.0f; - }); + RAJA::forall(RAJA::RangeSegment(0, 10), [=](int i) { v2[i] = v1[i] * 2.0f; }); #endif std::cout << "end of loop 2" << std::endl; - RAJA::forall(RAJA::RangeSegment(0, 10), [=] (int i) { - ASSERT_FLOAT_EQ(v2[i], i*2.0f); + RAJA::forall(RAJA::RangeSegment(0, 10), [=](int i) { + ASSERT_FLOAT_EQ(v2[i], i * 2.0f); }); -#if defined(CHAI_ENABLE_CUDA) - RAJA::forall >(RAJA::RangeSegment(0, 10), [=] __device__ (int i) { - v2[i] *= 2.0f; +#if defined(RAJA_ENABLE_CUDA) + RAJA::forall >(RAJA::RangeSegment(0, 10), [=] __device__(int i) { + v2[i] *= 2.0f; }); #else - RAJA::forall(RAJA::RangeSegment(0, 10), [=] (int i) { - v2[i] *= 2.0f; - }); + RAJA::forall(RAJA::RangeSegment(0, 10), [=](int i) { v2[i] *= 2.0f; }); #endif - float * raw_v2 = v2; - for (int i = 0; i < 10; i++ ) { - ASSERT_FLOAT_EQ(raw_v2[i], i*2.0f*2.0f);; + float* raw_v2 = v2; + for (int i = 0; i < 10; i++) { + ASSERT_FLOAT_EQ(raw_v2[i], i * 2.0f * 2.0f); + ; } } -#if 0 -CUDA_TEST(ChaiTest, Views) { +CUDA_TEST(ChaiTest, Views) +{ chai::ManagedArray v1_array(10); chai::ManagedArray v2_array(10); - typedef RAJA::ManagedArrayView > view; + using view = chai::ManagedArrayView >; view v1(v1_array, 10); view v2(v2_array, 10); - RAJA::forall(0, 10, [=] (int i) { - v1(i) = static_cast(i * 1.0f); + RAJA::forall(RAJA::RangeSegment(0, 10), [=](int i) { + v1(i) = static_cast(i * 1.0f); }); -#if defined(CHAI_ENABLE_CUDA) - RAJA::forall >(0, 10, [=] __device__ (int i) { - v2(i) = v1(i)*2.0f; +#if defined(RAJA_ENABLE_CUDA) + RAJA::forall >(RAJA::RangeSegment(0, 10), [=] __device__(int i) { + v2(i) = v1(i) * 2.0f; }); #else - RAJA::forall(0, 10, [=](int i) { - v2(i) = v1(i)*2.0f; - }); + RAJA::forall(RAJA::RangeSegment(0, 10), [=](int i) { v2(i) = v1(i) * 2.0f; }); #endif - RAJA::forall(0, 10, [=] (int i) { - ASSERT_FLOAT_EQ(v2(i), i*2.0f); + RAJA::forall(RAJA::RangeSegment(0, 10), [=](int i) { + ASSERT_FLOAT_EQ(v2(i), i * 2.0f); }); -#if defined(CHAI_ENABLE_CUDA) - RAJA::forall >(0, 10, [=] __device__ (int i) { - v2(i) *= 2.0f; +#if defined(RAJA_ENABLE_CUDA) + RAJA::forall >(RAJA::RangeSegment(0, 10), [=] __device__(int i) { + v2(i) *= 2.0f; }); #else - RAJA::forall(0, 10, [=](int i) { - v2(i) *= 2.0f; - }); + RAJA::forall(RAJA::RangeSegment(0, 10), [=](int i) { v2(i) *= 2.0f; }); #endif - float * raw_v2 = v2.data; - for (int i = 0; i < 10; i++ ) { - ASSERT_FLOAT_EQ(raw_v2[i], i*1.0f*2.0f*2.0f);; + float* raw_v2 = v2.data; + for (int i = 0; i < 10; i++) { + ASSERT_FLOAT_EQ(raw_v2[i], i * 1.0f * 2.0f * 2.0f); + ; } } -#endif From ba44bcdb3c8ca2e0fa7ef38f609834a9041dfc65 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Thu, 12 Sep 2019 17:54:49 -0700 Subject: [PATCH 08/58] Add ability to control callbacks --- src/chai/ArrayManager.cpp | 21 ++++++++++++--------- src/chai/ArrayManager.hpp | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index ec16cde8..54a32176 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -61,7 +61,8 @@ ArrayManager* ArrayManager::getInstance() ArrayManager::ArrayManager() : m_pointer_map{}, m_allocators{}, - m_resource_manager{umpire::ResourceManager::getInstance()} + m_resource_manager{umpire::ResourceManager::getInstance()}, + m_callbacks_active{true} { m_pointer_map.clear(); m_current_execution_space = NONE; @@ -196,7 +197,7 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space) if (!record->m_touched[record->m_last_space]) { return; } else { - record->m_user_callback(ACTION_MOVE, space, record->m_size); + callback(record, ACTION_MOVE, space, record->m_size); std::lock_guard lock(m_mutex); m_resource_manager.copy(dst_pointer, src_pointer); } @@ -211,7 +212,7 @@ void ArrayManager::allocate( auto size = pointer_record->m_size; auto alloc = m_resource_manager.getAllocator(pointer_record->m_allocators[space]); - pointer_record->m_user_callback(ACTION_ALLOC, space, size); + callback(pointer_record, ACTION_ALLOC, space, size); pointer_record->m_pointers[space] = alloc.allocate(size); registerPointer(pointer_record, space); @@ -229,9 +230,10 @@ void ArrayManager::free(PointerRecord* pointer_record) void* space_ptr = pointer_record->m_pointers[space]; #if defined(CHAI_ENABLE_UM) if (space_ptr == pointer_record->m_pointers[UM]) { - pointer_record->m_user_callback(ACTION_FREE, - ExecutionSpace(UM), - pointer_record->m_size); + callback(pointer_record, + ACTION_FREE, + ExecutionSpace(UM), + pointer_record->m_size); { std::lock_guard lock(m_mutex); m_pointer_map.erase(space_ptr); @@ -247,9 +249,10 @@ void ArrayManager::free(PointerRecord* pointer_record) } } else { #endif - pointer_record->m_user_callback(ACTION_FREE, - ExecutionSpace(space), - pointer_record->m_size); + callback(pointer_record, + ACTION_FREE, + ExecutionSpace(space), + pointer_record->m_size); { std::lock_guard lock(m_mutex); m_pointer_map.erase(space_ptr); diff --git a/src/chai/ArrayManager.hpp b/src/chai/ArrayManager.hpp index 7afc21ab..82221d24 100644 --- a/src/chai/ArrayManager.hpp +++ b/src/chai/ArrayManager.hpp @@ -258,6 +258,16 @@ class ArrayManager int getAllocatorId(ExecutionSpace space) const; + /*! + * \brief Turn callbacks on. + */ + void enableCallbacks() { m_callbacks_active = true; } + + /*! + * \brief Turn callbacks off. + */ + void disableCallbacks() { m_callbacks_active = false; } + protected: /*! * \brief Construct a new ArrayManager. @@ -293,6 +303,23 @@ class ArrayManager */ void move(PointerRecord* record, ExecutionSpace space); + /*! + * \brief Execute a user callback if callbacks are active + * + * \param record The pointer record containing the callback + * \param action The event that occurred + * \param space The space in which the event occurred + * \param size The number of bytes in the array associated with this pointer record + */ + inline void callback(PointerRecord* record, + Action action, + ExecutionSpace space, + size_t size) const { + if (m_callbacks_active && record) { + record->m_user_callback(action, space, size); + } + } + /*! * Current execution space. */ @@ -317,6 +344,11 @@ class ArrayManager umpire::ResourceManager& m_resource_manager; mutable std::mutex m_mutex; + + /*! + * \brief Controls whether or not callbacks are called. + */ + bool m_callbacks_active; }; } // end of namespace chai From 2708a6003caee54fa922d29234fe3d6906f1a4da Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 13 Sep 2019 09:38:57 -0700 Subject: [PATCH 09/58] Add tests for controlling callbacks --- tests/unit/array_manager_unit_tests.cpp | 59 ++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/tests/unit/array_manager_unit_tests.cpp b/tests/unit/array_manager_unit_tests.cpp index 0aff35d9..eba731af 100644 --- a/tests/unit/array_manager_unit_tests.cpp +++ b/tests/unit/array_manager_unit_tests.cpp @@ -53,6 +53,7 @@ TEST(ArrayManager, Constructor) } #ifndef CHAI_DISABLE_RM + TEST(ArrayManager, getPointerMap) { chai::ArrayManager* rm = chai::ArrayManager::getInstance(); @@ -108,4 +109,60 @@ TEST(ArrayManager, getPointerMap) ASSERT_EQ(rm->getTotalSize(), (sizeOfArray1 * sizeof(int)) + (sizeOfArray2 * sizeof(double))); } -#endif + +TEST(ArrayManager, controlCallbacks) +{ + // First check that callbacks are turned on by default + chai::ArrayManager* arrayManager = chai::ArrayManager::getInstance(); + + // Variable for testing if callbacks are on or off + bool callbacksAreOn = false; + + // Allocate one array and set a callback + size_t sizeOfArray = 5; + chai::ManagedArray array1(sizeOfArray, chai::CPU); + array1.setUserCallback([&] (chai::Action, chai::ExecutionSpace, std::size_t) { + callbacksAreOn = true; + }); + + // Make sure the callback is called with ACTION_FREE + array1.free(); + ASSERT_TRUE(callbacksAreOn); + + // Now turn off callbacks + arrayManager->disableCallbacks(); + + // Reset the variable for testing if callbacks are on or off + callbacksAreOn = false; + + // Allocate another array and set a callback + chai::ManagedArray array2(sizeOfArray, chai::CPU); + array2.setUserCallback([&] (chai::Action, chai::ExecutionSpace, std::size_t) { + callbacksAreOn = true; + }); + + // Make sure the callback is called with ACTION_FREE + array2.free(); + ASSERT_FALSE(callbacksAreOn); + + // Now make sure the order doesn't matter for when the callback is set compared + // to when callbacks are enabled + + // Reset the variable for testing if callbacks are on or off + callbacksAreOn = false; + + // Allocate a third array and set a callback + chai::ManagedArray array3(sizeOfArray, chai::CPU); + array3.setUserCallback([&] (chai::Action, chai::ExecutionSpace, std::size_t) { + callbacksAreOn = true; + }); + + // Turn on callbacks + arrayManager->enableCallbacks(); + + // Make sure the callback is called with ACTION_FREE + array3.free(); + ASSERT_TRUE(callbacksAreOn); +} + +#endif // !CHAI_DISABLE_RM From 969e4bcad7a8dd42451539814287996d9ef19c04 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 13 Sep 2019 09:43:08 -0700 Subject: [PATCH 10/58] Add documentation for callback test --- tests/unit/array_manager_unit_tests.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/unit/array_manager_unit_tests.cpp b/tests/unit/array_manager_unit_tests.cpp index eba731af..66edf586 100644 --- a/tests/unit/array_manager_unit_tests.cpp +++ b/tests/unit/array_manager_unit_tests.cpp @@ -110,6 +110,9 @@ TEST(ArrayManager, getPointerMap) (sizeOfArray1 * sizeof(int)) + (sizeOfArray2 * sizeof(double))); } +/*! + * \brief Tests to see if callbacks can be turned on or off + */ TEST(ArrayManager, controlCallbacks) { // First check that callbacks are turned on by default From ef55f1057cee012fbe3fb1e3258f6fb55ecbc5ff Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Mon, 16 Sep 2019 15:10:02 -0700 Subject: [PATCH 11/58] Add docs links --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 959dc3bf..b45a98c6 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ [![Azure Build Status](https://dev.azure.com/davidbeckingsale/CHAI/_apis/build/status/LLNL.CHAI?branchName=develop)](https://dev.azure.com/davidbeckingsale/CHAI/_build/latest?definitionId=2&branchName=develop) [![Build Status](https://travis-ci.org/LLNL/CHAI.svg?branch=develop)](https://travis-ci.org/LLNL/CHAI) +[![Documentation Status](https://readthedocs.org/projects/chai/badge/?version=develop)](https://chai.readthedocs.io/en/develop/?badge=develop) + CHAI is a library that handles automatic data migration to different memory spaces behind an array-style interface. It was designed to work with @@ -22,7 +24,7 @@ of CUDA was detected. Once CMake has completed, CHAI can be built with Make: For more advanced configuration you can use standard CMake variables. -More information is available in the CHAI documentation. +More information is available in the [CHAI documentation](https://chai.readthedocs.io/en/develop/). ## Authors From d08ad65580645defc9275b860cf1825b461b47c5 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Tue, 24 Sep 2019 10:02:32 -0700 Subject: [PATCH 12/58] Fix offsets in thin ManagedArray --- src/chai/ManagedArray_thin.inl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/chai/ManagedArray_thin.inl b/src/chai/ManagedArray_thin.inl index 9568af28..88c34dca 100644 --- a/src/chai/ManagedArray_thin.inl +++ b/src/chai/ManagedArray_thin.inl @@ -57,6 +57,7 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray(): m_active_pointer(nullptr), m_resource_manager(nullptr), m_elems(0), + m_offset(0), m_is_slice(false) { } @@ -68,6 +69,7 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray( m_active_pointer(nullptr), m_resource_manager(nullptr), m_elems(elems), + m_offset(0), m_is_slice(false) { this->allocate(elems, space); @@ -79,6 +81,7 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray(std::nullptr_t) : m_active_pointer(nullptr), m_resource_manager(nullptr), m_elems(0), + m_offset(0), m_is_slice(false) { } @@ -90,6 +93,7 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray(ManagedArray const& other): m_active_pointer(other.m_active_pointer), m_resource_manager(other.m_resource_manager), m_elems(other.m_elems), + m_offset(other.m_offset), m_is_slice(other.m_is_slice) { } @@ -100,6 +104,7 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray(T* data, ArrayManager* array_mana m_active_pointer(data), m_resource_manager(array_manager), m_elems(elems), + m_offset(0), m_is_slice(false) { } From 9dcc087ee1ecde1527c01ee00e37584180bb3cc5 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Tue, 24 Sep 2019 10:08:51 -0700 Subject: [PATCH 13/58] Make sure m_active_base_pointer is set in thin version --- src/chai/ManagedArray_thin.inl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/chai/ManagedArray_thin.inl b/src/chai/ManagedArray_thin.inl index 88c34dca..70529011 100644 --- a/src/chai/ManagedArray_thin.inl +++ b/src/chai/ManagedArray_thin.inl @@ -53,8 +53,9 @@ namespace chai { template CHAI_INLINE -CHAI_HOST_DEVICE ManagedArray::ManagedArray(): +CHAI_HOST_DEVICE ManagedArray::ManagedArray() : m_active_pointer(nullptr), + m_active_base_pointer(nullptr), m_resource_manager(nullptr), m_elems(0), m_offset(0), @@ -65,8 +66,9 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray(): template CHAI_INLINE CHAI_HOST_DEVICE ManagedArray::ManagedArray( - size_t elems, ExecutionSpace space): + size_t elems, ExecutionSpace space) : m_active_pointer(nullptr), + m_active_base_pointer(nullptr), m_resource_manager(nullptr), m_elems(elems), m_offset(0), @@ -79,6 +81,7 @@ template CHAI_INLINE CHAI_HOST_DEVICE ManagedArray::ManagedArray(std::nullptr_t) : m_active_pointer(nullptr), + m_active_base_pointer(nullptr), m_resource_manager(nullptr), m_elems(0), m_offset(0), @@ -89,8 +92,9 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray(std::nullptr_t) : template CHAI_INLINE -CHAI_HOST_DEVICE ManagedArray::ManagedArray(ManagedArray const& other): +CHAI_HOST_DEVICE ManagedArray::ManagedArray(ManagedArray const& other) : m_active_pointer(other.m_active_pointer), + m_active_base_pointer(other.m_active_base_pointer), m_resource_manager(other.m_resource_manager), m_elems(other.m_elems), m_offset(other.m_offset), @@ -102,6 +106,7 @@ template CHAI_INLINE CHAI_HOST_DEVICE ManagedArray::ManagedArray(T* data, ArrayManager* array_manager, size_t elems, PointerRecord* pointer_record) : m_active_pointer(data), + m_active_base_pointer(data), m_resource_manager(array_manager), m_elems(elems), m_offset(0), From bd06267152ffecce6bef250075d446ec3353f0bf Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Tue, 24 Sep 2019 10:12:51 -0700 Subject: [PATCH 14/58] Make sure pointer record gets set in thin version --- src/chai/ManagedArray_thin.inl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/chai/ManagedArray_thin.inl b/src/chai/ManagedArray_thin.inl index 70529011..a63b6ed7 100644 --- a/src/chai/ManagedArray_thin.inl +++ b/src/chai/ManagedArray_thin.inl @@ -57,6 +57,7 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray() : m_active_pointer(nullptr), m_active_base_pointer(nullptr), m_resource_manager(nullptr), + m_pointer_record(nullptr), m_elems(0), m_offset(0), m_is_slice(false) @@ -70,6 +71,7 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray( m_active_pointer(nullptr), m_active_base_pointer(nullptr), m_resource_manager(nullptr), + m_pointer_record(nullptr), m_elems(elems), m_offset(0), m_is_slice(false) @@ -83,6 +85,7 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray(std::nullptr_t) : m_active_pointer(nullptr), m_active_base_pointer(nullptr), m_resource_manager(nullptr), + m_pointer_record(nullptr), m_elems(0), m_offset(0), m_is_slice(false) @@ -96,6 +99,7 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray(ManagedArray const& other) : m_active_pointer(other.m_active_pointer), m_active_base_pointer(other.m_active_base_pointer), m_resource_manager(other.m_resource_manager), + m_pointer_record(other.m_pointer_record), m_elems(other.m_elems), m_offset(other.m_offset), m_is_slice(other.m_is_slice) @@ -108,6 +112,7 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray(T* data, ArrayManager* array_mana m_active_pointer(data), m_active_base_pointer(data), m_resource_manager(array_manager), + m_pointer_record(pointer_record), m_elems(elems), m_offset(0), m_is_slice(false) From e909a3f52860cfede85149bd70a9bc3c95691d1b Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Tue, 24 Sep 2019 10:34:39 -0700 Subject: [PATCH 15/58] Fix compiler warnings --- src/chai/ManagedArray.hpp | 10 +++++----- src/chai/ManagedArray_thin.inl | 13 +++++++------ 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/chai/ManagedArray.hpp b/src/chai/ManagedArray.hpp index 3bcabdad..c4593549 100644 --- a/src/chai/ManagedArray.hpp +++ b/src/chai/ManagedArray.hpp @@ -344,24 +344,24 @@ class ManagedArray : public CHAICopyable /*! * Currently active data pointer. */ - mutable T* m_active_pointer; - mutable T* m_active_base_pointer; + mutable T* m_active_pointer = nullptr; + mutable T* m_active_base_pointer = nullptr; /*! * Pointer to ArrayManager instance. */ - ArrayManager* m_resource_manager; + ArrayManager* m_resource_manager = nullptr; /*! * Number of elements in the ManagedArray. */ - size_t m_elems; + size_t m_elems = 0; size_t m_offset = 0; /*! * Pointer to PointerRecord data. */ - PointerRecord* m_pointer_record; + PointerRecord* m_pointer_record = nullptr; bool m_is_slice = false; diff --git a/src/chai/ManagedArray_thin.inl b/src/chai/ManagedArray_thin.inl index a63b6ed7..25edc36f 100644 --- a/src/chai/ManagedArray_thin.inl +++ b/src/chai/ManagedArray_thin.inl @@ -57,9 +57,9 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray() : m_active_pointer(nullptr), m_active_base_pointer(nullptr), m_resource_manager(nullptr), - m_pointer_record(nullptr), m_elems(0), m_offset(0), + m_pointer_record(nullptr), m_is_slice(false) { } @@ -71,9 +71,9 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray( m_active_pointer(nullptr), m_active_base_pointer(nullptr), m_resource_manager(nullptr), - m_pointer_record(nullptr), m_elems(elems), m_offset(0), + m_pointer_record(nullptr), m_is_slice(false) { this->allocate(elems, space); @@ -85,9 +85,9 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray(std::nullptr_t) : m_active_pointer(nullptr), m_active_base_pointer(nullptr), m_resource_manager(nullptr), - m_pointer_record(nullptr), m_elems(0), m_offset(0), + m_pointer_record(nullptr), m_is_slice(false) { } @@ -99,9 +99,9 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray(ManagedArray const& other) : m_active_pointer(other.m_active_pointer), m_active_base_pointer(other.m_active_base_pointer), m_resource_manager(other.m_resource_manager), - m_pointer_record(other.m_pointer_record), m_elems(other.m_elems), m_offset(other.m_offset), + m_pointer_record(other.m_pointer_record), m_is_slice(other.m_is_slice) { } @@ -112,9 +112,9 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray(T* data, ArrayManager* array_mana m_active_pointer(data), m_active_base_pointer(data), m_resource_manager(array_manager), - m_pointer_record(pointer_record), m_elems(elems), m_offset(0), + m_pointer_record(pointer_record), m_is_slice(false) { } @@ -135,8 +135,9 @@ CHAI_HOST ManagedArray ManagedArray::slice(size_t offset, size_t elems) { template CHAI_INLINE -CHAI_HOST void ManagedArray::allocate(size_t elems, ExecutionSpace space, UserCallback const &cback) { +CHAI_HOST void ManagedArray::allocate(size_t elems, ExecutionSpace space, UserCallback const &) { if(!m_is_slice) { + (void) space; // Quiet compiler warning when CHAI_LOG does nothing CHAI_LOG("ManagedArray", "Allocating array of size " << elems << " in space " << space); m_elems = elems; From 9683d9d63657ab9c66df80fb3a6341ecb8035db4 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Tue, 24 Sep 2019 12:19:46 -0700 Subject: [PATCH 16/58] Fix linking error --- src/chai/ManagedArray_thin.inl | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/chai/ManagedArray_thin.inl b/src/chai/ManagedArray_thin.inl index 25edc36f..6ac92bcb 100644 --- a/src/chai/ManagedArray_thin.inl +++ b/src/chai/ManagedArray_thin.inl @@ -51,6 +51,35 @@ namespace chai { +template +CHAI_INLINE +CHAI_HOST_DEVICE ManagedArray::ManagedArray( + std::initializer_list spaces, + std::initializer_list allocators) : + ManagedArray() +{ + if (m_pointer_record) { + int i = 0; + + for (auto& space : spaces) { + m_pointer_record->m_allocators[space] = allocators.begin()[i++].getId(); + } + } +} + +template +CHAI_INLINE +CHAI_HOST_DEVICE ManagedArray::ManagedArray( + size_t elems, + std::initializer_list spaces, + std::initializer_list allocators, + ExecutionSpace space) : + ManagedArray(spaces, allocators) +{ + m_elems = elems; + this->allocate(elems, space); +} + template CHAI_INLINE CHAI_HOST_DEVICE ManagedArray::ManagedArray() : From 03498099e157566ca4dc70b738e2f16cfb8e0cc0 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Tue, 24 Sep 2019 12:42:04 -0700 Subject: [PATCH 17/58] Add log messages for attempting to do certain operations on slices --- src/chai/.ManagedArray_thin.inl.swp | Bin 0 -> 28672 bytes src/chai/ManagedArray_thin.inl | 53 +++++++++++++++++++--------- 2 files changed, 37 insertions(+), 16 deletions(-) create mode 100644 src/chai/.ManagedArray_thin.inl.swp diff --git a/src/chai/.ManagedArray_thin.inl.swp b/src/chai/.ManagedArray_thin.inl.swp new file mode 100644 index 0000000000000000000000000000000000000000..e811814f55fef2bdd6220e283b0cf045c460b020 GIT binary patch literal 28672 zcmeI450D$xeaDvqA%Oy!G!0ENX`TggC4saKU?3zZkT5V!LX)OIN=s7yHYA0nWau>0(8*9T1ZXJ`m^S2JXPQ5R^z(bW zds^wF`(q5ubXGGT>$JP?-}n9Y?fd=SThDFWo|;ytkN&>F=dFfu;s+0$_t<574oiLF zFr!l1=QQeP$eCq7_C=+7%dDQZY1yj`W31h>$9CjT8S^q8D^<-?D`Vw_k~P+{%kCf3 zrFv<%S&7?rX_!TM;DiA6VDiA6VDiA6VDiA6VDiA6VDiA6V zDsT`Alq!ZHzWWQ}@U#B!`~UYGZx~fL7YguB_-**#;|$}c@H9LDx59h9Ngcj_E)4_m;2};}rUx6FpO1J{%;T>>190PBG zH^WZ|f;($|bnNJek(i37ic{Hqp*WG;o=PNDa1+Ls!gwqeJP>I)l~~M6U38)<=4Oz}q*Ixs z;h42%wd9yvoPABRUaFa@Fm4!^7)r65qqSkYRIN73oR64Qv(_3>t!AlgMmHN5b49Ia zwTi8(RW>8@rE1HJ>SJDoi?&&A*p)!NB|&3;zGXU*jWk1k>P+3Ny(6~SYP9XLS*+=u zCBb$FM42nK`Y98*VySG3Ps_T4J8))*H63J*lNPsNAG{yf4wN`_|hq z&LHBqt+h+vx#yv5=h*0+x!bBoqPwh*lky6`ftan?cVqOcQ z>SDvagn>y}WVlh1So12Hu&(W;lL?TV?kl-re3(Qelrt7aBUO>1;voU`+)V$NH2vl7v@N@n6))5+rObd((Y zq(ec$Mi-jRAx^rz#`P9}_I#t-IvXd%5 zo0w7)@#*-sl4>@ebpK4{RX#ge*b&bqM^tiqGNV$HDn7A2l}}E1 z7fbSVD(>bzqOv(v$mR+vlFB5~vlFSzHkC_lGj&nI*8gr~4HVoOFykeVPxJWVBMl8Kc3kvum! zJ(G^-cIpb!ihS~%SuWzF*CY9Wcq5X}R5m?94Jku6IzxnQvur~JL#eAs6C;!?(D9+CtqHd7Zgyk5g4Z$^UmIq zZK}PNv(RWejGkuMnzzcTX0UlOj!g491Z7EJ$Z6?+RvP7Y&8#~mnLDXcy)s624P9i7siMtO zwk4I^V^ye}v|kEyWvMqD=8Gd7uv$tU!%{jWS9mx#>MhAl*KEGQlWVV4->s-Xxmscj zwUt%(GuohXX0T>0IX7q3OZGm+ldGm`ZL+paS1MCyS(i&9Kn1vt#M2qX?`(co*NR`iR6aUH@^3b z(@*!07sOs9bH#VFWNW*Tgt{!bYDV7`sN+eJF736=I`jH8_lR9%x<5X~O@8ha;VpK7^l-eDN zhwcVL{Qqn5q5lEjTKxY>&tH55U;hsHB3uJk!2xK41;Ze5fXCrcxCicnJK(S2({MHH zf)s3nN!SFx3&+7D!~|}KTj4L^A~0bmtOJP|907;JOT-GEgh%0Ou;DB?5`Ij~;D_)7 zcpjdEZ^QlY88{z?Ap-XkQ@9uIfm`4PkXVBQaaa#0z)^4{904y%+yO+RheW6F6DklY z5GoKV5GoKV5GwG-D&U5R6jQ=wB00t-asjTNct_o#CO0^5n@-!Vt6_1YB>Lus(*0mu z@9F`^#Yfz11OrEUsi1*ZArs*WU!M41KH7DU_EvJ2bzYx(7S=D-&#&sP^W6JGr79OH z%d^lGE6v5Ll2Dscpho^8S=WsstBSICwid}ccC2uJ`^ljEOO%pe`GmQL*ie37y}V#I z>bkVeI%2-g51AK>iP?#GF-|jwtYmssbXhE-WAam6g#5pPwnqpVul9`q9F z$tS8c}xE?MA2d3d{5Pdhof3R2J5qJ>VAbSWdfJs;nL-0=cPx?gm z5;9B??TmudG0A%1yc!~W2vX9_qxE>Zj_7a#d0~7EH`vblSUxF{d z<**)!y3P6DCq9 z)*E&ASs!Jl(fuftq5L&$@8YCj$xs}q)zUGoX_YTn6ri`^fd}@=gWRBp-b^ZSb`(>S z8Oox$ss1CZ1XP_!bmhYIi0Uq8>pauhZIP~p(4i4E#8LziUY14XZ6eBg@5#C$y<}~& z0flFxzaHM(RamDtJ(TIPOqji{3~nU}Uc^=2JWMW+#iYRCG*wRnZL`Ljf;SbEXQXsl zaON7!J*w+@qEwX3-ACS}+&o0 zoNn=<>bN)y7qzA=UeI~AsrMi(*MEaeU{Ni0L-{$e>ZIfTrAh^RYsJ)%+Onn7$KJjO z^hb-qec#F1-E-ArDPAsOB<9{CrTgdg49Yv~&88?PU_IXTT*Azn^)J`aDPSm`5*-up z3Sq^B`A=g$Qi!TyU7pT3U_wgiw#dAx+lV@imA1-hC%@ z%xFd9?ZjVnB5$A(gIV(j%kwu`t~srrKwGaY^6HI^D_L&b-6S@^ZgqnU$vGlV_^6xd zW(iQ8)3!{9fC(Gvsw~g5-Jvd->|J1AgKrzE(lA?WI3QA0-%U1sj9>Cz)n}Hq*#x~k zS1MoN#%a`gd6umzv0T)>XU}a(Ad)d)3gg?af8%LIykyJ84D^h~n@H-Zii}9^3`hSQ z?L3h&YsYSz?vUl5)a7u-Gbw3+f2w-`=bBziRwpz1$X;7rCRup1qvb}hRGpbJUb0(= z$~2T)jPjp+tms?pFuPF6eijC|xkkHQX}R++Yd+!@NXc%+*%GOJc(;do=CE!d!LUQ` z1wFrsKj@vt-r(n!<=(IUj$SE9o81bXA_miuewn??Uc0@m%w9BX?OT}j!@%Fr+#OR3 z&JX>-T3~|WRlqx=^!oov_}hOA;{W^a_dkVC|8=+%Zh`&qL1;h?D>5N?1^ z!pA}ONL&PU*aTznF1VXGz+LbKxE#)b8Td86{wweldraQ5hO}1cH%dUG*CUl1{^^zkCl2|=iJ{79NhQul|3)F-#fmX z{0{nuhsY`G9n=;l1p{USen)ouMExBsHOOz%g{&}HUtH|f(|_&FXsLZq$bSVf^q_iI z0(yVHK^~M^+h@kT0dxB8(P7{9^j$Fb9k8|J==qC!AaGyKTSMyL-iTp0sOS4}fv~~q z-oy1bKpc#Vs$Z@MgbVt#BNnYn6clf9Fw`*8Xd{ zuDt%QUgdgTknW>W;0js)Z{st^@vFuEAMqTzXYlDCfV<$6a3x#;Wsr6Mv*0u^;AMRL zU%=1dS&(-E{s!)VFTw3_8_4?rd*K5302~9~!S}xpz6|@}kKrRwgA%+8o~3S&fW!tq z54XVQK;8qm6m;GDIilgQP=Qc^P=Qc^P=Qc^P=Qc^P=Qc^RVm::ManagedArray(ManagedArray const& other) : template CHAI_INLINE -CHAI_HOST_DEVICE ManagedArray::ManagedArray(T* data, ArrayManager* array_manager, size_t elems, PointerRecord* pointer_record) : +CHAI_HOST_DEVICE ManagedArray::ManagedArray(T* data, + ArrayManager* array_manager, + size_t elems, + PointerRecord* pointer_record) : m_active_pointer(data), m_active_base_pointer(data), m_resource_manager(array_manager), @@ -164,58 +167,76 @@ CHAI_HOST ManagedArray ManagedArray::slice(size_t offset, size_t elems) { template CHAI_INLINE -CHAI_HOST void ManagedArray::allocate(size_t elems, ExecutionSpace space, UserCallback const &) { - if(!m_is_slice) { +CHAI_HOST void ManagedArray::allocate(size_t elems, + ExecutionSpace space, + UserCallback const &) { + if (!m_is_slice) { (void) space; // Quiet compiler warning when CHAI_LOG does nothing - CHAI_LOG("ManagedArray", "Allocating array of size " << elems << " in space " << space); + CHAI_LOG("ManagedArray", "Allocating array of size " << elems + << " in space " + << space); m_elems = elems; #if defined(CHAI_ENABLE_UM) - cudaMallocManaged(&m_active_pointer, sizeof(T)*elems); + cudaMallocManaged(&m_active_pointer, sizeof(T) * elems); #else - m_active_pointer = static_cast(malloc(sizeof(T)*elems)); + m_active_pointer = static_cast(malloc(sizeof(T) * elems)); #endif CHAI_LOG("ManagedArray", "m_active_ptr allocated at address: " << m_active_pointer); } + else { + CHAI_LOG("ManagedArray", "Attempted to allocate slice!"); + } } template CHAI_INLINE CHAI_HOST void ManagedArray::reallocate(size_t new_elems) { - if(!m_is_slice) { - CHAI_LOG("ManagedArray", "Reallocating array of size " << m_elems << " with new size" << elems); + if (!m_is_slice) { + CHAI_LOG("ManagedArray", "Reallocating array of size " << m_elems + << " with new size" + << elems); T* new_ptr; - #if defined(CHAI_ENABLE_UM) - cudaMallocManaged(&new_ptr, sizeof(T)*new_elems); - - cudaMemcpy(new_ptr, m_active_pointer, sizeof(T)*m_elems, cudaMemcpyDefault); + #if defined(CHAI_ENABLE_UM) + cudaMallocManaged(&new_ptr, sizeof(T) * new_elems); + cudaMemcpy(new_ptr, m_active_pointer, sizeof(T) * m_elems, cudaMemcpyDefault); cudaFree(m_active_pointer); #else - new_ptr = static_cast(realloc(m_active_pointer, sizeof(T)*new_elems)); + new_ptr = static_cast(realloc(m_active_pointer, sizeof(T) * new_elems)); #endif m_elems = new_elems; m_active_pointer = new_ptr; + m_active_base_pointer = m_active_pointer; CHAI_LOG("ManagedArray", "m_active_ptr reallocated at address: " << m_active_pointer); } + else { + CHAI_LOG("ManagedArray", "Attempted to realloc slice!"); + } } template CHAI_INLINE CHAI_HOST void ManagedArray::free() { - if(!m_is_slice) { + if (!m_is_slice) { #if defined(CHAI_ENABLE_UM) - cudaFree(m_active_pointer); + cudaFree(m_active_base_pointer); #else - ::free(m_active_pointer); + ::free(m_active_base_pointer); #endif + + m_active_base_pointer = nullptr; + m_active_pointer = nullptr; + } + else { + CHAI_LOG("ManagedArray", "tried to free slice!"); } } From 9892e5afac9e0bea71e0e85b6b312c7468b56848 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Tue, 24 Sep 2019 14:12:42 -0700 Subject: [PATCH 18/58] Use default initializers --- src/chai/.ManagedArray_thin.inl.swp | Bin 28672 -> 0 bytes src/chai/ManagedArray_thin.inl | 35 +++++----------------------- 2 files changed, 6 insertions(+), 29 deletions(-) delete mode 100644 src/chai/.ManagedArray_thin.inl.swp diff --git a/src/chai/.ManagedArray_thin.inl.swp b/src/chai/.ManagedArray_thin.inl.swp deleted file mode 100644 index e811814f55fef2bdd6220e283b0cf045c460b020..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 28672 zcmeI450D$xeaDvqA%Oy!G!0ENX`TggC4saKU?3zZkT5V!LX)OIN=s7yHYA0nWau>0(8*9T1ZXJ`m^S2JXPQ5R^z(bW zds^wF`(q5ubXGGT>$JP?-}n9Y?fd=SThDFWo|;ytkN&>F=dFfu;s+0$_t<574oiLF zFr!l1=QQeP$eCq7_C=+7%dDQZY1yj`W31h>$9CjT8S^q8D^<-?D`Vw_k~P+{%kCf3 zrFv<%S&7?rX_!TM;DiA6VDiA6VDiA6VDiA6VDiA6VDiA6V zDsT`Alq!ZHzWWQ}@U#B!`~UYGZx~fL7YguB_-**#;|$}c@H9LDx59h9Ngcj_E)4_m;2};}rUx6FpO1J{%;T>>190PBG zH^WZ|f;($|bnNJek(i37ic{Hqp*WG;o=PNDa1+Ls!gwqeJP>I)l~~M6U38)<=4Oz}q*Ixs z;h42%wd9yvoPABRUaFa@Fm4!^7)r65qqSkYRIN73oR64Qv(_3>t!AlgMmHN5b49Ia zwTi8(RW>8@rE1HJ>SJDoi?&&A*p)!NB|&3;zGXU*jWk1k>P+3Ny(6~SYP9XLS*+=u zCBb$FM42nK`Y98*VySG3Ps_T4J8))*H63J*lNPsNAG{yf4wN`_|hq z&LHBqt+h+vx#yv5=h*0+x!bBoqPwh*lky6`ftan?cVqOcQ z>SDvagn>y}WVlh1So12Hu&(W;lL?TV?kl-re3(Qelrt7aBUO>1;voU`+)V$NH2vl7v@N@n6))5+rObd((Y zq(ec$Mi-jRAx^rz#`P9}_I#t-IvXd%5 zo0w7)@#*-sl4>@ebpK4{RX#ge*b&bqM^tiqGNV$HDn7A2l}}E1 z7fbSVD(>bzqOv(v$mR+vlFB5~vlFSzHkC_lGj&nI*8gr~4HVoOFykeVPxJWVBMl8Kc3kvum! zJ(G^-cIpb!ihS~%SuWzF*CY9Wcq5X}R5m?94Jku6IzxnQvur~JL#eAs6C;!?(D9+CtqHd7Zgyk5g4Z$^UmIq zZK}PNv(RWejGkuMnzzcTX0UlOj!g491Z7EJ$Z6?+RvP7Y&8#~mnLDXcy)s624P9i7siMtO zwk4I^V^ye}v|kEyWvMqD=8Gd7uv$tU!%{jWS9mx#>MhAl*KEGQlWVV4->s-Xxmscj zwUt%(GuohXX0T>0IX7q3OZGm+ldGm`ZL+paS1MCyS(i&9Kn1vt#M2qX?`(co*NR`iR6aUH@^3b z(@*!07sOs9bH#VFWNW*Tgt{!bYDV7`sN+eJF736=I`jH8_lR9%x<5X~O@8ha;VpK7^l-eDN zhwcVL{Qqn5q5lEjTKxY>&tH55U;hsHB3uJk!2xK41;Ze5fXCrcxCicnJK(S2({MHH zf)s3nN!SFx3&+7D!~|}KTj4L^A~0bmtOJP|907;JOT-GEgh%0Ou;DB?5`Ij~;D_)7 zcpjdEZ^QlY88{z?Ap-XkQ@9uIfm`4PkXVBQaaa#0z)^4{904y%+yO+RheW6F6DklY z5GoKV5GoKV5GwG-D&U5R6jQ=wB00t-asjTNct_o#CO0^5n@-!Vt6_1YB>Lus(*0mu z@9F`^#Yfz11OrEUsi1*ZArs*WU!M41KH7DU_EvJ2bzYx(7S=D-&#&sP^W6JGr79OH z%d^lGE6v5Ll2Dscpho^8S=WsstBSICwid}ccC2uJ`^ljEOO%pe`GmQL*ie37y}V#I z>bkVeI%2-g51AK>iP?#GF-|jwtYmssbXhE-WAam6g#5pPwnqpVul9`q9F z$tS8c}xE?MA2d3d{5Pdhof3R2J5qJ>VAbSWdfJs;nL-0=cPx?gm z5;9B??TmudG0A%1yc!~W2vX9_qxE>Zj_7a#d0~7EH`vblSUxF{d z<**)!y3P6DCq9 z)*E&ASs!Jl(fuftq5L&$@8YCj$xs}q)zUGoX_YTn6ri`^fd}@=gWRBp-b^ZSb`(>S z8Oox$ss1CZ1XP_!bmhYIi0Uq8>pauhZIP~p(4i4E#8LziUY14XZ6eBg@5#C$y<}~& z0flFxzaHM(RamDtJ(TIPOqji{3~nU}Uc^=2JWMW+#iYRCG*wRnZL`Ljf;SbEXQXsl zaON7!J*w+@qEwX3-ACS}+&o0 zoNn=<>bN)y7qzA=UeI~AsrMi(*MEaeU{Ni0L-{$e>ZIfTrAh^RYsJ)%+Onn7$KJjO z^hb-qec#F1-E-ArDPAsOB<9{CrTgdg49Yv~&88?PU_IXTT*Azn^)J`aDPSm`5*-up z3Sq^B`A=g$Qi!TyU7pT3U_wgiw#dAx+lV@imA1-hC%@ z%xFd9?ZjVnB5$A(gIV(j%kwu`t~srrKwGaY^6HI^D_L&b-6S@^ZgqnU$vGlV_^6xd zW(iQ8)3!{9fC(Gvsw~g5-Jvd->|J1AgKrzE(lA?WI3QA0-%U1sj9>Cz)n}Hq*#x~k zS1MoN#%a`gd6umzv0T)>XU}a(Ad)d)3gg?af8%LIykyJ84D^h~n@H-Zii}9^3`hSQ z?L3h&YsYSz?vUl5)a7u-Gbw3+f2w-`=bBziRwpz1$X;7rCRup1qvb}hRGpbJUb0(= z$~2T)jPjp+tms?pFuPF6eijC|xkkHQX}R++Yd+!@NXc%+*%GOJc(;do=CE!d!LUQ` z1wFrsKj@vt-r(n!<=(IUj$SE9o81bXA_miuewn??Uc0@m%w9BX?OT}j!@%Fr+#OR3 z&JX>-T3~|WRlqx=^!oov_}hOA;{W^a_dkVC|8=+%Zh`&qL1;h?D>5N?1^ z!pA}ONL&PU*aTznF1VXGz+LbKxE#)b8Td86{wweldraQ5hO}1cH%dUG*CUl1{^^zkCl2|=iJ{79NhQul|3)F-#fmX z{0{nuhsY`G9n=;l1p{USen)ouMExBsHOOz%g{&}HUtH|f(|_&FXsLZq$bSVf^q_iI z0(yVHK^~M^+h@kT0dxB8(P7{9^j$Fb9k8|J==qC!AaGyKTSMyL-iTp0sOS4}fv~~q z-oy1bKpc#Vs$Z@MgbVt#BNnYn6clf9Fw`*8Xd{ zuDt%QUgdgTknW>W;0js)Z{st^@vFuEAMqTzXYlDCfV<$6a3x#;Wsr6Mv*0u^;AMRL zU%=1dS&(-E{s!)VFTw3_8_4?rd*K5302~9~!S}xpz6|@}kKrRwgA%+8o~3S&fW!tq z54XVQK;8qm6m;GDIilgQP=Qc^P=Qc^P=Qc^P=Qc^P=Qc^RVm::ManagedArray( template CHAI_INLINE -CHAI_HOST_DEVICE ManagedArray::ManagedArray() : - m_active_pointer(nullptr), - m_active_base_pointer(nullptr), - m_resource_manager(nullptr), - m_elems(0), - m_offset(0), - m_pointer_record(nullptr), - m_is_slice(false) +CHAI_HOST_DEVICE ManagedArray::ManagedArray() { } template CHAI_INLINE -CHAI_HOST_DEVICE ManagedArray::ManagedArray( - size_t elems, ExecutionSpace space) : - m_active_pointer(nullptr), - m_active_base_pointer(nullptr), - m_resource_manager(nullptr), - m_elems(elems), - m_offset(0), - m_pointer_record(nullptr), - m_is_slice(false) +CHAI_HOST_DEVICE ManagedArray::ManagedArray(size_t elems, ExecutionSpace space) : + m_elems(elems) { this->allocate(elems, space); } template CHAI_INLINE -CHAI_HOST_DEVICE ManagedArray::ManagedArray(std::nullptr_t) : - m_active_pointer(nullptr), - m_active_base_pointer(nullptr), - m_resource_manager(nullptr), - m_elems(0), - m_offset(0), - m_pointer_record(nullptr), - m_is_slice(false) +CHAI_HOST_DEVICE ManagedArray::ManagedArray(std::nullptr_t) { } @@ -145,9 +124,7 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray(T* data, m_active_base_pointer(data), m_resource_manager(array_manager), m_elems(elems), - m_offset(0), - m_pointer_record(pointer_record), - m_is_slice(false) + m_pointer_record(pointer_record) { } @@ -155,7 +132,7 @@ template CHAI_INLINE CHAI_HOST ManagedArray ManagedArray::slice(size_t offset, size_t elems) { ManagedArray slice; - if(offset + elems > size()) { + if (offset + elems > size()) { CHAI_LOG("ManagedArray", "Invalid slice. No active pointer or index out of bounds"); } else { slice.m_active_pointer = m_active_pointer + offset; From 7a70daf7d8164d6d7d3171d7b91728cf3f043780 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Tue, 24 Sep 2019 14:23:10 -0700 Subject: [PATCH 19/58] Use implicitly generated constructors --- src/chai/ManagedArray_thin.inl | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/src/chai/ManagedArray_thin.inl b/src/chai/ManagedArray_thin.inl index bdf47069..528751ee 100644 --- a/src/chai/ManagedArray_thin.inl +++ b/src/chai/ManagedArray_thin.inl @@ -82,9 +82,7 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray( template CHAI_INLINE -CHAI_HOST_DEVICE ManagedArray::ManagedArray() -{ -} +CHAI_HOST_DEVICE ManagedArray::ManagedArray() = default; template CHAI_INLINE @@ -100,19 +98,9 @@ CHAI_HOST_DEVICE ManagedArray::ManagedArray(std::nullptr_t) { } - template CHAI_INLINE -CHAI_HOST_DEVICE ManagedArray::ManagedArray(ManagedArray const& other) : - m_active_pointer(other.m_active_pointer), - m_active_base_pointer(other.m_active_base_pointer), - m_resource_manager(other.m_resource_manager), - m_elems(other.m_elems), - m_offset(other.m_offset), - m_pointer_record(other.m_pointer_record), - m_is_slice(other.m_is_slice) -{ -} +CHAI_HOST_DEVICE ManagedArray::ManagedArray(ManagedArray const& other) = default; template CHAI_INLINE From 30a844c4150d89eb0b2e5fa20cf3fe1ce8019b17 Mon Sep 17 00:00:00 2001 From: "Adam J. Kunen" Date: Fri, 27 Sep 2019 14:08:56 -0700 Subject: [PATCH 20/58] Modified to allow bringing chai and umpire into a higher level project with add_subdirectory --- cmake/thirdparty/SetupChaiThirdparty.cmake | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/cmake/thirdparty/SetupChaiThirdparty.cmake b/cmake/thirdparty/SetupChaiThirdparty.cmake index 7c50a86e..ee8ffbb4 100644 --- a/cmake/thirdparty/SetupChaiThirdparty.cmake +++ b/cmake/thirdparty/SetupChaiThirdparty.cmake @@ -42,13 +42,16 @@ ####################################################################### set(ENABLE_FORTRAN Off CACHE BOOL "Enable Fortran in Umpire") -if (DEFINED umpire_DIR) - find_package(umpire REQUIRED) - blt_register_library( - NAME umpire - INCLUDES ${UMPIRE_INCLUDE_DIRS} - LIBRARIES umpire) -else () - add_subdirectory(${PROJECT_SOURCE_DIR}/src/tpl/umpire) +if (NOT TARGET umpire) + if (DEFINED umpire_DIR) + find_package(umpire REQUIRED) + + blt_register_library( + NAME umpire + INCLUDES ${UMPIRE_INCLUDE_DIRS} + LIBRARIES umpire) + else () + add_subdirectory(${PROJECT_SOURCE_DIR}/src/tpl/umpire) + endif() endif() From eb8b8da8145793b3a0ca1ab8a7cf2e486d4f3be8 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Mon, 30 Sep 2019 12:20:38 -0700 Subject: [PATCH 21/58] Added managed_ptr --- src/chai/CMakeLists.txt | 1 + src/chai/managed_ptr.hpp | 1375 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 1376 insertions(+) create mode 100644 src/chai/managed_ptr.hpp diff --git a/src/chai/CMakeLists.txt b/src/chai/CMakeLists.txt index 7821fa3f..350cdf30 100644 --- a/src/chai/CMakeLists.txt +++ b/src/chai/CMakeLists.txt @@ -59,6 +59,7 @@ set (chai_headers ExecutionSpaces.hpp ManagedArray.hpp ManagedArray.inl + managed_ptr.hpp PointerRecord.hpp Types.hpp) diff --git a/src/chai/managed_ptr.hpp b/src/chai/managed_ptr.hpp new file mode 100644 index 00000000..36c25d95 --- /dev/null +++ b/src/chai/managed_ptr.hpp @@ -0,0 +1,1375 @@ +// --------------------------------------------------------------------- +// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All +// rights reserved. +// +// Produced at the Lawrence Livermore National Laboratory. +// +// This file is part of CHAI. +// +// LLNL-CODE-705877 +// +// For details, see https:://github.com/LLNL/CHAI +// Please also see the NOTICE and LICENSE files. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the +// distribution. +// +// - Neither the name of the LLNS/LLNL nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// --------------------------------------------------------------------- + +#ifndef MANAGED_PTR_H_ +#define MANAGED_PTR_H_ + +#include "chai/config.hpp" + +#ifndef CHAI_DISABLE_RM +#include "chai/ArrayManager.hpp" +#endif + +#include "chai/ChaiMacros.hpp" +#include "chai/ExecutionSpaces.hpp" +#include "chai/ManagedArray.hpp" +#include "chai/Types.hpp" + +// Standard libary headers +#include +#include + +#ifdef __CUDACC__ + +inline void gpuErrorCheck(cudaError_t code, const char *file, int line, bool abort=true) +{ + if (code != cudaSuccess) { + fprintf(stderr, "[CHAI] GPU Error: %s %s %d\n", cudaGetErrorString(code), file, line); + if (abort) { + exit(code); + } + } +} + +#if DEBUG +#define GPU_ERROR_CHECK(code) { gpuErrorCheck((code), __FILE__, __LINE__); } +#else +#define GPU_ERROR_CHECK(code) code +#endif + +inline void debug_cudaDeviceSynchronize() { +#if DEBUG + GPU_ERROR_CHECK(cudaDeviceSynchronize()); +#endif +} + +#endif // __CUDACC__ + +namespace chai { + namespace detail { +#ifdef __CUDACC__ + template + __global__ void destroy_on_device(T* gpuPointer); +#endif + } + + struct managed_ptr_record { + managed_ptr_record() : + m_num_references(1), + m_callback() + { + } + + managed_ptr_record(std::function callback) : + m_num_references(1), + m_callback(callback) + { + } + + size_t use_count() { + return m_num_references; + } + + void addReference() { + m_num_references++; + } + + void removeReference() { + m_num_references--; + } + + ExecutionSpace getLastSpace() { + return m_last_space; + } + + void set_callback(std::function callback) { + m_callback = callback; + } + + size_t m_num_references = 1; /// The reference counter + ExecutionSpace m_last_space = NONE; /// The last space executed in + std::function m_callback; /// Callback to handle events + }; + + /// + /// @class managed_ptr + /// @author Alan Dayton + /// + /// This wrapper stores both host and device pointers so that polymorphism can be + /// used in both contexts with a single API. It is modeled after std::shared_ptr, + /// so it does reference counting and automatically cleans up when the last + /// reference is destroyed. If we ever do multi-threading on the CPU, locking will + /// need to be added to the reference counter. + /// The make_managed and make_managed_from_factory functions call new on both the + /// host and device so that polymorphism is valid in both contexts. Simply copying + /// an object to the device will not copy the vtable, so new must be called on + /// the device. + /// + /// Usage Requirements: + /// Methods that can be called on both the host and device must be declared + /// with the __host__ __device__ specifiers. This includes constructors + /// and destructors. Furthermore, destructors of base and child classes + /// must all be declared virtual. + /// This wrapper does NOT automatically sync the device object if the host object + /// is updated and vice versa. If you wish to keep both instances in sync, + /// you must explicitly modify the object in both the host context and the + /// device context. + /// Raw array members of T need to be initialized correctly with a host or + /// device pointer. If a ManagedArray is passed to the make_managed or + /// make_managed_from_factory methods in place of a raw array, it will be + /// cast to the appropriate host or device pointer when passed to T's + /// constructor on the host and on the device. If it is desired that these + /// host and device pointers be kept in sync, define a callback that maintains + /// a copy of the ManagedArray and upon the ACTION_MOVE event calls the copy + /// constructor of that ManagedArray. + /// If a raw array is passed to make_managed, accessing that member will be + /// valid only in the correct context. To prevent the accidental use of that + /// member in the wrong context, any methods that access it should be __host__ + /// only or __device__ only. Special care should be taken when passing raw + /// arrays as arguments to member functions. + /// The same restrictions for raw array members also apply to raw pointer members. + /// A managed_ptr can be passed to the make_managed or make_managed_from_factory + /// methods in place of a raw pointer, and the host constructor of T will + /// be given the extracted host pointer, and likewise the device constructor + /// of T will be given the extracted device pointer. It is recommended that + /// a callback is defined that maintains a copy of the managed_ptr so that + /// the raw pointers are not accidentally destroyed prematurely (since + /// managed_ptr does reference counting). It is also recommended that the + /// callback calls the copy constructor of the managed_ptr on the ACTION_MOVE + /// event so that the ACTION_MOVE event is triggered also for the inner + /// managed_ptr. + /// Again, if a raw pointer is passed to make_managed, accessing that member will + /// only be valid in the correct context. Take care when passing raw pointers + /// as arguments to member functions. + /// Be aware that only the debug version of CHAI will check for GPU errors. So + /// if you are seeing strange behavior and/or your code crashes in the + /// constructor/destructor of T, then build CHAI as debug to see what is + /// going on. For example, the constructor of T might run out of per-thread + /// stack space on the GPU. If that happens, you can increase the device + /// limit of per-thread stack space. Alternatively, you could add a call + /// to cudaDeviceSynchronize after calling make_managed and check the return + /// code of cudaDeviceSynchronize. + /// + template + class managed_ptr { + public: + using element_type = T; + + /// + /// @author Alan Dayton + /// + /// Default constructor. + /// Initializes the reference count to 0. + /// + CHAI_HOST_DEVICE constexpr managed_ptr() noexcept {} + + /// + /// @author Alan Dayton + /// + /// Construct from nullptr. + /// Initializes the reference count to 0. + /// + CHAI_HOST_DEVICE constexpr managed_ptr(std::nullptr_t) noexcept {} + + /// + /// @author Alan Dayton + /// + /// Constructs a managed_ptr from the given pointers. U* must be convertible + /// to T*. + /// + /// @pre spaces.size() == pointers.size() + /// + /// @param[in] spaces A list of execution spaces + /// @param[in] pointers A list of pointers to take ownership of + /// + template + managed_ptr(std::initializer_list spaces, + std::initializer_list pointers) : + m_cpu_pointer(nullptr), + m_gpu_pointer(nullptr), + m_pointer_record(new managed_ptr_record()) + { + static_assert(std::is_convertible::value, + "U* must be convertible to T*."); + + // TODO: In c++14 convert to a static_assert + if (spaces.size() != pointers.size()) { + printf("[CHAI] WARNING: The number of spaces is different than the number of pointers given!\n"); + } + + int i = 0; + + for (const auto& space : spaces) { + switch (space) { + case CPU: + m_cpu_pointer = pointers.begin()[i++]; + break; +#ifdef __CUDACC__ + case GPU: + m_gpu_pointer = pointers.begin()[i++]; + break; +#endif + default: + ++i; + printf("[CHAI] WARNING: Execution space not supported by chai::managed_ptr!\n"); + break; + } + } + } + + /// + /// @author Alan Dayton + /// + /// Constructs a managed_ptr from the given pointers and callback function. + /// U* must be convertible to T*. + /// + /// @pre spaces.size() == pointers.size() + /// + /// @param[in] spaces A list of execution spaces + /// @param[in] pointers A list of pointers to take ownership of + /// @param[in] callback The user defined callback to call on trigger events + /// + template + CHAI_HOST managed_ptr(std::initializer_list spaces, + std::initializer_list pointers, + std::function callback) : + m_cpu_pointer(nullptr), + m_gpu_pointer(nullptr), + m_pointer_record(new managed_ptr_record(callback)) + { + static_assert(std::is_convertible::value, + "U* must be convertible to T*."); + + // TODO: In c++14 convert to a static_assert + if (spaces.size() != pointers.size()) { + printf("[CHAI] WARNING: The number of spaces is different than the number of pointers given.\n"); + } + + int i = 0; + + for (const auto& space : spaces) { + switch (space) { + case CPU: + m_cpu_pointer = pointers.begin()[i++]; + break; +#ifdef __CUDACC__ + case GPU: + m_gpu_pointer = pointers.begin()[i++]; + break; +#endif + default: + ++i; + printf("[CHAI] WARNING: Execution space not supported by chai::managed_ptr!\n"); + break; + } + } + } + + /// + /// @author Alan Dayton + /// + /// Copy constructor. + /// Constructs a copy of the given managed_ptr, increases the reference count, + /// and if the execution space is different, calls the user defined callback + /// with ACTION_MOVE for each of the execution spaces. + /// + /// @param[in] other The managed_ptr to copy + /// + CHAI_HOST_DEVICE managed_ptr(const managed_ptr& other) noexcept : + m_cpu_pointer(other.m_cpu_pointer), + m_gpu_pointer(other.m_gpu_pointer), + m_pointer_record(other.m_pointer_record) + { +#ifndef __CUDA_ARCH__ + addReference(); + move(); +#endif + } + + /// + /// @author Alan Dayton + /// + /// Converting constructor. + /// Constructs a copy of the given managed_ptr, increases the reference count, + /// and if the execution space is different, calls the user defined callback + /// with ACTION_MOVE for each of the execution spaces. U* must be convertible + /// to T*. + /// + /// @param[in] other The managed_ptr to copy + /// + template + CHAI_HOST_DEVICE managed_ptr(const managed_ptr& other) noexcept : + m_cpu_pointer(other.m_cpu_pointer), + m_gpu_pointer(other.m_gpu_pointer), + m_pointer_record(other.m_pointer_record) + { + static_assert(std::is_convertible::value, + "U* must be convertible to T*."); + +#ifndef __CUDA_ARCH__ + addReference(); + move(); +#endif + } + + /// + /// @author Alan Dayton + /// + /// Aliasing constructor. + /// Has the same ownership information as other, but holds different pointers. + /// + /// @pre spaces.size() == pointers.size() + /// + /// @param[in] other The managed_ptr to copy ownership information from + /// @param[in] spaces A list of execution spaces + /// @param[in] pointers A list of pointers to maintain a reference to + /// + template + CHAI_HOST managed_ptr(const managed_ptr& other, + std::initializer_list spaces, + std::initializer_list pointers) noexcept : + m_pointer_record(other.m_pointer_record) + { + // TODO: In c++14 convert to a static_assert + if (spaces.size() != pointers.size()) { + printf("[CHAI] WARNING: The number of spaces is different than the number of pointers given.\n"); + } + + int i = 0; + + for (const auto& space : spaces) { + switch (space) { + case CPU: + m_cpu_pointer = pointers.begin()[i++]; + break; +#ifdef __CUDACC__ + case GPU: + m_gpu_pointer = pointers.begin()[i++]; + break; +#endif + default: + ++i; + printf("[CHAI] WARNING: Execution space not supported by chai::managed_ptr!\n"); + break; + } + } + + addReference(); + move(); + } + + /// + /// @author Alan Dayton + /// + /// Destructor. Decreases the reference count and if this is the last reference, + /// clean up. + /// + CHAI_HOST_DEVICE ~managed_ptr() { +#ifdef __CUDACC__ + // This trick came from Max Katz at Nvidia. + // Taking the address of this kernel ensures that it gets instantiated + // by the compiler and can be used within __CUDA_ARCH__. Without this, + // calling destroy_on_device within the confines of __CUDA_ARCH__ will + // always fail with error code 0x8 (invalid device function). + // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#restrictions + // From the CUDA Programming Guide Restrictions: + // "If a __global__ function template is instantiated and launched from + // the host, then the function template must be instantiated with the + // same template arguments irrespective of whether __CUDA_ARCH__ is + // defined and regardless of the value of __CUDA_ARCH__." + (void) &detail::destroy_on_device; +#endif + +#ifndef __CUDA_ARCH__ + removeReference(); +#endif + } + + /// + /// @author Alan Dayton + /// + /// Copy assignment operator. + /// Copies the given managed_ptr and increases the reference count. + /// + /// @param[in] other The managed_ptr to copy + /// + CHAI_HOST_DEVICE managed_ptr& operator=(const managed_ptr& other) noexcept { + if (this != &other) { +#ifndef __CUDA_ARCH__ + removeReference(); +#endif + + m_cpu_pointer = other.m_cpu_pointer; + m_gpu_pointer = other.m_gpu_pointer; + m_pointer_record = other.m_pointer_record; + +#ifndef __CUDA_ARCH__ + addReference(); + move(); +#endif + } + + return *this; + } + + /// + /// @author Alan Dayton + /// + /// Conversion copy assignment operator. + /// Copies the given managed_ptr and increases the reference count. + /// U* must be convertible to T*. + /// + /// @param[in] other The managed_ptr to copy + /// + template + CHAI_HOST_DEVICE managed_ptr& operator=(const managed_ptr& other) noexcept { + static_assert(std::is_convertible::value, + "U* must be convertible to T*."); + +#ifndef __CUDA_ARCH__ + removeReference(); +#endif + + m_cpu_pointer = other.m_cpu_pointer; + m_gpu_pointer = other.m_gpu_pointer; + m_pointer_record = other.m_pointer_record; + +#ifndef __CUDA_ARCH__ + addReference(); + move(); +#endif + + return *this; + } + + /// + /// @author Alan Dayton + /// + /// Returns the CPU or GPU pointer depending on the calling context. + /// + CHAI_HOST_DEVICE inline T* get() const { +#ifndef __CUDA_ARCH__ + move(); + return m_cpu_pointer; +#else + return m_gpu_pointer; +#endif + } + + /// + /// @author Alan Dayton + /// + /// Returns the pointer corresponding to the given execution space. + /// + /// @param[in] space The execution space + /// @param[in] move Whether or not to trigger the move event (default is true) + /// + CHAI_HOST inline T* get(const ExecutionSpace space, const bool move=true) const { + if (move) { + this->move(); + } + + switch (space) { + case CPU: + return m_cpu_pointer; +#ifdef __CUDACC__ + case GPU: + return m_gpu_pointer; +#endif + default: + return nullptr; + } + } + + /// + /// @author Alan Dayton + /// + /// Returns the CPU or GPU pointer depending on the calling context. + /// + CHAI_HOST_DEVICE inline T* operator->() const { +#ifndef __CUDA_ARCH__ + return m_cpu_pointer; +#else + return m_gpu_pointer; +#endif + } + + /// + /// @author Alan Dayton + /// + /// Returns the CPU or GPU reference depending on the calling context. + /// + CHAI_HOST_DEVICE inline T& operator*() const { +#ifndef __CUDA_ARCH__ + return *m_cpu_pointer; +#else + return *m_gpu_pointer; +#endif + } + + /// + /// @author Alan Dayton + /// + /// Returns the number of managed_ptrs owning these pointers. + /// + CHAI_HOST std::size_t use_count() const { + if (m_pointer_record) { + return m_pointer_record->use_count(); + } + else { + return 0; + } + } + + /// + /// @author Alan Dayton + /// + /// Returns true if the contained pointer is not nullptr, false otherwise. + /// + CHAI_HOST_DEVICE inline explicit operator bool() const noexcept { + return get() != nullptr; + } + + /// + /// @author Alan Dayton + /// + /// Sets the callback, which can be used to handle specific actions. + /// ACTION_MOVE can be used to call the copy constructor for ManagedArrays. + /// ACTION_FREE can be used to provide a custom deleter operation. Use + /// ExecutionSpace::NONE if freeing anything other than the actual object + /// pointers. + /// + /// @param[in] callback The callback to call when certain actions occur + /// + CHAI_HOST void set_callback(std::function callback) { + if (m_pointer_record) { + m_pointer_record->set_callback(callback); + } + else { + printf("[CHAI] WARNING: No callback is allowed for managed_ptr that does not contain a valid pointer (i.e. the default or nullptr constructor was used)!\n"); + } + } + + private: + T* m_cpu_pointer = nullptr; /// The CPU pointer + T* m_gpu_pointer = nullptr; /// The GPU pointer + managed_ptr_record* m_pointer_record = nullptr; /// The pointer record + + /// Needed for the converting constructor + template + friend class managed_ptr; + + /// Needed to use the make_managed API + template + friend CHAI_HOST managed_ptr make_managed(Args... args); + + /// + /// @author Alan Dayton + /// + /// If the execution space has changed, calls the user provided callback + /// with the ACTION_MOVE event. + /// + CHAI_HOST void move() const { +#ifndef CHAI_DISABLE_RM + if (m_pointer_record) { + ExecutionSpace newSpace = ArrayManager::getInstance()->getExecutionSpace(); + + if (newSpace != NONE && newSpace != m_pointer_record->getLastSpace()) { + m_pointer_record->m_last_space = newSpace; + + if (m_pointer_record->m_callback) { + for (int space = NONE; space < NUM_EXECUTION_SPACES; ++space) { + ExecutionSpace execSpace = static_cast(space); + + T* pointer = get(execSpace, false); + + using T_non_const = typename std::remove_const::type; + + // We can use const_cast because can managed_ptr can only + // be constructed with non const pointers. + T_non_const* temp = const_cast(pointer); + + void* voidPointer = static_cast(temp); + + m_pointer_record->m_callback(ACTION_MOVE, execSpace, voidPointer); + } + } + } + } +#endif + } + + /// + /// @author Alan Dayton + /// + /// Increments the reference count and calls the copy constructor to + /// trigger data movement. + /// + CHAI_HOST void addReference() { + if (m_pointer_record) { + m_pointer_record->addReference(); + } + } + + /// + /// @author Alan Dayton + /// + /// Decrements the reference counter. If the resulting number of references + /// is 0, clean up the object. + /// + CHAI_HOST void removeReference() { + if (m_pointer_record) { + m_pointer_record->removeReference(); + + if (m_pointer_record->use_count() == 0) { + if (m_pointer_record->m_callback) { + // Destroy device pointer first to take advantage of asynchrony + for (int space = NUM_EXECUTION_SPACES-1; space >= NONE; --space) { + ExecutionSpace execSpace = static_cast(space); + T* pointer = get(execSpace, false); + + using T_non_const = typename std::remove_const::type; + + // We can use const_cast because can managed_ptr can only + // be constructed with non const pointers. + T_non_const* temp = const_cast(pointer); + void* voidPointer = static_cast(temp); + + if (!m_pointer_record->m_callback(ACTION_FREE, + execSpace, + voidPointer)) { + switch (execSpace) { + case CPU: + delete pointer; + break; +#ifdef __CUDACC__ + case GPU: + { + if (pointer) { + detail::destroy_on_device<<<1, 1>>>(temp); + debug_cudaDeviceSynchronize(); + } + + break; + } +#endif + default: + break; + } + } + } + } + else { + // Destroy device pointer first to take advantage of asynchrony + for (int space = NUM_EXECUTION_SPACES-1; space >= NONE; --space) { + ExecutionSpace execSpace = static_cast(space); + T* pointer = get(execSpace, false); + + switch (execSpace) { + case CPU: + delete pointer; + break; +#ifdef __CUDACC__ + case GPU: + { + if (pointer) { + detail::destroy_on_device<<<1, 1>>>(pointer); + debug_cudaDeviceSynchronize(); + } + + break; + } +#endif + default: + break; + } + } + } + + delete m_pointer_record; + } + } + } + + }; + + namespace detail { + /// + /// @author Alan Dayton + /// + /// This implementation of getRawPointers handles every non-CHAI type. + /// + /// @param[in] arg The non-CHAI type, which will simply be returned + /// + /// @return arg + /// + template + CHAI_HOST_DEVICE T getRawPointers(T arg) { + return arg; + } + + /// + /// @author Alan Dayton + /// + /// This implementation of getRawPointers handles the CHAI ManagedArray type. + /// + /// @param[in] arg The ManagedArray from which to extract a raw pointer + /// + /// @return arg cast to a raw pointer + /// + template + CHAI_HOST_DEVICE T* getRawPointers(ManagedArray arg) { + return (T*) arg; + } + + /// + /// @author Alan Dayton + /// + /// This implementation of getRawPointers handles the CHAI managed_ptr type. + /// The managed_ptr type is not implicitly convertible to a raw pointer, so + /// when using the make_managed API, it is necessary to pull the raw pointers + /// out of the managed_ptr. + /// + /// @param[in] arg The managed_ptr from which to extract a raw pointer + /// + /// @return a raw pointer acquired from arg + /// + template + CHAI_HOST_DEVICE T* getRawPointers(managed_ptr arg) { + return arg.get(); + } + + /// + /// @author Alan Dayton + /// + /// Creates a new object on the host and returns a pointer to it. + /// This implementation of new_on_host is called when no arguments need to be + /// converted to raw pointers. + /// + /// @param[in] args The arguments to T's constructor + /// + /// @return a pointer to the new object on the host + /// + template ::value, int>::type = 0> + CHAI_HOST T* new_on_host(Args&&... args) { + return new T(args...); + } + + /// + /// @author Alan Dayton + /// + /// Creates a new object on the host and returns a pointer to it. + /// This implementation of new_on_host is called when arguments do need to be + /// converted to raw pointers. + /// + /// @param[in] args The arguments to T's constructor + /// + /// @return a pointer to the new object on the host + /// + template ::value, int>::type = 0> + CHAI_HOST T* new_on_host(Args&&... args) { + return new T(getRawPointers(args)...); + } + + /// + /// @author Alan Dayton + /// + /// Creates a new T on the host. + /// Sets the execution space to the CPU so that ManagedArrays and managed_ptrs + /// are moved to the host as necessary. + /// + /// @param[in] args The arguments to T's constructor + /// + /// @return The host pointer to the new T + /// + template + CHAI_HOST T* make_on_host(Args&&... args) { +#ifndef CHAI_DISABLE_RM + // Get the ArrayManager and save the current execution space + chai::ArrayManager* arrayManager = chai::ArrayManager::getInstance(); + ExecutionSpace currentSpace = arrayManager->getExecutionSpace(); + + // Set the execution space so that ManagedArrays and managed_ptrs + // are handled properly + arrayManager->setExecutionSpace(CPU); +#endif + + // Create on the host + T* cpuPointer = detail::new_on_host(args...); + +#ifndef CHAI_DISABLE_RM + // Set the execution space back to the previous value + arrayManager->setExecutionSpace(currentSpace); +#endif + + // Return the CPU pointer + return cpuPointer; + } + + /// + /// @author Alan Dayton + /// + /// Calls a factory method to create a new object on the host. + /// Sets the execution space to the CPU so that ManagedArrays and managed_ptrs + /// are moved to the host as necessary. + /// + /// @param[in] f The factory method + /// @param[in] args The arguments to the factory method + /// + /// @return The host pointer to the new object + /// + template + CHAI_HOST T* make_on_host_from_factory(F f, Args&&... args) { +#ifndef CHAI_DISABLE_RM + // Get the ArrayManager and save the current execution space + chai::ArrayManager* arrayManager = chai::ArrayManager::getInstance(); + ExecutionSpace currentSpace = arrayManager->getExecutionSpace(); + + // Set the execution space so that ManagedArrays and managed_ptrs + // are handled properly + arrayManager->setExecutionSpace(CPU); +#endif + + // Create the object on the device + T* cpuPointer = f(args...); + +#ifndef CHAI_DISABLE_RM + // Set the execution space back to the previous value + arrayManager->setExecutionSpace(currentSpace); +#endif + + // Return the GPU pointer + return cpuPointer; + } + +#ifdef __CUDACC__ + /// + /// @author Alan Dayton + /// + /// Creates a new object on the device and returns a pointer to it. + /// This implementation of new_on_device is called when no arguments need to be + /// converted to raw pointers. + /// + /// @param[in] args The arguments to T's constructor + /// + /// @return a pointer to the new object on the device + /// + template ::value, int>::type = 0> + CHAI_DEVICE void new_on_device(T** gpuPointer, Args&&... args) { + *gpuPointer = new T(args...); + } + + /// + /// @author Alan Dayton + /// + /// Creates a new object on the device and returns a pointer to it. + /// This implementation of new_on_device is called when arguments do need to be + /// converted to raw pointers. + /// + /// @param[in] args The arguments to T's constructor + /// + /// @return a pointer to the new object on the device + /// + template ::value, int>::type = 0> + CHAI_DEVICE void new_on_device(T** gpuPointer, Args&&... args) { + *gpuPointer = new T(getRawPointers(args)...); + } + + /// + /// @author Alan Dayton + /// + /// Creates a new T on the device. + /// + /// @param[out] gpuPointer Used to return the device pointer to the new T + /// @param[in] args The arguments to T's constructor + /// + /// @note Cannot capture argument packs in an extended device lambda, + /// so explicit kernel is needed. + /// + template + __global__ void make_on_device(T** gpuPointer, Args... args) + { + new_on_device(gpuPointer, args...); + } + + /// + /// @author Alan Dayton + /// + /// Creates a new object on the device by calling the given factory method. + /// + /// @param[out] gpuPointer Used to return the device pointer to the new object + /// @param[in] f The factory method (must be a __device__ or __host__ __device__ + /// method + /// @param[in] args The arguments to the factory method + /// + /// @note Cannot capture argument packs in an extended device lambda, + /// so explicit kernel is needed. + /// + template + __global__ void make_on_device_from_factory(T** gpuPointer, F f, Args... args) + { + *gpuPointer = f(args...); + } + + /// + /// @author Alan Dayton + /// + /// Destroys the device pointer. + /// + /// @param[out] gpuPointer The device pointer to call delete on + /// + template + __global__ void destroy_on_device(T* gpuPointer) + { + if (gpuPointer) { + delete gpuPointer; + } + } + + /// + /// @author Alan Dayton + /// + /// Creates a new T on the device. + /// + /// @param[in] args The arguments to T's constructor + /// + /// @return The device pointer to the new T + /// + template + CHAI_HOST T* make_on_device(Args... args) { +#ifndef CHAI_DISABLE_RM + // Get the ArrayManager and save the current execution space + chai::ArrayManager* arrayManager = chai::ArrayManager::getInstance(); + ExecutionSpace currentSpace = arrayManager->getExecutionSpace(); + + // Set the execution space so that ManagedArrays and managed_ptrs + // are handled properly + arrayManager->setExecutionSpace(GPU); +#endif + + // Allocate space on the GPU to hold the pointer to the new object + T** gpuBuffer; + GPU_ERROR_CHECK(cudaMalloc(&gpuBuffer, sizeof(T*))); + + // Create the object on the device + make_on_device<<<1, 1>>>(gpuBuffer, args...); + debug_cudaDeviceSynchronize(); + + // Allocate space on the CPU for the pointer and copy the pointer to the CPU + T** cpuBuffer = (T**) malloc(sizeof(T*)); + GPU_ERROR_CHECK(cudaMemcpy(cpuBuffer, gpuBuffer, sizeof(T*), + cudaMemcpyDeviceToHost)); + + // Get the GPU pointer + T* gpuPointer = cpuBuffer[0]; + + // Free the host and device buffers + free(cpuBuffer); + GPU_ERROR_CHECK(cudaFree(gpuBuffer)); + +#ifndef CHAI_DISABLE_RM + // Set the execution space back to the previous value + arrayManager->setExecutionSpace(currentSpace); +#endif + + // Return the GPU pointer + return gpuPointer; + } + + /// + /// @author Alan Dayton + /// + /// Calls a factory method to create a new object on the device. + /// + /// @param[in] f The factory method + /// @param[in] args The arguments to the factory method + /// + /// @return The device pointer to the new object + /// + template + CHAI_HOST T* make_on_device_from_factory(F f, Args&&... args) { +#ifndef CHAI_DISABLE_RM + // Get the ArrayManager and save the current execution space + chai::ArrayManager* arrayManager = chai::ArrayManager::getInstance(); + ExecutionSpace currentSpace = arrayManager->getExecutionSpace(); + + // Set the execution space so that chai::ManagedArrays and + // chai::managed_ptrs are handled properly + arrayManager->setExecutionSpace(GPU); +#endif + + // Allocate space on the GPU to hold the pointer to the new object + T** gpuBuffer; + GPU_ERROR_CHECK(cudaMalloc(&gpuBuffer, sizeof(T*))); + + // Create the object on the device + make_on_device_from_factory<<<1, 1>>>(gpuBuffer, f, args...); + debug_cudaDeviceSynchronize(); + + // Allocate space on the CPU for the pointer and copy the pointer to the CPU + T** cpuBuffer = (T**) malloc(sizeof(T*)); + GPU_ERROR_CHECK(cudaMemcpy(cpuBuffer, gpuBuffer, sizeof(T*), + cudaMemcpyDeviceToHost)); + + // Get the GPU pointer + T* gpuPointer = cpuBuffer[0]; + + // Free the host and device buffers + free(cpuBuffer); + GPU_ERROR_CHECK(cudaFree(gpuBuffer)); + +#ifndef CHAI_DISABLE_RM + // Set the execution space back to the previous value + arrayManager->setExecutionSpace(currentSpace); +#endif + + // Return the GPU pointer + return gpuPointer; + } + +#endif + + // Adapted from "The C++ Programming Language," Fourth Edition, + // by Bjarne Stroustrup, pp. 814-816 + // Used to determine if a functor is callable with the given arguments + struct substitution_failure {}; + + template + struct substitution_succeeded : std::true_type {}; + + template<> + struct substitution_succeeded : std::false_type {}; + + template + struct is_invocable_impl { + private: + template + static auto check(X const& x, Ts&&... ts) -> decltype(x(ts...)); + static substitution_failure check(...); + public: + using type = decltype(check(std::declval(), std::declval()...)); + }; + + template + struct is_invocable : substitution_succeeded::type> {}; + } // namespace detail + + /// + /// @author Alan Dayton + /// + /// Makes a managed_ptr. + /// Factory function to create managed_ptrs. + /// + /// @param[in] args The arguments to T's constructor + /// + template + CHAI_HOST managed_ptr make_managed(Args... args) { +#ifdef __CUDACC__ + // Construct on the GPU first to take advantage of asynchrony + T* gpuPointer = detail::make_on_device(args...); +#endif + + // Construct on the CPU + T* cpuPointer = detail::make_on_host(args...); + + // Construct and return the managed_ptr +#ifdef __CUDACC__ + return managed_ptr({CPU, GPU}, {cpuPointer, gpuPointer}); +#else + return managed_ptr({CPU}, {cpuPointer}); +#endif + } + + /// + /// @author Alan Dayton + /// + /// Makes a managed_ptr. + /// Factory function to create managed_ptrs. + /// + /// @param[in] f The factory function that will create the object + /// @param[in] args The arguments to the factory function + /// + template + CHAI_HOST managed_ptr make_managed_from_factory(F&& f, Args&&... args) { + static_assert(detail::is_invocable::value, + "F is not invocable with the given arguments."); + + static_assert(std::is_pointer::type>::value, + "F does not return a pointer."); + + using R = typename std::remove_pointer::type>::type; + + static_assert(std::is_convertible::value, + "F does not return a pointer that is convertible to T*."); + +#ifdef __CUDACC__ + // Construct on the GPU first to take advantage of asynchrony + T* gpuPointer = detail::make_on_device_from_factory(f, args...); +#endif + + // Construct on the CPU + T* cpuPointer = detail::make_on_host_from_factory(f, args...); + + // Construct and return the managed_ptr +#ifdef __CUDACC__ + return managed_ptr({CPU, GPU}, {cpuPointer, gpuPointer}); +#else + return managed_ptr({CPU}, {cpuPointer}); +#endif + } + + /// + /// @author Alan Dayton + /// + /// Makes a new managed_ptr that shares ownership with the given managed_ptr, but + /// the underlying pointer is converted using static_cast. + /// + /// @param[in] other The managed_ptr to share ownership with and whose pointer to + /// convert using static_cast + /// + template + CHAI_HOST managed_ptr static_pointer_cast(const managed_ptr& other) noexcept { + T* cpuPointer = static_cast(other.get()); + +#ifdef __CUDACC__ + T* gpuPointer = static_cast(other.get(GPU, false)); + + return managed_ptr(other, {CPU, GPU}, {cpuPointer, gpuPointer}); +#else + return managed_ptr(other, {CPU}, {cpuPointer}); +#endif + } + + /// + /// @author Alan Dayton + /// + /// Makes a new managed_ptr that shares ownership with the given managed_ptr, but + /// the underlying pointer is converted using dynamic_cast. + /// + /// @param[in] other The managed_ptr to share ownership with and whose pointer to + /// convert using dynamic_cast + /// + template + CHAI_HOST managed_ptr dynamic_pointer_cast(const managed_ptr& other) noexcept { + T* cpuPointer = dynamic_cast(other.get()); + +#ifdef __CUDACC__ + T* gpuPointer = nullptr; + + if (cpuPointer) { + gpuPointer = static_cast(other.get(GPU, false)); + } + + return managed_ptr(other, {CPU, GPU}, {cpuPointer, gpuPointer}); +#else + return managed_ptr(other, {CPU}, {cpuPointer}); +#endif + } + + /// + /// @author Alan Dayton + /// + /// Makes a new managed_ptr that shares ownership with the given managed_ptr, but + /// the underlying pointer is converted using const_cast. + /// + /// @param[in] other The managed_ptr to share ownership with and whose pointer to + /// convert using const_cast + /// + template + CHAI_HOST managed_ptr const_pointer_cast(const managed_ptr& other) noexcept { + T* cpuPointer = const_cast(other.get()); + +#ifdef __CUDACC__ + T* gpuPointer = const_cast(other.get(GPU, false)); + + return managed_ptr(other, {CPU, GPU}, {cpuPointer, gpuPointer}); +#else + return managed_ptr(other, {CPU}, {cpuPointer}); +#endif + } + + /// + /// @author Alan Dayton + /// + /// Makes a new managed_ptr that shares ownership with the given managed_ptr, but + /// the underlying pointer is converted using reinterpret_cast. + /// + /// @param[in] other The managed_ptr to share ownership with and whose pointer to + /// convert using reinterpret_cast + /// + template + CHAI_HOST managed_ptr reinterpret_pointer_cast(const managed_ptr& other) noexcept { + T* cpuPointer = reinterpret_cast(other.get()); + +#ifdef __CUDACC__ + T* gpuPointer = reinterpret_cast(other.get(GPU, false)); + + return managed_ptr(other, {CPU, GPU}, {cpuPointer, gpuPointer}); +#else + return managed_ptr(other, {CPU}, {cpuPointer}); +#endif + } + + /// Comparison operators + + /// + /// @author Alan Dayton + /// + /// Equals comparison. + /// + /// @param[in] lhs The first managed_ptr to compare + /// @param[in] rhs The second managed_ptr to compare + /// + template + CHAI_HOST_DEVICE CHAI_INLINE + bool operator==(const managed_ptr& lhs, const managed_ptr& rhs) noexcept { + return lhs.get() == rhs.get(); + } + + /// + /// @author Alan Dayton + /// + /// Not equals comparison. + /// + /// @param[in] lhs The first managed_ptr to compare + /// @param[in] rhs The second managed_ptr to compare + /// + template + CHAI_HOST_DEVICE CHAI_INLINE + bool operator!=(const managed_ptr& lhs, const managed_ptr& rhs) noexcept { + return lhs.get() != rhs.get(); + } + + /// Comparison operators with nullptr + + /// + /// @author Alan Dayton + /// + /// Equals comparison with nullptr. + /// + /// @param[in] lhs The managed_ptr to compare to nullptr + /// + template + CHAI_HOST_DEVICE CHAI_INLINE + bool operator==(const managed_ptr& lhs, std::nullptr_t) noexcept { + return lhs.get() == nullptr; + } + + /// + /// @author Alan Dayton + /// + /// Equals comparison with nullptr. + /// + /// @param[in] rhs The managed_ptr to compare to nullptr + /// + template + CHAI_HOST_DEVICE CHAI_INLINE + bool operator==(std::nullptr_t, const managed_ptr& rhs) noexcept { + return nullptr == rhs.get(); + } + + /// + /// @author Alan Dayton + /// + /// Not equals comparison with nullptr. + /// + /// @param[in] lhs The managed_ptr to compare to nullptr + /// + template + CHAI_HOST_DEVICE CHAI_INLINE + bool operator!=(const managed_ptr& lhs, std::nullptr_t) noexcept { + return lhs.get() != nullptr; + } + + /// + /// @author Alan Dayton + /// + /// Not equals comparison with nullptr. + /// + /// @param[in] rhs The managed_ptr to compare to nullptr + /// + template + CHAI_HOST_DEVICE CHAI_INLINE + bool operator!=(std::nullptr_t, const managed_ptr& rhs) noexcept { + return nullptr != rhs.get(); + } + + /// + /// @author Alan Dayton + /// + /// Not equals comparison. + /// + /// @param[in] lhs The first managed_ptr to swap + /// @param[in] rhs The second managed_ptr to swap + /// + template + void swap(managed_ptr& lhs, managed_ptr& rhs) noexcept { + std::swap(lhs.m_cpu_pointer, rhs.m_cpu_pointer); + std::swap(lhs.m_gpu_pointer, rhs.m_gpu_pointer); + std::swap(lhs.m_pointer_record, rhs.m_pointer_record); + } +} // namespace chai + +#endif // MANAGED_PTR + From 89288c5f2271801686c73782ea52a15906b47996 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Mon, 30 Sep 2019 13:41:42 -0700 Subject: [PATCH 22/58] Add managed_ptr unit tests --- tests/unit/CMakeLists.txt | 44 +- tests/unit/managed_ptr_unit_tests.cpp | 1086 +++++++++++++++++++++++++ 2 files changed, 1107 insertions(+), 23 deletions(-) create mode 100644 tests/unit/managed_ptr_unit_tests.cpp diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index fea76c55..835aded5 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -41,33 +41,17 @@ # POSSIBILITY OF SUCH DAMAGE. ####################################################################### -set (managed_array_test_depends - chai umpire gtest) +# Unit test dependencies +set (chai_unit_test_depends + chai umpire gtest) -set (array_manager_test_depends - chai umpire gtest) - -if (ENABLE_CUDA) - set (managed_array_test_depends - ${managed_array_test_depends} - cuda) - set (array_manager_test_depends - ${array_manager_test_depends} - cuda) -endif () -if (ENABLE_HIP) - set (managed_array_test_depends - ${managed_array_test_depends} - hip) - set (array_manager_test_depends - ${array_manager_test_depends} - hip) -endif () +blt_list_append(TO chai_unit_test_depends ELEMENTS cuda IF ${ENABLE_CUDA}) +# ManagedArray tests blt_add_executable( NAME managed_array_unit_tests SOURCES managed_array_unit_tests.cpp - DEPENDS_ON ${managed_array_test_depends}) + DEPENDS_ON ${chai_unit_test_depends}) target_include_directories( managed_array_unit_tests @@ -81,7 +65,7 @@ blt_add_test( blt_add_executable( NAME array_manager_unit_tests SOURCES array_manager_unit_tests.cpp - DEPENDS_ON ${array_manager_test_depends}) + DEPENDS_ON ${chai_unit_test_depends}) target_include_directories( array_manager_unit_tests @@ -91,3 +75,17 @@ blt_add_test( NAME array_manager_unit_test COMMAND array_manager_unit_tests) +# managed_ptr tests +blt_add_executable( + NAME managed_ptr_unit_tests + SOURCES managed_ptr_unit_tests.cpp + DEPENDS_ON ${chai_unit_test_depends}) + +target_include_directories( + managed_ptr_unit_tests + PUBLIC ${PROJECT_BINARY_DIR}/include) + +blt_add_test( + NAME managed_ptr_unit_test + COMMAND managed_ptr_unit_tests) + diff --git a/tests/unit/managed_ptr_unit_tests.cpp b/tests/unit/managed_ptr_unit_tests.cpp new file mode 100644 index 00000000..c6abf86e --- /dev/null +++ b/tests/unit/managed_ptr_unit_tests.cpp @@ -0,0 +1,1086 @@ +// --------------------------------------------------------------------- +// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All +// rights reserved. +// +// Produced at the Lawrence Livermore National Laboratory. +// +// This file is part of CHAI. +// +// LLNL-CODE-705877 +// +// For details, see https:://github.com/LLNL/CHAI +// Please also see the NOTICE and LICENSE files. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the +// distribution. +// +// - Neither the name of the LLNS/LLNL nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// --------------------------------------------------------------------- +#include "gtest/gtest.h" + +#define CUDA_TEST(X, Y) \ + static void cuda_test_##X_##Y(); \ + TEST(X, Y) { cuda_test_##X_##Y(); } \ + static void cuda_test_##X_##Y() + +#include "chai/config.hpp" +#include "chai/ManagedArray.hpp" +#include "chai/managed_ptr.hpp" + +#include "../src/util/forall.hpp" + +// Standard library headers +#include + +class Simple { + public: + CHAI_HOST_DEVICE Simple() : m_value(-1) {} + CHAI_HOST_DEVICE Simple(int value) : m_value(value) {} + CHAI_HOST_DEVICE ~Simple() {} + + CHAI_HOST_DEVICE Simple(Simple const & other) : m_value(other.m_value) {} + + CHAI_HOST_DEVICE Simple& operator=(Simple const & other) { + m_value = other.m_value; + return *this; + } + + CHAI_HOST_DEVICE Simple(Simple&& other) : m_value(other.m_value) { + other.m_value = -1; + } + + CHAI_HOST_DEVICE Simple& operator=(Simple&& other) { + m_value = other.m_value; + other.m_value = -1; + return *this; + } + + CHAI_HOST_DEVICE int getValue() { return m_value; } + + private: + int m_value; +}; + +class TestBase { + public: + CHAI_HOST_DEVICE TestBase() {} + CHAI_HOST_DEVICE virtual ~TestBase() {} + + CHAI_HOST_DEVICE static TestBase* Factory(const int value); + + CHAI_HOST_DEVICE virtual int getValue() const = 0; +}; + +class TestDerived : public TestBase { + public: + CHAI_HOST_DEVICE TestDerived() : TestBase(), m_value(0) {} + CHAI_HOST_DEVICE TestDerived(const int value) : TestBase(), m_value(value) {} + + CHAI_HOST_DEVICE virtual ~TestDerived() {} + + CHAI_HOST_DEVICE virtual int getValue() const { return m_value; } + + private: + int m_value; +}; + +CHAI_HOST_DEVICE TestBase* TestBase::Factory(const int value) { + return new TestDerived(value); +} + +CHAI_HOST_DEVICE TestBase* Factory(const int value) { + return new TestDerived(value); +} + +CHAI_HOST_DEVICE TestBase* OverloadedFactory() { + return new TestDerived(-1); +} + +CHAI_HOST_DEVICE TestBase* OverloadedFactory(const int value) { + return new TestDerived(value); +} + + +TEST(managed_ptr, default_constructor) +{ + chai::managed_ptr derived; + chai::managed_ptr otherDerived; + + EXPECT_EQ(derived.get(), nullptr); + EXPECT_EQ(derived.use_count(), 0); + EXPECT_FALSE(derived); + EXPECT_TRUE(derived == nullptr); + EXPECT_TRUE(nullptr == derived); + EXPECT_FALSE(derived != nullptr); + EXPECT_FALSE(nullptr != derived); + EXPECT_TRUE(derived == otherDerived); + EXPECT_TRUE(otherDerived == derived); + EXPECT_FALSE(derived != otherDerived); + EXPECT_FALSE(otherDerived != derived); +} + +TEST(managed_ptr, nullptr_constructor) +{ + chai::managed_ptr derived = nullptr; + chai::managed_ptr otherDerived = nullptr; + + EXPECT_EQ(derived.get(), nullptr); + EXPECT_EQ(derived.use_count(), 0); + EXPECT_FALSE(derived); + EXPECT_TRUE(derived == nullptr); + EXPECT_TRUE(nullptr == derived); + EXPECT_FALSE(derived != nullptr); + EXPECT_FALSE(nullptr != derived); + EXPECT_TRUE(derived == otherDerived); + EXPECT_TRUE(otherDerived == derived); + EXPECT_FALSE(derived != otherDerived); + EXPECT_FALSE(otherDerived != derived); +} + +TEST(managed_ptr, cpu_pointer_constructor) +{ + TestDerived* cpuPointer = new TestDerived(3); + chai::managed_ptr derived({chai::CPU}, {cpuPointer}); + + EXPECT_EQ(derived->getValue(), 3); + + EXPECT_NE(derived.get(), nullptr); + EXPECT_EQ(derived.use_count(), 1); + EXPECT_TRUE(derived); + EXPECT_FALSE(derived == nullptr); + EXPECT_FALSE(nullptr == derived); + EXPECT_TRUE(derived != nullptr); + EXPECT_TRUE(nullptr != derived); +} + +TEST(managed_ptr, make_managed) +{ + const int expectedValue = rand(); + auto derived = chai::make_managed(expectedValue); + + EXPECT_EQ((*derived).getValue(), expectedValue); + + EXPECT_NE(derived.get(), nullptr); + EXPECT_EQ(derived.use_count(), 1); + EXPECT_TRUE(derived); + EXPECT_FALSE(derived == nullptr); + EXPECT_FALSE(nullptr == derived); + EXPECT_TRUE(derived != nullptr); + EXPECT_TRUE(nullptr != derived); +} + +TEST(managed_ptr, copy_constructor) +{ + const int expectedValue = rand(); + auto derived = chai::make_managed(expectedValue); + chai::managed_ptr otherDerived(derived); + + EXPECT_EQ(derived->getValue(), expectedValue); + EXPECT_EQ(otherDerived->getValue(), expectedValue); + + EXPECT_NE(derived.get(), nullptr); + EXPECT_EQ(derived.use_count(), 2); + EXPECT_TRUE(derived); + EXPECT_FALSE(derived == nullptr); + EXPECT_FALSE(nullptr == derived); + EXPECT_TRUE(derived != nullptr); + EXPECT_TRUE(nullptr != derived); + EXPECT_TRUE(derived == otherDerived); + EXPECT_FALSE(derived != otherDerived); + + EXPECT_NE(otherDerived.get(), nullptr); + EXPECT_EQ(otherDerived.use_count(), 2); + EXPECT_TRUE(otherDerived); + EXPECT_FALSE(otherDerived == nullptr); + EXPECT_FALSE(nullptr == otherDerived); + EXPECT_TRUE(otherDerived != nullptr); + EXPECT_TRUE(nullptr != otherDerived); + EXPECT_TRUE(otherDerived == derived); + EXPECT_FALSE(otherDerived != derived); +} + +TEST(managed_ptr, converting_constructor) +{ + const int expectedValue = rand(); + auto derived = chai::make_managed(expectedValue); + chai::managed_ptr base = derived; + + EXPECT_EQ(derived->getValue(), expectedValue); + EXPECT_EQ(base->getValue(), expectedValue); + + EXPECT_NE(derived.get(), nullptr); + EXPECT_EQ(derived.use_count(), 2); + EXPECT_TRUE(derived); + EXPECT_FALSE(derived == nullptr); + EXPECT_FALSE(nullptr == derived); + EXPECT_TRUE(derived != nullptr); + EXPECT_TRUE(nullptr != derived); + EXPECT_TRUE(derived == base); + EXPECT_FALSE(derived != base); + + EXPECT_NE(base.get(), nullptr); + EXPECT_EQ(base.use_count(), 2); + EXPECT_TRUE(base); + EXPECT_FALSE(base == nullptr); + EXPECT_FALSE(nullptr == base); + EXPECT_TRUE(base != nullptr); + EXPECT_TRUE(nullptr != base); + EXPECT_TRUE(base == derived); + EXPECT_FALSE(base != derived); +} + +TEST(managed_ptr, copy_assignment_operator) +{ + const int expectedValue = rand(); + auto derived = chai::make_managed(expectedValue); + chai::managed_ptr otherDerived; + otherDerived = derived; + + EXPECT_EQ(derived->getValue(), expectedValue); + EXPECT_EQ(otherDerived->getValue(), expectedValue); + + EXPECT_NE(derived.get(), nullptr); + EXPECT_EQ(derived.use_count(), 2); + EXPECT_TRUE(derived); + EXPECT_FALSE(derived == nullptr); + EXPECT_FALSE(nullptr == derived); + EXPECT_TRUE(derived != nullptr); + EXPECT_TRUE(nullptr != derived); + EXPECT_TRUE(derived == otherDerived); + EXPECT_FALSE(derived != otherDerived); + + EXPECT_NE(otherDerived.get(), nullptr); + EXPECT_EQ(otherDerived.use_count(), 2); + EXPECT_TRUE(otherDerived); + EXPECT_FALSE(otherDerived == nullptr); + EXPECT_FALSE(nullptr == otherDerived); + EXPECT_TRUE(otherDerived != nullptr); + EXPECT_TRUE(nullptr != otherDerived); + EXPECT_TRUE(otherDerived == derived); + EXPECT_FALSE(otherDerived != derived); +} + +TEST(managed_ptr, copy_constructor_from_default_constructed) +{ + chai::managed_ptr derived; + chai::managed_ptr otherDerived(derived); + + EXPECT_EQ(derived.get(), nullptr); + EXPECT_EQ(derived.use_count(), 0); + EXPECT_EQ(bool(derived), false); + EXPECT_EQ(derived, nullptr); + EXPECT_EQ(nullptr, derived); + + EXPECT_EQ(otherDerived.get(), nullptr); + EXPECT_EQ(otherDerived.use_count(), 0); + EXPECT_EQ(bool(otherDerived), false); + EXPECT_EQ(otherDerived, nullptr); + EXPECT_EQ(nullptr, otherDerived); +} + +TEST(managed_ptr, copy_assignment_operator_from_default_constructed) +{ + chai::managed_ptr derived; + chai::managed_ptr otherDerived; + otherDerived = derived; + + EXPECT_EQ(derived.get(), nullptr); + EXPECT_EQ(derived.use_count(), 0); + EXPECT_EQ(bool(derived), false); + EXPECT_EQ(derived, nullptr); + EXPECT_EQ(nullptr, derived); + + EXPECT_EQ(otherDerived.get(), nullptr); + EXPECT_EQ(otherDerived.use_count(), 0); + EXPECT_EQ(bool(otherDerived), false); + EXPECT_EQ(otherDerived, nullptr); + EXPECT_EQ(nullptr, otherDerived); +} + +TEST(managed_ptr, conversion_copy_constructor_from_default_constructed) +{ + chai::managed_ptr derived; + chai::managed_ptr otherDerived(derived); + + EXPECT_EQ(derived.get(), nullptr); + EXPECT_EQ(derived.use_count(), 0); + EXPECT_EQ(bool(derived), false); + EXPECT_EQ(derived, nullptr); + EXPECT_EQ(nullptr, derived); + + EXPECT_EQ(otherDerived.get(), nullptr); + EXPECT_EQ(otherDerived.use_count(), 0); + EXPECT_EQ(bool(otherDerived), false); + EXPECT_EQ(otherDerived, nullptr); + EXPECT_EQ(nullptr, otherDerived); +} + +TEST(managed_ptr, conversion_copy_assignment_operator_from_default_constructed) +{ + chai::managed_ptr derived; + chai::managed_ptr otherDerived; + otherDerived = derived; + + EXPECT_EQ(derived.get(), nullptr); + EXPECT_EQ(derived.use_count(), 0); + EXPECT_EQ(bool(derived), false); + EXPECT_EQ(derived, nullptr); + EXPECT_EQ(nullptr, derived); + + EXPECT_EQ(otherDerived.get(), nullptr); + EXPECT_EQ(otherDerived.use_count(), 0); + EXPECT_EQ(bool(otherDerived), false); + EXPECT_EQ(otherDerived, nullptr); + EXPECT_EQ(nullptr, otherDerived); +} + +TEST(managed_ptr, copy_assignment_operator_from_host_ptr_constructed) +{ + const int expectedValue1 = rand(); + const int expectedValue2 = rand(); + + chai::managed_ptr derived = chai::make_managed(expectedValue1); + chai::managed_ptr otherDerived = chai::make_managed(expectedValue2); + chai::managed_ptr thirdDerived(otherDerived); + + thirdDerived = derived; + + EXPECT_NE(derived.get(), nullptr); + EXPECT_EQ(derived.use_count(), 2); + EXPECT_EQ(bool(derived), true); + EXPECT_NE(derived, nullptr); + EXPECT_NE(nullptr, derived); + + EXPECT_NE(otherDerived.get(), nullptr); + EXPECT_EQ(otherDerived.use_count(), 1); + EXPECT_EQ(bool(otherDerived), true); + EXPECT_NE(otherDerived, nullptr); + EXPECT_NE(nullptr, otherDerived); + + EXPECT_NE(thirdDerived.get(), nullptr); + EXPECT_EQ(thirdDerived.use_count(), 2); + EXPECT_EQ(bool(thirdDerived), true); + EXPECT_NE(thirdDerived, nullptr); + EXPECT_NE(nullptr, thirdDerived); +} + +TEST(managed_ptr, conversion_copy_assignment_operator_from_host_ptr_constructed) +{ + const int expectedValue1 = rand(); + const int expectedValue2 = rand(); + + chai::managed_ptr derived = chai::make_managed(expectedValue1); + chai::managed_ptr otherDerived = chai::make_managed(expectedValue2); + chai::managed_ptr thirdDerived(otherDerived); + + thirdDerived = derived; + + EXPECT_NE(derived.get(), nullptr); + EXPECT_EQ(derived.use_count(), 2); + EXPECT_EQ(bool(derived), true); + EXPECT_NE(derived, nullptr); + EXPECT_NE(nullptr, derived); + + EXPECT_NE(otherDerived.get(), nullptr); + EXPECT_EQ(otherDerived.use_count(), 1); + EXPECT_EQ(bool(otherDerived), true); + EXPECT_NE(otherDerived, nullptr); + EXPECT_NE(nullptr, otherDerived); + + EXPECT_NE(thirdDerived.get(), nullptr); + EXPECT_EQ(thirdDerived.use_count(), 2); + EXPECT_EQ(bool(thirdDerived), true); + EXPECT_NE(thirdDerived, nullptr); + EXPECT_NE(nullptr, thirdDerived); +} + +TEST(managed_ptr, static_pointer_cast) +{ + TestDerived* cpuPointer = new TestDerived(3); + chai::managed_ptr derived({chai::CPU}, {cpuPointer}); + + auto base = chai::static_pointer_cast(derived); + + EXPECT_EQ(base->getValue(), 3); + + EXPECT_NE(base.get(), nullptr); + EXPECT_EQ(base.use_count(), 2); + EXPECT_TRUE(base); + EXPECT_FALSE(base == nullptr); + EXPECT_FALSE(nullptr == base); + EXPECT_TRUE(base != nullptr); + EXPECT_TRUE(nullptr != base); +} + +TEST(managed_ptr, dynamic_pointer_cast) +{ + TestDerived* cpuPointer = new TestDerived(3); + chai::managed_ptr base({chai::CPU}, {cpuPointer}); + + auto derived = chai::dynamic_pointer_cast(base); + + EXPECT_EQ(derived->getValue(), 3); + + EXPECT_NE(derived.get(), nullptr); + EXPECT_EQ(derived.use_count(), 2); + EXPECT_TRUE(derived); + EXPECT_FALSE(derived == nullptr); + EXPECT_FALSE(nullptr == derived); + EXPECT_TRUE(derived != nullptr); + EXPECT_TRUE(nullptr != derived); +} + +TEST(managed_ptr, const_pointer_cast) +{ + TestDerived* cpuPointer = new TestDerived(3); + chai::managed_ptr base({chai::CPU}, {cpuPointer}); + + auto nonConstBase = chai::const_pointer_cast(base); + + EXPECT_EQ(nonConstBase->getValue(), 3); + + EXPECT_NE(nonConstBase.get(), nullptr); + EXPECT_EQ(nonConstBase.use_count(), 2); + EXPECT_TRUE(nonConstBase); + EXPECT_FALSE(nonConstBase == nullptr); + EXPECT_FALSE(nullptr == nonConstBase); + EXPECT_TRUE(nonConstBase != nullptr); + EXPECT_TRUE(nullptr != nonConstBase); +} + +TEST(managed_ptr, reinterpret_pointer_cast) +{ + TestDerived* cpuPointer = new TestDerived(3); + chai::managed_ptr base({chai::CPU}, {cpuPointer}); + + auto derived = chai::reinterpret_pointer_cast(base); + + EXPECT_EQ(derived->getValue(), 3); + + EXPECT_NE(derived.get(), nullptr); + EXPECT_EQ(derived.use_count(), 2); + EXPECT_TRUE(derived); + EXPECT_FALSE(derived == nullptr); + EXPECT_FALSE(nullptr == derived); + EXPECT_TRUE(derived != nullptr); + EXPECT_TRUE(nullptr != derived); +} + +#ifdef __CUDACC__ + +CUDA_TEST(managed_ptr, cuda_default_constructor) +{ + chai::managed_ptr derived; + chai::managed_ptr otherDerived; + + chai::ManagedArray array(1, chai::GPU); + chai::ManagedArray array2(9, chai::GPU); + + forall(cuda(), 0, 1, [=] __device__ (int i) { + array[i] = derived.get(); + array2[0] = (bool) derived; + array2[1] = derived == nullptr; + array2[2] = nullptr == derived; + array2[3] = derived != nullptr; + array2[4] = nullptr != derived; + array2[5] = derived == otherDerived; + array2[6] = otherDerived == derived; + array2[7] = derived != otherDerived; + array2[8] = otherDerived != derived; + }); + + array.move(chai::CPU); + array2.move(chai::CPU); + + EXPECT_EQ(array[0], nullptr); + EXPECT_FALSE(array2[0]); + EXPECT_TRUE(array2[1]); + EXPECT_TRUE(array2[2]); + EXPECT_FALSE(array2[3]); + EXPECT_FALSE(array2[4]); + EXPECT_TRUE(array2[5]); + EXPECT_TRUE(array2[6]); + EXPECT_FALSE(array2[7]); + EXPECT_FALSE(array2[8]); +} + +CUDA_TEST(managed_ptr, cuda_nullptr_constructor) +{ + chai::managed_ptr derived = nullptr; + chai::managed_ptr otherDerived = nullptr; + + chai::ManagedArray array(1, chai::GPU); + chai::ManagedArray array2(9, chai::GPU); + + forall(cuda(), 0, 1, [=] __device__ (int i) { + array[i] = derived.get(); + array2[0] = (bool) derived; + array2[1] = derived == nullptr; + array2[2] = nullptr == derived; + array2[3] = derived != nullptr; + array2[4] = nullptr != derived; + array2[5] = derived == otherDerived; + array2[6] = otherDerived == derived; + array2[7] = derived != otherDerived; + array2[8] = otherDerived != derived; + }); + + array.move(chai::CPU); + array2.move(chai::CPU); + + EXPECT_EQ(array[0], nullptr); + EXPECT_FALSE(array2[0]); + EXPECT_TRUE(array2[1]); + EXPECT_TRUE(array2[2]); + EXPECT_FALSE(array2[3]); + EXPECT_FALSE(array2[4]); + EXPECT_TRUE(array2[5]); + EXPECT_TRUE(array2[6]); + EXPECT_FALSE(array2[7]); + EXPECT_FALSE(array2[8]); +} + +CUDA_TEST(managed_ptr, cuda_gpu_pointer_constructor) +{ + TestDerived* gpuPointer = chai::detail::make_on_device(3); + chai::managed_ptr derived({chai::GPU}, {gpuPointer}); + + EXPECT_EQ(derived.get(), nullptr); + EXPECT_EQ(derived.use_count(), 1); + EXPECT_FALSE(derived); + EXPECT_TRUE(derived == nullptr); + EXPECT_TRUE(nullptr == derived); + EXPECT_FALSE(derived != nullptr); + EXPECT_FALSE(nullptr != derived); + + chai::ManagedArray array1(1, chai::GPU); + chai::ManagedArray array2(1, chai::GPU); + chai::ManagedArray array3(5, chai::GPU); + + forall(cuda(), 0, 1, [=] __device__ (int i) { + array1[i] = derived->getValue(); + array2[i] = derived.get(); + array3[0] = (bool) derived; + array3[1] = derived == nullptr; + array3[2] = nullptr == derived; + array3[3] = derived != nullptr; + array3[4] = nullptr != derived; + }); + + array1.move(chai::CPU); + array2.move(chai::CPU); + array3.move(chai::CPU); + + EXPECT_EQ(array1[0], 3); + EXPECT_NE(array2[0], nullptr); + EXPECT_TRUE(array3[0]); + EXPECT_FALSE(array3[1]); + EXPECT_FALSE(array3[2]); + EXPECT_TRUE(array3[3]); + EXPECT_TRUE(array3[4]); +} + +CUDA_TEST(managed_ptr, cuda_new_and_delete_on_device) +{ + // Initialize host side memory to hold a pointer + Simple** cpuPointerHolder = (Simple**) malloc(sizeof(Simple*)); + cpuPointerHolder[0] = nullptr; + + // Initialize device side memory to hold a pointer + Simple** gpuPointerHolder = nullptr; + cudaMalloc(&gpuPointerHolder, sizeof(Simple*)); + + // Create on the device + chai::detail::make_on_device<<<1, 1>>>(gpuPointerHolder); + + // Copy to the host side memory + cudaMemcpy(cpuPointerHolder, gpuPointerHolder, sizeof(Simple*), cudaMemcpyDeviceToHost); + + // Free device side memory + cudaFree(gpuPointerHolder); + + // Save the pointer + ASSERT_NE(cpuPointerHolder[0], nullptr); + Simple* gpuPointer = cpuPointerHolder[0]; + + // Free host side memory + free(cpuPointerHolder); + + // Initialize more host side memory + Simple** cpuPointerHolder2 = (Simple**) malloc(sizeof(Simple*)); + cpuPointerHolder2[0] = gpuPointer; + + // Initialize more device side memory + Simple** gpuPointerHolder2 = nullptr; + cudaMalloc(&gpuPointerHolder2, sizeof(Simple*)); + + // Copy pointer back to the device + cudaMemcpy(gpuPointerHolder2, cpuPointerHolder2, sizeof(Simple*), + cudaMemcpyHostToDevice); + + chai::detail::destroy_on_device<<<1, 1>>>(gpuPointerHolder2); + + // Free host memory + free(cpuPointerHolder2); + + // Free device memory + cudaFree(gpuPointerHolder2); +} + +CUDA_TEST(managed_ptr, cuda_new_and_delete_on_device_2) +{ + // Initialize host side memory to hold a pointer + Simple** cpuPointerHolder = (Simple**) malloc(sizeof(Simple*)); + cpuPointerHolder[0] = nullptr; + + // Initialize device side memory to hold a pointer + Simple** gpuPointerHolder = nullptr; + cudaMalloc(&gpuPointerHolder, sizeof(Simple*)); + + // Create on the device + chai::detail::make_on_device<<<1, 1>>>(gpuPointerHolder); + + // Copy to the host side memory + cudaMemcpy(cpuPointerHolder, gpuPointerHolder, sizeof(Simple*), cudaMemcpyDeviceToHost); + + // Free device side memory + cudaFree(gpuPointerHolder); + + // Save the pointer + ASSERT_NE(cpuPointerHolder[0], nullptr); + Simple* gpuPointer = cpuPointerHolder[0]; + + // Free host side memory + free(cpuPointerHolder); + + chai::managed_ptr test({chai::GPU}, {gpuPointer}); +} + +CUDA_TEST(managed_ptr, simple_cuda_cpu_and_gpu_pointer_constructor) +{ + Simple* gpuPointer = chai::detail::make_on_device(3); + Simple* cpuPointer = new Simple(4); + + chai::managed_ptr simple({chai::GPU, chai::CPU}, {gpuPointer, cpuPointer}); + + EXPECT_EQ(simple->getValue(), 4); + + chai::ManagedArray array1(1, chai::GPU); + + forall(cuda(), 0, 1, [=] __device__ (int i) { + array1[i] = simple->getValue(); + }); + + array1.move(chai::CPU); + + cudaDeviceSynchronize(); + + EXPECT_EQ(array1[0], 3); +} + +CUDA_TEST(managed_ptr, cuda_cpu_and_gpu_pointer_constructor) +{ + TestDerived* gpuPointer = chai::detail::make_on_device(3); + TestDerived* cpuPointer = new TestDerived(4); + + chai::managed_ptr derived({chai::GPU, chai::CPU}, {gpuPointer, cpuPointer}); + + EXPECT_EQ(derived->getValue(), 4); + EXPECT_NE(derived.get(), nullptr); + EXPECT_EQ(derived.use_count(), 1); + EXPECT_TRUE(derived); + EXPECT_FALSE(derived == nullptr); + EXPECT_FALSE(nullptr == derived); + EXPECT_TRUE(derived != nullptr); + EXPECT_TRUE(nullptr != derived); + + chai::ManagedArray array1(1, chai::GPU); + chai::ManagedArray array2(1, chai::GPU); + chai::ManagedArray array3(5, chai::GPU); + + forall(cuda(), 0, 1, [=] __device__ (int i) { + array1[i] = derived->getValue(); + array2[i] = derived.get(); + array3[0] = (bool) derived; + array3[1] = derived == nullptr; + array3[2] = nullptr == derived; + array3[3] = derived != nullptr; + array3[4] = nullptr != derived; + }); + + array1.move(chai::CPU); + array2.move(chai::CPU); + array3.move(chai::CPU); + + EXPECT_EQ(array1[0], 3); + EXPECT_NE(array2[0], nullptr); + EXPECT_TRUE(array3[0]); + EXPECT_FALSE(array3[1]); + EXPECT_FALSE(array3[2]); + EXPECT_TRUE(array3[3]); + EXPECT_TRUE(array3[4]); +} + +CUDA_TEST(managed_ptr, cuda_make_managed) +{ + const int expectedValue = rand(); + auto derived = chai::make_managed(expectedValue); + + chai::ManagedArray array(1, chai::GPU); + chai::ManagedArray array2(1, chai::GPU); + chai::ManagedArray array3(7, chai::GPU); + + forall(cuda(), 0, 1, [=] __device__ (int i) { + array[i] = derived->getValue(); + array2[i] = derived.get(); + array3[0] = (bool) derived; + array3[1] = derived == nullptr; + array3[2] = nullptr == derived; + array3[3] = derived != nullptr; + array3[4] = nullptr != derived; + }); + + array.move(chai::CPU); + array2.move(chai::CPU); + array3.move(chai::CPU); + + EXPECT_EQ(array[0], expectedValue); + + EXPECT_NE(array2[0], nullptr); + EXPECT_EQ(derived.use_count(), 1); + EXPECT_TRUE(array3[0]); + EXPECT_FALSE(array3[1]); + EXPECT_FALSE(array3[2]); + EXPECT_TRUE(array3[3]); + EXPECT_TRUE(array3[4]); +} + +CUDA_TEST(managed_ptr, make_managed_from_factory_function) +{ + const int expectedValue = rand(); + + auto factory = [] CHAI_HOST_DEVICE (const int value) { + return Factory(value); + }; + + auto derived = chai::make_managed_from_factory(factory, expectedValue); + + EXPECT_EQ((*derived).getValue(), expectedValue); + + EXPECT_NE(derived.get(), nullptr); + EXPECT_EQ(derived.use_count(), 1); + EXPECT_TRUE(derived); + EXPECT_FALSE(derived == nullptr); + EXPECT_FALSE(nullptr == derived); + EXPECT_TRUE(derived != nullptr); + EXPECT_TRUE(nullptr != derived); +} + +CUDA_TEST(managed_ptr, make_managed_from_factory_lambda) +{ + const int expectedValue = rand(); + + auto factory = [] CHAI_HOST_DEVICE (const int value) { + return new TestDerived(value); + }; + + auto derived = chai::make_managed_from_factory(factory, expectedValue); + + EXPECT_EQ((*derived).getValue(), expectedValue); + + EXPECT_NE(derived.get(), nullptr); + EXPECT_EQ(derived.use_count(), 1); + EXPECT_TRUE(derived); + EXPECT_FALSE(derived == nullptr); + EXPECT_FALSE(nullptr == derived); + EXPECT_TRUE(derived != nullptr); + EXPECT_TRUE(nullptr != derived); +} + +CUDA_TEST(managed_ptr, make_managed_from_overloaded_factory_function) +{ + const int expectedValue = rand(); + + auto factory = [] CHAI_HOST_DEVICE (const int value) { + return OverloadedFactory(value); + }; + + auto derived = chai::make_managed_from_factory(factory, expectedValue); + + EXPECT_EQ((*derived).getValue(), expectedValue); + + EXPECT_NE(derived.get(), nullptr); + EXPECT_EQ(derived.use_count(), 1); + EXPECT_TRUE(derived); + EXPECT_FALSE(derived == nullptr); + EXPECT_FALSE(nullptr == derived); + EXPECT_TRUE(derived != nullptr); + EXPECT_TRUE(nullptr != derived); +} + +CUDA_TEST(managed_ptr, make_managed_from_factory_static_member_function) +{ + const int expectedValue = rand(); + + auto factory = [] CHAI_HOST_DEVICE (const int value) { + return TestBase::Factory(value); + }; + + auto derived = chai::make_managed_from_factory(factory, expectedValue); + + EXPECT_EQ((*derived).getValue(), expectedValue); + + EXPECT_NE(derived.get(), nullptr); + EXPECT_EQ(derived.use_count(), 1); + EXPECT_TRUE(derived); + EXPECT_FALSE(derived == nullptr); + EXPECT_FALSE(nullptr == derived); + EXPECT_TRUE(derived != nullptr); + EXPECT_TRUE(nullptr != derived); +} + +CUDA_TEST(managed_ptr, cuda_copy_constructor) +{ + const int expectedValue = rand(); + auto derived = chai::make_managed(expectedValue); + chai::managed_ptr otherDerived(derived); + + chai::ManagedArray array(2, chai::GPU); + chai::ManagedArray array2(2, chai::GPU); + chai::ManagedArray array3(14, chai::GPU); + + forall(cuda(), 0, 1, [=] __device__ (int i) { + array[i] = derived->getValue(); + array2[0] = derived.get(); + array3[0] = (bool) derived; + array3[1] = derived == nullptr; + array3[2] = nullptr == derived; + array3[3] = derived != nullptr; + array3[4] = nullptr != derived; + array3[5] = derived == otherDerived; + array3[6] = derived != otherDerived; + + array[1] = otherDerived->getValue(); + array2[1] = otherDerived.get(); + array3[7] = (bool) derived; + array3[8] = derived == nullptr; + array3[9] = nullptr == derived; + array3[10] = derived != nullptr; + array3[11] = nullptr != derived; + array3[12] = derived == otherDerived; + array3[13] = derived != otherDerived; + }); + + array.move(chai::CPU); + array2.move(chai::CPU); + array3.move(chai::CPU); + + EXPECT_EQ(array[0], expectedValue); + EXPECT_EQ(array[1], expectedValue); + + EXPECT_NE(array2[0], nullptr); + EXPECT_TRUE(array3[0]); + EXPECT_FALSE(array3[1]); + EXPECT_FALSE(array3[2]); + EXPECT_TRUE(array3[3]); + EXPECT_TRUE(array3[4]); + EXPECT_TRUE(array3[5]); + EXPECT_FALSE(array3[6]); + + EXPECT_NE(array2[1], nullptr); + EXPECT_TRUE(array3[7]); + EXPECT_FALSE(array3[8]); + EXPECT_FALSE(array3[9]); + EXPECT_TRUE(array3[10]); + EXPECT_TRUE(array3[11]); + EXPECT_TRUE(array3[12]); + EXPECT_FALSE(array3[13]); +} + +CUDA_TEST(managed_ptr, cuda_converting_constructor) +{ + const int expectedValue = rand(); + auto derived = chai::make_managed(expectedValue); + chai::managed_ptr base(derived); + + chai::ManagedArray array(2, chai::GPU); + chai::ManagedArray array2(2, chai::GPU); + chai::ManagedArray array3(14, chai::GPU); + + forall(cuda(), 0, 1, [=] __device__ (int i) { + array[i] = derived->getValue(); + array2[0] = derived.get(); + array3[0] = (bool) derived; + array3[1] = derived == nullptr; + array3[2] = nullptr == derived; + array3[3] = derived != nullptr; + array3[4] = nullptr != derived; + array3[5] = derived == base; + array3[6] = derived != base; + + array[1] = base->getValue(); + array2[1] = base.get(); + array3[7] = (bool) base; + array3[8] = base == nullptr; + array3[9] = nullptr == base; + array3[10] = base != nullptr; + array3[11] = nullptr != base; + array3[12] = base == derived; + array3[13] = base != derived; + }); + + array.move(chai::CPU); + array2.move(chai::CPU); + array3.move(chai::CPU); + + EXPECT_EQ(array[0], expectedValue); + EXPECT_EQ(array[1], expectedValue); + + EXPECT_NE(array2[0], nullptr); + EXPECT_TRUE(array3[0]); + EXPECT_FALSE(array3[1]); + EXPECT_FALSE(array3[2]); + EXPECT_TRUE(array3[3]); + EXPECT_TRUE(array3[4]); + EXPECT_TRUE(array3[5]); + EXPECT_FALSE(array3[6]); + + EXPECT_NE(array2[1], nullptr); + EXPECT_TRUE(array3[7]); + EXPECT_FALSE(array3[8]); + EXPECT_FALSE(array3[9]); + EXPECT_TRUE(array3[10]); + EXPECT_TRUE(array3[11]); + EXPECT_TRUE(array3[12]); + EXPECT_FALSE(array3[13]); +} + +CUDA_TEST(managed_ptr, cuda_copy_assignment_operator) +{ + const int expectedValue = rand(); + auto derived = chai::make_managed(expectedValue); + chai::managed_ptr otherDerived; + otherDerived = derived; + + chai::ManagedArray array(2, chai::GPU); + chai::ManagedArray array2(2, chai::GPU); + chai::ManagedArray array3(14, chai::GPU); + + forall(cuda(), 0, 1, [=] __device__ (int i) { + array[i] = derived->getValue(); + array2[0] = derived.get(); + array3[0] = (bool) derived; + array3[1] = derived == nullptr; + array3[2] = nullptr == derived; + array3[3] = derived != nullptr; + array3[4] = nullptr != derived; + array3[5] = derived == otherDerived; + array3[6] = derived != otherDerived; + + array[1] = otherDerived->getValue(); + array2[1] = otherDerived.get(); + array3[7] = (bool) derived; + array3[8] = derived == nullptr; + array3[9] = nullptr == derived; + array3[10] = derived != nullptr; + array3[11] = nullptr != derived; + array3[12] = derived == otherDerived; + array3[13] = derived != otherDerived; + }); + + array.move(chai::CPU); + array2.move(chai::CPU); + array3.move(chai::CPU); + + EXPECT_EQ(array[0], expectedValue); + EXPECT_EQ(array[1], expectedValue); + + EXPECT_NE(array2[0], nullptr); + EXPECT_TRUE(array3[0]); + EXPECT_FALSE(array3[1]); + EXPECT_FALSE(array3[2]); + EXPECT_TRUE(array3[3]); + EXPECT_TRUE(array3[4]); + EXPECT_TRUE(array3[5]); + EXPECT_FALSE(array3[6]); + + EXPECT_NE(array2[1], nullptr); + EXPECT_TRUE(array3[7]); + EXPECT_FALSE(array3[8]); + EXPECT_FALSE(array3[9]); + EXPECT_TRUE(array3[10]); + EXPECT_TRUE(array3[11]); + EXPECT_TRUE(array3[12]); + EXPECT_FALSE(array3[13]); +} + +#endif + +// Enable the following tests to ensure that proper compiler errors are given +// for bad arguments since otherwise it is difficult to make sure the template +// metaprogramming is correct. + +#if 0 + +// Should give something like the following: +// error: static assertion failed: F is not invocable with the given arguments. + +TEST(managed_ptr, bad_function_to_make_managed_from_factory_function) +{ + const int expectedValue = rand(); + + auto factory = [] CHAI_HOST (const int value) { + return new TestDerived(value); + }; + + auto derived = chai::make_managed_from_factory(expectedValue, factory); + + EXPECT_EQ((*derived).getValue(), expectedValue); +} + +#endif + +#if 0 + +// Should give something like the following: +// error: static assertion failed: F is not invocable with the given arguments. + +TEST(managed_ptr, bad_arguments_to_make_managed_from_factory_function) +{ + const int expectedValue = rand(); + + auto factory = [] CHAI_HOST (const int value) { + return new TestDerived(value); + }; + + auto derived = chai::make_managed_from_factory(factory, expectedValue, 3); + + EXPECT_EQ((*derived).getValue(), expectedValue); +} + +#endif + From 97da33ed7261506537e6ae6507d22791840914d8 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Mon, 30 Sep 2019 17:30:08 -0700 Subject: [PATCH 23/58] Add managed_ptr integration tests --- tests/integration/CMakeLists.txt | 37 +- tests/integration/managed_ptr_tests.cpp | 717 ++++++++++++++++++++++++ 2 files changed, 740 insertions(+), 14 deletions(-) create mode 100644 tests/integration/managed_ptr_tests.cpp diff --git a/tests/integration/CMakeLists.txt b/tests/integration/CMakeLists.txt index 22cbdd04..509079a5 100644 --- a/tests/integration/CMakeLists.txt +++ b/tests/integration/CMakeLists.txt @@ -1,21 +1,15 @@ -set (managed_array_test_depends - chai umpire gtest) - -if (ENABLE_CUDA) - set (managed_array_test_depends - ${managed_array_test_depends} - cuda) -endif () -if (ENABLE_HIP) - set (managed_array_test_depends - ${managed_array_test_depends} - hip) -endif () +# Integration test dependencies +set (chai_integration_test_depends + chai umpire gtest) +blt_list_append(TO chai_integration_test_depends ELEMENTS cuda IF ${ENABLE_CUDA}) +blt_list_append(TO chai_integration_test_depends ELEMENTS hip IF ${ENABLE_HIP}) + +# ManagedArray tests blt_add_executable( NAME managed_array_tests SOURCES managed_array_tests.cpp - DEPENDS_ON ${managed_array_test_depends}) + DEPENDS_ON ${chai_integration_test_depends}) target_include_directories( managed_array_tests @@ -24,3 +18,18 @@ target_include_directories( blt_add_test( NAME managed_array_test COMMAND managed_array_tests) + +# managed_ptr tests +blt_add_executable( + NAME managed_ptr_tests + SOURCES managed_ptr_tests.cpp + DEPENDS_ON ${chai_integration_test_depends}) + +target_include_directories( + managed_ptr_tests + PUBLIC ${PROJECT_BINARY_DIR}/include) + +blt_add_test( + NAME managed_ptr_test + COMMAND managed_ptr_tests) + diff --git a/tests/integration/managed_ptr_tests.cpp b/tests/integration/managed_ptr_tests.cpp new file mode 100644 index 00000000..f5d22680 --- /dev/null +++ b/tests/integration/managed_ptr_tests.cpp @@ -0,0 +1,717 @@ +// --------------------------------------------------------------------- +// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All +// rights reserved. +// +// Produced at the Lawrence Livermore National Laboratory. +// +// This file is part of CHAI. +// +// LLNL-CODE-705877 +// +// For details, see https:://github.com/LLNL/CHAI +// Please also see the NOTICE and LICENSE files. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the +// distribution. +// +// - Neither the name of the LLNS/LLNL nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// --------------------------------------------------------------------- +#include "gtest/gtest.h" + +#define CUDA_TEST(X, Y) \ + static void cuda_test_##X##Y(); \ + TEST(X, Y) { cuda_test_##X##Y(); } \ + static void cuda_test_##X##Y() + +#include "chai/config.hpp" +#include "chai/ManagedArray.hpp" +#include "chai/managed_ptr.hpp" + +#include "../src/util/forall.hpp" + +// Standard library headers +#include + +class Base1 { + public: + CHAI_HOST_DEVICE Base1() {} + CHAI_HOST_DEVICE virtual ~Base1() {} + + CHAI_HOST_DEVICE virtual bool isBase1() { return true; } +}; + +class Base2 { + public: + CHAI_HOST_DEVICE Base2() {} + CHAI_HOST_DEVICE virtual ~Base2() {} + + CHAI_HOST_DEVICE virtual bool isBase2() { return true; } +}; + +class ClassWithMultipleInheritance : public Base1, public Base2 { + public: + CHAI_HOST_DEVICE ClassWithMultipleInheritance() : Base1(), Base2() {} + CHAI_HOST_DEVICE virtual ~ClassWithMultipleInheritance() {} +}; + +class RawArrayClass { + public: + CHAI_HOST_DEVICE RawArrayClass() : m_values(nullptr) {} + CHAI_HOST_DEVICE RawArrayClass(int* values) : m_values(values) {} + + CHAI_HOST_DEVICE ~RawArrayClass() {} + + CHAI_HOST_DEVICE int getValue(const int i) const { return m_values[i]; } + + private: + int* m_values; +}; + +class RawPointerClass { + public: + CHAI_HOST_DEVICE RawPointerClass() : m_innerClass(nullptr) {} + CHAI_HOST_DEVICE RawPointerClass(RawArrayClass* innerClass) : m_innerClass(innerClass) {} + + CHAI_HOST_DEVICE ~RawPointerClass() {} + + CHAI_HOST_DEVICE int getValue(const int i) const { return m_innerClass->getValue(i); } + + private: + RawArrayClass* m_innerClass; +}; + +class TestBase { + public: + CHAI_HOST_DEVICE TestBase() {} + CHAI_HOST_DEVICE virtual ~TestBase() {} + + CHAI_HOST_DEVICE virtual int getValue(const int i) const = 0; +}; + +class TestDerived : public TestBase { + public: + CHAI_HOST_DEVICE TestDerived() : TestBase(), m_values(nullptr) {} + CHAI_HOST_DEVICE TestDerived(chai::ManagedArray values) : TestBase(), m_values(values) {} + CHAI_HOST_DEVICE virtual ~TestDerived() {} + + CHAI_HOST_DEVICE virtual int getValue(const int i) const { return m_values[i]; } + + private: + chai::ManagedArray m_values; +}; + +class TestInnerBase { + public: + CHAI_HOST_DEVICE TestInnerBase() {} + CHAI_HOST_DEVICE virtual ~TestInnerBase() {} + + CHAI_HOST_DEVICE virtual int getValue() = 0; +}; + +class TestInner : public TestInnerBase { + public: + CHAI_HOST_DEVICE TestInner() : TestInnerBase(), m_value(0) {} + CHAI_HOST_DEVICE TestInner(int value) : TestInnerBase(), m_value(value) {} + CHAI_HOST_DEVICE virtual ~TestInner() {} + + CHAI_HOST_DEVICE virtual int getValue() { return m_value; } + + private: + int m_value; +}; + +class TestContainer { + public: + CHAI_HOST_DEVICE TestContainer() : m_innerType(nullptr) {} + CHAI_HOST_DEVICE TestContainer(chai::managed_ptr innerType) : m_innerType(innerType) {} + + CHAI_HOST_DEVICE ~TestContainer() {} + + CHAI_HOST_DEVICE int getValue() const { + return m_innerType->getValue(); + } + + private: + chai::managed_ptr m_innerType; +}; + +class MultipleRawArrayClass { + public: + CHAI_HOST_DEVICE MultipleRawArrayClass() : m_values1(nullptr), m_values2(nullptr) {} + CHAI_HOST_DEVICE MultipleRawArrayClass(int* values1, int* values2) : + m_values1(values1), + m_values2(values2) + {} + + CHAI_HOST_DEVICE ~MultipleRawArrayClass() {} + + CHAI_HOST_DEVICE int getValue(const int i, const int j) const { + if (i == 0) { + return m_values1[j]; + } + else if (i == 1) { + return m_values2[j]; + } + else { + return -1; + } + } + + private: + int* m_values1; + int* m_values2; +}; + +TEST(managed_ptr, class_with_raw_array) +{ + const int expectedValue = rand(); + + chai::ManagedArray array(1, chai::CPU); + + forall(sequential(), 0, 1, [=] (int i) { + array[i] = expectedValue; + }); + + auto rawArrayClass = chai::make_managed(array); + + ASSERT_EQ(rawArrayClass->getValue(0), expectedValue); + + array.free(); +} + +TEST(managed_ptr, class_with_multiple_raw_arrays) +{ + const int expectedValue1 = rand(); + const int expectedValue2 = rand(); + + chai::ManagedArray array1(1, chai::CPU); + chai::ManagedArray array2(1, chai::CPU); + + forall(sequential(), 0, 1, [=] (int i) { + array1[i] = expectedValue1; + array2[i] = expectedValue2; + }); + + auto multipleRawArrayClass = chai::make_managed(array1, array2); + + ASSERT_EQ(multipleRawArrayClass->getValue(0, 0), expectedValue1); + ASSERT_EQ(multipleRawArrayClass->getValue(1, 0), expectedValue2); +} + +TEST(managed_ptr, class_with_managed_array) +{ + const int expectedValue = rand(); + + chai::ManagedArray array(1, chai::CPU); + + forall(sequential(), 0, 1, [=] (int i) { + array[i] = expectedValue; + }); + + auto derived = chai::make_managed(array); + + ASSERT_EQ(derived->getValue(0), expectedValue); +} + +TEST(managed_ptr, class_with_raw_ptr) +{ + const int expectedValue = rand(); + + chai::ManagedArray array(1, chai::CPU); + + forall(sequential(), 0, 1, [=] (int i) { + array[i] = expectedValue; + }); + + auto rawArrayClass = chai::make_managed(array); + auto rawPointerClass = chai::make_managed(rawArrayClass); + + // This prevents the pointers contained by rawArrayClass from being deleted + // out from under us. Otherwise, rawArrayClass is the last remaining reference + // and if it is destroyed before rawPointerClass is, then we are in trouble. + rawPointerClass.set_callback([=] (chai::Action, chai::ExecutionSpace, void*) { + (void) rawArrayClass; return false; + }); + rawArrayClass = nullptr; + + ASSERT_EQ((*rawPointerClass).getValue(0), expectedValue); +} + +TEST(managed_ptr, class_with_managed_ptr) +{ + const int expectedValue = rand(); + + auto derived = chai::make_managed(expectedValue); + TestContainer container(derived); + + ASSERT_EQ(container.getValue(), expectedValue); +} + +TEST(managed_ptr, nested_managed_ptr) +{ + const int expectedValue = rand(); + + auto derived = chai::make_managed(expectedValue); + auto container = chai::make_managed(derived); + + ASSERT_EQ(container->getValue(), expectedValue); +} + +#ifdef __CUDACC__ + +template +__global__ void deviceNew(T** arr) { + *arr = new T[5]; +} + +template +__global__ void deviceDelete(T** arr) { + delete[] *arr; +} + +__global__ void passObjectToKernel(chai::ManagedArray arr) { + arr[0] = -1; +} + +CUDA_TEST(managed_ptr, make_on_device) +{ + int** hostArray = (int**) malloc(sizeof(int*)); + hostArray[0] = nullptr; + + int** deviceArray = nullptr; + cudaMalloc(&deviceArray, sizeof(int*)); + + int** deviceArray2 = nullptr; + cudaMalloc(&deviceArray2, sizeof(int*)); + + deviceNew<<<1, 1>>>(deviceArray); + + cudaMemcpy(hostArray, deviceArray, sizeof(int*), cudaMemcpyDeviceToHost); + cudaDeviceSynchronize(); + cudaMemcpy(deviceArray2, hostArray, sizeof(int*), cudaMemcpyHostToDevice); + ASSERT_NE(hostArray[0], nullptr); + + deviceDelete<<<1, 1>>>(deviceArray2); + cudaDeviceSynchronize(); + free(hostArray); + cudaFree(deviceArray); + cudaFree(deviceArray2); +} + +CUDA_TEST(managed_ptr, cuda_new_and_delete_on_device) +{ + // Initialize host side memory to hold a pointer + RawArrayClass** cpuPointerHolder = (RawArrayClass**) malloc(sizeof(RawArrayClass*)); + cpuPointerHolder[0] = nullptr; + + // Initialize device side memory to hold a pointer + RawArrayClass** gpuPointerHolder = nullptr; + cudaMalloc(&gpuPointerHolder, sizeof(RawArrayClass*)); + + // Create on the device + chai::detail::make_on_device<<<1, 1>>>(gpuPointerHolder); + + // Copy to the host side memory + cudaMemcpy(cpuPointerHolder, gpuPointerHolder, sizeof(RawArrayClass*), cudaMemcpyDeviceToHost); + + // Free device side memory + cudaFree(gpuPointerHolder); + + // Save the pointer + ASSERT_NE(cpuPointerHolder[0], nullptr); + RawArrayClass* gpuPointer = cpuPointerHolder[0]; + + // Free host side memory + free(cpuPointerHolder); + + chai::detail::destroy_on_device<<<1, 1>>>(gpuPointer); +} + +CUDA_TEST(managed_ptr, cuda_build_managed_ptr) +{ + // Initialize host side memory to hold a pointer + RawArrayClass** cpuPointerHolder = (RawArrayClass**) malloc(sizeof(RawArrayClass*)); + cpuPointerHolder[0] = nullptr; + + // Initialize device side memory to hold a pointer + RawArrayClass** gpuPointerHolder = nullptr; + cudaMalloc(&gpuPointerHolder, sizeof(RawArrayClass*)); + + // Create on the device + chai::detail::make_on_device<<<1, 1>>>(gpuPointerHolder); + + // Copy to the host side memory + cudaMemcpy(cpuPointerHolder, gpuPointerHolder, sizeof(RawArrayClass*), cudaMemcpyDeviceToHost); + + // Free device side memory + cudaFree(gpuPointerHolder); + + // Save the pointer + ASSERT_NE(cpuPointerHolder[0], nullptr); + RawArrayClass* gpuPointer = cpuPointerHolder[0]; + + // Free host side memory + free(cpuPointerHolder); + + chai::managed_ptr managedPtr({chai::GPU}, {gpuPointer}); +} + + +CUDA_TEST(managed_ptr, pass_object_to_kernel) +{ + const int expectedValue = rand(); + + chai::ManagedArray array(1, chai::CPU); + + forall(sequential(), 0, 1, [=] (int i) { + array[i] = expectedValue; + }); + + chai::ArrayManager* manager = chai::ArrayManager::getInstance(); + manager->setExecutionSpace(chai::GPU); + passObjectToKernel<<<1, 1>>>(array); + cudaDeviceSynchronize(); + array.move(chai::CPU); + cudaDeviceSynchronize(); + ASSERT_EQ(array[0], -1); +} + +CUDA_TEST(managed_ptr, cuda_class_with_raw_array) +{ + const int expectedValue = rand(); + + chai::ManagedArray array(1, chai::CPU); + + forall(sequential(), 0, 1, [=] (int i) { + array[i] = expectedValue; + }); + + auto rawArrayClass = chai::make_managed(array); + chai::ManagedArray results(1, chai::GPU); + + forall(cuda(), 0, 1, [=] __device__ (int i) { + results[i] = rawArrayClass->getValue(i); + }); + + results.move(chai::CPU); + ASSERT_EQ(results[0], expectedValue); +} + +CUDA_TEST(managed_ptr, cuda_class_with_raw_array_and_callback) +{ + const int expectedValue = rand(); + + chai::ManagedArray array(1, chai::CPU); + + forall(sequential(), 0, 1, [=] (int i) { + array[i] = expectedValue; + }); + + auto cpuPointer = new RawArrayClass(array); + auto gpuPointer = chai::detail::make_on_device(array); + + auto callback = [=] (chai::Action action, chai::ExecutionSpace space, void*) mutable -> bool { + switch (action) { + case chai::ACTION_FREE: + switch (space) { + case chai::NONE: + array.free(); + return true; + default: + return false; + } + default: + return false; + } + }; + + auto managedPointer = chai::managed_ptr({chai::CPU, chai::GPU}, + {cpuPointer, gpuPointer}, + callback); + + chai::ManagedArray results(1, chai::GPU); + + forall(cuda(), 0, 1, [=] __device__ (int i) { + results[i] = managedPointer->getValue(i); + }); + + results.move(chai::CPU); + ASSERT_EQ(results[0], expectedValue); +} + +CUDA_TEST(managed_ptr, cuda_class_with_managed_array) +{ + const int expectedValue = rand(); + + chai::ManagedArray array(1, chai::CPU); + + forall(sequential(), 0, 1, [=] (int i) { + array[0] = expectedValue; + }); + + chai::managed_ptr derived = chai::make_managed(array); + + chai::ManagedArray results(1, chai::GPU); + + forall(cuda(), 0, 1, [=] __device__ (int i) { + results[i] = derived->getValue(i); + }); + + results.move(chai::CPU); + + ASSERT_EQ(results[0], expectedValue); +} + +CUDA_TEST(managed_ptr, cuda_class_with_raw_ptr) +{ + const int expectedValue = rand(); + + chai::ManagedArray array(1, chai::CPU); + + forall(sequential(), 0, 1, [=] (int i) { + array[0] = expectedValue; + }); + + auto rawArrayClass = chai::make_managed(array); + auto rawPointerClass = chai::make_managed(rawArrayClass); + + chai::ManagedArray results(1, chai::GPU); + + forall(cuda(), 0, 1, [=] __device__ (int i) { + results[i] = (*rawPointerClass).getValue(i); + }); + + results.move(chai::CPU); + ASSERT_EQ(results[0], expectedValue); +} + +CUDA_TEST(managed_ptr, cuda_class_with_managed_ptr) +{ + const int expectedValue = rand(); + + auto derived = chai::make_managed(expectedValue); + TestContainer container(derived); + + chai::ManagedArray results(1, chai::GPU); + + forall(cuda(), 0, 1, [=] __device__ (int i) { + results[i] = container.getValue(); + }); + + results.move(chai::CPU); + ASSERT_EQ(results[0], expectedValue); +} + +CUDA_TEST(managed_ptr, cuda_nested_managed_ptr) +{ + const int expectedValue = rand(); + + auto derived = chai::make_managed(expectedValue); + auto container = chai::make_managed(derived); + + chai::ManagedArray results(1, chai::GPU); + + forall(cuda(), 0, 1, [=] __device__ (int i) { + results[i] = container->getValue(); + }); + + results.move(chai::CPU); + ASSERT_EQ(results[0], expectedValue); +} + +CUDA_TEST(managed_ptr, cuda_multiple_inheritance) +{ + auto derived = chai::make_managed(); + + chai::managed_ptr base1 = derived; + chai::managed_ptr base2 = derived; + + chai::ManagedArray results(2, chai::GPU); + + forall(cuda(), 0, 1, [=] __device__ (int i) { + results[i] = base1->isBase1(); + results[1] = base2->isBase2(); + }); + + results.move(chai::CPU); + cudaDeviceSynchronize(); + + ASSERT_EQ(results[0], true); + ASSERT_EQ(results[1], true); +} + +CUDA_TEST(managed_ptr, static_pointer_cast) +{ + const int expectedValue = rand(); + + chai::ManagedArray array(1, chai::CPU); + + forall(sequential(), 0, 1, [=] (int i) { + array[0] = expectedValue; + }); + + auto derived = chai::make_managed(array); + auto base = chai::static_pointer_cast(derived); + auto derivedFromBase = chai::static_pointer_cast(base); + + chai::ManagedArray results(3, chai::GPU); + + forall(cuda(), 0, 1, [=] __device__ (int i) { + results[i] = derived->getValue(i); + results[1] = base->getValue(i); + results[2] = derivedFromBase->getValue(i); + }); + + results.move(chai::CPU); + + ASSERT_EQ(results[0], expectedValue); + ASSERT_EQ(results[1], expectedValue); + ASSERT_EQ(results[2], expectedValue); +} + +CUDA_TEST(managed_ptr, dynamic_pointer_cast) +{ + const int expectedValue = rand(); + + chai::ManagedArray array(1, chai::CPU); + + forall(sequential(), 0, 1, [=] (int i) { + array[0] = expectedValue; + }); + + auto derived = chai::make_managed(array); + auto base = chai::dynamic_pointer_cast(derived); + auto derivedFromBase = chai::dynamic_pointer_cast(base); + + chai::ManagedArray results(3, chai::GPU); + + forall(cuda(), 0, 1, [=] __device__ (int i) { + results[i] = derived->getValue(i); + results[1] = base->getValue(i); + results[2] = derivedFromBase->getValue(i); + }); + + results.move(chai::CPU); + + ASSERT_EQ(results[0], expectedValue); + ASSERT_EQ(results[1], expectedValue); + ASSERT_EQ(results[2], expectedValue); +} + +CUDA_TEST(managed_ptr, const_pointer_cast) +{ + const int expectedValue = rand(); + + chai::ManagedArray array(1, chai::CPU); + + forall(sequential(), 0, 1, [=] (int i) { + array[0] = expectedValue; + }); + + auto derived = chai::make_managed(array); + auto constDerived = chai::const_pointer_cast(derived); + auto derivedFromConst = chai::const_pointer_cast(constDerived); + + chai::ManagedArray results(3, chai::GPU); + + forall(cuda(), 0, 1, [=] __device__ (int i) { + results[i] = derived->getValue(i); + results[1] = constDerived->getValue(i); + results[2] = derivedFromConst->getValue(i); + }); + + results.move(chai::CPU); + + ASSERT_EQ(results[0], expectedValue); + ASSERT_EQ(results[1], expectedValue); + ASSERT_EQ(results[2], expectedValue); +} + +CUDA_TEST(managed_ptr, reinterpret_pointer_cast) +{ + const int expectedValue = rand(); + + chai::ManagedArray array(1, chai::CPU); + + forall(sequential(), 0, 1, [=] (int i) { + array[0] = expectedValue; + }); + + auto derived = chai::make_managed(array); + auto base = chai::reinterpret_pointer_cast(derived); + auto derivedFromBase = chai::reinterpret_pointer_cast(base); + + chai::ManagedArray results(3, chai::GPU); + + forall(cuda(), 0, 1, [=] __device__ (int i) { + results[i] = derived->getValue(i); + results[1] = base->getValue(i); + results[2] = derivedFromBase->getValue(i); + }); + + results.move(chai::CPU); + + ASSERT_EQ(results[0], expectedValue); + ASSERT_EQ(results[1], expectedValue); + ASSERT_EQ(results[2], expectedValue); +} + +#endif + +#if 0 // TODO: Enable if/when ManagedArrays of managed_ptrs can be handled correctly. + +class RawArrayOfPointersClass { + public: + CHAI_HOST_DEVICE RawArrayOfPointersClass() = delete; + CHAI_HOST_DEVICE RawArrayOfPointersClass(RawArrayClass** arrayOfPointers) : + m_arrayOfPointers(arrayOfPointers) + {} + + CHAI_HOST_DEVICE int getValue(const int i, const int j) const { + return m_arrayOfPointers[i]->getValue(j); + } + + private: + RawArrayClass** m_arrayOfPointers = nullptr; +}; + +TEST(managed_ptr, class_with_raw_array_of_pointers) +{ + const int expectedValue = rand(); + + chai::ManagedArray array(1, chai::CPU); + array[0] = expectedValue; + + auto rawArrayClass = chai::make_managed(array); + chai::managed_ptr arrayOfPointers[1] = {rawArrayClass}; + + auto rawArrayOfPointersClass = chai::make_managed(arrayOfPointers); + ASSERT_EQ(rawArrayOfPointersClass->getValue(0, 0), expectedValue); +} + +#endif + From 9ec6d1a4efa69b98101d2b574944a3be43ff17fd Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Mon, 30 Sep 2019 17:33:15 -0700 Subject: [PATCH 24/58] Restore hip support to unit tests --- tests/unit/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 835aded5..b4103bc8 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -46,6 +46,7 @@ set (chai_unit_test_depends chai umpire gtest) blt_list_append(TO chai_unit_test_depends ELEMENTS cuda IF ${ENABLE_CUDA}) +blt_list_append(TO chai_unit_test_depends ELEMENTS hip IF ${ENABLE_HIP}) # ManagedArray tests blt_add_executable( From 052973b106ecaf90a77bc81f683dbb69a92ff1b0 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Tue, 1 Oct 2019 14:02:58 -0700 Subject: [PATCH 25/58] Remove reference counting from managed_ptr --- src/chai/managed_ptr.hpp | 271 +++++++++----------------- tests/unit/managed_ptr_unit_tests.cpp | 67 +++---- 2 files changed, 124 insertions(+), 214 deletions(-) diff --git a/src/chai/managed_ptr.hpp b/src/chai/managed_ptr.hpp index 36c25d95..c9c0de66 100644 --- a/src/chai/managed_ptr.hpp +++ b/src/chai/managed_ptr.hpp @@ -95,29 +95,15 @@ namespace chai { struct managed_ptr_record { managed_ptr_record() : - m_num_references(1), m_callback() { } managed_ptr_record(std::function callback) : - m_num_references(1), m_callback(callback) { } - size_t use_count() { - return m_num_references; - } - - void addReference() { - m_num_references++; - } - - void removeReference() { - m_num_references--; - } - ExecutionSpace getLastSpace() { return m_last_space; } @@ -126,7 +112,6 @@ namespace chai { m_callback = callback; } - size_t m_num_references = 1; /// The reference counter ExecutionSpace m_last_space = NONE; /// The last space executed in std::function m_callback; /// Callback to handle events }; @@ -136,10 +121,7 @@ namespace chai { /// @author Alan Dayton /// /// This wrapper stores both host and device pointers so that polymorphism can be - /// used in both contexts with a single API. It is modeled after std::shared_ptr, - /// so it does reference counting and automatically cleans up when the last - /// reference is destroyed. If we ever do multi-threading on the CPU, locking will - /// need to be added to the reference counter. + /// used in both contexts with a single API. /// The make_managed and make_managed_from_factory functions call new on both the /// host and device so that polymorphism is valid in both contexts. Simply copying /// an object to the device will not copy the vtable, so new must be called on @@ -173,11 +155,10 @@ namespace chai { /// be given the extracted host pointer, and likewise the device constructor /// of T will be given the extracted device pointer. It is recommended that /// a callback is defined that maintains a copy of the managed_ptr so that - /// the raw pointers are not accidentally destroyed prematurely (since - /// managed_ptr does reference counting). It is also recommended that the - /// callback calls the copy constructor of the managed_ptr on the ACTION_MOVE - /// event so that the ACTION_MOVE event is triggered also for the inner - /// managed_ptr. + /// the raw pointers are not accidentally destroyed prematurely. It is also + /// recommended that the callback calls the copy constructor of the managed_ptr + /// on the ACTION_MOVE event so that the ACTION_MOVE event is triggered also for + /// the inner managed_ptr. /// Again, if a raw pointer is passed to make_managed, accessing that member will /// only be valid in the correct context. Take care when passing raw pointers /// as arguments to member functions. @@ -199,7 +180,6 @@ namespace chai { /// @author Alan Dayton /// /// Default constructor. - /// Initializes the reference count to 0. /// CHAI_HOST_DEVICE constexpr managed_ptr() noexcept {} @@ -207,7 +187,6 @@ namespace chai { /// @author Alan Dayton /// /// Construct from nullptr. - /// Initializes the reference count to 0. /// CHAI_HOST_DEVICE constexpr managed_ptr(std::nullptr_t) noexcept {} @@ -309,9 +288,9 @@ namespace chai { /// @author Alan Dayton /// /// Copy constructor. - /// Constructs a copy of the given managed_ptr, increases the reference count, - /// and if the execution space is different, calls the user defined callback - /// with ACTION_MOVE for each of the execution spaces. + /// Constructs a copy of the given managed_ptr and if the execution space is + /// different, calls the user defined callback with ACTION_MOVE for each + /// of the execution spaces. /// /// @param[in] other The managed_ptr to copy /// @@ -321,7 +300,6 @@ namespace chai { m_pointer_record(other.m_pointer_record) { #ifndef __CUDA_ARCH__ - addReference(); move(); #endif } @@ -330,10 +308,9 @@ namespace chai { /// @author Alan Dayton /// /// Converting constructor. - /// Constructs a copy of the given managed_ptr, increases the reference count, - /// and if the execution space is different, calls the user defined callback - /// with ACTION_MOVE for each of the execution spaces. U* must be convertible - /// to T*. + /// Constructs a copy of the given managed_ptr and if the execution space is + /// different, calls the user defined callback with ACTION_MOVE for each + /// of the execution spaces. U* must be convertible to T*. /// /// @param[in] other The managed_ptr to copy /// @@ -347,7 +324,6 @@ namespace chai { "U* must be convertible to T*."); #ifndef __CUDA_ARCH__ - addReference(); move(); #endif } @@ -394,57 +370,30 @@ namespace chai { } } - addReference(); move(); } /// /// @author Alan Dayton /// - /// Destructor. Decreases the reference count and if this is the last reference, - /// clean up. + /// Destructor /// - CHAI_HOST_DEVICE ~managed_ptr() { -#ifdef __CUDACC__ - // This trick came from Max Katz at Nvidia. - // Taking the address of this kernel ensures that it gets instantiated - // by the compiler and can be used within __CUDA_ARCH__. Without this, - // calling destroy_on_device within the confines of __CUDA_ARCH__ will - // always fail with error code 0x8 (invalid device function). - // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#restrictions - // From the CUDA Programming Guide Restrictions: - // "If a __global__ function template is instantiated and launched from - // the host, then the function template must be instantiated with the - // same template arguments irrespective of whether __CUDA_ARCH__ is - // defined and regardless of the value of __CUDA_ARCH__." - (void) &detail::destroy_on_device; -#endif - -#ifndef __CUDA_ARCH__ - removeReference(); -#endif - } + CHAI_HOST_DEVICE ~managed_ptr() {} /// /// @author Alan Dayton /// - /// Copy assignment operator. - /// Copies the given managed_ptr and increases the reference count. + /// Copy assignment operator. Does a shallow copy. /// /// @param[in] other The managed_ptr to copy /// CHAI_HOST_DEVICE managed_ptr& operator=(const managed_ptr& other) noexcept { if (this != &other) { -#ifndef __CUDA_ARCH__ - removeReference(); -#endif - m_cpu_pointer = other.m_cpu_pointer; m_gpu_pointer = other.m_gpu_pointer; m_pointer_record = other.m_pointer_record; #ifndef __CUDA_ARCH__ - addReference(); move(); #endif } @@ -456,8 +405,8 @@ namespace chai { /// @author Alan Dayton /// /// Conversion copy assignment operator. - /// Copies the given managed_ptr and increases the reference count. - /// U* must be convertible to T*. + /// Copies the given managed_ptr. Does a shallow copy. U* must be convertible + /// to T*. /// /// @param[in] other The managed_ptr to copy /// @@ -466,16 +415,11 @@ namespace chai { static_assert(std::is_convertible::value, "U* must be convertible to T*."); -#ifndef __CUDA_ARCH__ - removeReference(); -#endif - m_cpu_pointer = other.m_cpu_pointer; m_gpu_pointer = other.m_gpu_pointer; m_pointer_record = other.m_pointer_record; #ifndef __CUDA_ARCH__ - addReference(); move(); #endif @@ -547,20 +491,6 @@ namespace chai { #endif } - /// - /// @author Alan Dayton - /// - /// Returns the number of managed_ptrs owning these pointers. - /// - CHAI_HOST std::size_t use_count() const { - if (m_pointer_record) { - return m_pointer_record->use_count(); - } - else { - return 0; - } - } - /// /// @author Alan Dayton /// @@ -590,6 +520,82 @@ namespace chai { } } + /// + /// @author Alan Dayton + /// + /// If a user callback is provided, calls the callback with the ACTION_FREE + /// event. Otherwise calls delete on the CPU and GPU pointers. + /// + CHAI_HOST void free() { + if (m_pointer_record) { + if (m_pointer_record->m_callback) { + // Destroy device pointer first to take advantage of asynchrony + for (int space = NUM_EXECUTION_SPACES-1; space >= NONE; --space) { + ExecutionSpace execSpace = static_cast(space); + T* pointer = get(execSpace, false); + + using T_non_const = typename std::remove_const::type; + + // We can use const_cast because can managed_ptr can only + // be constructed with non const pointers. + T_non_const* temp = const_cast(pointer); + void* voidPointer = static_cast(temp); + + if (!m_pointer_record->m_callback(ACTION_FREE, + execSpace, + voidPointer)) { + switch (execSpace) { + case CPU: + delete pointer; + break; +#ifdef __CUDACC__ + case GPU: + { + if (pointer) { + detail::destroy_on_device<<<1, 1>>>(temp); + debug_cudaDeviceSynchronize(); + } + + break; + } +#endif + default: + break; + } + } + } + } + else { + // Destroy device pointer first to take advantage of asynchrony + for (int space = NUM_EXECUTION_SPACES-1; space >= NONE; --space) { + ExecutionSpace execSpace = static_cast(space); + T* pointer = get(execSpace, false); + + switch (execSpace) { + case CPU: + delete pointer; + break; +#ifdef __CUDACC__ + case GPU: + { + if (pointer) { + detail::destroy_on_device<<<1, 1>>>(pointer); + debug_cudaDeviceSynchronize(); + } + + break; + } +#endif + default: + break; + } + } + } + + delete m_pointer_record; + } + } + private: T* m_cpu_pointer = nullptr; /// The CPU pointer T* m_gpu_pointer = nullptr; /// The GPU pointer @@ -639,99 +645,6 @@ namespace chai { } #endif } - - /// - /// @author Alan Dayton - /// - /// Increments the reference count and calls the copy constructor to - /// trigger data movement. - /// - CHAI_HOST void addReference() { - if (m_pointer_record) { - m_pointer_record->addReference(); - } - } - - /// - /// @author Alan Dayton - /// - /// Decrements the reference counter. If the resulting number of references - /// is 0, clean up the object. - /// - CHAI_HOST void removeReference() { - if (m_pointer_record) { - m_pointer_record->removeReference(); - - if (m_pointer_record->use_count() == 0) { - if (m_pointer_record->m_callback) { - // Destroy device pointer first to take advantage of asynchrony - for (int space = NUM_EXECUTION_SPACES-1; space >= NONE; --space) { - ExecutionSpace execSpace = static_cast(space); - T* pointer = get(execSpace, false); - - using T_non_const = typename std::remove_const::type; - - // We can use const_cast because can managed_ptr can only - // be constructed with non const pointers. - T_non_const* temp = const_cast(pointer); - void* voidPointer = static_cast(temp); - - if (!m_pointer_record->m_callback(ACTION_FREE, - execSpace, - voidPointer)) { - switch (execSpace) { - case CPU: - delete pointer; - break; -#ifdef __CUDACC__ - case GPU: - { - if (pointer) { - detail::destroy_on_device<<<1, 1>>>(temp); - debug_cudaDeviceSynchronize(); - } - - break; - } -#endif - default: - break; - } - } - } - } - else { - // Destroy device pointer first to take advantage of asynchrony - for (int space = NUM_EXECUTION_SPACES-1; space >= NONE; --space) { - ExecutionSpace execSpace = static_cast(space); - T* pointer = get(execSpace, false); - - switch (execSpace) { - case CPU: - delete pointer; - break; -#ifdef __CUDACC__ - case GPU: - { - if (pointer) { - detail::destroy_on_device<<<1, 1>>>(pointer); - debug_cudaDeviceSynchronize(); - } - - break; - } -#endif - default: - break; - } - } - } - - delete m_pointer_record; - } - } - } - }; namespace detail { diff --git a/tests/unit/managed_ptr_unit_tests.cpp b/tests/unit/managed_ptr_unit_tests.cpp index c6abf86e..4bef0cef 100644 --- a/tests/unit/managed_ptr_unit_tests.cpp +++ b/tests/unit/managed_ptr_unit_tests.cpp @@ -131,7 +131,6 @@ TEST(managed_ptr, default_constructor) chai::managed_ptr otherDerived; EXPECT_EQ(derived.get(), nullptr); - EXPECT_EQ(derived.use_count(), 0); EXPECT_FALSE(derived); EXPECT_TRUE(derived == nullptr); EXPECT_TRUE(nullptr == derived); @@ -141,6 +140,10 @@ TEST(managed_ptr, default_constructor) EXPECT_TRUE(otherDerived == derived); EXPECT_FALSE(derived != otherDerived); EXPECT_FALSE(otherDerived != derived); + + // Make sure free is a no-op + derived.free(); + otherDerived.free(); } TEST(managed_ptr, nullptr_constructor) @@ -149,7 +152,6 @@ TEST(managed_ptr, nullptr_constructor) chai::managed_ptr otherDerived = nullptr; EXPECT_EQ(derived.get(), nullptr); - EXPECT_EQ(derived.use_count(), 0); EXPECT_FALSE(derived); EXPECT_TRUE(derived == nullptr); EXPECT_TRUE(nullptr == derived); @@ -159,6 +161,10 @@ TEST(managed_ptr, nullptr_constructor) EXPECT_TRUE(otherDerived == derived); EXPECT_FALSE(derived != otherDerived); EXPECT_FALSE(otherDerived != derived); + + // Make sure free is a no-op + derived.free(); + otherDerived.free(); } TEST(managed_ptr, cpu_pointer_constructor) @@ -169,12 +175,13 @@ TEST(managed_ptr, cpu_pointer_constructor) EXPECT_EQ(derived->getValue(), 3); EXPECT_NE(derived.get(), nullptr); - EXPECT_EQ(derived.use_count(), 1); EXPECT_TRUE(derived); EXPECT_FALSE(derived == nullptr); EXPECT_FALSE(nullptr == derived); EXPECT_TRUE(derived != nullptr); EXPECT_TRUE(nullptr != derived); + + derived.free(); } TEST(managed_ptr, make_managed) @@ -185,12 +192,13 @@ TEST(managed_ptr, make_managed) EXPECT_EQ((*derived).getValue(), expectedValue); EXPECT_NE(derived.get(), nullptr); - EXPECT_EQ(derived.use_count(), 1); EXPECT_TRUE(derived); EXPECT_FALSE(derived == nullptr); EXPECT_FALSE(nullptr == derived); EXPECT_TRUE(derived != nullptr); EXPECT_TRUE(nullptr != derived); + + derived.free(); } TEST(managed_ptr, copy_constructor) @@ -203,7 +211,6 @@ TEST(managed_ptr, copy_constructor) EXPECT_EQ(otherDerived->getValue(), expectedValue); EXPECT_NE(derived.get(), nullptr); - EXPECT_EQ(derived.use_count(), 2); EXPECT_TRUE(derived); EXPECT_FALSE(derived == nullptr); EXPECT_FALSE(nullptr == derived); @@ -213,7 +220,6 @@ TEST(managed_ptr, copy_constructor) EXPECT_FALSE(derived != otherDerived); EXPECT_NE(otherDerived.get(), nullptr); - EXPECT_EQ(otherDerived.use_count(), 2); EXPECT_TRUE(otherDerived); EXPECT_FALSE(otherDerived == nullptr); EXPECT_FALSE(nullptr == otherDerived); @@ -221,6 +227,8 @@ TEST(managed_ptr, copy_constructor) EXPECT_TRUE(nullptr != otherDerived); EXPECT_TRUE(otherDerived == derived); EXPECT_FALSE(otherDerived != derived); + + derived.free(); } TEST(managed_ptr, converting_constructor) @@ -233,7 +241,6 @@ TEST(managed_ptr, converting_constructor) EXPECT_EQ(base->getValue(), expectedValue); EXPECT_NE(derived.get(), nullptr); - EXPECT_EQ(derived.use_count(), 2); EXPECT_TRUE(derived); EXPECT_FALSE(derived == nullptr); EXPECT_FALSE(nullptr == derived); @@ -243,7 +250,6 @@ TEST(managed_ptr, converting_constructor) EXPECT_FALSE(derived != base); EXPECT_NE(base.get(), nullptr); - EXPECT_EQ(base.use_count(), 2); EXPECT_TRUE(base); EXPECT_FALSE(base == nullptr); EXPECT_FALSE(nullptr == base); @@ -251,6 +257,8 @@ TEST(managed_ptr, converting_constructor) EXPECT_TRUE(nullptr != base); EXPECT_TRUE(base == derived); EXPECT_FALSE(base != derived); + + base.free(); } TEST(managed_ptr, copy_assignment_operator) @@ -264,7 +272,6 @@ TEST(managed_ptr, copy_assignment_operator) EXPECT_EQ(otherDerived->getValue(), expectedValue); EXPECT_NE(derived.get(), nullptr); - EXPECT_EQ(derived.use_count(), 2); EXPECT_TRUE(derived); EXPECT_FALSE(derived == nullptr); EXPECT_FALSE(nullptr == derived); @@ -274,7 +281,6 @@ TEST(managed_ptr, copy_assignment_operator) EXPECT_FALSE(derived != otherDerived); EXPECT_NE(otherDerived.get(), nullptr); - EXPECT_EQ(otherDerived.use_count(), 2); EXPECT_TRUE(otherDerived); EXPECT_FALSE(otherDerived == nullptr); EXPECT_FALSE(nullptr == otherDerived); @@ -282,6 +288,8 @@ TEST(managed_ptr, copy_assignment_operator) EXPECT_TRUE(nullptr != otherDerived); EXPECT_TRUE(otherDerived == derived); EXPECT_FALSE(otherDerived != derived); + + derived.free(); } TEST(managed_ptr, copy_constructor_from_default_constructed) @@ -290,13 +298,11 @@ TEST(managed_ptr, copy_constructor_from_default_constructed) chai::managed_ptr otherDerived(derived); EXPECT_EQ(derived.get(), nullptr); - EXPECT_EQ(derived.use_count(), 0); EXPECT_EQ(bool(derived), false); EXPECT_EQ(derived, nullptr); EXPECT_EQ(nullptr, derived); EXPECT_EQ(otherDerived.get(), nullptr); - EXPECT_EQ(otherDerived.use_count(), 0); EXPECT_EQ(bool(otherDerived), false); EXPECT_EQ(otherDerived, nullptr); EXPECT_EQ(nullptr, otherDerived); @@ -309,13 +315,11 @@ TEST(managed_ptr, copy_assignment_operator_from_default_constructed) otherDerived = derived; EXPECT_EQ(derived.get(), nullptr); - EXPECT_EQ(derived.use_count(), 0); EXPECT_EQ(bool(derived), false); EXPECT_EQ(derived, nullptr); EXPECT_EQ(nullptr, derived); EXPECT_EQ(otherDerived.get(), nullptr); - EXPECT_EQ(otherDerived.use_count(), 0); EXPECT_EQ(bool(otherDerived), false); EXPECT_EQ(otherDerived, nullptr); EXPECT_EQ(nullptr, otherDerived); @@ -327,13 +331,11 @@ TEST(managed_ptr, conversion_copy_constructor_from_default_constructed) chai::managed_ptr otherDerived(derived); EXPECT_EQ(derived.get(), nullptr); - EXPECT_EQ(derived.use_count(), 0); EXPECT_EQ(bool(derived), false); EXPECT_EQ(derived, nullptr); EXPECT_EQ(nullptr, derived); EXPECT_EQ(otherDerived.get(), nullptr); - EXPECT_EQ(otherDerived.use_count(), 0); EXPECT_EQ(bool(otherDerived), false); EXPECT_EQ(otherDerived, nullptr); EXPECT_EQ(nullptr, otherDerived); @@ -346,13 +348,11 @@ TEST(managed_ptr, conversion_copy_assignment_operator_from_default_constructed) otherDerived = derived; EXPECT_EQ(derived.get(), nullptr); - EXPECT_EQ(derived.use_count(), 0); EXPECT_EQ(bool(derived), false); EXPECT_EQ(derived, nullptr); EXPECT_EQ(nullptr, derived); EXPECT_EQ(otherDerived.get(), nullptr); - EXPECT_EQ(otherDerived.use_count(), 0); EXPECT_EQ(bool(otherDerived), false); EXPECT_EQ(otherDerived, nullptr); EXPECT_EQ(nullptr, otherDerived); @@ -370,22 +370,22 @@ TEST(managed_ptr, copy_assignment_operator_from_host_ptr_constructed) thirdDerived = derived; EXPECT_NE(derived.get(), nullptr); - EXPECT_EQ(derived.use_count(), 2); EXPECT_EQ(bool(derived), true); EXPECT_NE(derived, nullptr); EXPECT_NE(nullptr, derived); EXPECT_NE(otherDerived.get(), nullptr); - EXPECT_EQ(otherDerived.use_count(), 1); EXPECT_EQ(bool(otherDerived), true); EXPECT_NE(otherDerived, nullptr); EXPECT_NE(nullptr, otherDerived); EXPECT_NE(thirdDerived.get(), nullptr); - EXPECT_EQ(thirdDerived.use_count(), 2); EXPECT_EQ(bool(thirdDerived), true); EXPECT_NE(thirdDerived, nullptr); EXPECT_NE(nullptr, thirdDerived); + + otherDerived.free(); + thirdDerived.free(); } TEST(managed_ptr, conversion_copy_assignment_operator_from_host_ptr_constructed) @@ -400,22 +400,22 @@ TEST(managed_ptr, conversion_copy_assignment_operator_from_host_ptr_constructed) thirdDerived = derived; EXPECT_NE(derived.get(), nullptr); - EXPECT_EQ(derived.use_count(), 2); EXPECT_EQ(bool(derived), true); EXPECT_NE(derived, nullptr); EXPECT_NE(nullptr, derived); EXPECT_NE(otherDerived.get(), nullptr); - EXPECT_EQ(otherDerived.use_count(), 1); EXPECT_EQ(bool(otherDerived), true); EXPECT_NE(otherDerived, nullptr); EXPECT_NE(nullptr, otherDerived); EXPECT_NE(thirdDerived.get(), nullptr); - EXPECT_EQ(thirdDerived.use_count(), 2); EXPECT_EQ(bool(thirdDerived), true); EXPECT_NE(thirdDerived, nullptr); EXPECT_NE(nullptr, thirdDerived); + + otherDerived.free(); + thirdDerived.free(); } TEST(managed_ptr, static_pointer_cast) @@ -428,12 +428,13 @@ TEST(managed_ptr, static_pointer_cast) EXPECT_EQ(base->getValue(), 3); EXPECT_NE(base.get(), nullptr); - EXPECT_EQ(base.use_count(), 2); EXPECT_TRUE(base); EXPECT_FALSE(base == nullptr); EXPECT_FALSE(nullptr == base); EXPECT_TRUE(base != nullptr); EXPECT_TRUE(nullptr != base); + + derived.free(); } TEST(managed_ptr, dynamic_pointer_cast) @@ -446,12 +447,13 @@ TEST(managed_ptr, dynamic_pointer_cast) EXPECT_EQ(derived->getValue(), 3); EXPECT_NE(derived.get(), nullptr); - EXPECT_EQ(derived.use_count(), 2); EXPECT_TRUE(derived); EXPECT_FALSE(derived == nullptr); EXPECT_FALSE(nullptr == derived); EXPECT_TRUE(derived != nullptr); EXPECT_TRUE(nullptr != derived); + + derived.free(); } TEST(managed_ptr, const_pointer_cast) @@ -464,12 +466,13 @@ TEST(managed_ptr, const_pointer_cast) EXPECT_EQ(nonConstBase->getValue(), 3); EXPECT_NE(nonConstBase.get(), nullptr); - EXPECT_EQ(nonConstBase.use_count(), 2); EXPECT_TRUE(nonConstBase); EXPECT_FALSE(nonConstBase == nullptr); EXPECT_FALSE(nullptr == nonConstBase); EXPECT_TRUE(nonConstBase != nullptr); EXPECT_TRUE(nullptr != nonConstBase); + + base.free(); } TEST(managed_ptr, reinterpret_pointer_cast) @@ -482,12 +485,13 @@ TEST(managed_ptr, reinterpret_pointer_cast) EXPECT_EQ(derived->getValue(), 3); EXPECT_NE(derived.get(), nullptr); - EXPECT_EQ(derived.use_count(), 2); EXPECT_TRUE(derived); EXPECT_FALSE(derived == nullptr); EXPECT_FALSE(nullptr == derived); EXPECT_TRUE(derived != nullptr); EXPECT_TRUE(nullptr != derived); + + derived.free(); } #ifdef __CUDACC__ @@ -570,7 +574,6 @@ CUDA_TEST(managed_ptr, cuda_gpu_pointer_constructor) chai::managed_ptr derived({chai::GPU}, {gpuPointer}); EXPECT_EQ(derived.get(), nullptr); - EXPECT_EQ(derived.use_count(), 1); EXPECT_FALSE(derived); EXPECT_TRUE(derived == nullptr); EXPECT_TRUE(nullptr == derived); @@ -711,7 +714,6 @@ CUDA_TEST(managed_ptr, cuda_cpu_and_gpu_pointer_constructor) EXPECT_EQ(derived->getValue(), 4); EXPECT_NE(derived.get(), nullptr); - EXPECT_EQ(derived.use_count(), 1); EXPECT_TRUE(derived); EXPECT_FALSE(derived == nullptr); EXPECT_FALSE(nullptr == derived); @@ -771,7 +773,6 @@ CUDA_TEST(managed_ptr, cuda_make_managed) EXPECT_EQ(array[0], expectedValue); EXPECT_NE(array2[0], nullptr); - EXPECT_EQ(derived.use_count(), 1); EXPECT_TRUE(array3[0]); EXPECT_FALSE(array3[1]); EXPECT_FALSE(array3[2]); @@ -792,7 +793,6 @@ CUDA_TEST(managed_ptr, make_managed_from_factory_function) EXPECT_EQ((*derived).getValue(), expectedValue); EXPECT_NE(derived.get(), nullptr); - EXPECT_EQ(derived.use_count(), 1); EXPECT_TRUE(derived); EXPECT_FALSE(derived == nullptr); EXPECT_FALSE(nullptr == derived); @@ -813,7 +813,6 @@ CUDA_TEST(managed_ptr, make_managed_from_factory_lambda) EXPECT_EQ((*derived).getValue(), expectedValue); EXPECT_NE(derived.get(), nullptr); - EXPECT_EQ(derived.use_count(), 1); EXPECT_TRUE(derived); EXPECT_FALSE(derived == nullptr); EXPECT_FALSE(nullptr == derived); @@ -834,7 +833,6 @@ CUDA_TEST(managed_ptr, make_managed_from_overloaded_factory_function) EXPECT_EQ((*derived).getValue(), expectedValue); EXPECT_NE(derived.get(), nullptr); - EXPECT_EQ(derived.use_count(), 1); EXPECT_TRUE(derived); EXPECT_FALSE(derived == nullptr); EXPECT_FALSE(nullptr == derived); @@ -855,7 +853,6 @@ CUDA_TEST(managed_ptr, make_managed_from_factory_static_member_function) EXPECT_EQ((*derived).getValue(), expectedValue); EXPECT_NE(derived.get(), nullptr); - EXPECT_EQ(derived.use_count(), 1); EXPECT_TRUE(derived); EXPECT_FALSE(derived == nullptr); EXPECT_FALSE(nullptr == derived); From f08d93759992acf65ee2db48c28a90d49d160435 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Tue, 1 Oct 2019 14:13:21 -0700 Subject: [PATCH 26/58] Fixed leaks in managed_ptr integration tests --- tests/integration/managed_ptr_tests.cpp | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/tests/integration/managed_ptr_tests.cpp b/tests/integration/managed_ptr_tests.cpp index f5d22680..33435a1f 100644 --- a/tests/integration/managed_ptr_tests.cpp +++ b/tests/integration/managed_ptr_tests.cpp @@ -201,6 +201,7 @@ TEST(managed_ptr, class_with_raw_array) ASSERT_EQ(rawArrayClass->getValue(0), expectedValue); array.free(); + rawArrayClass.free(); } TEST(managed_ptr, class_with_multiple_raw_arrays) @@ -220,6 +221,10 @@ TEST(managed_ptr, class_with_multiple_raw_arrays) ASSERT_EQ(multipleRawArrayClass->getValue(0, 0), expectedValue1); ASSERT_EQ(multipleRawArrayClass->getValue(1, 0), expectedValue2); + + array1.free(); + array2.free(); + multipleRawArrayClass.free(); } TEST(managed_ptr, class_with_managed_array) @@ -235,6 +240,9 @@ TEST(managed_ptr, class_with_managed_array) auto derived = chai::make_managed(array); ASSERT_EQ(derived->getValue(0), expectedValue); + + array.free(); + derived.free(); } TEST(managed_ptr, class_with_raw_ptr) @@ -250,15 +258,11 @@ TEST(managed_ptr, class_with_raw_ptr) auto rawArrayClass = chai::make_managed(array); auto rawPointerClass = chai::make_managed(rawArrayClass); - // This prevents the pointers contained by rawArrayClass from being deleted - // out from under us. Otherwise, rawArrayClass is the last remaining reference - // and if it is destroyed before rawPointerClass is, then we are in trouble. - rawPointerClass.set_callback([=] (chai::Action, chai::ExecutionSpace, void*) { - (void) rawArrayClass; return false; - }); - rawArrayClass = nullptr; - ASSERT_EQ((*rawPointerClass).getValue(0), expectedValue); + + array.free(); + rawArrayClass.free(); + rawPointerClass.free(); } TEST(managed_ptr, class_with_managed_ptr) @@ -269,6 +273,8 @@ TEST(managed_ptr, class_with_managed_ptr) TestContainer container(derived); ASSERT_EQ(container.getValue(), expectedValue); + + derived.free(); } TEST(managed_ptr, nested_managed_ptr) @@ -279,6 +285,9 @@ TEST(managed_ptr, nested_managed_ptr) auto container = chai::make_managed(derived); ASSERT_EQ(container->getValue(), expectedValue); + + derived.free(); + container.free(); } #ifdef __CUDACC__ From 05b0027f582309147e41dcd84ed0ba16dd8add13 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Tue, 1 Oct 2019 14:25:37 -0700 Subject: [PATCH 27/58] Fix cuda compile errors --- tests/integration/managed_ptr_tests.cpp | 60 ++++++++++++------------- tests/unit/managed_ptr_unit_tests.cpp | 56 +++++++++++------------ 2 files changed, 58 insertions(+), 58 deletions(-) diff --git a/tests/integration/managed_ptr_tests.cpp b/tests/integration/managed_ptr_tests.cpp index 33435a1f..94982121 100644 --- a/tests/integration/managed_ptr_tests.cpp +++ b/tests/integration/managed_ptr_tests.cpp @@ -42,10 +42,10 @@ // --------------------------------------------------------------------- #include "gtest/gtest.h" -#define CUDA_TEST(X, Y) \ - static void cuda_test_##X##Y(); \ - TEST(X, Y) { cuda_test_##X##Y(); } \ - static void cuda_test_##X##Y() +#define GPU_TEST(X, Y) \ + static void gpu_test_##X##Y(); \ + TEST(X, Y) { gpu_test_##X##Y(); } \ + static void gpu_test_##X##Y() #include "chai/config.hpp" #include "chai/ManagedArray.hpp" @@ -306,7 +306,7 @@ __global__ void passObjectToKernel(chai::ManagedArray arr) { arr[0] = -1; } -CUDA_TEST(managed_ptr, make_on_device) +GPU_TEST(managed_ptr, make_on_device) { int** hostArray = (int**) malloc(sizeof(int*)); hostArray[0] = nullptr; @@ -331,7 +331,7 @@ CUDA_TEST(managed_ptr, make_on_device) cudaFree(deviceArray2); } -CUDA_TEST(managed_ptr, cuda_new_and_delete_on_device) +GPU_TEST(managed_ptr, gpu_new_and_delete_on_device) { // Initialize host side memory to hold a pointer RawArrayClass** cpuPointerHolder = (RawArrayClass**) malloc(sizeof(RawArrayClass*)); @@ -360,7 +360,7 @@ CUDA_TEST(managed_ptr, cuda_new_and_delete_on_device) chai::detail::destroy_on_device<<<1, 1>>>(gpuPointer); } -CUDA_TEST(managed_ptr, cuda_build_managed_ptr) +GPU_TEST(managed_ptr, gpu_build_managed_ptr) { // Initialize host side memory to hold a pointer RawArrayClass** cpuPointerHolder = (RawArrayClass**) malloc(sizeof(RawArrayClass*)); @@ -390,7 +390,7 @@ CUDA_TEST(managed_ptr, cuda_build_managed_ptr) } -CUDA_TEST(managed_ptr, pass_object_to_kernel) +GPU_TEST(managed_ptr, pass_object_to_kernel) { const int expectedValue = rand(); @@ -409,7 +409,7 @@ CUDA_TEST(managed_ptr, pass_object_to_kernel) ASSERT_EQ(array[0], -1); } -CUDA_TEST(managed_ptr, cuda_class_with_raw_array) +GPU_TEST(managed_ptr, gpu_class_with_raw_array) { const int expectedValue = rand(); @@ -422,7 +422,7 @@ CUDA_TEST(managed_ptr, cuda_class_with_raw_array) auto rawArrayClass = chai::make_managed(array); chai::ManagedArray results(1, chai::GPU); - forall(cuda(), 0, 1, [=] __device__ (int i) { + forall(gpu(), 0, 1, [=] __device__ (int i) { results[i] = rawArrayClass->getValue(i); }); @@ -430,7 +430,7 @@ CUDA_TEST(managed_ptr, cuda_class_with_raw_array) ASSERT_EQ(results[0], expectedValue); } -CUDA_TEST(managed_ptr, cuda_class_with_raw_array_and_callback) +GPU_TEST(managed_ptr, gpu_class_with_raw_array_and_callback) { const int expectedValue = rand(); @@ -464,7 +464,7 @@ CUDA_TEST(managed_ptr, cuda_class_with_raw_array_and_callback) chai::ManagedArray results(1, chai::GPU); - forall(cuda(), 0, 1, [=] __device__ (int i) { + forall(gpu(), 0, 1, [=] __device__ (int i) { results[i] = managedPointer->getValue(i); }); @@ -472,7 +472,7 @@ CUDA_TEST(managed_ptr, cuda_class_with_raw_array_and_callback) ASSERT_EQ(results[0], expectedValue); } -CUDA_TEST(managed_ptr, cuda_class_with_managed_array) +GPU_TEST(managed_ptr, gpu_class_with_managed_array) { const int expectedValue = rand(); @@ -486,7 +486,7 @@ CUDA_TEST(managed_ptr, cuda_class_with_managed_array) chai::ManagedArray results(1, chai::GPU); - forall(cuda(), 0, 1, [=] __device__ (int i) { + forall(gpu(), 0, 1, [=] __device__ (int i) { results[i] = derived->getValue(i); }); @@ -495,7 +495,7 @@ CUDA_TEST(managed_ptr, cuda_class_with_managed_array) ASSERT_EQ(results[0], expectedValue); } -CUDA_TEST(managed_ptr, cuda_class_with_raw_ptr) +GPU_TEST(managed_ptr, gpu_class_with_raw_ptr) { const int expectedValue = rand(); @@ -510,7 +510,7 @@ CUDA_TEST(managed_ptr, cuda_class_with_raw_ptr) chai::ManagedArray results(1, chai::GPU); - forall(cuda(), 0, 1, [=] __device__ (int i) { + forall(gpu(), 0, 1, [=] __device__ (int i) { results[i] = (*rawPointerClass).getValue(i); }); @@ -518,7 +518,7 @@ CUDA_TEST(managed_ptr, cuda_class_with_raw_ptr) ASSERT_EQ(results[0], expectedValue); } -CUDA_TEST(managed_ptr, cuda_class_with_managed_ptr) +GPU_TEST(managed_ptr, gpu_class_with_managed_ptr) { const int expectedValue = rand(); @@ -527,7 +527,7 @@ CUDA_TEST(managed_ptr, cuda_class_with_managed_ptr) chai::ManagedArray results(1, chai::GPU); - forall(cuda(), 0, 1, [=] __device__ (int i) { + forall(gpu(), 0, 1, [=] __device__ (int i) { results[i] = container.getValue(); }); @@ -535,7 +535,7 @@ CUDA_TEST(managed_ptr, cuda_class_with_managed_ptr) ASSERT_EQ(results[0], expectedValue); } -CUDA_TEST(managed_ptr, cuda_nested_managed_ptr) +GPU_TEST(managed_ptr, gpu_nested_managed_ptr) { const int expectedValue = rand(); @@ -544,7 +544,7 @@ CUDA_TEST(managed_ptr, cuda_nested_managed_ptr) chai::ManagedArray results(1, chai::GPU); - forall(cuda(), 0, 1, [=] __device__ (int i) { + forall(gpu(), 0, 1, [=] __device__ (int i) { results[i] = container->getValue(); }); @@ -552,7 +552,7 @@ CUDA_TEST(managed_ptr, cuda_nested_managed_ptr) ASSERT_EQ(results[0], expectedValue); } -CUDA_TEST(managed_ptr, cuda_multiple_inheritance) +GPU_TEST(managed_ptr, gpu_multiple_inheritance) { auto derived = chai::make_managed(); @@ -561,7 +561,7 @@ CUDA_TEST(managed_ptr, cuda_multiple_inheritance) chai::ManagedArray results(2, chai::GPU); - forall(cuda(), 0, 1, [=] __device__ (int i) { + forall(gpu(), 0, 1, [=] __device__ (int i) { results[i] = base1->isBase1(); results[1] = base2->isBase2(); }); @@ -573,7 +573,7 @@ CUDA_TEST(managed_ptr, cuda_multiple_inheritance) ASSERT_EQ(results[1], true); } -CUDA_TEST(managed_ptr, static_pointer_cast) +GPU_TEST(managed_ptr, static_pointer_cast) { const int expectedValue = rand(); @@ -589,7 +589,7 @@ CUDA_TEST(managed_ptr, static_pointer_cast) chai::ManagedArray results(3, chai::GPU); - forall(cuda(), 0, 1, [=] __device__ (int i) { + forall(gpu(), 0, 1, [=] __device__ (int i) { results[i] = derived->getValue(i); results[1] = base->getValue(i); results[2] = derivedFromBase->getValue(i); @@ -602,7 +602,7 @@ CUDA_TEST(managed_ptr, static_pointer_cast) ASSERT_EQ(results[2], expectedValue); } -CUDA_TEST(managed_ptr, dynamic_pointer_cast) +GPU_TEST(managed_ptr, dynamic_pointer_cast) { const int expectedValue = rand(); @@ -618,7 +618,7 @@ CUDA_TEST(managed_ptr, dynamic_pointer_cast) chai::ManagedArray results(3, chai::GPU); - forall(cuda(), 0, 1, [=] __device__ (int i) { + forall(gpu(), 0, 1, [=] __device__ (int i) { results[i] = derived->getValue(i); results[1] = base->getValue(i); results[2] = derivedFromBase->getValue(i); @@ -631,7 +631,7 @@ CUDA_TEST(managed_ptr, dynamic_pointer_cast) ASSERT_EQ(results[2], expectedValue); } -CUDA_TEST(managed_ptr, const_pointer_cast) +GPU_TEST(managed_ptr, const_pointer_cast) { const int expectedValue = rand(); @@ -647,7 +647,7 @@ CUDA_TEST(managed_ptr, const_pointer_cast) chai::ManagedArray results(3, chai::GPU); - forall(cuda(), 0, 1, [=] __device__ (int i) { + forall(gpu(), 0, 1, [=] __device__ (int i) { results[i] = derived->getValue(i); results[1] = constDerived->getValue(i); results[2] = derivedFromConst->getValue(i); @@ -660,7 +660,7 @@ CUDA_TEST(managed_ptr, const_pointer_cast) ASSERT_EQ(results[2], expectedValue); } -CUDA_TEST(managed_ptr, reinterpret_pointer_cast) +GPU_TEST(managed_ptr, reinterpret_pointer_cast) { const int expectedValue = rand(); @@ -676,7 +676,7 @@ CUDA_TEST(managed_ptr, reinterpret_pointer_cast) chai::ManagedArray results(3, chai::GPU); - forall(cuda(), 0, 1, [=] __device__ (int i) { + forall(gpu(), 0, 1, [=] __device__ (int i) { results[i] = derived->getValue(i); results[1] = base->getValue(i); results[2] = derivedFromBase->getValue(i); diff --git a/tests/unit/managed_ptr_unit_tests.cpp b/tests/unit/managed_ptr_unit_tests.cpp index 4bef0cef..f0fef889 100644 --- a/tests/unit/managed_ptr_unit_tests.cpp +++ b/tests/unit/managed_ptr_unit_tests.cpp @@ -42,10 +42,10 @@ // --------------------------------------------------------------------- #include "gtest/gtest.h" -#define CUDA_TEST(X, Y) \ - static void cuda_test_##X_##Y(); \ - TEST(X, Y) { cuda_test_##X_##Y(); } \ - static void cuda_test_##X_##Y() +#define GPU_TEST(X, Y) \ + static void gpu_test_##X_##Y(); \ + TEST(X, Y) { gpu_test_##X_##Y(); } \ + static void gpu_test_##X_##Y() #include "chai/config.hpp" #include "chai/ManagedArray.hpp" @@ -496,7 +496,7 @@ TEST(managed_ptr, reinterpret_pointer_cast) #ifdef __CUDACC__ -CUDA_TEST(managed_ptr, cuda_default_constructor) +GPU_TEST(managed_ptr, gpu_default_constructor) { chai::managed_ptr derived; chai::managed_ptr otherDerived; @@ -504,7 +504,7 @@ CUDA_TEST(managed_ptr, cuda_default_constructor) chai::ManagedArray array(1, chai::GPU); chai::ManagedArray array2(9, chai::GPU); - forall(cuda(), 0, 1, [=] __device__ (int i) { + forall(gpu(), 0, 1, [=] __device__ (int i) { array[i] = derived.get(); array2[0] = (bool) derived; array2[1] = derived == nullptr; @@ -532,7 +532,7 @@ CUDA_TEST(managed_ptr, cuda_default_constructor) EXPECT_FALSE(array2[8]); } -CUDA_TEST(managed_ptr, cuda_nullptr_constructor) +GPU_TEST(managed_ptr, gpu_nullptr_constructor) { chai::managed_ptr derived = nullptr; chai::managed_ptr otherDerived = nullptr; @@ -540,7 +540,7 @@ CUDA_TEST(managed_ptr, cuda_nullptr_constructor) chai::ManagedArray array(1, chai::GPU); chai::ManagedArray array2(9, chai::GPU); - forall(cuda(), 0, 1, [=] __device__ (int i) { + forall(gpu(), 0, 1, [=] __device__ (int i) { array[i] = derived.get(); array2[0] = (bool) derived; array2[1] = derived == nullptr; @@ -568,7 +568,7 @@ CUDA_TEST(managed_ptr, cuda_nullptr_constructor) EXPECT_FALSE(array2[8]); } -CUDA_TEST(managed_ptr, cuda_gpu_pointer_constructor) +GPU_TEST(managed_ptr, gpu_gpu_pointer_constructor) { TestDerived* gpuPointer = chai::detail::make_on_device(3); chai::managed_ptr derived({chai::GPU}, {gpuPointer}); @@ -584,7 +584,7 @@ CUDA_TEST(managed_ptr, cuda_gpu_pointer_constructor) chai::ManagedArray array2(1, chai::GPU); chai::ManagedArray array3(5, chai::GPU); - forall(cuda(), 0, 1, [=] __device__ (int i) { + forall(gpu(), 0, 1, [=] __device__ (int i) { array1[i] = derived->getValue(); array2[i] = derived.get(); array3[0] = (bool) derived; @@ -607,7 +607,7 @@ CUDA_TEST(managed_ptr, cuda_gpu_pointer_constructor) EXPECT_TRUE(array3[4]); } -CUDA_TEST(managed_ptr, cuda_new_and_delete_on_device) +GPU_TEST(managed_ptr, gpu_new_and_delete_on_device) { // Initialize host side memory to hold a pointer Simple** cpuPointerHolder = (Simple**) malloc(sizeof(Simple*)); @@ -654,7 +654,7 @@ CUDA_TEST(managed_ptr, cuda_new_and_delete_on_device) cudaFree(gpuPointerHolder2); } -CUDA_TEST(managed_ptr, cuda_new_and_delete_on_device_2) +GPU_TEST(managed_ptr, gpu_new_and_delete_on_device_2) { // Initialize host side memory to hold a pointer Simple** cpuPointerHolder = (Simple**) malloc(sizeof(Simple*)); @@ -683,7 +683,7 @@ CUDA_TEST(managed_ptr, cuda_new_and_delete_on_device_2) chai::managed_ptr test({chai::GPU}, {gpuPointer}); } -CUDA_TEST(managed_ptr, simple_cuda_cpu_and_gpu_pointer_constructor) +GPU_TEST(managed_ptr, simple_cuda_cpu_and_gpu_pointer_constructor) { Simple* gpuPointer = chai::detail::make_on_device(3); Simple* cpuPointer = new Simple(4); @@ -694,7 +694,7 @@ CUDA_TEST(managed_ptr, simple_cuda_cpu_and_gpu_pointer_constructor) chai::ManagedArray array1(1, chai::GPU); - forall(cuda(), 0, 1, [=] __device__ (int i) { + forall(gpu(), 0, 1, [=] __device__ (int i) { array1[i] = simple->getValue(); }); @@ -705,7 +705,7 @@ CUDA_TEST(managed_ptr, simple_cuda_cpu_and_gpu_pointer_constructor) EXPECT_EQ(array1[0], 3); } -CUDA_TEST(managed_ptr, cuda_cpu_and_gpu_pointer_constructor) +GPU_TEST(managed_ptr, gpu_cpu_and_gpu_pointer_constructor) { TestDerived* gpuPointer = chai::detail::make_on_device(3); TestDerived* cpuPointer = new TestDerived(4); @@ -724,7 +724,7 @@ CUDA_TEST(managed_ptr, cuda_cpu_and_gpu_pointer_constructor) chai::ManagedArray array2(1, chai::GPU); chai::ManagedArray array3(5, chai::GPU); - forall(cuda(), 0, 1, [=] __device__ (int i) { + forall(gpu(), 0, 1, [=] __device__ (int i) { array1[i] = derived->getValue(); array2[i] = derived.get(); array3[0] = (bool) derived; @@ -747,7 +747,7 @@ CUDA_TEST(managed_ptr, cuda_cpu_and_gpu_pointer_constructor) EXPECT_TRUE(array3[4]); } -CUDA_TEST(managed_ptr, cuda_make_managed) +GPU_TEST(managed_ptr, gpu_make_managed) { const int expectedValue = rand(); auto derived = chai::make_managed(expectedValue); @@ -756,7 +756,7 @@ CUDA_TEST(managed_ptr, cuda_make_managed) chai::ManagedArray array2(1, chai::GPU); chai::ManagedArray array3(7, chai::GPU); - forall(cuda(), 0, 1, [=] __device__ (int i) { + forall(gpu(), 0, 1, [=] __device__ (int i) { array[i] = derived->getValue(); array2[i] = derived.get(); array3[0] = (bool) derived; @@ -780,7 +780,7 @@ CUDA_TEST(managed_ptr, cuda_make_managed) EXPECT_TRUE(array3[4]); } -CUDA_TEST(managed_ptr, make_managed_from_factory_function) +GPU_TEST(managed_ptr, make_managed_from_factory_function) { const int expectedValue = rand(); @@ -800,7 +800,7 @@ CUDA_TEST(managed_ptr, make_managed_from_factory_function) EXPECT_TRUE(nullptr != derived); } -CUDA_TEST(managed_ptr, make_managed_from_factory_lambda) +GPU_TEST(managed_ptr, make_managed_from_factory_lambda) { const int expectedValue = rand(); @@ -820,7 +820,7 @@ CUDA_TEST(managed_ptr, make_managed_from_factory_lambda) EXPECT_TRUE(nullptr != derived); } -CUDA_TEST(managed_ptr, make_managed_from_overloaded_factory_function) +GPU_TEST(managed_ptr, make_managed_from_overloaded_factory_function) { const int expectedValue = rand(); @@ -840,7 +840,7 @@ CUDA_TEST(managed_ptr, make_managed_from_overloaded_factory_function) EXPECT_TRUE(nullptr != derived); } -CUDA_TEST(managed_ptr, make_managed_from_factory_static_member_function) +GPU_TEST(managed_ptr, make_managed_from_factory_static_member_function) { const int expectedValue = rand(); @@ -860,7 +860,7 @@ CUDA_TEST(managed_ptr, make_managed_from_factory_static_member_function) EXPECT_TRUE(nullptr != derived); } -CUDA_TEST(managed_ptr, cuda_copy_constructor) +GPU_TEST(managed_ptr, gpu_copy_constructor) { const int expectedValue = rand(); auto derived = chai::make_managed(expectedValue); @@ -870,7 +870,7 @@ CUDA_TEST(managed_ptr, cuda_copy_constructor) chai::ManagedArray array2(2, chai::GPU); chai::ManagedArray array3(14, chai::GPU); - forall(cuda(), 0, 1, [=] __device__ (int i) { + forall(gpu(), 0, 1, [=] __device__ (int i) { array[i] = derived->getValue(); array2[0] = derived.get(); array3[0] = (bool) derived; @@ -918,7 +918,7 @@ CUDA_TEST(managed_ptr, cuda_copy_constructor) EXPECT_FALSE(array3[13]); } -CUDA_TEST(managed_ptr, cuda_converting_constructor) +GPU_TEST(managed_ptr, gpu_converting_constructor) { const int expectedValue = rand(); auto derived = chai::make_managed(expectedValue); @@ -928,7 +928,7 @@ CUDA_TEST(managed_ptr, cuda_converting_constructor) chai::ManagedArray array2(2, chai::GPU); chai::ManagedArray array3(14, chai::GPU); - forall(cuda(), 0, 1, [=] __device__ (int i) { + forall(gpu(), 0, 1, [=] __device__ (int i) { array[i] = derived->getValue(); array2[0] = derived.get(); array3[0] = (bool) derived; @@ -976,7 +976,7 @@ CUDA_TEST(managed_ptr, cuda_converting_constructor) EXPECT_FALSE(array3[13]); } -CUDA_TEST(managed_ptr, cuda_copy_assignment_operator) +GPU_TEST(managed_ptr, gpu_copy_assignment_operator) { const int expectedValue = rand(); auto derived = chai::make_managed(expectedValue); @@ -987,7 +987,7 @@ CUDA_TEST(managed_ptr, cuda_copy_assignment_operator) chai::ManagedArray array2(2, chai::GPU); chai::ManagedArray array3(14, chai::GPU); - forall(cuda(), 0, 1, [=] __device__ (int i) { + forall(gpu(), 0, 1, [=] __device__ (int i) { array[i] = derived->getValue(); array2[0] = derived.get(); array3[0] = (bool) derived; From 6893e8206a02feeb0756f98a32a651666fd3fa43 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Tue, 1 Oct 2019 15:17:22 -0700 Subject: [PATCH 28/58] Fix managed_ptr unit tests in the cuda build --- tests/unit/managed_ptr_unit_tests.cpp | 69 +++++++++++++++++++-------- 1 file changed, 49 insertions(+), 20 deletions(-) diff --git a/tests/unit/managed_ptr_unit_tests.cpp b/tests/unit/managed_ptr_unit_tests.cpp index f0fef889..2dd12b9a 100644 --- a/tests/unit/managed_ptr_unit_tests.cpp +++ b/tests/unit/managed_ptr_unit_tests.cpp @@ -530,6 +530,9 @@ GPU_TEST(managed_ptr, gpu_default_constructor) EXPECT_TRUE(array2[6]); EXPECT_FALSE(array2[7]); EXPECT_FALSE(array2[8]); + + array.free(); + array2.free(); } GPU_TEST(managed_ptr, gpu_nullptr_constructor) @@ -566,9 +569,12 @@ GPU_TEST(managed_ptr, gpu_nullptr_constructor) EXPECT_TRUE(array2[6]); EXPECT_FALSE(array2[7]); EXPECT_FALSE(array2[8]); + + array.free(); + array2.free(); } -GPU_TEST(managed_ptr, gpu_gpu_pointer_constructor) +GPU_TEST(managed_ptr, gpu_pointer_constructor) { TestDerived* gpuPointer = chai::detail::make_on_device(3); chai::managed_ptr derived({chai::GPU}, {gpuPointer}); @@ -605,6 +611,10 @@ GPU_TEST(managed_ptr, gpu_gpu_pointer_constructor) EXPECT_FALSE(array3[2]); EXPECT_TRUE(array3[3]); EXPECT_TRUE(array3[4]); + + array1.free(); + array2.free(); + array3.free(); } GPU_TEST(managed_ptr, gpu_new_and_delete_on_device) @@ -633,25 +643,7 @@ GPU_TEST(managed_ptr, gpu_new_and_delete_on_device) // Free host side memory free(cpuPointerHolder); - // Initialize more host side memory - Simple** cpuPointerHolder2 = (Simple**) malloc(sizeof(Simple*)); - cpuPointerHolder2[0] = gpuPointer; - - // Initialize more device side memory - Simple** gpuPointerHolder2 = nullptr; - cudaMalloc(&gpuPointerHolder2, sizeof(Simple*)); - - // Copy pointer back to the device - cudaMemcpy(gpuPointerHolder2, cpuPointerHolder2, sizeof(Simple*), - cudaMemcpyHostToDevice); - - chai::detail::destroy_on_device<<<1, 1>>>(gpuPointerHolder2); - - // Free host memory - free(cpuPointerHolder2); - - // Free device memory - cudaFree(gpuPointerHolder2); + chai::detail::destroy_on_device<<<1, 1>>>(gpuPointer); } GPU_TEST(managed_ptr, gpu_new_and_delete_on_device_2) @@ -681,6 +673,7 @@ GPU_TEST(managed_ptr, gpu_new_and_delete_on_device_2) free(cpuPointerHolder); chai::managed_ptr test({chai::GPU}, {gpuPointer}); + test.free(); } GPU_TEST(managed_ptr, simple_cuda_cpu_and_gpu_pointer_constructor) @@ -703,6 +696,9 @@ GPU_TEST(managed_ptr, simple_cuda_cpu_and_gpu_pointer_constructor) cudaDeviceSynchronize(); EXPECT_EQ(array1[0], 3); + + array1.free(); + simple.free(); } GPU_TEST(managed_ptr, gpu_cpu_and_gpu_pointer_constructor) @@ -745,6 +741,11 @@ GPU_TEST(managed_ptr, gpu_cpu_and_gpu_pointer_constructor) EXPECT_FALSE(array3[2]); EXPECT_TRUE(array3[3]); EXPECT_TRUE(array3[4]); + + array1.free(); + array2.free(); + array3.free(); + derived.free(); } GPU_TEST(managed_ptr, gpu_make_managed) @@ -778,6 +779,11 @@ GPU_TEST(managed_ptr, gpu_make_managed) EXPECT_FALSE(array3[2]); EXPECT_TRUE(array3[3]); EXPECT_TRUE(array3[4]); + + array.free(); + array2.free(); + array3.free(); + derived.free(); } GPU_TEST(managed_ptr, make_managed_from_factory_function) @@ -798,6 +804,8 @@ GPU_TEST(managed_ptr, make_managed_from_factory_function) EXPECT_FALSE(nullptr == derived); EXPECT_TRUE(derived != nullptr); EXPECT_TRUE(nullptr != derived); + + derived.free(); } GPU_TEST(managed_ptr, make_managed_from_factory_lambda) @@ -818,6 +826,8 @@ GPU_TEST(managed_ptr, make_managed_from_factory_lambda) EXPECT_FALSE(nullptr == derived); EXPECT_TRUE(derived != nullptr); EXPECT_TRUE(nullptr != derived); + + derived.free(); } GPU_TEST(managed_ptr, make_managed_from_overloaded_factory_function) @@ -838,6 +848,8 @@ GPU_TEST(managed_ptr, make_managed_from_overloaded_factory_function) EXPECT_FALSE(nullptr == derived); EXPECT_TRUE(derived != nullptr); EXPECT_TRUE(nullptr != derived); + + derived.free(); } GPU_TEST(managed_ptr, make_managed_from_factory_static_member_function) @@ -858,6 +870,8 @@ GPU_TEST(managed_ptr, make_managed_from_factory_static_member_function) EXPECT_FALSE(nullptr == derived); EXPECT_TRUE(derived != nullptr); EXPECT_TRUE(nullptr != derived); + + derived.free(); } GPU_TEST(managed_ptr, gpu_copy_constructor) @@ -916,6 +930,11 @@ GPU_TEST(managed_ptr, gpu_copy_constructor) EXPECT_TRUE(array3[11]); EXPECT_TRUE(array3[12]); EXPECT_FALSE(array3[13]); + + array.free(); + array2.free(); + array3.free(); + otherDerived.free(); } GPU_TEST(managed_ptr, gpu_converting_constructor) @@ -974,6 +993,11 @@ GPU_TEST(managed_ptr, gpu_converting_constructor) EXPECT_TRUE(array3[11]); EXPECT_TRUE(array3[12]); EXPECT_FALSE(array3[13]); + + array.free(); + array2.free(); + array3.free(); + derived.free(); } GPU_TEST(managed_ptr, gpu_copy_assignment_operator) @@ -1033,6 +1057,11 @@ GPU_TEST(managed_ptr, gpu_copy_assignment_operator) EXPECT_TRUE(array3[11]); EXPECT_TRUE(array3[12]); EXPECT_FALSE(array3[13]); + + array.free(); + array2.free(); + array3.free(); + otherDerived.free(); } #endif From 1e8c44c11802e4f0a9e1946eaebdf02407926696 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Tue, 1 Oct 2019 15:25:21 -0700 Subject: [PATCH 29/58] Fix memory leaks in integration tests --- tests/integration/managed_ptr_tests.cpp | 44 +++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tests/integration/managed_ptr_tests.cpp b/tests/integration/managed_ptr_tests.cpp index 94982121..b27ea4c0 100644 --- a/tests/integration/managed_ptr_tests.cpp +++ b/tests/integration/managed_ptr_tests.cpp @@ -407,6 +407,8 @@ GPU_TEST(managed_ptr, pass_object_to_kernel) array.move(chai::CPU); cudaDeviceSynchronize(); ASSERT_EQ(array[0], -1); + + array.free(); } GPU_TEST(managed_ptr, gpu_class_with_raw_array) @@ -428,6 +430,10 @@ GPU_TEST(managed_ptr, gpu_class_with_raw_array) results.move(chai::CPU); ASSERT_EQ(results[0], expectedValue); + + array.free(); + rawArrayClass.free(); + results.free(); } GPU_TEST(managed_ptr, gpu_class_with_raw_array_and_callback) @@ -470,6 +476,9 @@ GPU_TEST(managed_ptr, gpu_class_with_raw_array_and_callback) results.move(chai::CPU); ASSERT_EQ(results[0], expectedValue); + + results.free(); + managedPointer.free(); } GPU_TEST(managed_ptr, gpu_class_with_managed_array) @@ -493,6 +502,10 @@ GPU_TEST(managed_ptr, gpu_class_with_managed_array) results.move(chai::CPU); ASSERT_EQ(results[0], expectedValue); + + array.free(); + derived.free(); + results.free(); } GPU_TEST(managed_ptr, gpu_class_with_raw_ptr) @@ -516,6 +529,11 @@ GPU_TEST(managed_ptr, gpu_class_with_raw_ptr) results.move(chai::CPU); ASSERT_EQ(results[0], expectedValue); + + array.free(); + rawArrayClass.free(); + rawPointerClass.free(); + results.free(); } GPU_TEST(managed_ptr, gpu_class_with_managed_ptr) @@ -533,6 +551,9 @@ GPU_TEST(managed_ptr, gpu_class_with_managed_ptr) results.move(chai::CPU); ASSERT_EQ(results[0], expectedValue); + + derived.free(); + results.free(); } GPU_TEST(managed_ptr, gpu_nested_managed_ptr) @@ -550,6 +571,10 @@ GPU_TEST(managed_ptr, gpu_nested_managed_ptr) results.move(chai::CPU); ASSERT_EQ(results[0], expectedValue); + + derived.free(); + container.free(); + results.free(); } GPU_TEST(managed_ptr, gpu_multiple_inheritance) @@ -571,6 +596,9 @@ GPU_TEST(managed_ptr, gpu_multiple_inheritance) ASSERT_EQ(results[0], true); ASSERT_EQ(results[1], true); + + derived.free(); + results.free(); } GPU_TEST(managed_ptr, static_pointer_cast) @@ -600,6 +628,10 @@ GPU_TEST(managed_ptr, static_pointer_cast) ASSERT_EQ(results[0], expectedValue); ASSERT_EQ(results[1], expectedValue); ASSERT_EQ(results[2], expectedValue); + + array.free(); + derived.free(); + results.free(); } GPU_TEST(managed_ptr, dynamic_pointer_cast) @@ -629,6 +661,10 @@ GPU_TEST(managed_ptr, dynamic_pointer_cast) ASSERT_EQ(results[0], expectedValue); ASSERT_EQ(results[1], expectedValue); ASSERT_EQ(results[2], expectedValue); + + array.free(); + base.free(); + results.free(); } GPU_TEST(managed_ptr, const_pointer_cast) @@ -658,6 +694,10 @@ GPU_TEST(managed_ptr, const_pointer_cast) ASSERT_EQ(results[0], expectedValue); ASSERT_EQ(results[1], expectedValue); ASSERT_EQ(results[2], expectedValue); + + array.free(); + derivedFromConst.free(); + results.free(); } GPU_TEST(managed_ptr, reinterpret_pointer_cast) @@ -687,6 +727,10 @@ GPU_TEST(managed_ptr, reinterpret_pointer_cast) ASSERT_EQ(results[0], expectedValue); ASSERT_EQ(results[1], expectedValue); ASSERT_EQ(results[2], expectedValue); + + array.free(); + derived.free(); + results.free(); } #endif From 18a88009be4644374f8e8978c9fb732eccc722c1 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Tue, 1 Oct 2019 15:27:33 -0700 Subject: [PATCH 30/58] Remove unnecessary synchronizes --- tests/integration/managed_ptr_tests.cpp | 5 ----- tests/unit/managed_ptr_unit_tests.cpp | 2 -- 2 files changed, 7 deletions(-) diff --git a/tests/integration/managed_ptr_tests.cpp b/tests/integration/managed_ptr_tests.cpp index b27ea4c0..36036d96 100644 --- a/tests/integration/managed_ptr_tests.cpp +++ b/tests/integration/managed_ptr_tests.cpp @@ -320,12 +320,10 @@ GPU_TEST(managed_ptr, make_on_device) deviceNew<<<1, 1>>>(deviceArray); cudaMemcpy(hostArray, deviceArray, sizeof(int*), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); cudaMemcpy(deviceArray2, hostArray, sizeof(int*), cudaMemcpyHostToDevice); ASSERT_NE(hostArray[0], nullptr); deviceDelete<<<1, 1>>>(deviceArray2); - cudaDeviceSynchronize(); free(hostArray); cudaFree(deviceArray); cudaFree(deviceArray2); @@ -403,9 +401,7 @@ GPU_TEST(managed_ptr, pass_object_to_kernel) chai::ArrayManager* manager = chai::ArrayManager::getInstance(); manager->setExecutionSpace(chai::GPU); passObjectToKernel<<<1, 1>>>(array); - cudaDeviceSynchronize(); array.move(chai::CPU); - cudaDeviceSynchronize(); ASSERT_EQ(array[0], -1); array.free(); @@ -592,7 +588,6 @@ GPU_TEST(managed_ptr, gpu_multiple_inheritance) }); results.move(chai::CPU); - cudaDeviceSynchronize(); ASSERT_EQ(results[0], true); ASSERT_EQ(results[1], true); diff --git a/tests/unit/managed_ptr_unit_tests.cpp b/tests/unit/managed_ptr_unit_tests.cpp index 2dd12b9a..721e9b40 100644 --- a/tests/unit/managed_ptr_unit_tests.cpp +++ b/tests/unit/managed_ptr_unit_tests.cpp @@ -693,8 +693,6 @@ GPU_TEST(managed_ptr, simple_cuda_cpu_and_gpu_pointer_constructor) array1.move(chai::CPU); - cudaDeviceSynchronize(); - EXPECT_EQ(array1[0], 3); array1.free(); From 87676b4f8d0e2ebbe662706c173a77abccd33101 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Wed, 9 Oct 2019 17:15:26 -0700 Subject: [PATCH 31/58] Add benchmarks --- benchmarks/CMakeLists.txt | 10 ++ benchmarks/chai_managed_ptr_benchmarks.cpp | 111 +++++++++++++++++++++ 2 files changed, 121 insertions(+) create mode 100644 benchmarks/chai_managed_ptr_benchmarks.cpp diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 6795f860..726cbf77 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -73,3 +73,13 @@ blt_add_executable( blt_add_benchmark( NAME managedarray_benchmarks COMMAND managedarray_benchmarks) + +blt_add_executable( + NAME managed_ptr_benchmarks + SOURCES chai_managed_ptr_benchmarks.cpp + DEPENDS_ON ${chai_benchmark_depends}) + +blt_add_benchmark( + NAME managed_ptr_benchmarks + COMMAND managed_ptr_benchmarks) + diff --git a/benchmarks/chai_managed_ptr_benchmarks.cpp b/benchmarks/chai_managed_ptr_benchmarks.cpp new file mode 100644 index 00000000..e9e603ec --- /dev/null +++ b/benchmarks/chai_managed_ptr_benchmarks.cpp @@ -0,0 +1,111 @@ +// --------------------------------------------------------------------- +// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All +// rights reserved. +// +// Produced at the Lawrence Livermore National Laboratory. +// +// This file is part of CHAI. +// +// LLNL-CODE-705877 +// +// For details, see https:://github.com/LLNL/CHAI +// Please also see the NOTICE and LICENSE files. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the +// distribution. +// +// - Neither the name of the LLNS/LLNL nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// --------------------------------------------------------------------- +#include + +#include "benchmark/benchmark_api.h" + +#include "chai/config.hpp" +#include "chai/managed_ptr.hpp" + +#include "../src/util/forall.hpp" + +class Base { + public: + CHAI_HOST_DEVICE virtual int getValue() const = 0; +}; + +class Derived : public Base { + public: + CHAI_HOST_DEVICE Derived(int value) : Base(), m_value(value) {} + + CHAI_HOST_DEVICE int getValue() const override { return m_value; } + + private: + int m_value = -1; +}; + +void benchmark_managed_ptr_construction_and_destruction(benchmark::State& state) +{ + while (state.KeepRunning()) { + chai::managed_ptr temp = chai::make_managed(state.range(0)); + temp.free(); + } + + state.SetItemsProcessed(state.iterations()); +} + +BENCHMARK(benchmark_managed_ptr_construction_and_destruction)->Range(1, 1); + +static chai::managed_ptr helper1 = chai::make_managed(1); + +void benchmark_managed_ptr_use_cpu(benchmark::State& state) +{ + while (state.KeepRunning()) { + auto helper = helper1; + forall(sequential(), 0, 1, [=] (int i) { (void) helper->getValue(); }); + } + + state.SetItemsProcessed(state.iterations()); +} + +BENCHMARK(benchmark_managed_ptr_use_cpu)->Range(1, 1); + +#if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) + +static chai::managed_ptr helper2 = chai::make_managed(2); + +void benchmark_managed_ptr_use_gpu(benchmark::State& state) +{ + while (state.KeepRunning()) { + auto helper = helper2; + forall(gpu(), 0, 1, [=] __device__ (int i) { (void) helper->getValue(); }); + } + + state.SetItemsProcessed(state.iterations()); +} + +BENCHMARK(benchmark_managed_ptr_use_gpu)->Range(1, 1); + +#endif + +BENCHMARK_MAIN(); From 7bf07616e91e88e432f322d0599c377db351766d Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Thu, 10 Oct 2019 09:55:46 -0700 Subject: [PATCH 32/58] Add comparison benchmarks for managed_ptr --- benchmarks/chai_managed_ptr_benchmarks.cpp | 88 ++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/benchmarks/chai_managed_ptr_benchmarks.cpp b/benchmarks/chai_managed_ptr_benchmarks.cpp index e9e603ec..a36aa8b8 100644 --- a/benchmarks/chai_managed_ptr_benchmarks.cpp +++ b/benchmarks/chai_managed_ptr_benchmarks.cpp @@ -64,6 +64,34 @@ class Derived : public Base { int m_value = -1; }; +template +class BaseCRTP { + public: + CHAI_HOST_DEVICE int getValue() const { + return static_cast(this)->getValue(); + } +}; + +class DerivedCRTP : public BaseCRTP { + public: + CHAI_HOST_DEVICE DerivedCRTP(int value) : BaseCRTP(), m_value(value) {} + + CHAI_HOST_DEVICE int getValue() const { return m_value; } + + private: + int m_value = -1; +}; + +class NoInheritance { + public: + CHAI_HOST_DEVICE NoInheritance(int value) : m_value(value) {} + + CHAI_HOST_DEVICE int getValue() const { return m_value; } + + private: + int m_value = -1; +}; + void benchmark_managed_ptr_construction_and_destruction(benchmark::State& state) { while (state.KeepRunning()) { @@ -90,6 +118,36 @@ void benchmark_managed_ptr_use_cpu(benchmark::State& state) BENCHMARK(benchmark_managed_ptr_use_cpu)->Range(1, 1); +// Curiously recurring template pattern +static BaseCRTP* derivedCRTP = new DerivedCRTP(3); + +void benchmark_curiously_recurring_template_pattern_cpu(benchmark::State& state) +{ + while (state.KeepRunning()) { + auto helper = derivedCRTP; + forall(sequential(), 0, 1, [=] (int i) { (void) helper->getValue(); }); + } + + state.SetItemsProcessed(state.iterations()); +} + +BENCHMARK(benchmark_curiously_recurring_template_pattern_cpu)->Range(1, 1); + +// Class without inheritance +static NoInheritance* noInheritance = new NoInheritance(5); + +void benchmark_no_inheritance_cpu(benchmark::State& state) +{ + while (state.KeepRunning()) { + auto helper = noInheritance; + forall(sequential(), 0, 1, [=] (int i) { (void) helper->getValue(); }); + } + + state.SetItemsProcessed(state.iterations()); +} + +BENCHMARK(benchmark_no_inheritance_cpu)->Range(1, 1); + #if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) static chai::managed_ptr helper2 = chai::make_managed(2); @@ -106,6 +164,36 @@ void benchmark_managed_ptr_use_gpu(benchmark::State& state) BENCHMARK(benchmark_managed_ptr_use_gpu)->Range(1, 1); +// Curiously recurring template pattern +static BaseCRTP* derivedCRTP2 = new DerivedCRTP(4); + +void benchmark_curiously_recurring_template_pattern_gpu(benchmark::State& state) +{ + while (state.KeepRunning()) { + auto helper = *derivedCRTP2; + forall(gpu(), 0, 1, [=] __device__ (int i) { (void) helper.getValue(); }); + } + + state.SetItemsProcessed(state.iterations()); +} + +BENCHMARK(benchmark_curiously_recurring_template_pattern_gpu)->Range(1, 1); + +// Class without inheritance +static NoInheritance* noInheritance2 = new NoInheritance(5); + +void benchmark_no_inheritance_gpu(benchmark::State& state) +{ + while (state.KeepRunning()) { + auto helper = *noInheritance2; + forall(gpu(), 0, 1, [=] __device__ (int i) { (void) helper.getValue(); }); + } + + state.SetItemsProcessed(state.iterations()); +} + +BENCHMARK(benchmark_no_inheritance_gpu)->Range(1, 1); + #endif BENCHMARK_MAIN(); From 795947c53d947f36256640b22813cfc56c68b70c Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Thu, 10 Oct 2019 14:36:15 -0700 Subject: [PATCH 33/58] Add a lot more benchmarks to compare different approaches --- benchmarks/chai_managed_ptr_benchmarks.cpp | 555 +++++++++++++++++++-- 1 file changed, 517 insertions(+), 38 deletions(-) diff --git a/benchmarks/chai_managed_ptr_benchmarks.cpp b/benchmarks/chai_managed_ptr_benchmarks.cpp index a36aa8b8..3cdaaacf 100644 --- a/benchmarks/chai_managed_ptr_benchmarks.cpp +++ b/benchmarks/chai_managed_ptr_benchmarks.cpp @@ -42,7 +42,7 @@ // --------------------------------------------------------------------- #include -#include "benchmark/benchmark_api.h" +#include "benchmark/benchmark.h" #include "chai/config.hpp" #include "chai/managed_ptr.hpp" @@ -92,107 +92,586 @@ class NoInheritance { int m_value = -1; }; -void benchmark_managed_ptr_construction_and_destruction(benchmark::State& state) +template +class ClassWithSize { + private: + char m_values[N]; +}; + +static void benchmark_managed_ptr_construction_and_destruction(benchmark::State& state) { while (state.KeepRunning()) { - chai::managed_ptr temp = chai::make_managed(state.range(0)); + chai::managed_ptr temp = chai::make_managed(1); temp.free(); } - - state.SetItemsProcessed(state.iterations()); } -BENCHMARK(benchmark_managed_ptr_construction_and_destruction)->Range(1, 1); +BENCHMARK(benchmark_managed_ptr_construction_and_destruction); -static chai::managed_ptr helper1 = chai::make_managed(1); - -void benchmark_managed_ptr_use_cpu(benchmark::State& state) +// managed_ptr +static void benchmark_managed_ptr_use_cpu(benchmark::State& state) { + chai::managed_ptr helper = chai::make_managed(1); + while (state.KeepRunning()) { - auto helper = helper1; forall(sequential(), 0, 1, [=] (int i) { (void) helper->getValue(); }); } - state.SetItemsProcessed(state.iterations()); + helper.free(); } -BENCHMARK(benchmark_managed_ptr_use_cpu)->Range(1, 1); +BENCHMARK(benchmark_managed_ptr_use_cpu); // Curiously recurring template pattern -static BaseCRTP* derivedCRTP = new DerivedCRTP(3); - -void benchmark_curiously_recurring_template_pattern_cpu(benchmark::State& state) +static void benchmark_curiously_recurring_template_pattern_cpu(benchmark::State& state) { + BaseCRTP* helper = new DerivedCRTP(3); + while (state.KeepRunning()) { - auto helper = derivedCRTP; forall(sequential(), 0, 1, [=] (int i) { (void) helper->getValue(); }); } - state.SetItemsProcessed(state.iterations()); + delete helper; } -BENCHMARK(benchmark_curiously_recurring_template_pattern_cpu)->Range(1, 1); +BENCHMARK(benchmark_curiously_recurring_template_pattern_cpu); // Class without inheritance -static NoInheritance* noInheritance = new NoInheritance(5); - -void benchmark_no_inheritance_cpu(benchmark::State& state) +static void benchmark_no_inheritance_cpu(benchmark::State& state) { + NoInheritance* helper = new NoInheritance(5); + while (state.KeepRunning()) { - auto helper = noInheritance; forall(sequential(), 0, 1, [=] (int i) { (void) helper->getValue(); }); } - state.SetItemsProcessed(state.iterations()); + delete helper; } -BENCHMARK(benchmark_no_inheritance_cpu)->Range(1, 1); +BENCHMARK(benchmark_no_inheritance_cpu); #if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) -static chai::managed_ptr helper2 = chai::make_managed(2); +template +__global__ void copy_kernel(ClassWithSize) {} + +// Benchmark how long it takes to copy a class to the GPU +void benchmark_pass_copy_to_gpu_8(benchmark::State& state) +{ + ClassWithSize<8> helper; + + while (state.KeepRunning()) { + copy_kernel<<<1, 1>>>(helper); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_pass_copy_to_gpu_8); + +void benchmark_pass_copy_to_gpu_64(benchmark::State& state) +{ + ClassWithSize<64> helper; + + while (state.KeepRunning()) { + copy_kernel<<<1, 1>>>(helper); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_pass_copy_to_gpu_64); + +void benchmark_pass_copy_to_gpu_512(benchmark::State& state) +{ + ClassWithSize<512> helper; + + while (state.KeepRunning()) { + copy_kernel<<<1, 1>>>(helper); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_pass_copy_to_gpu_512); + +void benchmark_pass_copy_to_gpu_4096(benchmark::State& state) +{ + ClassWithSize<4096> helper; + + while (state.KeepRunning()) { + copy_kernel<<<1, 1>>>(helper); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_pass_copy_to_gpu_4096); void benchmark_managed_ptr_use_gpu(benchmark::State& state) { + chai::managed_ptr helper = chai::make_managed(2); + while (state.KeepRunning()) { - auto helper = helper2; forall(gpu(), 0, 1, [=] __device__ (int i) { (void) helper->getValue(); }); } - state.SetItemsProcessed(state.iterations()); + helper.free(); } -BENCHMARK(benchmark_managed_ptr_use_gpu)->Range(1, 1); +// Benchmark how long it takes to call placement new on the GPU +template +__global__ void placement_new_kernel(ClassWithSize* address) { + (void) new(address) ClassWithSize(); +} -// Curiously recurring template pattern -static BaseCRTP* derivedCRTP2 = new DerivedCRTP(4); +template +__global__ void placement_delete_kernel(ClassWithSize* address) { + address->~ClassWithSize(); +} + +void benchmark_placement_new_on_gpu_8(benchmark::State& state) +{ + while (state.KeepRunning()) { + ClassWithSize<8>* address; + cudaMalloc(&address, sizeof(ClassWithSize<8>)); + placement_new_kernel<<<1, 1>>>(address); + placement_delete_kernel<<<1, 1>>>(address); + cudaFree(address); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_placement_new_on_gpu_8); + +void benchmark_placement_new_on_gpu_64(benchmark::State& state) +{ + while (state.KeepRunning()) { + ClassWithSize<64>* address; + cudaMalloc(&address, sizeof(ClassWithSize<64>)); + placement_new_kernel<<<1, 1>>>(address); + placement_delete_kernel<<<1, 1>>>(address); + cudaFree(address); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_placement_new_on_gpu_64); + +void benchmark_placement_new_on_gpu_512(benchmark::State& state) +{ + while (state.KeepRunning()) { + ClassWithSize<512>* address; + cudaMalloc(&address, sizeof(ClassWithSize<512>)); + placement_new_kernel<<<1, 1>>>(address); + placement_delete_kernel<<<1, 1>>>(address); + cudaFree(address); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_placement_new_on_gpu_512); + +void benchmark_placement_new_on_gpu_4096(benchmark::State& state) +{ + while (state.KeepRunning()) { + ClassWithSize<4096>* address; + cudaMalloc(&address, sizeof(ClassWithSize<4096>)); + placement_new_kernel<<<1, 1>>>(address); + placement_delete_kernel<<<1, 1>>>(address); + cudaFree(address); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_placement_new_on_gpu_4096); + +void benchmark_placement_new_on_gpu_32768(benchmark::State& state) +{ + while (state.KeepRunning()) { + ClassWithSize<32768>* address; + cudaMalloc(&address, sizeof(ClassWithSize<32768>)); + placement_new_kernel<<<1, 1>>>(address); + placement_delete_kernel<<<1, 1>>>(address); + cudaFree(address); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_placement_new_on_gpu_32768); + +void benchmark_placement_new_on_gpu_262144(benchmark::State& state) +{ + while (state.KeepRunning()) { + ClassWithSize<262144>* address; + cudaMalloc(&address, sizeof(ClassWithSize<262144>)); + placement_new_kernel<<<1, 1>>>(address); + placement_delete_kernel<<<1, 1>>>(address); + cudaFree(address); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_placement_new_on_gpu_262144); + +void benchmark_placement_new_on_gpu_2097152(benchmark::State& state) +{ + while (state.KeepRunning()) { + ClassWithSize<2097152>* address; + cudaMalloc(&address, sizeof(ClassWithSize<2097152>)); + placement_new_kernel<<<1, 1>>>(address); + placement_delete_kernel<<<1, 1>>>(address); + cudaFree(address); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_placement_new_on_gpu_2097152); + +// Benchmark how long it takes to call new on the GPU +template +__global__ void create_kernel(ClassWithSize** address) { + *address = new ClassWithSize(); +} + +template +__global__ void delete_kernel(ClassWithSize** address) { + delete *address; +} + +void benchmark_new_on_gpu_8(benchmark::State& state) +{ + while (state.KeepRunning()) { + ClassWithSize<8>** buffer; + cudaMalloc(&buffer, sizeof(ClassWithSize<8>*)); + create_kernel<<<1, 1>>>(buffer); + delete_kernel<<<1, 1>>>(buffer); + cudaFree(buffer); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_new_on_gpu_8); + +void benchmark_new_on_gpu_64(benchmark::State& state) +{ + while (state.KeepRunning()) { + ClassWithSize<64>** buffer; + cudaMalloc(&buffer, sizeof(ClassWithSize<64>*)); + create_kernel<<<1, 1>>>(buffer); + delete_kernel<<<1, 1>>>(buffer); + cudaFree(buffer); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_new_on_gpu_64); + +void benchmark_new_on_gpu_512(benchmark::State& state) +{ + while (state.KeepRunning()) { + ClassWithSize<512>** buffer; + cudaMalloc(&buffer, sizeof(ClassWithSize<512>*)); + create_kernel<<<1, 1>>>(buffer); + delete_kernel<<<1, 1>>>(buffer); + cudaFree(buffer); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_new_on_gpu_512); + +void benchmark_new_on_gpu_4096(benchmark::State& state) +{ + while (state.KeepRunning()) { + ClassWithSize<4096>** buffer; + cudaMalloc(&buffer, sizeof(ClassWithSize<4096>*)); + create_kernel<<<1, 1>>>(buffer); + delete_kernel<<<1, 1>>>(buffer); + cudaFree(buffer); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_new_on_gpu_4096); +void benchmark_new_on_gpu_32768(benchmark::State& state) +{ + while (state.KeepRunning()) { + ClassWithSize<32768>** buffer; + cudaMalloc(&buffer, sizeof(ClassWithSize<32768>*)); + create_kernel<<<1, 1>>>(buffer); + delete_kernel<<<1, 1>>>(buffer); + cudaFree(buffer); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_new_on_gpu_32768); + +void benchmark_new_on_gpu_262144(benchmark::State& state) +{ + while (state.KeepRunning()) { + ClassWithSize<262144>** buffer; + cudaMalloc(&buffer, sizeof(ClassWithSize<262144>*)); + create_kernel<<<1, 1>>>(buffer); + delete_kernel<<<1, 1>>>(buffer); + cudaFree(buffer); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_new_on_gpu_262144); + +void benchmark_new_on_gpu_2097152(benchmark::State& state) +{ + while (state.KeepRunning()) { + ClassWithSize<2097152>** buffer; + cudaMalloc(&buffer, sizeof(ClassWithSize<2097152>*)); + create_kernel<<<1, 1>>>(buffer); + delete_kernel<<<1, 1>>>(buffer); + cudaFree(buffer); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_new_on_gpu_2097152); + +// Benchmark current approach +template +__global__ void delete_kernel_2(ClassWithSize* address) { + delete address; +} + +void benchmark_new_on_gpu_and_copy_to_host_8(benchmark::State& state) +{ + while (state.KeepRunning()) { + ClassWithSize<8>** gpuBuffer; + cudaMalloc(&gpuBuffer, sizeof(ClassWithSize<8>*)); + create_kernel<<<1, 1>>>(gpuBuffer); + ClassWithSize<8>** cpuBuffer = (ClassWithSize<8>**) malloc(sizeof(ClassWithSize<8>*)); + cudaMemcpy(cpuBuffer, gpuBuffer, sizeof(ClassWithSize<8>*), cudaMemcpyDeviceToHost); + cudaFree(gpuBuffer); + ClassWithSize<8>* gpuPointer = cpuBuffer[0]; + delete_kernel_2<<<1, 1>>>(gpuPointer); + free(cpuBuffer); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_new_on_gpu_and_copy_to_host_8); + +void benchmark_new_on_gpu_and_copy_to_host_64(benchmark::State& state) +{ + while (state.KeepRunning()) { + ClassWithSize<64>** gpuBuffer; + cudaMalloc(&gpuBuffer, sizeof(ClassWithSize<64>*)); + create_kernel<<<1, 1>>>(gpuBuffer); + ClassWithSize<64>** cpuBuffer = (ClassWithSize<64>**) malloc(sizeof(ClassWithSize<64>*)); + cudaMemcpy(cpuBuffer, gpuBuffer, sizeof(ClassWithSize<64>*), cudaMemcpyDeviceToHost); + cudaFree(gpuBuffer); + ClassWithSize<64>* gpuPointer = cpuBuffer[0]; + delete_kernel_2<<<1, 1>>>(gpuPointer); + free(cpuBuffer); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_new_on_gpu_and_copy_to_host_64); + +void benchmark_new_on_gpu_and_copy_to_host_512(benchmark::State& state) +{ + while (state.KeepRunning()) { + ClassWithSize<512>** gpuBuffer; + cudaMalloc(&gpuBuffer, sizeof(ClassWithSize<512>*)); + create_kernel<<<1, 1>>>(gpuBuffer); + ClassWithSize<512>** cpuBuffer = (ClassWithSize<512>**) malloc(sizeof(ClassWithSize<512>*)); + cudaMemcpy(cpuBuffer, gpuBuffer, sizeof(ClassWithSize<512>*), cudaMemcpyDeviceToHost); + cudaFree(gpuBuffer); + ClassWithSize<512>* gpuPointer = cpuBuffer[0]; + delete_kernel_2<<<1, 1>>>(gpuPointer); + free(cpuBuffer); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_new_on_gpu_and_copy_to_host_512); + +void benchmark_new_on_gpu_and_copy_to_host_4096(benchmark::State& state) +{ + while (state.KeepRunning()) { + ClassWithSize<4096>** gpuBuffer; + cudaMalloc(&gpuBuffer, sizeof(ClassWithSize<4096>*)); + create_kernel<<<1, 1>>>(gpuBuffer); + ClassWithSize<4096>** cpuBuffer = (ClassWithSize<4096>**) malloc(sizeof(ClassWithSize<4096>*)); + cudaMemcpy(cpuBuffer, gpuBuffer, sizeof(ClassWithSize<4096>*), cudaMemcpyDeviceToHost); + cudaFree(gpuBuffer); + ClassWithSize<4096>* gpuPointer = cpuBuffer[0]; + delete_kernel_2<<<1, 1>>>(gpuPointer); + free(cpuBuffer); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_new_on_gpu_and_copy_to_host_4096); + +void benchmark_new_on_gpu_and_copy_to_host_32768(benchmark::State& state) +{ + while (state.KeepRunning()) { + ClassWithSize<32768>** gpuBuffer; + cudaMalloc(&gpuBuffer, sizeof(ClassWithSize<32768>*)); + create_kernel<<<1, 1>>>(gpuBuffer); + ClassWithSize<32768>** cpuBuffer = (ClassWithSize<32768>**) malloc(sizeof(ClassWithSize<32768>*)); + cudaMemcpy(cpuBuffer, gpuBuffer, sizeof(ClassWithSize<32768>*), cudaMemcpyDeviceToHost); + cudaFree(gpuBuffer); + ClassWithSize<32768>* gpuPointer = cpuBuffer[0]; + delete_kernel_2<<<1, 1>>>(gpuPointer); + free(cpuBuffer); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_new_on_gpu_and_copy_to_host_32768); + +void benchmark_new_on_gpu_and_copy_to_host_262144(benchmark::State& state) +{ + while (state.KeepRunning()) { + ClassWithSize<262144>** gpuBuffer; + cudaMalloc(&gpuBuffer, sizeof(ClassWithSize<262144>*)); + create_kernel<<<1, 1>>>(gpuBuffer); + ClassWithSize<262144>** cpuBuffer = (ClassWithSize<262144>**) malloc(sizeof(ClassWithSize<262144>*)); + cudaMemcpy(cpuBuffer, gpuBuffer, sizeof(ClassWithSize<262144>*), cudaMemcpyDeviceToHost); + cudaFree(gpuBuffer); + ClassWithSize<262144>* gpuPointer = cpuBuffer[0]; + delete_kernel_2<<<1, 1>>>(gpuPointer); + free(cpuBuffer); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_new_on_gpu_and_copy_to_host_262144); + +void benchmark_new_on_gpu_and_copy_to_host_2097152(benchmark::State& state) +{ + while (state.KeepRunning()) { + ClassWithSize<2097152>** gpuBuffer; + cudaMalloc(&gpuBuffer, sizeof(ClassWithSize<2097152>*)); + create_kernel<<<1, 1>>>(gpuBuffer); + ClassWithSize<2097152>** cpuBuffer = (ClassWithSize<2097152>**) malloc(sizeof(ClassWithSize<2097152>*)); + cudaMemcpy(cpuBuffer, gpuBuffer, sizeof(ClassWithSize<2097152>*), cudaMemcpyDeviceToHost); + cudaFree(gpuBuffer); + ClassWithSize<2097152>* gpuPointer = cpuBuffer[0]; + delete_kernel_2<<<1, 1>>>(gpuPointer); + free(cpuBuffer); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_new_on_gpu_and_copy_to_host_2097152); + +// Benchmark how long it takes to create a stack object on the GPU +template +__global__ void create_on_stack_kernel() { + (void) ClassWithSize(); +} + +void benchmark_create_on_stack_on_gpu_8(benchmark::State& state) +{ + while (state.KeepRunning()) { + create_on_stack_kernel<8><<<1, 1>>>(); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_create_on_stack_on_gpu_8); + +void benchmark_create_on_stack_on_gpu_64(benchmark::State& state) +{ + while (state.KeepRunning()) { + create_on_stack_kernel<64><<<1, 1>>>(); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_create_on_stack_on_gpu_64); + +void benchmark_create_on_stack_on_gpu_512(benchmark::State& state) +{ + while (state.KeepRunning()) { + create_on_stack_kernel<512><<<1, 1>>>(); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_create_on_stack_on_gpu_512); + +void benchmark_create_on_stack_on_gpu_4096(benchmark::State& state) +{ + while (state.KeepRunning()) { + create_on_stack_kernel<4096><<<1, 1>>>(); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_create_on_stack_on_gpu_4096); + +void benchmark_create_on_stack_on_gpu_32768(benchmark::State& state) +{ + while (state.KeepRunning()) { + create_on_stack_kernel<32768><<<1, 1>>>(); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_create_on_stack_on_gpu_32768); + +void benchmark_create_on_stack_on_gpu_262144(benchmark::State& state) +{ + while (state.KeepRunning()) { + create_on_stack_kernel<262144><<<1, 1>>>(); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_create_on_stack_on_gpu_262144); + +void benchmark_create_on_stack_on_gpu_2097152(benchmark::State& state) +{ + while (state.KeepRunning()) { + create_on_stack_kernel<2097152><<<1, 1>>>(); + cudaDeviceSynchronize(); + } +} + +BENCHMARK(benchmark_create_on_stack_on_gpu_2097152); + +BENCHMARK(benchmark_managed_ptr_use_gpu); + +// Curiously recurring template pattern void benchmark_curiously_recurring_template_pattern_gpu(benchmark::State& state) { + BaseCRTP* derivedCRTP = new DerivedCRTP(4); + auto helper = *derivedCRTP; + while (state.KeepRunning()) { - auto helper = *derivedCRTP2; forall(gpu(), 0, 1, [=] __device__ (int i) { (void) helper.getValue(); }); } - state.SetItemsProcessed(state.iterations()); + delete derivedCRTP; } -BENCHMARK(benchmark_curiously_recurring_template_pattern_gpu)->Range(1, 1); +BENCHMARK(benchmark_curiously_recurring_template_pattern_gpu); // Class without inheritance -static NoInheritance* noInheritance2 = new NoInheritance(5); - void benchmark_no_inheritance_gpu(benchmark::State& state) { + NoInheritance* noInheritance = new NoInheritance(5); + auto helper = *noInheritance; + while (state.KeepRunning()) { - auto helper = *noInheritance2; forall(gpu(), 0, 1, [=] __device__ (int i) { (void) helper.getValue(); }); } - state.SetItemsProcessed(state.iterations()); + delete noInheritance; } -BENCHMARK(benchmark_no_inheritance_gpu)->Range(1, 1); +BENCHMARK(benchmark_no_inheritance_gpu); #endif From 349fbddc81127d003a2310025d11f5fff8b37fc1 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Thu, 10 Oct 2019 15:21:38 -0700 Subject: [PATCH 34/58] Use placement new for better performance and cleaner code --- src/chai/managed_ptr.hpp | 30 ++++--------- tests/integration/managed_ptr_tests.cpp | 57 ++++++++----------------- tests/unit/managed_ptr_unit_tests.cpp | 56 +++++++----------------- 3 files changed, 42 insertions(+), 101 deletions(-) diff --git a/src/chai/managed_ptr.hpp b/src/chai/managed_ptr.hpp index c9c0de66..190df7cf 100644 --- a/src/chai/managed_ptr.hpp +++ b/src/chai/managed_ptr.hpp @@ -818,8 +818,8 @@ namespace chai { template ::value, int>::type = 0> - CHAI_DEVICE void new_on_device(T** gpuPointer, Args&&... args) { - *gpuPointer = new T(args...); + CHAI_DEVICE void new_on_device(T* gpuPointer, Args&&... args) { + new(gpuPointer) T(args...); } /// @@ -836,8 +836,8 @@ namespace chai { template ::value, int>::type = 0> - CHAI_DEVICE void new_on_device(T** gpuPointer, Args&&... args) { - *gpuPointer = new T(getRawPointers(args)...); + CHAI_DEVICE void new_on_device(T* gpuPointer, Args&&... args) { + new(gpuPointer) T(getRawPointers(args)...); } /// @@ -853,7 +853,7 @@ namespace chai { /// template - __global__ void make_on_device(T** gpuPointer, Args... args) + __global__ void make_on_device(T* gpuPointer, Args... args) { new_on_device(gpuPointer, args...); } @@ -890,7 +890,7 @@ namespace chai { __global__ void destroy_on_device(T* gpuPointer) { if (gpuPointer) { - delete gpuPointer; + gpuPointer->~T(); } } @@ -917,25 +917,13 @@ namespace chai { #endif // Allocate space on the GPU to hold the pointer to the new object - T** gpuBuffer; - GPU_ERROR_CHECK(cudaMalloc(&gpuBuffer, sizeof(T*))); + T* gpuPointer; + GPU_ERROR_CHECK(cudaMalloc(&gpuPointer, sizeof(T))); // Create the object on the device - make_on_device<<<1, 1>>>(gpuBuffer, args...); + make_on_device<<<1, 1>>>(gpuPointer, args...); debug_cudaDeviceSynchronize(); - // Allocate space on the CPU for the pointer and copy the pointer to the CPU - T** cpuBuffer = (T**) malloc(sizeof(T*)); - GPU_ERROR_CHECK(cudaMemcpy(cpuBuffer, gpuBuffer, sizeof(T*), - cudaMemcpyDeviceToHost)); - - // Get the GPU pointer - T* gpuPointer = cpuBuffer[0]; - - // Free the host and device buffers - free(cpuBuffer); - GPU_ERROR_CHECK(cudaFree(gpuBuffer)); - #ifndef CHAI_DISABLE_RM // Set the execution space back to the previous value arrayManager->setExecutionSpace(currentSpace); diff --git a/tests/integration/managed_ptr_tests.cpp b/tests/integration/managed_ptr_tests.cpp index 36036d96..908074fa 100644 --- a/tests/integration/managed_ptr_tests.cpp +++ b/tests/integration/managed_ptr_tests.cpp @@ -331,60 +331,37 @@ GPU_TEST(managed_ptr, make_on_device) GPU_TEST(managed_ptr, gpu_new_and_delete_on_device) { - // Initialize host side memory to hold a pointer - RawArrayClass** cpuPointerHolder = (RawArrayClass**) malloc(sizeof(RawArrayClass*)); - cpuPointerHolder[0] = nullptr; - - // Initialize device side memory to hold a pointer - RawArrayClass** gpuPointerHolder = nullptr; - cudaMalloc(&gpuPointerHolder, sizeof(RawArrayClass*)); + // Initialize device side memory to hold the new object + RawArrayClass* gpuPointer = nullptr; + cudaMalloc(&gpuPointer, sizeof(RawArrayClass)); // Create on the device - chai::detail::make_on_device<<<1, 1>>>(gpuPointerHolder); - - // Copy to the host side memory - cudaMemcpy(cpuPointerHolder, gpuPointerHolder, sizeof(RawArrayClass*), cudaMemcpyDeviceToHost); - - // Free device side memory - cudaFree(gpuPointerHolder); + chai::detail::make_on_device<<<1, 1>>>(gpuPointer); - // Save the pointer - ASSERT_NE(cpuPointerHolder[0], nullptr); - RawArrayClass* gpuPointer = cpuPointerHolder[0]; - - // Free host side memory - free(cpuPointerHolder); + // Check the pointer + ASSERT_NE(gpuPointer, nullptr); + // Clean up on the device chai::detail::destroy_on_device<<<1, 1>>>(gpuPointer); } GPU_TEST(managed_ptr, gpu_build_managed_ptr) { - // Initialize host side memory to hold a pointer - RawArrayClass** cpuPointerHolder = (RawArrayClass**) malloc(sizeof(RawArrayClass*)); - cpuPointerHolder[0] = nullptr; - - // Initialize device side memory to hold a pointer - RawArrayClass** gpuPointerHolder = nullptr; - cudaMalloc(&gpuPointerHolder, sizeof(RawArrayClass*)); + // Initialize device side memory to hold the new object + RawArrayClass* gpuPointer = nullptr; + cudaMalloc(&gpuPointer, sizeof(RawArrayClass)); // Create on the device - chai::detail::make_on_device<<<1, 1>>>(gpuPointerHolder); + chai::detail::make_on_device<<<1, 1>>>(gpuPointer); - // Copy to the host side memory - cudaMemcpy(cpuPointerHolder, gpuPointerHolder, sizeof(RawArrayClass*), cudaMemcpyDeviceToHost); - - // Free device side memory - cudaFree(gpuPointerHolder); - - // Save the pointer - ASSERT_NE(cpuPointerHolder[0], nullptr); - RawArrayClass* gpuPointer = cpuPointerHolder[0]; - - // Free host side memory - free(cpuPointerHolder); + // Check the pointer + ASSERT_NE(gpuPointer, nullptr); + // Make a managed_ptr chai::managed_ptr managedPtr({chai::GPU}, {gpuPointer}); + + // Clean up the memory + managedPtr.free(); } diff --git a/tests/unit/managed_ptr_unit_tests.cpp b/tests/unit/managed_ptr_unit_tests.cpp index 721e9b40..b8ac4ec4 100644 --- a/tests/unit/managed_ptr_unit_tests.cpp +++ b/tests/unit/managed_ptr_unit_tests.cpp @@ -619,60 +619,36 @@ GPU_TEST(managed_ptr, gpu_pointer_constructor) GPU_TEST(managed_ptr, gpu_new_and_delete_on_device) { - // Initialize host side memory to hold a pointer - Simple** cpuPointerHolder = (Simple**) malloc(sizeof(Simple*)); - cpuPointerHolder[0] = nullptr; - - // Initialize device side memory to hold a pointer - Simple** gpuPointerHolder = nullptr; - cudaMalloc(&gpuPointerHolder, sizeof(Simple*)); + // Initialize device side memory to hold the new object + Simple* gpuPointer = nullptr; + cudaMalloc(&gpuPointer, sizeof(Simple)); // Create on the device - chai::detail::make_on_device<<<1, 1>>>(gpuPointerHolder); - - // Copy to the host side memory - cudaMemcpy(cpuPointerHolder, gpuPointerHolder, sizeof(Simple*), cudaMemcpyDeviceToHost); - - // Free device side memory - cudaFree(gpuPointerHolder); + chai::detail::make_on_device<<<1, 1>>>(gpuPointer); - // Save the pointer - ASSERT_NE(cpuPointerHolder[0], nullptr); - Simple* gpuPointer = cpuPointerHolder[0]; - - // Free host side memory - free(cpuPointerHolder); + // Check the pointer + ASSERT_NE(gpuPointer, nullptr); + // Clean up on the device chai::detail::destroy_on_device<<<1, 1>>>(gpuPointer); } GPU_TEST(managed_ptr, gpu_new_and_delete_on_device_2) { - // Initialize host side memory to hold a pointer - Simple** cpuPointerHolder = (Simple**) malloc(sizeof(Simple*)); - cpuPointerHolder[0] = nullptr; - - // Initialize device side memory to hold a pointer - Simple** gpuPointerHolder = nullptr; - cudaMalloc(&gpuPointerHolder, sizeof(Simple*)); + // Initialize device side memory to hold a the new object + Simple* gpuPointer = nullptr; + cudaMalloc(&gpuPointer, sizeof(Simple)); // Create on the device - chai::detail::make_on_device<<<1, 1>>>(gpuPointerHolder); + chai::detail::make_on_device<<<1, 1>>>(gpuPointer); - // Copy to the host side memory - cudaMemcpy(cpuPointerHolder, gpuPointerHolder, sizeof(Simple*), cudaMemcpyDeviceToHost); - - // Free device side memory - cudaFree(gpuPointerHolder); - - // Save the pointer - ASSERT_NE(cpuPointerHolder[0], nullptr); - Simple* gpuPointer = cpuPointerHolder[0]; - - // Free host side memory - free(cpuPointerHolder); + // Check the pointer + ASSERT_NE(gpuPointer, nullptr); + // Create a managed_ptr chai::managed_ptr test({chai::GPU}, {gpuPointer}); + + // Free the memory test.free(); } From ac3bc32ee60f00d9b6956bf78bccc35328f4f0ae Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Thu, 10 Oct 2019 15:36:50 -0700 Subject: [PATCH 35/58] Fix memory leaks --- src/chai/managed_ptr.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/chai/managed_ptr.hpp b/src/chai/managed_ptr.hpp index 190df7cf..a0e90966 100644 --- a/src/chai/managed_ptr.hpp +++ b/src/chai/managed_ptr.hpp @@ -554,6 +554,7 @@ namespace chai { if (pointer) { detail::destroy_on_device<<<1, 1>>>(temp); debug_cudaDeviceSynchronize(); + GPU_ERROR_CHECK(cudaFree(temp)); } break; @@ -581,6 +582,7 @@ namespace chai { if (pointer) { detail::destroy_on_device<<<1, 1>>>(pointer); debug_cudaDeviceSynchronize(); + GPU_ERROR_CHECK(cudaFree((void*) pointer)); } break; @@ -916,7 +918,7 @@ namespace chai { arrayManager->setExecutionSpace(GPU); #endif - // Allocate space on the GPU to hold the pointer to the new object + // Allocate space on the GPU to hold the new object T* gpuPointer; GPU_ERROR_CHECK(cudaMalloc(&gpuPointer, sizeof(T))); From 552786c32b9a5a224d6f81a029d2c06f8e67f09a Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Thu, 10 Oct 2019 16:00:02 -0700 Subject: [PATCH 36/58] Remove make_managed_from_factory --- src/chai/managed_ptr.hpp | 203 ++++---------------------- tests/unit/managed_ptr_unit_tests.cpp | 132 ----------------- 2 files changed, 26 insertions(+), 309 deletions(-) diff --git a/src/chai/managed_ptr.hpp b/src/chai/managed_ptr.hpp index a0e90966..fb3999a8 100644 --- a/src/chai/managed_ptr.hpp +++ b/src/chai/managed_ptr.hpp @@ -122,10 +122,9 @@ namespace chai { /// /// This wrapper stores both host and device pointers so that polymorphism can be /// used in both contexts with a single API. - /// The make_managed and make_managed_from_factory functions call new on both the - /// host and device so that polymorphism is valid in both contexts. Simply copying - /// an object to the device will not copy the vtable, so new must be called on - /// the device. + /// The make_managed function calls new on both the host and device so that + /// polymorphism is valid in both contexts. Simply copying an object to the + /// device will not copy the vtable, so new must be called on the device. /// /// Usage Requirements: /// Methods that can be called on both the host and device must be declared @@ -137,28 +136,26 @@ namespace chai { /// you must explicitly modify the object in both the host context and the /// device context. /// Raw array members of T need to be initialized correctly with a host or - /// device pointer. If a ManagedArray is passed to the make_managed or - /// make_managed_from_factory methods in place of a raw array, it will be - /// cast to the appropriate host or device pointer when passed to T's - /// constructor on the host and on the device. If it is desired that these - /// host and device pointers be kept in sync, define a callback that maintains - /// a copy of the ManagedArray and upon the ACTION_MOVE event calls the copy - /// constructor of that ManagedArray. + /// device pointer. If a ManagedArray is passed to the make_managed function + /// in place of a raw array, it will be cast to the appropriate host or device + /// pointer when passed to T's constructor on the host and on the device. If it + /// is desired that these host and device pointers be kept in sync, define a + /// callback that maintains a copy of the ManagedArray and upon the ACTION_MOVE + /// event calls the copy constructor of that ManagedArray. /// If a raw array is passed to make_managed, accessing that member will be /// valid only in the correct context. To prevent the accidental use of that /// member in the wrong context, any methods that access it should be __host__ /// only or __device__ only. Special care should be taken when passing raw /// arrays as arguments to member functions. /// The same restrictions for raw array members also apply to raw pointer members. - /// A managed_ptr can be passed to the make_managed or make_managed_from_factory - /// methods in place of a raw pointer, and the host constructor of T will - /// be given the extracted host pointer, and likewise the device constructor - /// of T will be given the extracted device pointer. It is recommended that - /// a callback is defined that maintains a copy of the managed_ptr so that - /// the raw pointers are not accidentally destroyed prematurely. It is also - /// recommended that the callback calls the copy constructor of the managed_ptr - /// on the ACTION_MOVE event so that the ACTION_MOVE event is triggered also for - /// the inner managed_ptr. + /// A managed_ptr can be passed to the make_managed function in place of a raw + /// pointer, and the host constructor of T will be given the extracted host + /// pointer, and likewise the device constructor of T will be given the + /// extracted device pointer. It is recommended that a callback is defined that + /// maintains a copy of the managed_ptr and frees it on the ACTION_FREE event. + /// It is also recommended that the callback calls the copy constructor of the + /// managed_ptr on the ACTION_MOVE event so that the ACTION_MOVE event is + /// triggered also for the inner managed_ptr. /// Again, if a raw pointer is passed to make_managed, accessing that member will /// only be valid in the correct context. Take care when passing raw pointers /// as arguments to member functions. @@ -572,6 +569,12 @@ namespace chai { ExecutionSpace execSpace = static_cast(space); T* pointer = get(execSpace, false); + using T_non_const = typename std::remove_const::type; + + // We can use const_cast because can managed_ptr can only + // be constructed with non const pointers. + T_non_const* temp = const_cast(pointer); + switch (execSpace) { case CPU: delete pointer; @@ -580,9 +583,9 @@ namespace chai { case GPU: { if (pointer) { - detail::destroy_on_device<<<1, 1>>>(pointer); + detail::destroy_on_device<<<1, 1>>>(temp); debug_cudaDeviceSynchronize(); - GPU_ERROR_CHECK(cudaFree((void*) pointer)); + GPU_ERROR_CHECK(cudaFree(temp)); } break; @@ -767,44 +770,6 @@ namespace chai { return cpuPointer; } - /// - /// @author Alan Dayton - /// - /// Calls a factory method to create a new object on the host. - /// Sets the execution space to the CPU so that ManagedArrays and managed_ptrs - /// are moved to the host as necessary. - /// - /// @param[in] f The factory method - /// @param[in] args The arguments to the factory method - /// - /// @return The host pointer to the new object - /// - template - CHAI_HOST T* make_on_host_from_factory(F f, Args&&... args) { -#ifndef CHAI_DISABLE_RM - // Get the ArrayManager and save the current execution space - chai::ArrayManager* arrayManager = chai::ArrayManager::getInstance(); - ExecutionSpace currentSpace = arrayManager->getExecutionSpace(); - - // Set the execution space so that ManagedArrays and managed_ptrs - // are handled properly - arrayManager->setExecutionSpace(CPU); -#endif - - // Create the object on the device - T* cpuPointer = f(args...); - -#ifndef CHAI_DISABLE_RM - // Set the execution space back to the previous value - arrayManager->setExecutionSpace(currentSpace); -#endif - - // Return the GPU pointer - return cpuPointer; - } - #ifdef __CUDACC__ /// /// @author Alan Dayton @@ -860,27 +825,6 @@ namespace chai { new_on_device(gpuPointer, args...); } - /// - /// @author Alan Dayton - /// - /// Creates a new object on the device by calling the given factory method. - /// - /// @param[out] gpuPointer Used to return the device pointer to the new object - /// @param[in] f The factory method (must be a __device__ or __host__ __device__ - /// method - /// @param[in] args The arguments to the factory method - /// - /// @note Cannot capture argument packs in an extended device lambda, - /// so explicit kernel is needed. - /// - template - __global__ void make_on_device_from_factory(T** gpuPointer, F f, Args... args) - { - *gpuPointer = f(args...); - } - /// /// @author Alan Dayton /// @@ -891,9 +835,7 @@ namespace chai { template __global__ void destroy_on_device(T* gpuPointer) { - if (gpuPointer) { - gpuPointer->~T(); - } + gpuPointer->~T(); } /// @@ -926,59 +868,6 @@ namespace chai { make_on_device<<<1, 1>>>(gpuPointer, args...); debug_cudaDeviceSynchronize(); -#ifndef CHAI_DISABLE_RM - // Set the execution space back to the previous value - arrayManager->setExecutionSpace(currentSpace); -#endif - - // Return the GPU pointer - return gpuPointer; - } - - /// - /// @author Alan Dayton - /// - /// Calls a factory method to create a new object on the device. - /// - /// @param[in] f The factory method - /// @param[in] args The arguments to the factory method - /// - /// @return The device pointer to the new object - /// - template - CHAI_HOST T* make_on_device_from_factory(F f, Args&&... args) { -#ifndef CHAI_DISABLE_RM - // Get the ArrayManager and save the current execution space - chai::ArrayManager* arrayManager = chai::ArrayManager::getInstance(); - ExecutionSpace currentSpace = arrayManager->getExecutionSpace(); - - // Set the execution space so that chai::ManagedArrays and - // chai::managed_ptrs are handled properly - arrayManager->setExecutionSpace(GPU); -#endif - - // Allocate space on the GPU to hold the pointer to the new object - T** gpuBuffer; - GPU_ERROR_CHECK(cudaMalloc(&gpuBuffer, sizeof(T*))); - - // Create the object on the device - make_on_device_from_factory<<<1, 1>>>(gpuBuffer, f, args...); - debug_cudaDeviceSynchronize(); - - // Allocate space on the CPU for the pointer and copy the pointer to the CPU - T** cpuBuffer = (T**) malloc(sizeof(T*)); - GPU_ERROR_CHECK(cudaMemcpy(cpuBuffer, gpuBuffer, sizeof(T*), - cudaMemcpyDeviceToHost)); - - // Get the GPU pointer - T* gpuPointer = cpuBuffer[0]; - - // Free the host and device buffers - free(cpuBuffer); - GPU_ERROR_CHECK(cudaFree(gpuBuffer)); - #ifndef CHAI_DISABLE_RM // Set the execution space back to the previous value arrayManager->setExecutionSpace(currentSpace); @@ -1042,46 +931,6 @@ namespace chai { #endif } - /// - /// @author Alan Dayton - /// - /// Makes a managed_ptr. - /// Factory function to create managed_ptrs. - /// - /// @param[in] f The factory function that will create the object - /// @param[in] args The arguments to the factory function - /// - template - CHAI_HOST managed_ptr make_managed_from_factory(F&& f, Args&&... args) { - static_assert(detail::is_invocable::value, - "F is not invocable with the given arguments."); - - static_assert(std::is_pointer::type>::value, - "F does not return a pointer."); - - using R = typename std::remove_pointer::type>::type; - - static_assert(std::is_convertible::value, - "F does not return a pointer that is convertible to T*."); - -#ifdef __CUDACC__ - // Construct on the GPU first to take advantage of asynchrony - T* gpuPointer = detail::make_on_device_from_factory(f, args...); -#endif - - // Construct on the CPU - T* cpuPointer = detail::make_on_host_from_factory(f, args...); - - // Construct and return the managed_ptr -#ifdef __CUDACC__ - return managed_ptr({CPU, GPU}, {cpuPointer, gpuPointer}); -#else - return managed_ptr({CPU}, {cpuPointer}); -#endif - } - /// /// @author Alan Dayton /// diff --git a/tests/unit/managed_ptr_unit_tests.cpp b/tests/unit/managed_ptr_unit_tests.cpp index b8ac4ec4..281e88a5 100644 --- a/tests/unit/managed_ptr_unit_tests.cpp +++ b/tests/unit/managed_ptr_unit_tests.cpp @@ -760,94 +760,6 @@ GPU_TEST(managed_ptr, gpu_make_managed) derived.free(); } -GPU_TEST(managed_ptr, make_managed_from_factory_function) -{ - const int expectedValue = rand(); - - auto factory = [] CHAI_HOST_DEVICE (const int value) { - return Factory(value); - }; - - auto derived = chai::make_managed_from_factory(factory, expectedValue); - - EXPECT_EQ((*derived).getValue(), expectedValue); - - EXPECT_NE(derived.get(), nullptr); - EXPECT_TRUE(derived); - EXPECT_FALSE(derived == nullptr); - EXPECT_FALSE(nullptr == derived); - EXPECT_TRUE(derived != nullptr); - EXPECT_TRUE(nullptr != derived); - - derived.free(); -} - -GPU_TEST(managed_ptr, make_managed_from_factory_lambda) -{ - const int expectedValue = rand(); - - auto factory = [] CHAI_HOST_DEVICE (const int value) { - return new TestDerived(value); - }; - - auto derived = chai::make_managed_from_factory(factory, expectedValue); - - EXPECT_EQ((*derived).getValue(), expectedValue); - - EXPECT_NE(derived.get(), nullptr); - EXPECT_TRUE(derived); - EXPECT_FALSE(derived == nullptr); - EXPECT_FALSE(nullptr == derived); - EXPECT_TRUE(derived != nullptr); - EXPECT_TRUE(nullptr != derived); - - derived.free(); -} - -GPU_TEST(managed_ptr, make_managed_from_overloaded_factory_function) -{ - const int expectedValue = rand(); - - auto factory = [] CHAI_HOST_DEVICE (const int value) { - return OverloadedFactory(value); - }; - - auto derived = chai::make_managed_from_factory(factory, expectedValue); - - EXPECT_EQ((*derived).getValue(), expectedValue); - - EXPECT_NE(derived.get(), nullptr); - EXPECT_TRUE(derived); - EXPECT_FALSE(derived == nullptr); - EXPECT_FALSE(nullptr == derived); - EXPECT_TRUE(derived != nullptr); - EXPECT_TRUE(nullptr != derived); - - derived.free(); -} - -GPU_TEST(managed_ptr, make_managed_from_factory_static_member_function) -{ - const int expectedValue = rand(); - - auto factory = [] CHAI_HOST_DEVICE (const int value) { - return TestBase::Factory(value); - }; - - auto derived = chai::make_managed_from_factory(factory, expectedValue); - - EXPECT_EQ((*derived).getValue(), expectedValue); - - EXPECT_NE(derived.get(), nullptr); - EXPECT_TRUE(derived); - EXPECT_FALSE(derived == nullptr); - EXPECT_FALSE(nullptr == derived); - EXPECT_TRUE(derived != nullptr); - EXPECT_TRUE(nullptr != derived); - - derived.free(); -} - GPU_TEST(managed_ptr, gpu_copy_constructor) { const int expectedValue = rand(); @@ -1040,47 +952,3 @@ GPU_TEST(managed_ptr, gpu_copy_assignment_operator) #endif -// Enable the following tests to ensure that proper compiler errors are given -// for bad arguments since otherwise it is difficult to make sure the template -// metaprogramming is correct. - -#if 0 - -// Should give something like the following: -// error: static assertion failed: F is not invocable with the given arguments. - -TEST(managed_ptr, bad_function_to_make_managed_from_factory_function) -{ - const int expectedValue = rand(); - - auto factory = [] CHAI_HOST (const int value) { - return new TestDerived(value); - }; - - auto derived = chai::make_managed_from_factory(expectedValue, factory); - - EXPECT_EQ((*derived).getValue(), expectedValue); -} - -#endif - -#if 0 - -// Should give something like the following: -// error: static assertion failed: F is not invocable with the given arguments. - -TEST(managed_ptr, bad_arguments_to_make_managed_from_factory_function) -{ - const int expectedValue = rand(); - - auto factory = [] CHAI_HOST (const int value) { - return new TestDerived(value); - }; - - auto derived = chai::make_managed_from_factory(factory, expectedValue, 3); - - EXPECT_EQ((*derived).getValue(), expectedValue); -} - -#endif - From cc0e571e02fe58843bcc2aa4eca7dd9cf49b6391 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 11 Oct 2019 09:11:42 -0700 Subject: [PATCH 37/58] Eliminate duplicate code --- benchmarks/chai_managed_ptr_benchmarks.cpp | 449 +++------------------ 1 file changed, 63 insertions(+), 386 deletions(-) diff --git a/benchmarks/chai_managed_ptr_benchmarks.cpp b/benchmarks/chai_managed_ptr_benchmarks.cpp index 3cdaaacf..ec0ff71c 100644 --- a/benchmarks/chai_managed_ptr_benchmarks.cpp +++ b/benchmarks/chai_managed_ptr_benchmarks.cpp @@ -156,45 +156,10 @@ template __global__ void copy_kernel(ClassWithSize) {} // Benchmark how long it takes to copy a class to the GPU -void benchmark_pass_copy_to_gpu_8(benchmark::State& state) -{ - ClassWithSize<8> helper; - - while (state.KeepRunning()) { - copy_kernel<<<1, 1>>>(helper); - cudaDeviceSynchronize(); - } -} - -BENCHMARK(benchmark_pass_copy_to_gpu_8); - -void benchmark_pass_copy_to_gpu_64(benchmark::State& state) -{ - ClassWithSize<64> helper; - - while (state.KeepRunning()) { - copy_kernel<<<1, 1>>>(helper); - cudaDeviceSynchronize(); - } -} - -BENCHMARK(benchmark_pass_copy_to_gpu_64); - -void benchmark_pass_copy_to_gpu_512(benchmark::State& state) -{ - ClassWithSize<512> helper; - - while (state.KeepRunning()) { - copy_kernel<<<1, 1>>>(helper); - cudaDeviceSynchronize(); - } -} - -BENCHMARK(benchmark_pass_copy_to_gpu_512); - -void benchmark_pass_copy_to_gpu_4096(benchmark::State& state) +template +static void benchmark_pass_copy_to_gpu(benchmark::State& state) { - ClassWithSize<4096> helper; + ClassWithSize helper; while (state.KeepRunning()) { copy_kernel<<<1, 1>>>(helper); @@ -202,18 +167,10 @@ void benchmark_pass_copy_to_gpu_4096(benchmark::State& state) } } -BENCHMARK(benchmark_pass_copy_to_gpu_4096); - -void benchmark_managed_ptr_use_gpu(benchmark::State& state) -{ - chai::managed_ptr helper = chai::make_managed(2); - - while (state.KeepRunning()) { - forall(gpu(), 0, 1, [=] __device__ (int i) { (void) helper->getValue(); }); - } - - helper.free(); -} +BENCHMARK_TEMPLATE(benchmark_pass_copy_to_gpu, 8); +BENCHMARK_TEMPLATE(benchmark_pass_copy_to_gpu, 64); +BENCHMARK_TEMPLATE(benchmark_pass_copy_to_gpu, 512); +BENCHMARK_TEMPLATE(benchmark_pass_copy_to_gpu, 4096); // Benchmark how long it takes to call placement new on the GPU template @@ -226,103 +183,32 @@ __global__ void placement_delete_kernel(ClassWithSize* address) { address->~ClassWithSize(); } -void benchmark_placement_new_on_gpu_8(benchmark::State& state) -{ - while (state.KeepRunning()) { - ClassWithSize<8>* address; - cudaMalloc(&address, sizeof(ClassWithSize<8>)); - placement_new_kernel<<<1, 1>>>(address); - placement_delete_kernel<<<1, 1>>>(address); - cudaFree(address); - cudaDeviceSynchronize(); - } -} - -BENCHMARK(benchmark_placement_new_on_gpu_8); - -void benchmark_placement_new_on_gpu_64(benchmark::State& state) -{ - while (state.KeepRunning()) { - ClassWithSize<64>* address; - cudaMalloc(&address, sizeof(ClassWithSize<64>)); - placement_new_kernel<<<1, 1>>>(address); - placement_delete_kernel<<<1, 1>>>(address); - cudaFree(address); - cudaDeviceSynchronize(); - } -} - -BENCHMARK(benchmark_placement_new_on_gpu_64); - -void benchmark_placement_new_on_gpu_512(benchmark::State& state) -{ - while (state.KeepRunning()) { - ClassWithSize<512>* address; - cudaMalloc(&address, sizeof(ClassWithSize<512>)); - placement_new_kernel<<<1, 1>>>(address); - placement_delete_kernel<<<1, 1>>>(address); - cudaFree(address); - cudaDeviceSynchronize(); - } -} - -BENCHMARK(benchmark_placement_new_on_gpu_512); - -void benchmark_placement_new_on_gpu_4096(benchmark::State& state) -{ - while (state.KeepRunning()) { - ClassWithSize<4096>* address; - cudaMalloc(&address, sizeof(ClassWithSize<4096>)); - placement_new_kernel<<<1, 1>>>(address); - placement_delete_kernel<<<1, 1>>>(address); - cudaFree(address); - cudaDeviceSynchronize(); - } -} - -BENCHMARK(benchmark_placement_new_on_gpu_4096); - -void benchmark_placement_new_on_gpu_32768(benchmark::State& state) +template +static void benchmark_placement_new_on_gpu(benchmark::State& state) { while (state.KeepRunning()) { - ClassWithSize<32768>* address; - cudaMalloc(&address, sizeof(ClassWithSize<32768>)); + ClassWithSize* address; + cudaMalloc(&address, sizeof(ClassWithSize)); placement_new_kernel<<<1, 1>>>(address); - placement_delete_kernel<<<1, 1>>>(address); - cudaFree(address); cudaDeviceSynchronize(); - } -} -BENCHMARK(benchmark_placement_new_on_gpu_32768); + state.PauseTiming(); -void benchmark_placement_new_on_gpu_262144(benchmark::State& state) -{ - while (state.KeepRunning()) { - ClassWithSize<262144>* address; - cudaMalloc(&address, sizeof(ClassWithSize<262144>)); - placement_new_kernel<<<1, 1>>>(address); placement_delete_kernel<<<1, 1>>>(address); cudaFree(address); cudaDeviceSynchronize(); - } -} -BENCHMARK(benchmark_placement_new_on_gpu_262144); - -void benchmark_placement_new_on_gpu_2097152(benchmark::State& state) -{ - while (state.KeepRunning()) { - ClassWithSize<2097152>* address; - cudaMalloc(&address, sizeof(ClassWithSize<2097152>)); - placement_new_kernel<<<1, 1>>>(address); - placement_delete_kernel<<<1, 1>>>(address); - cudaFree(address); - cudaDeviceSynchronize(); + state.ResumeTiming(); } } -BENCHMARK(benchmark_placement_new_on_gpu_2097152); +BENCHMARK_TEMPLATE(benchmark_placement_new_on_gpu, 8); +BENCHMARK_TEMPLATE(benchmark_placement_new_on_gpu, 64); +BENCHMARK_TEMPLATE(benchmark_placement_new_on_gpu, 512); +BENCHMARK_TEMPLATE(benchmark_placement_new_on_gpu, 4096); +BENCHMARK_TEMPLATE(benchmark_placement_new_on_gpu, 32768); +BENCHMARK_TEMPLATE(benchmark_placement_new_on_gpu, 262144); +BENCHMARK_TEMPLATE(benchmark_placement_new_on_gpu, 2097152); // Benchmark how long it takes to call new on the GPU template @@ -335,103 +221,32 @@ __global__ void delete_kernel(ClassWithSize** address) { delete *address; } -void benchmark_new_on_gpu_8(benchmark::State& state) -{ - while (state.KeepRunning()) { - ClassWithSize<8>** buffer; - cudaMalloc(&buffer, sizeof(ClassWithSize<8>*)); - create_kernel<<<1, 1>>>(buffer); - delete_kernel<<<1, 1>>>(buffer); - cudaFree(buffer); - cudaDeviceSynchronize(); - } -} - -BENCHMARK(benchmark_new_on_gpu_8); - -void benchmark_new_on_gpu_64(benchmark::State& state) -{ - while (state.KeepRunning()) { - ClassWithSize<64>** buffer; - cudaMalloc(&buffer, sizeof(ClassWithSize<64>*)); - create_kernel<<<1, 1>>>(buffer); - delete_kernel<<<1, 1>>>(buffer); - cudaFree(buffer); - cudaDeviceSynchronize(); - } -} - -BENCHMARK(benchmark_new_on_gpu_64); - -void benchmark_new_on_gpu_512(benchmark::State& state) -{ - while (state.KeepRunning()) { - ClassWithSize<512>** buffer; - cudaMalloc(&buffer, sizeof(ClassWithSize<512>*)); - create_kernel<<<1, 1>>>(buffer); - delete_kernel<<<1, 1>>>(buffer); - cudaFree(buffer); - cudaDeviceSynchronize(); - } -} - -BENCHMARK(benchmark_new_on_gpu_512); - -void benchmark_new_on_gpu_4096(benchmark::State& state) -{ - while (state.KeepRunning()) { - ClassWithSize<4096>** buffer; - cudaMalloc(&buffer, sizeof(ClassWithSize<4096>*)); - create_kernel<<<1, 1>>>(buffer); - delete_kernel<<<1, 1>>>(buffer); - cudaFree(buffer); - cudaDeviceSynchronize(); - } -} - -BENCHMARK(benchmark_new_on_gpu_4096); - -void benchmark_new_on_gpu_32768(benchmark::State& state) +template +static void benchmark_new_on_gpu(benchmark::State& state) { while (state.KeepRunning()) { - ClassWithSize<32768>** buffer; - cudaMalloc(&buffer, sizeof(ClassWithSize<32768>*)); + ClassWithSize** buffer; + cudaMalloc(&buffer, sizeof(ClassWithSize*)); create_kernel<<<1, 1>>>(buffer); - delete_kernel<<<1, 1>>>(buffer); - cudaFree(buffer); cudaDeviceSynchronize(); - } -} -BENCHMARK(benchmark_new_on_gpu_32768); + state.PauseTiming(); -void benchmark_new_on_gpu_262144(benchmark::State& state) -{ - while (state.KeepRunning()) { - ClassWithSize<262144>** buffer; - cudaMalloc(&buffer, sizeof(ClassWithSize<262144>*)); - create_kernel<<<1, 1>>>(buffer); delete_kernel<<<1, 1>>>(buffer); cudaFree(buffer); cudaDeviceSynchronize(); - } -} - -BENCHMARK(benchmark_new_on_gpu_262144); -void benchmark_new_on_gpu_2097152(benchmark::State& state) -{ - while (state.KeepRunning()) { - ClassWithSize<2097152>** buffer; - cudaMalloc(&buffer, sizeof(ClassWithSize<2097152>*)); - create_kernel<<<1, 1>>>(buffer); - delete_kernel<<<1, 1>>>(buffer); - cudaFree(buffer); - cudaDeviceSynchronize(); + state.ResumeTiming(); } } -BENCHMARK(benchmark_new_on_gpu_2097152); +BENCHMARK_TEMPLATE(benchmark_new_on_gpu, 8); +BENCHMARK_TEMPLATE(benchmark_new_on_gpu, 64); +BENCHMARK_TEMPLATE(benchmark_new_on_gpu, 512); +BENCHMARK_TEMPLATE(benchmark_new_on_gpu, 4096); +BENCHMARK_TEMPLATE(benchmark_new_on_gpu, 32768); +BENCHMARK_TEMPLATE(benchmark_new_on_gpu, 262144); +BENCHMARK_TEMPLATE(benchmark_new_on_gpu, 2097152); // Benchmark current approach template @@ -439,131 +254,35 @@ __global__ void delete_kernel_2(ClassWithSize* address) { delete address; } -void benchmark_new_on_gpu_and_copy_to_host_8(benchmark::State& state) -{ - while (state.KeepRunning()) { - ClassWithSize<8>** gpuBuffer; - cudaMalloc(&gpuBuffer, sizeof(ClassWithSize<8>*)); - create_kernel<<<1, 1>>>(gpuBuffer); - ClassWithSize<8>** cpuBuffer = (ClassWithSize<8>**) malloc(sizeof(ClassWithSize<8>*)); - cudaMemcpy(cpuBuffer, gpuBuffer, sizeof(ClassWithSize<8>*), cudaMemcpyDeviceToHost); - cudaFree(gpuBuffer); - ClassWithSize<8>* gpuPointer = cpuBuffer[0]; - delete_kernel_2<<<1, 1>>>(gpuPointer); - free(cpuBuffer); - cudaDeviceSynchronize(); - } -} - -BENCHMARK(benchmark_new_on_gpu_and_copy_to_host_8); - -void benchmark_new_on_gpu_and_copy_to_host_64(benchmark::State& state) -{ - while (state.KeepRunning()) { - ClassWithSize<64>** gpuBuffer; - cudaMalloc(&gpuBuffer, sizeof(ClassWithSize<64>*)); - create_kernel<<<1, 1>>>(gpuBuffer); - ClassWithSize<64>** cpuBuffer = (ClassWithSize<64>**) malloc(sizeof(ClassWithSize<64>*)); - cudaMemcpy(cpuBuffer, gpuBuffer, sizeof(ClassWithSize<64>*), cudaMemcpyDeviceToHost); - cudaFree(gpuBuffer); - ClassWithSize<64>* gpuPointer = cpuBuffer[0]; - delete_kernel_2<<<1, 1>>>(gpuPointer); - free(cpuBuffer); - cudaDeviceSynchronize(); - } -} - -BENCHMARK(benchmark_new_on_gpu_and_copy_to_host_64); - -void benchmark_new_on_gpu_and_copy_to_host_512(benchmark::State& state) -{ - while (state.KeepRunning()) { - ClassWithSize<512>** gpuBuffer; - cudaMalloc(&gpuBuffer, sizeof(ClassWithSize<512>*)); - create_kernel<<<1, 1>>>(gpuBuffer); - ClassWithSize<512>** cpuBuffer = (ClassWithSize<512>**) malloc(sizeof(ClassWithSize<512>*)); - cudaMemcpy(cpuBuffer, gpuBuffer, sizeof(ClassWithSize<512>*), cudaMemcpyDeviceToHost); - cudaFree(gpuBuffer); - ClassWithSize<512>* gpuPointer = cpuBuffer[0]; - delete_kernel_2<<<1, 1>>>(gpuPointer); - free(cpuBuffer); - cudaDeviceSynchronize(); - } -} - -BENCHMARK(benchmark_new_on_gpu_and_copy_to_host_512); - -void benchmark_new_on_gpu_and_copy_to_host_4096(benchmark::State& state) -{ - while (state.KeepRunning()) { - ClassWithSize<4096>** gpuBuffer; - cudaMalloc(&gpuBuffer, sizeof(ClassWithSize<4096>*)); - create_kernel<<<1, 1>>>(gpuBuffer); - ClassWithSize<4096>** cpuBuffer = (ClassWithSize<4096>**) malloc(sizeof(ClassWithSize<4096>*)); - cudaMemcpy(cpuBuffer, gpuBuffer, sizeof(ClassWithSize<4096>*), cudaMemcpyDeviceToHost); - cudaFree(gpuBuffer); - ClassWithSize<4096>* gpuPointer = cpuBuffer[0]; - delete_kernel_2<<<1, 1>>>(gpuPointer); - free(cpuBuffer); - cudaDeviceSynchronize(); - } -} - -BENCHMARK(benchmark_new_on_gpu_and_copy_to_host_4096); - -void benchmark_new_on_gpu_and_copy_to_host_32768(benchmark::State& state) +template +static void benchmark_new_on_gpu_and_copy_to_host(benchmark::State& state) { while (state.KeepRunning()) { - ClassWithSize<32768>** gpuBuffer; - cudaMalloc(&gpuBuffer, sizeof(ClassWithSize<32768>*)); + ClassWithSize** gpuBuffer; + cudaMalloc(&gpuBuffer, sizeof(ClassWithSize*)); create_kernel<<<1, 1>>>(gpuBuffer); - ClassWithSize<32768>** cpuBuffer = (ClassWithSize<32768>**) malloc(sizeof(ClassWithSize<32768>*)); - cudaMemcpy(cpuBuffer, gpuBuffer, sizeof(ClassWithSize<32768>*), cudaMemcpyDeviceToHost); + ClassWithSize** cpuBuffer = (ClassWithSize**) malloc(sizeof(ClassWithSize*)); + cudaMemcpy(cpuBuffer, gpuBuffer, sizeof(ClassWithSize*), cudaMemcpyDeviceToHost); cudaFree(gpuBuffer); - ClassWithSize<32768>* gpuPointer = cpuBuffer[0]; - delete_kernel_2<<<1, 1>>>(gpuPointer); + ClassWithSize* gpuPointer = cpuBuffer[0]; free(cpuBuffer); - cudaDeviceSynchronize(); - } -} -BENCHMARK(benchmark_new_on_gpu_and_copy_to_host_32768); + state.PauseTiming(); -void benchmark_new_on_gpu_and_copy_to_host_262144(benchmark::State& state) -{ - while (state.KeepRunning()) { - ClassWithSize<262144>** gpuBuffer; - cudaMalloc(&gpuBuffer, sizeof(ClassWithSize<262144>*)); - create_kernel<<<1, 1>>>(gpuBuffer); - ClassWithSize<262144>** cpuBuffer = (ClassWithSize<262144>**) malloc(sizeof(ClassWithSize<262144>*)); - cudaMemcpy(cpuBuffer, gpuBuffer, sizeof(ClassWithSize<262144>*), cudaMemcpyDeviceToHost); - cudaFree(gpuBuffer); - ClassWithSize<262144>* gpuPointer = cpuBuffer[0]; delete_kernel_2<<<1, 1>>>(gpuPointer); - free(cpuBuffer); cudaDeviceSynchronize(); - } -} -BENCHMARK(benchmark_new_on_gpu_and_copy_to_host_262144); - -void benchmark_new_on_gpu_and_copy_to_host_2097152(benchmark::State& state) -{ - while (state.KeepRunning()) { - ClassWithSize<2097152>** gpuBuffer; - cudaMalloc(&gpuBuffer, sizeof(ClassWithSize<2097152>*)); - create_kernel<<<1, 1>>>(gpuBuffer); - ClassWithSize<2097152>** cpuBuffer = (ClassWithSize<2097152>**) malloc(sizeof(ClassWithSize<2097152>*)); - cudaMemcpy(cpuBuffer, gpuBuffer, sizeof(ClassWithSize<2097152>*), cudaMemcpyDeviceToHost); - cudaFree(gpuBuffer); - ClassWithSize<2097152>* gpuPointer = cpuBuffer[0]; - delete_kernel_2<<<1, 1>>>(gpuPointer); - free(cpuBuffer); - cudaDeviceSynchronize(); + state.ResumeTiming(); } } -BENCHMARK(benchmark_new_on_gpu_and_copy_to_host_2097152); +BENCHMARK_TEMPLATE(benchmark_new_on_gpu_and_copy_to_host, 8); +BENCHMARK_TEMPLATE(benchmark_new_on_gpu_and_copy_to_host, 64); +BENCHMARK_TEMPLATE(benchmark_new_on_gpu_and_copy_to_host, 512); +BENCHMARK_TEMPLATE(benchmark_new_on_gpu_and_copy_to_host, 4096); +BENCHMARK_TEMPLATE(benchmark_new_on_gpu_and_copy_to_host, 32768); +BENCHMARK_TEMPLATE(benchmark_new_on_gpu_and_copy_to_host, 262144); +BENCHMARK_TEMPLATE(benchmark_new_on_gpu_and_copy_to_host, 2097152); // Benchmark how long it takes to create a stack object on the GPU template @@ -571,76 +290,34 @@ __global__ void create_on_stack_kernel() { (void) ClassWithSize(); } -void benchmark_create_on_stack_on_gpu_8(benchmark::State& state) -{ - while (state.KeepRunning()) { - create_on_stack_kernel<8><<<1, 1>>>(); - cudaDeviceSynchronize(); - } -} - -BENCHMARK(benchmark_create_on_stack_on_gpu_8); - -void benchmark_create_on_stack_on_gpu_64(benchmark::State& state) -{ - while (state.KeepRunning()) { - create_on_stack_kernel<64><<<1, 1>>>(); - cudaDeviceSynchronize(); - } -} - -BENCHMARK(benchmark_create_on_stack_on_gpu_64); - -void benchmark_create_on_stack_on_gpu_512(benchmark::State& state) -{ - while (state.KeepRunning()) { - create_on_stack_kernel<512><<<1, 1>>>(); - cudaDeviceSynchronize(); - } -} - -BENCHMARK(benchmark_create_on_stack_on_gpu_512); - -void benchmark_create_on_stack_on_gpu_4096(benchmark::State& state) +template +static void benchmark_create_on_stack_on_gpu(benchmark::State& state) { while (state.KeepRunning()) { - create_on_stack_kernel<4096><<<1, 1>>>(); + create_on_stack_kernel<<<1, 1>>>(); cudaDeviceSynchronize(); } } -BENCHMARK(benchmark_create_on_stack_on_gpu_4096); +BENCHMARK_TEMPLATE(benchmark_create_on_stack_on_gpu, 8); +BENCHMARK_TEMPLATE(benchmark_create_on_stack_on_gpu, 64); +BENCHMARK_TEMPLATE(benchmark_create_on_stack_on_gpu, 512); +BENCHMARK_TEMPLATE(benchmark_create_on_stack_on_gpu, 4096); +BENCHMARK_TEMPLATE(benchmark_create_on_stack_on_gpu, 32768); +BENCHMARK_TEMPLATE(benchmark_create_on_stack_on_gpu, 262144); +BENCHMARK_TEMPLATE(benchmark_create_on_stack_on_gpu, 2097152); -void benchmark_create_on_stack_on_gpu_32768(benchmark::State& state) +void benchmark_managed_ptr_use_gpu(benchmark::State& state) { - while (state.KeepRunning()) { - create_on_stack_kernel<32768><<<1, 1>>>(); - cudaDeviceSynchronize(); - } -} - -BENCHMARK(benchmark_create_on_stack_on_gpu_32768); + chai::managed_ptr helper = chai::make_managed(2); -void benchmark_create_on_stack_on_gpu_262144(benchmark::State& state) -{ while (state.KeepRunning()) { - create_on_stack_kernel<262144><<<1, 1>>>(); - cudaDeviceSynchronize(); + forall(gpu(), 0, 1, [=] __device__ (int i) { (void) helper->getValue(); }); } -} - -BENCHMARK(benchmark_create_on_stack_on_gpu_262144); -void benchmark_create_on_stack_on_gpu_2097152(benchmark::State& state) -{ - while (state.KeepRunning()) { - create_on_stack_kernel<2097152><<<1, 1>>>(); - cudaDeviceSynchronize(); - } + helper.free(); } -BENCHMARK(benchmark_create_on_stack_on_gpu_2097152); - BENCHMARK(benchmark_managed_ptr_use_gpu); // Curiously recurring template pattern From 701cd76b153ca0d5b63c2e938d5f88fa19620255 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 11 Oct 2019 09:25:27 -0700 Subject: [PATCH 38/58] Add another benchmark --- benchmarks/chai_managed_ptr_benchmarks.cpp | 30 ++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/benchmarks/chai_managed_ptr_benchmarks.cpp b/benchmarks/chai_managed_ptr_benchmarks.cpp index ec0ff71c..ae268f20 100644 --- a/benchmarks/chai_managed_ptr_benchmarks.cpp +++ b/benchmarks/chai_managed_ptr_benchmarks.cpp @@ -172,6 +172,36 @@ BENCHMARK_TEMPLATE(benchmark_pass_copy_to_gpu, 64); BENCHMARK_TEMPLATE(benchmark_pass_copy_to_gpu, 512); BENCHMARK_TEMPLATE(benchmark_pass_copy_to_gpu, 4096); +template +static void benchmark_copy_to_gpu(benchmark::State& state) +{ + ClassWithSize* cpuPointer = new ClassWithSize(); + + while (state.KeepRunning()) { + ClassWithSize* gpuPointer; + cudaMalloc(&gpuPointer, sizeof(ClassWithSize)); + cudaMemcpy(gpuPointer, cpuPointer, sizeof(ClassWithSize), cudaMemcpyHostToDevice); + cudaDeviceSynchronize(); + + state.PauseTiming(); + + cudaFree(gpuPointer); + cudaDeviceSynchronize(); + + state.ResumeTiming(); + } + + delete cpuPointer; +} + +BENCHMARK_TEMPLATE(benchmark_copy_to_gpu, 8); +BENCHMARK_TEMPLATE(benchmark_copy_to_gpu, 64); +BENCHMARK_TEMPLATE(benchmark_copy_to_gpu, 512); +BENCHMARK_TEMPLATE(benchmark_copy_to_gpu, 4096); +BENCHMARK_TEMPLATE(benchmark_copy_to_gpu, 32768); +BENCHMARK_TEMPLATE(benchmark_copy_to_gpu, 262144); +BENCHMARK_TEMPLATE(benchmark_copy_to_gpu, 2097152); + // Benchmark how long it takes to call placement new on the GPU template __global__ void placement_new_kernel(ClassWithSize* address) { From 519dc1f52f24780fab40f3aeabd9fdc71d544939 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 11 Oct 2019 09:53:49 -0700 Subject: [PATCH 39/58] Remove PauseTiming since it completely messes up the benchmarks --- benchmarks/chai_managed_ptr_benchmarks.cpp | 23 ---------------------- 1 file changed, 23 deletions(-) diff --git a/benchmarks/chai_managed_ptr_benchmarks.cpp b/benchmarks/chai_managed_ptr_benchmarks.cpp index ae268f20..c82eb186 100644 --- a/benchmarks/chai_managed_ptr_benchmarks.cpp +++ b/benchmarks/chai_managed_ptr_benchmarks.cpp @@ -181,14 +181,8 @@ static void benchmark_copy_to_gpu(benchmark::State& state) ClassWithSize* gpuPointer; cudaMalloc(&gpuPointer, sizeof(ClassWithSize)); cudaMemcpy(gpuPointer, cpuPointer, sizeof(ClassWithSize), cudaMemcpyHostToDevice); - cudaDeviceSynchronize(); - - state.PauseTiming(); - cudaFree(gpuPointer); cudaDeviceSynchronize(); - - state.ResumeTiming(); } delete cpuPointer; @@ -220,15 +214,9 @@ static void benchmark_placement_new_on_gpu(benchmark::State& state) ClassWithSize* address; cudaMalloc(&address, sizeof(ClassWithSize)); placement_new_kernel<<<1, 1>>>(address); - cudaDeviceSynchronize(); - - state.PauseTiming(); - placement_delete_kernel<<<1, 1>>>(address); cudaFree(address); cudaDeviceSynchronize(); - - state.ResumeTiming(); } } @@ -258,15 +246,9 @@ static void benchmark_new_on_gpu(benchmark::State& state) ClassWithSize** buffer; cudaMalloc(&buffer, sizeof(ClassWithSize*)); create_kernel<<<1, 1>>>(buffer); - cudaDeviceSynchronize(); - - state.PauseTiming(); - delete_kernel<<<1, 1>>>(buffer); cudaFree(buffer); cudaDeviceSynchronize(); - - state.ResumeTiming(); } } @@ -296,13 +278,8 @@ static void benchmark_new_on_gpu_and_copy_to_host(benchmark::State& state) cudaFree(gpuBuffer); ClassWithSize* gpuPointer = cpuBuffer[0]; free(cpuBuffer); - - state.PauseTiming(); - delete_kernel_2<<<1, 1>>>(gpuPointer); cudaDeviceSynchronize(); - - state.ResumeTiming(); } } From 4dad7285822e951abc5e3036b8247704f09823ca Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 11 Oct 2019 14:48:52 -0700 Subject: [PATCH 40/58] Made benchmark non-trivial --- benchmarks/chai_managed_ptr_benchmarks.cpp | 147 +++++++++++++++++---- 1 file changed, 119 insertions(+), 28 deletions(-) diff --git a/benchmarks/chai_managed_ptr_benchmarks.cpp b/benchmarks/chai_managed_ptr_benchmarks.cpp index c82eb186..b47c2a48 100644 --- a/benchmarks/chai_managed_ptr_benchmarks.cpp +++ b/benchmarks/chai_managed_ptr_benchmarks.cpp @@ -51,14 +51,18 @@ class Base { public: - CHAI_HOST_DEVICE virtual int getValue() const = 0; + CHAI_HOST_DEVICE virtual void scale(size_t numValues, int* values) = 0; }; class Derived : public Base { public: CHAI_HOST_DEVICE Derived(int value) : Base(), m_value(value) {} - CHAI_HOST_DEVICE int getValue() const override { return m_value; } + CHAI_HOST_DEVICE virtual void scale(size_t numValues, int* values) override { + for (size_t i = 0; i < numValues; ++i) { + values[i] *= m_value; + } + } private: int m_value = -1; @@ -67,8 +71,8 @@ class Derived : public Base { template class BaseCRTP { public: - CHAI_HOST_DEVICE int getValue() const { - return static_cast(this)->getValue(); + CHAI_HOST_DEVICE void scale(size_t numValues, int* values) { + return static_cast(this)->scale(numValues, values); } }; @@ -76,7 +80,11 @@ class DerivedCRTP : public BaseCRTP { public: CHAI_HOST_DEVICE DerivedCRTP(int value) : BaseCRTP(), m_value(value) {} - CHAI_HOST_DEVICE int getValue() const { return m_value; } + CHAI_HOST_DEVICE void scale(size_t numValues, int* values) { + for (size_t i = 0; i < numValues; ++i) { + values[i] *= m_value; + } + } private: int m_value = -1; @@ -86,7 +94,11 @@ class NoInheritance { public: CHAI_HOST_DEVICE NoInheritance(int value) : m_value(value) {} - CHAI_HOST_DEVICE int getValue() const { return m_value; } + CHAI_HOST_DEVICE void scale(size_t numValues, int* values) { + for (size_t i = 0; i < numValues; ++i) { + values[i] *= m_value; + } + } private: int m_value = -1; @@ -109,29 +121,49 @@ static void benchmark_managed_ptr_construction_and_destruction(benchmark::State& BENCHMARK(benchmark_managed_ptr_construction_and_destruction); // managed_ptr -static void benchmark_managed_ptr_use_cpu(benchmark::State& state) +static void benchmark_use_managed_ptr_cpu(benchmark::State& state) { - chai::managed_ptr helper = chai::make_managed(1); + chai::managed_ptr object = chai::make_managed(2); + + size_t numValues = 100; + int* values = (int*) malloc(100 * sizeof(int)); + + for (size_t i = 0; i < numValues; ++i) { + values[i] = i * i; + } + +#ifdef __CUDACC__ + cudaDeviceSynchronize(); +#endif while (state.KeepRunning()) { - forall(sequential(), 0, 1, [=] (int i) { (void) helper->getValue(); }); + object->scale(numValues, values); } - helper.free(); + object.free(); + cudaDeviceSynchronize(); } -BENCHMARK(benchmark_managed_ptr_use_cpu); +BENCHMARK(benchmark_use_managed_ptr_cpu); // Curiously recurring template pattern static void benchmark_curiously_recurring_template_pattern_cpu(benchmark::State& state) { - BaseCRTP* helper = new DerivedCRTP(3); + BaseCRTP* object = new DerivedCRTP(2); + + size_t numValues = 100; + int* values = (int*) malloc(100 * sizeof(int)); + + for (size_t i = 0; i < numValues; ++i) { + values[i] = i * i; + } while (state.KeepRunning()) { - forall(sequential(), 0, 1, [=] (int i) { (void) helper->getValue(); }); + object->scale(numValues, values); } - delete helper; + free(values); + delete object; } BENCHMARK(benchmark_curiously_recurring_template_pattern_cpu); @@ -139,13 +171,21 @@ BENCHMARK(benchmark_curiously_recurring_template_pattern_cpu); // Class without inheritance static void benchmark_no_inheritance_cpu(benchmark::State& state) { - NoInheritance* helper = new NoInheritance(5); + NoInheritance* object = new NoInheritance(2); + + size_t numValues = 100; + int* values = (int*) malloc(100 * sizeof(int)); + + for (size_t i = 0; i < numValues; ++i) { + values[i] = i * i; + } while (state.KeepRunning()) { - forall(sequential(), 0, 1, [=] (int i) { (void) helper->getValue(); }); + object->scale(numValues, values); } - delete helper; + free(values); + delete object; } BENCHMARK(benchmark_no_inheritance_cpu); @@ -314,45 +354,96 @@ BENCHMARK_TEMPLATE(benchmark_create_on_stack_on_gpu, 32768); BENCHMARK_TEMPLATE(benchmark_create_on_stack_on_gpu, 262144); BENCHMARK_TEMPLATE(benchmark_create_on_stack_on_gpu, 2097152); -void benchmark_managed_ptr_use_gpu(benchmark::State& state) +// Use managed_ptr +__global__ void fill(size_t numValues, int* values) { + size_t i = blockIdx.x * blockDim.x + threadIdx.x; + + if (i < numValues) { + values[i] = i * i; + } +} + +__global__ void square(chai::managed_ptr object, size_t numValues, int* values) { + object->scale(numValues, values); +} + +void benchmark_use_managed_ptr_gpu(benchmark::State& state) { - chai::managed_ptr helper = chai::make_managed(2); + chai::managed_ptr object = chai::make_managed(2); + + size_t numValues = 100; + int* values; + cudaMalloc(&values, numValues * sizeof(int)); + fill<<<1, 100>>>(numValues, values); + + cudaDeviceSynchronize(); while (state.KeepRunning()) { - forall(gpu(), 0, 1, [=] __device__ (int i) { (void) helper->getValue(); }); + square<<<1, 1>>>(object, numValues, values); + cudaDeviceSynchronize(); } - helper.free(); + cudaFree(values); + object.free(); + cudaDeviceSynchronize(); } -BENCHMARK(benchmark_managed_ptr_use_gpu); +BENCHMARK(benchmark_use_managed_ptr_gpu); // Curiously recurring template pattern +__global__ void square(BaseCRTP object, size_t numValues, int* values) { + object.scale(numValues, values); +} + void benchmark_curiously_recurring_template_pattern_gpu(benchmark::State& state) { - BaseCRTP* derivedCRTP = new DerivedCRTP(4); - auto helper = *derivedCRTP; + BaseCRTP* derivedCRTP = new DerivedCRTP(2); + auto object = *derivedCRTP; + + size_t numValues = 100; + int* values; + cudaMalloc(&values, numValues * sizeof(int)); + fill<<<1, 100>>>(numValues, values); + + cudaDeviceSynchronize(); while (state.KeepRunning()) { - forall(gpu(), 0, 1, [=] __device__ (int i) { (void) helper.getValue(); }); + square<<<1, 1>>>(object, numValues, values); + cudaDeviceSynchronize(); } + cudaFree(values); delete derivedCRTP; + cudaDeviceSynchronize(); } BENCHMARK(benchmark_curiously_recurring_template_pattern_gpu); // Class without inheritance +__global__ void square(NoInheritance object, size_t numValues, int* values) { + object.scale(numValues, values); +} + void benchmark_no_inheritance_gpu(benchmark::State& state) { - NoInheritance* noInheritance = new NoInheritance(5); - auto helper = *noInheritance; + NoInheritance* noInheritance = new NoInheritance(2); + auto object = *noInheritance; + + size_t numValues = 100; + int* values; + cudaMalloc(&values, numValues * sizeof(int)); + fill<<<1, 100>>>(numValues, values); + + cudaDeviceSynchronize(); while (state.KeepRunning()) { - forall(gpu(), 0, 1, [=] __device__ (int i) { (void) helper.getValue(); }); + square<<<1, 1>>>(object, numValues, values); + cudaDeviceSynchronize(); } + cudaFree(values); delete noInheritance; + cudaDeviceSynchronize(); } BENCHMARK(benchmark_no_inheritance_gpu); From ea602f1e91a1f5ead66005d874dd17dafc03e7bf Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Wed, 16 Oct 2019 06:49:05 -0700 Subject: [PATCH 41/58] Switchout to use Umpire's logging macro for better log support --- src/chai/ArrayManager.cpp | 9 ++++----- src/chai/ArrayManager.inl | 2 +- src/chai/ChaiMacros.hpp | 22 ++++++++++++++++++---- src/chai/ManagedArray.inl | 14 +++++++------- src/chai/ManagedArray_thin.inl | 24 ++++++++++++------------ 5 files changed, 42 insertions(+), 29 deletions(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index 54a32176..82dc3795 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -85,7 +85,7 @@ void ArrayManager::registerPointer( ExecutionSpace space, bool owned) { - CHAI_LOG("ArrayManager", "Registering " << pointer << " in space " << space); + CHAI_LOG(Debug, "Registering " << pointer << " in space " << space); std::lock_guard lock(m_mutex); @@ -113,7 +113,7 @@ void ArrayManager::deregisterPointer(PointerRecord* record) void ArrayManager::setExecutionSpace(ExecutionSpace space) { - CHAI_LOG("ArrayManager", "Setting execution space to " << space); + CHAI_LOG(Debug, "Setting execution space to " << space); std::lock_guard lock(m_mutex); m_current_execution_space = space; @@ -150,8 +150,7 @@ void ArrayManager::registerTouch(PointerRecord* pointer_record) void ArrayManager::registerTouch(PointerRecord* pointer_record, ExecutionSpace space) { - CHAI_LOG("ArrayManager", - pointer << " touched in space " << space); + CHAI_LOG(Debug, pointer << " touched in space " << space); if (space != NONE) { std::lock_guard lock(m_mutex); @@ -217,7 +216,7 @@ void ArrayManager::allocate( registerPointer(pointer_record, space); - CHAI_LOG("ArrayManager", "Allocated array at: " << ret); + CHAI_LOG(Debug, "Allocated array at: " << ret); } void ArrayManager::free(PointerRecord* pointer_record) diff --git a/src/chai/ArrayManager.inl b/src/chai/ArrayManager.inl index ff6c8cf9..7c160439 100644 --- a/src/chai/ArrayManager.inl +++ b/src/chai/ArrayManager.inl @@ -72,7 +72,7 @@ void* ArrayManager::reallocate(void* pointer, size_t elems, PointerRecord* point for (int space = CPU; space < NUM_EXECUTION_SPACES; ++space) { if(!pointer_record->m_owned[space]) { - CHAI_LOG("ArrayManager", "Cannot reallocate unowned pointer"); + CHAI_LOG(Debug, "Cannot reallocate unowned pointer"); return pointer_record->m_pointers[my_space]; } } diff --git a/src/chai/ChaiMacros.hpp b/src/chai/ChaiMacros.hpp index e05dac6d..ca0fd83d 100644 --- a/src/chai/ChaiMacros.hpp +++ b/src/chai/ChaiMacros.hpp @@ -45,6 +45,8 @@ #include "chai/config.hpp" +#include "umpire/util/Macros.hpp" + #if defined(CHAI_ENABLE_CUDA) && defined(__CUDACC__) #define CHAI_HOST __host__ @@ -71,11 +73,23 @@ #define CHAI_UNUSED_ARG(X) -#ifdef DEBUG -#define CHAI_LOG(file, msg) \ - std::cerr << "[" << file << "] " << msg << std::endl; +#if !defined(CHAI_DISABLE_RM) + +#define CHAI_LOG(level, msg) \ + UMPIRE_LOG(level, msg); + +#else + +#if defined(DEBUG) + +#define CHAI_LOG(level, msg) \ + std::cerr << "[" << __FILE__ << "] " << msg << std::endl; + #else -#define CHAI_LOG(file, msg) + +#define CHAI_LOG(level, msg) + +#endif #endif #endif // CHAI_ChaiMacros_HPP diff --git a/src/chai/ManagedArray.inl b/src/chai/ManagedArray.inl index 18f4db7a..04615406 100644 --- a/src/chai/ManagedArray.inl +++ b/src/chai/ManagedArray.inl @@ -197,7 +197,7 @@ CHAI_HOST ManagedArray ManagedArray::slice(size_t offset, size_t elems) { ManagedArray slice(nullptr); slice.m_resource_manager = m_resource_manager; if(offset + elems > size()) { - CHAI_LOG("ManagedArray", "Invalid slice. No active pointer or index out of bounds"); + CHAI_LOG(Debug, "Invalid slice. No active pointer or index out of bounds"); } else { slice.m_pointer_record = m_pointer_record; slice.m_active_base_pointer = m_active_base_pointer; @@ -216,7 +216,7 @@ CHAI_HOST void ManagedArray::allocate( const UserCallback& cback) { if(!m_is_slice) { - CHAI_LOG("ManagedArray", "Allocating array of size " << elems << " in space " << space); + CHAI_LOG(Debug, "Allocating array of size " << elems << " in space " << space); if (space == NONE) { space = m_resource_manager->getDefaultAllocationSpace(); @@ -231,7 +231,7 @@ CHAI_HOST void ManagedArray::allocate( m_active_base_pointer = static_cast(m_pointer_record->m_pointers[space]); m_active_pointer = m_active_base_pointer; // Cannot be a slice - CHAI_LOG("ManagedArray", "m_active_ptr allocated at address: " << m_active_pointer); + CHAI_LOG(Debug, "m_active_ptr allocated at address: " << m_active_pointer); } } @@ -240,7 +240,7 @@ CHAI_INLINE CHAI_HOST void ManagedArray::reallocate(size_t elems) { if(!m_is_slice) { - CHAI_LOG("ManagedArray", "Reallocating array of size " << m_elems << " with new size" << elems); + CHAI_LOG(Debug, "Reallocating array of size " << m_elems << " with new size" << elems); m_elems = elems; m_active_base_pointer = @@ -248,7 +248,7 @@ CHAI_HOST void ManagedArray::reallocate(size_t elems) m_pointer_record)); m_active_pointer = m_active_base_pointer; // Cannot be a slice - CHAI_LOG("ManagedArray", "m_active_ptr reallocated at address: " << m_active_pointer); + CHAI_LOG(Debug, "m_active_ptr reallocated at address: " << m_active_pointer); } } @@ -260,7 +260,7 @@ CHAI_HOST void ManagedArray::free() m_resource_manager->free(m_pointer_record); m_pointer_record = nullptr; } else { - CHAI_LOG("ManagedArray", "Cannot free a slice!"); + CHAI_LOG(Debug, "Cannot free a slice!"); } } @@ -371,7 +371,7 @@ void ManagedArray::move(ExecutionSpace space) m_active_pointer = m_active_base_pointer + m_offset; if (!std::is_const::value) { - CHAI_LOG("ManagedArray", "T is non-const, registering touch of pointer" << m_active_pointer); + CHAI_LOG(Debug, "T is non-const, registering touch of pointer" << m_active_pointer); m_resource_manager->registerTouch(m_pointer_record, space); } diff --git a/src/chai/ManagedArray_thin.inl b/src/chai/ManagedArray_thin.inl index 528751ee..09d09e73 100644 --- a/src/chai/ManagedArray_thin.inl +++ b/src/chai/ManagedArray_thin.inl @@ -121,7 +121,7 @@ CHAI_INLINE CHAI_HOST ManagedArray ManagedArray::slice(size_t offset, size_t elems) { ManagedArray slice; if (offset + elems > size()) { - CHAI_LOG("ManagedArray", "Invalid slice. No active pointer or index out of bounds"); + CHAI_LOG(Debug, "Invalid slice. No active pointer or index out of bounds"); } else { slice.m_active_pointer = m_active_pointer + offset; slice.m_elems = elems; @@ -137,9 +137,9 @@ CHAI_HOST void ManagedArray::allocate(size_t elems, UserCallback const &) { if (!m_is_slice) { (void) space; // Quiet compiler warning when CHAI_LOG does nothing - CHAI_LOG("ManagedArray", "Allocating array of size " << elems - << " in space " - << space); + CHAI_LOG(Debug, "Allocating array of size " << elems + << " in space " + << space); m_elems = elems; @@ -149,10 +149,10 @@ CHAI_HOST void ManagedArray::allocate(size_t elems, m_active_pointer = static_cast(malloc(sizeof(T) * elems)); #endif - CHAI_LOG("ManagedArray", "m_active_ptr allocated at address: " << m_active_pointer); + CHAI_LOG(Debug, "m_active_ptr allocated at address: " << m_active_pointer); } else { - CHAI_LOG("ManagedArray", "Attempted to allocate slice!"); + CHAI_LOG(Debug, "Attempted to allocate slice!"); } } @@ -161,9 +161,9 @@ CHAI_INLINE CHAI_HOST void ManagedArray::reallocate(size_t new_elems) { if (!m_is_slice) { - CHAI_LOG("ManagedArray", "Reallocating array of size " << m_elems - << " with new size" - << elems); + CHAI_LOG(Debug, "Reallocating array of size " << m_elems + << " with new size" + << elems); T* new_ptr; @@ -179,10 +179,10 @@ CHAI_HOST void ManagedArray::reallocate(size_t new_elems) m_active_pointer = new_ptr; m_active_base_pointer = m_active_pointer; - CHAI_LOG("ManagedArray", "m_active_ptr reallocated at address: " << m_active_pointer); + CHAI_LOG(Debug, "m_active_ptr reallocated at address: " << m_active_pointer); } else { - CHAI_LOG("ManagedArray", "Attempted to realloc slice!"); + CHAI_LOG(Debug, "Attempted to realloc slice!"); } } @@ -201,7 +201,7 @@ CHAI_HOST void ManagedArray::free() m_active_pointer = nullptr; } else { - CHAI_LOG("ManagedArray", "tried to free slice!"); + CHAI_LOG(Debug, "tried to free slice!"); } } From a051ee6889513933309add228543c701c8b7d7bb Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Wed, 16 Oct 2019 06:53:20 -0700 Subject: [PATCH 42/58] Update BLT version --- blt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blt b/blt index fafdccc9..47089360 160000 --- a/blt +++ b/blt @@ -1 +1 @@ -Subproject commit fafdccc9a83dc293db1c0b678f859aa8a067b296 +Subproject commit 4708936054366585478d9c5430449358a0a3eb86 From df4dcb7deb74c3387899442965b42559a2369929 Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Wed, 16 Oct 2019 06:58:56 -0700 Subject: [PATCH 43/58] Fixup some incorrect log messages --- src/chai/ArrayManager.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index 82dc3795..e6c55628 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -85,12 +85,11 @@ void ArrayManager::registerPointer( ExecutionSpace space, bool owned) { - CHAI_LOG(Debug, "Registering " << pointer << " in space " << space); - std::lock_guard lock(m_mutex); - auto pointer = record->m_pointers[space]; + CHAI_LOG(Debug, "Registering " << pointer << " in space " << space); + m_pointer_map.insert(pointer, record); //record->m_last_space = space; @@ -150,7 +149,7 @@ void ArrayManager::registerTouch(PointerRecord* pointer_record) void ArrayManager::registerTouch(PointerRecord* pointer_record, ExecutionSpace space) { - CHAI_LOG(Debug, pointer << " touched in space " << space); + CHAI_LOG(Debug, pointer_record->m_pointers[space] << " touched in space " << space); if (space != NONE) { std::lock_guard lock(m_mutex); @@ -216,7 +215,7 @@ void ArrayManager::allocate( registerPointer(pointer_record, space); - CHAI_LOG(Debug, "Allocated array at: " << ret); + CHAI_LOG(Debug, "Allocated array at: " << pointer_record->m_pointers[space]); } void ArrayManager::free(PointerRecord* pointer_record) From 37f15347c98546b1a38bd5f1ebe909441dcccc92 Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Wed, 16 Oct 2019 06:59:09 -0700 Subject: [PATCH 44/58] Bump Umpire to v1.1.0 --- src/tpl/umpire | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tpl/umpire b/src/tpl/umpire index 82482fd7..3db26e6a 160000 --- a/src/tpl/umpire +++ b/src/tpl/umpire @@ -1 +1 @@ -Subproject commit 82482fd7450ab378db110f06f7e0302112c22c05 +Subproject commit 3db26e6a2626ee8c0cfa5c9769cfac6e33587122 From a0a897ea2480542d4fec5c25196dc1ba3fc26d2a Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Wed, 16 Oct 2019 07:25:22 -0700 Subject: [PATCH 45/58] Switch to SPDX license --- COPYRIGHT | 15 +++++++ LICENSE | 4 +- benchmarks/chai_arraymanager_benchmarks.cpp | 46 +++---------------- benchmarks/chai_benchmark_utils.hpp | 46 +++---------------- benchmarks/chai_managedarray_benchmarks.cpp | 46 +++---------------- docs/sphinx/conf.py | 49 +++------------------ examples/chai-umpire-allocators.cpp | 46 +++---------------- examples/ex1.cpp | 46 +++---------------- examples/example.cpp | 6 +++ scripts/apply-license-info.sh | 35 +++++++++++++++ scripts/format-source.sh | 6 +++ scripts/license.txt | 4 ++ scripts/make_release_tarball.sh | 48 +++----------------- scripts/travis/build_and_test.sh | 6 +++ scripts/travis/install_llvm.sh | 6 +++ scripts/update-copyright-year.sh | 49 +++------------------ src/chai/ArrayManager.cpp | 46 +++---------------- src/chai/ArrayManager.hpp | 46 +++---------------- src/chai/ChaiMacros.hpp | 46 +++---------------- src/chai/ExecutionSpaces.hpp | 46 +++---------------- src/chai/ManagedArray.hpp | 46 +++---------------- src/chai/PointerRecord.hpp | 46 +++---------------- src/chai/Types.hpp | 46 +++---------------- src/util/forall.hpp | 46 +++---------------- tests/integration/managed_array_tests.cpp | 48 +++----------------- tests/unit/array_manager_unit_tests.cpp | 46 +++---------------- tests/unit/managed_array_unit_tests.cpp | 46 +++---------------- 27 files changed, 178 insertions(+), 788 deletions(-) create mode 100644 COPYRIGHT create mode 100755 scripts/apply-license-info.sh create mode 100644 scripts/license.txt diff --git a/COPYRIGHT b/COPYRIGHT new file mode 100644 index 00000000..3bc5dea5 --- /dev/null +++ b/COPYRIGHT @@ -0,0 +1,15 @@ +Intellectual Property Notice +------------------------------ + +CHAI is licensed under the BSD 3 Clause license (LICENSE or +https://opensource.org/licenses/BSD-3-Clause). + +Copyrights and patents in the CHAI project are retained by contributors. No +copyright assignment is required to contribute to CHAI. + +SPDX usage +------------ + +Individual files contain SPDX tags instead of the full license text. +This enables machine processing of license information based on the SPDX +License Identifiers that are available here: https://spdx.org/licenses/ diff --git a/LICENSE b/LICENSE index 89442d9a..8f8fd45a 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,4 @@ -BSD 3-Clause License - -Copyright (c) 2018, Lawrence Livermore National Security, LLC +Copyright (c) 2016-2019, Lawrence Livermore National Security, LLC. All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/benchmarks/chai_arraymanager_benchmarks.cpp b/benchmarks/chai_arraymanager_benchmarks.cpp index 78430582..c58a4987 100644 --- a/benchmarks/chai_arraymanager_benchmarks.cpp +++ b/benchmarks/chai_arraymanager_benchmarks.cpp @@ -1,45 +1,9 @@ -// --------------------------------------------------------------------- -// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -// rights reserved. +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. // -// Produced at the Lawrence Livermore National Laboratory. -// -// This file is part of CHAI. -// -// LLNL-CODE-705877 -// -// For details, see https:://github.com/LLNL/CHAI -// Please also see the NOTICE and LICENSE files. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of the LLNS/LLNL nor the names of its contributors -// may be used to endorse or promote products derived from this -// software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// --------------------------------------------------------------------- +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// #include #include "benchmark/benchmark_api.h" diff --git a/benchmarks/chai_benchmark_utils.hpp b/benchmarks/chai_benchmark_utils.hpp index 659ff197..977f5b65 100644 --- a/benchmarks/chai_benchmark_utils.hpp +++ b/benchmarks/chai_benchmark_utils.hpp @@ -1,45 +1,9 @@ -// --------------------------------------------------------------------- -// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -// rights reserved. +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. // -// Produced at the Lawrence Livermore National Laboratory. -// -// This file is part of CHAI. -// -// LLNL-CODE-705877 -// -// For details, see https:://github.com/LLNL/CHAI -// Please also see the NOTICE and LICENSE files. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of the LLNS/LLNL nor the names of its contributors -// may be used to endorse or promote products derived from this -// software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// --------------------------------------------------------------------- +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// #ifndef CHAI_chai_benchmark_utils_HPP #define CHAI_chai_benchmark_utils_HPP diff --git a/benchmarks/chai_managedarray_benchmarks.cpp b/benchmarks/chai_managedarray_benchmarks.cpp index 74409725..4fcb33bf 100644 --- a/benchmarks/chai_managedarray_benchmarks.cpp +++ b/benchmarks/chai_managedarray_benchmarks.cpp @@ -1,45 +1,9 @@ -// --------------------------------------------------------------------- -// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -// rights reserved. +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. // -// Produced at the Lawrence Livermore National Laboratory. -// -// This file is part of CHAI. -// -// LLNL-CODE-705877 -// -// For details, see https:://github.com/LLNL/CHAI -// Please also see the NOTICE and LICENSE files. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of the LLNS/LLNL nor the names of its contributors -// may be used to endorse or promote products derived from this -// software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// --------------------------------------------------------------------- +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// #include #include "benchmark/benchmark_api.h" diff --git a/docs/sphinx/conf.py b/docs/sphinx/conf.py index e7e4a883..bd3f0ee1 100644 --- a/docs/sphinx/conf.py +++ b/docs/sphinx/conf.py @@ -1,48 +1,11 @@ +############################################################################## +# Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################## # -*- coding: utf-8 -*- -####################################################################### -# Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -# rights reserved. -# -# Produced at the Lawrence Livermore National Laboratory -# -# This file is part of CHAI. -# -# LLNL-CODE-705877 -# -# For details, see https:://github.com/LLNL/CHAI -# Please also see the NOTICE and LICENSE files. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the -# distribution. -# -# - Neither the name of the LLNS/LLNL nor the names of its contributors -# may be used to endorse or promote products derived from this -# software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -####################################################################### - # # CHAI documentation build configuration file, created by # sphinx-quickstart on Thu Mar 30 12:14:09 2017. diff --git a/examples/chai-umpire-allocators.cpp b/examples/chai-umpire-allocators.cpp index 4486a56c..15fa4fa9 100644 --- a/examples/chai-umpire-allocators.cpp +++ b/examples/chai-umpire-allocators.cpp @@ -1,45 +1,9 @@ -// --------------------------------------------------------------------- -// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -// rights reserved. +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. // -// Produced at the Lawrence Livermore National Laboratory. -// -// This file is part of CHAI. -// -// LLNL-CODE-705877 -// -// For details, see https:://github.com/LLNL/CHAI -// Please also see the NOTICE and LICENSE files. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of the LLNS/LLNL nor the names of its contributors -// may be used to endorse or promote products derived from this -// software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// --------------------------------------------------------------------- +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// #include "umpire/ResourceManager.hpp" #include "umpire/strategy/DynamicPool.hpp" diff --git a/examples/ex1.cpp b/examples/ex1.cpp index 11479716..fdec4b3b 100644 --- a/examples/ex1.cpp +++ b/examples/ex1.cpp @@ -1,45 +1,9 @@ -// --------------------------------------------------------------------- -// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -// rights reserved. +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. // -// Produced at the Lawrence Livermore National Laboratory. -// -// This file is part of CHAI. -// -// LLNL-CODE-705877 -// -// For details, see https:://github.com/LLNL/CHAI -// Please also see the NOTICE and LICENSE files. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of the LLNS/LLNL nor the names of its contributors -// may be used to endorse or promote products derived from this -// software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// --------------------------------------------------------------------- +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// #include "chai/ManagedArray.hpp" #include "chai/util/forall.hpp" diff --git a/examples/example.cpp b/examples/example.cpp index a7a3b2d6..e3405160 100644 --- a/examples/example.cpp +++ b/examples/example.cpp @@ -1,3 +1,9 @@ +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. +// +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// // --------------------------------------------------------------------- // Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All // rights reserved. diff --git a/scripts/apply-license-info.sh b/scripts/apply-license-info.sh new file mode 100755 index 00000000..99f7aea3 --- /dev/null +++ b/scripts/apply-license-info.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env zsh +############################################################################## +# Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################## + +setopt extended_glob + +RED="\033[1;31m" +GREEN="\033[1;32m" +NOCOLOR="\033[0m" + +LIC_CMD=$(which lic) +if [ ! $LIC_CMD ]; then + echo "${RED} [!] This script requires the lic command." + exit 255 +fi + +echo "Applying licenses to files" + +files_no_license=$(grep -L 'This file is part of Umpire.' \ + benchmarks/**/*(^/) \ + cmake/**/*(^/) \ + docs/**/*~*rst(^/)\ + examples/**/*(^/) \ + scripts/**/*(^/) \ + src/**/*~*tpl*(^/) \ + tests/**/*(^/) \ + CMakeLists.txt) + +echo $files_no_license | xargs $LIC_CMD -f scripts/license.txt + +echo "${GREEN} [Ok] License text applied. ${NOCOLOR}" diff --git a/scripts/format-source.sh b/scripts/format-source.sh index f14ab078..0206cfbe 100755 --- a/scripts/format-source.sh +++ b/scripts/format-source.sh @@ -1,3 +1,9 @@ #!/usr/bin/env bash +############################################################################## +# Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################## find . -type f -iname '*.hpp' -o -iname '*.cpp' | grep -v -e blt -e tpl | xargs clang-format -i diff --git a/scripts/license.txt b/scripts/license.txt new file mode 100644 index 00000000..5b1c9027 --- /dev/null +++ b/scripts/license.txt @@ -0,0 +1,4 @@ +Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +project contributors. See the COPYRIGHT file for details. + +SPDX-License-Identifier: BSD-3-Clause diff --git a/scripts/make_release_tarball.sh b/scripts/make_release_tarball.sh index a8deba89..6a062604 100755 --- a/scripts/make_release_tarball.sh +++ b/scripts/make_release_tarball.sh @@ -1,46 +1,10 @@ #!/bin/bash -####################################################################### -# Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -# rights reserved. -# -# Produced at the Lawrence Livermore National Laboratory. -# -# This file is part of CHAI. -# -# LLNL-CODE-705877 -# -# For details, see https:://github.com/LLNL/CHAI -# Please also see the NOTICE and LICENSE files. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the -# distribution. -# -# - Neither the name of the LLNS/LLNL nor the names of its contributors -# may be used to endorse or promote products derived from this -# software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -####################################################################### +############################################################################## +# Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################## TAR_CMD=gtar VERSION=1.2.0 diff --git a/scripts/travis/build_and_test.sh b/scripts/travis/build_and_test.sh index 1dd79681..44eb570c 100755 --- a/scripts/travis/build_and_test.sh +++ b/scripts/travis/build_and_test.sh @@ -1,4 +1,10 @@ #!/bin/bash +############################################################################## +# Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################## function or_die () { "$@" diff --git a/scripts/travis/install_llvm.sh b/scripts/travis/install_llvm.sh index fb6a87fb..2b6c69fd 100755 --- a/scripts/travis/install_llvm.sh +++ b/scripts/travis/install_llvm.sh @@ -1,3 +1,9 @@ +############################################################################## +# Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################## # /bin/bash export LLVM_PATH=${HOME}/llvm/clang+llvm-${LLVM_VERSION}-x86_64-linux-gnu-ubuntu-14.04 diff --git a/scripts/update-copyright-year.sh b/scripts/update-copyright-year.sh index 4bf2492e..6b411e07 100755 --- a/scripts/update-copyright-year.sh +++ b/scripts/update-copyright-year.sh @@ -1,47 +1,10 @@ #!/usr/bin/env zsh -####################################################################### -# Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -# rights reserved. -# -# Produced at the Lawrence Livermore National Laboratory. -# -# This file is part of CHAI. -# -# LLNL-CODE-705877 -# -# For details, see https:://github.com/LLNL/CHAI -# Please also see the NOTICE and LICENSE files. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the -# distribution. -# -# - Neither the name of the LLNS/LLNL nor the names of its contributors -# may be used to endorse or promote products derived from this -# software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -####################################################################### - +############################################################################## +# Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################## # This is used for the ~*tpl* line to ignore files in bundled tpls setopt extended_glob diff --git a/src/chai/ArrayManager.cpp b/src/chai/ArrayManager.cpp index 54a32176..5effcbc9 100644 --- a/src/chai/ArrayManager.cpp +++ b/src/chai/ArrayManager.cpp @@ -1,45 +1,9 @@ -// --------------------------------------------------------------------- -// Copyright (c) 2017, Lawrence Livermore National Security, LLC. All -// rights reserved. +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. // -// Produced at the Lawrence Livermore National Laboratory. -// -// This file is part of CHAI. -// -// LLNL-CODE-705877 -// -// For details, see https:://github.com/LLNL/CHAI -// Please also see the NOTICE and LICENSE files. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of the LLNS/LLNL nor the names of its contributors -// may be used to endorse or promote products derived from this -// software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// --------------------------------------------------------------------- +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// #include "chai/ArrayManager.hpp" #include "chai/config.hpp" diff --git a/src/chai/ArrayManager.hpp b/src/chai/ArrayManager.hpp index 82221d24..14e4c420 100644 --- a/src/chai/ArrayManager.hpp +++ b/src/chai/ArrayManager.hpp @@ -1,45 +1,9 @@ -// --------------------------------------------------------------------- -// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -// rights reserved. +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. // -// Produced at the Lawrence Livermore National Laboratory. -// -// This file is part of CHAI. -// -// LLNL-CODE-705877 -// -// For details, see https:://github.com/LLNL/CHAI -// Please also see the NOTICE and LICENSE files. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of the LLNS/LLNL nor the names of its contributors -// may be used to endorse or promote products derived from this -// software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// --------------------------------------------------------------------- +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// #ifndef CHAI_ArrayManager_HPP #define CHAI_ArrayManager_HPP diff --git a/src/chai/ChaiMacros.hpp b/src/chai/ChaiMacros.hpp index e05dac6d..14cbaae7 100644 --- a/src/chai/ChaiMacros.hpp +++ b/src/chai/ChaiMacros.hpp @@ -1,45 +1,9 @@ -// --------------------------------------------------------------------- -// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -// rights reserved. +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. // -// Produced at the Lawrence Livermore National Laboratory. -// -// This file is part of CHAI. -// -// LLNL-CODE-705877 -// -// For details, see https:://github.com/LLNL/CHAI -// Please also see the NOTICE and LICENSE files. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of the LLNS/LLNL nor the names of its contributors -// may be used to endorse or promote products derived from this -// software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// --------------------------------------------------------------------- +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// #ifndef CHAI_ChaiMacros_HPP #define CHAI_ChaiMacros_HPP diff --git a/src/chai/ExecutionSpaces.hpp b/src/chai/ExecutionSpaces.hpp index f75ec0ec..02f1c889 100644 --- a/src/chai/ExecutionSpaces.hpp +++ b/src/chai/ExecutionSpaces.hpp @@ -1,45 +1,9 @@ -// --------------------------------------------------------------------- -// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -// rights reserved. +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. // -// Produced at the Lawrence Livermore National Laboratory. -// -// This file is part of CHAI. -// -// LLNL-CODE-705877 -// -// For details, see https:://github.com/LLNL/CHAI -// Please also see the NOTICE and LICENSE files. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of the LLNS/LLNL nor the names of its contributors -// may be used to endorse or promote products derived from this -// software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// --------------------------------------------------------------------- +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// #ifndef CHAI_ExecutionSpaces_HPP #define CHAI_ExecutionSpaces_HPP diff --git a/src/chai/ManagedArray.hpp b/src/chai/ManagedArray.hpp index c4593549..37d16e91 100644 --- a/src/chai/ManagedArray.hpp +++ b/src/chai/ManagedArray.hpp @@ -1,45 +1,9 @@ -// --------------------------------------------------------------------- -// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -// rights reserved. +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. // -// Produced at the Lawrence Livermore National Laboratory. -// -// This file is part of CHAI. -// -// LLNL-CODE-705877 -// -// For details, see https:://github.com/LLNL/CHAI -// Please also see the NOTICE and LICENSE files. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of the LLNS/LLNL nor the names of its contributors -// may be used to endorse or promote products derived from this -// software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// --------------------------------------------------------------------- +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// #ifndef CHAI_ManagedArray_HPP #define CHAI_ManagedArray_HPP diff --git a/src/chai/PointerRecord.hpp b/src/chai/PointerRecord.hpp index da4f344e..9250a8e0 100644 --- a/src/chai/PointerRecord.hpp +++ b/src/chai/PointerRecord.hpp @@ -1,45 +1,9 @@ -// --------------------------------------------------------------------- -// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -// rights reserved. +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. // -// Produced at the Lawrence Livermore National Laboratory. -// -// This file is part of CHAI. -// -// LLNL-CODE-705877 -// -// For details, see https:://github.com/LLNL/CHAI -// Please also see the NOTICE and LICENSE files. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of the LLNS/LLNL nor the names of its contributors -// may be used to endorse or promote products derived from this -// software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// --------------------------------------------------------------------- +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// #ifndef CHAI_PointerRecord_HPP #define CHAI_PointerRecord_HPP diff --git a/src/chai/Types.hpp b/src/chai/Types.hpp index 0937e473..37a57b86 100644 --- a/src/chai/Types.hpp +++ b/src/chai/Types.hpp @@ -1,45 +1,9 @@ -// --------------------------------------------------------------------- -// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -// rights reserved. +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. // -// Produced at the Lawrence Livermore National Laboratory. -// -// This file is part of CHAI. -// -// LLNL-CODE-705877 -// -// For details, see https:://github.com/LLNL/CHAI -// Please also see the NOTICE and LICENSE files. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of the LLNS/LLNL nor the names of its contributors -// may be used to endorse or promote products derived from this -// software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// --------------------------------------------------------------------- +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// #ifndef CHAI_Types_HPP #define CHAI_Types_HPP diff --git a/src/util/forall.hpp b/src/util/forall.hpp index 4cb537f0..f1ae1835 100644 --- a/src/util/forall.hpp +++ b/src/util/forall.hpp @@ -1,45 +1,9 @@ -// --------------------------------------------------------------------- -// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -// rights reserved. +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. // -// Produced at the Lawrence Livermore National Laboratory. -// -// This file is part of CHAI. -// -// LLNL-CODE-705877 -// -// For details, see https:://github.com/LLNL/CHAI -// Please also see the NOTICE and LICENSE files. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of the LLNS/LLNL nor the names of its contributors -// may be used to endorse or promote products derived from this -// software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// --------------------------------------------------------------------- +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// #ifndef CHAI_forall_HPP #define CHAI_forall_HPP diff --git a/tests/integration/managed_array_tests.cpp b/tests/integration/managed_array_tests.cpp index 842293e7..506b7c70 100644 --- a/tests/integration/managed_array_tests.cpp +++ b/tests/integration/managed_array_tests.cpp @@ -1,45 +1,9 @@ -// --------------------------------------------------------------------- -// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -// rights reserved. +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. // -// Produced at the Lawrence Livermore National Laboratory. -// -// This file is part of CHAI. -// -// LLNL-CODE-705877 -// -// For details, see https:://github.com/LLNL/CHAI -// Please also see the NOTICE and LICENSE files. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of the LLNS/LLNL nor the names of its contributors -// may be used to endorse or promote products derived from this -// software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// --------------------------------------------------------------------- +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// #include "gtest/gtest.h" #define GPU_TEST(X, Y) \ @@ -1394,4 +1358,4 @@ GPU_TEST(ManagedArray, CopyZero) array.free(); } -#endif \ No newline at end of file +#endif diff --git a/tests/unit/array_manager_unit_tests.cpp b/tests/unit/array_manager_unit_tests.cpp index 66edf586..b50d0cce 100644 --- a/tests/unit/array_manager_unit_tests.cpp +++ b/tests/unit/array_manager_unit_tests.cpp @@ -1,45 +1,9 @@ -// --------------------------------------------------------------------- -// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -// rights reserved. +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. // -// Produced at the Lawrence Livermore National Laboratory. -// -// This file is part of CHAI. -// -// LLNL-CODE-705877 -// -// For details, see https:://github.com/LLNL/CHAI -// Please also see the NOTICE and LICENSE files. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of the LLNS/LLNL nor the names of its contributors -// may be used to endorse or promote products derived from this -// software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// --------------------------------------------------------------------- +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// #include "gtest/gtest.h" #include "chai/ArrayManager.hpp" diff --git a/tests/unit/managed_array_unit_tests.cpp b/tests/unit/managed_array_unit_tests.cpp index f893027d..50fb1696 100644 --- a/tests/unit/managed_array_unit_tests.cpp +++ b/tests/unit/managed_array_unit_tests.cpp @@ -1,45 +1,9 @@ -// --------------------------------------------------------------------- -// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -// rights reserved. +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. // -// Produced at the Lawrence Livermore National Laboratory. -// -// This file is part of CHAI. -// -// LLNL-CODE-705877 -// -// For details, see https:://github.com/LLNL/CHAI -// Please also see the NOTICE and LICENSE files. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of the LLNS/LLNL nor the names of its contributors -// may be used to endorse or promote products derived from this -// software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// --------------------------------------------------------------------- +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// #include "gtest/gtest.h" #define GPU_TEST(X, Y) \ From 2e974f9d9eb1fbd85788a46d0c0874cc1580adde Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Wed, 16 Oct 2019 07:31:56 -0700 Subject: [PATCH 46/58] Finalize license style switch --- CMakeLists.txt | 49 +++------------------ benchmarks/CMakeLists.txt | 49 +++------------------ cmake/ChaiBasics.cmake | 49 +++------------------ cmake/thirdparty/SetupChaiThirdparty.cmake | 49 +++------------------ docs/CMakeLists.txt | 49 +++------------------ docs/doxygen/CMakeLists.txt | 49 +++------------------ docs/doxygen/Doxyfile.in | 49 +++------------------ docs/sphinx/CMakeLists.txt | 6 +++ docs/sphinx/conf.py.in | 50 +++------------------- examples/CMakeLists.txt | 49 +++------------------ src/CMakeLists.txt | 49 +++------------------ src/chai/ArrayManager.inl | 46 +++----------------- src/chai/CMakeLists.txt | 49 +++------------------ src/chai/ManagedArray.inl | 48 +++------------------ src/chai/ManagedArray_thin.inl | 48 +++------------------ src/chai/chai-config.cmake.in | 48 +++------------------ src/chai/config.hpp.in | 48 +++------------------ tests/CMakeLists.txt | 49 +++------------------ tests/integration/CMakeLists.txt | 6 +++ tests/unit/CMakeLists.txt | 49 +++------------------ 20 files changed, 119 insertions(+), 769 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fefd839f..a99a649c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,46 +1,9 @@ -####################################################################### -# Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -# rights reserved. -# -# Produced at the Lawrence Livermore National Laboratory. -# -# This file is part of CHAI. -# -# LLNL-CODE-705877 -# -# For details, see https:://github.com/LLNL/CHAI -# Please also see the NOTICE and LICENSE files. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the -# distribution. -# -# - Neither the name of the LLNS/LLNL nor the names of its contributors -# may be used to endorse or promote products derived from this -# software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -####################################################################### - +############################################################################## +# Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################## cmake_policy(SET CMP0057 NEW) project(Chai LANGUAGES CXX) diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 6795f860..77dbe5fc 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -1,46 +1,9 @@ -####################################################################### -# Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -# rights reserved. -# -# Produced at the Lawrence Livermore National Laboratory. -# -# This file is part of CHAI. -# -# LLNL-CODE-705877 -# -# For details, see https:://github.com/LLNL/CHAI -# Please also see the NOTICE and LICENSE files. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the -# distribution. -# -# - Neither the name of the LLNS/LLNL nor the names of its contributors -# may be used to endorse or promote products derived from this -# software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -####################################################################### - +############################################################################## +# Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################## set (chai_benchmark_depends chai gbenchmark) diff --git a/cmake/ChaiBasics.cmake b/cmake/ChaiBasics.cmake index 7cae65a6..93db482b 100644 --- a/cmake/ChaiBasics.cmake +++ b/cmake/ChaiBasics.cmake @@ -1,46 +1,9 @@ -###################################################################### -# Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -# rights reserved. -# -# Produced at the Lawrence Livermore National Laboratory -# -# This file is part of CHAI. -# -# LLNL-CODE-705877 -# -# For details, see https:://github.com/LLNL/CHAI -# Please also see the NOTICE and LICENSE files. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the -# distribution. -# -# - Neither the name of the LLNS/LLNL nor the names of its contributors -# may be used to endorse or promote products derived from this -# software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -####################################################################### - +############################################################################## +# Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################## set (CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda") if (ENABLE_HIP) diff --git a/cmake/thirdparty/SetupChaiThirdparty.cmake b/cmake/thirdparty/SetupChaiThirdparty.cmake index ee8ffbb4..966221b9 100644 --- a/cmake/thirdparty/SetupChaiThirdparty.cmake +++ b/cmake/thirdparty/SetupChaiThirdparty.cmake @@ -1,46 +1,9 @@ -####################################################################### -# Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -# rights reserved. -# -# Produced at the Lawrence Livermore National Laboratory -# -# This file is part of CHAI. -# -# LLNL-CODE-705877 -# -# For details, see https:://github.com/LLNL/CHAI -# Please also see the NOTICE and LICENSE files. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the -# distribution. -# -# - Neither the name of the LLNS/LLNL nor the names of its contributors -# may be used to endorse or promote products derived from this -# software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -####################################################################### - +############################################################################## +# Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################## set(ENABLE_FORTRAN Off CACHE BOOL "Enable Fortran in Umpire") if (NOT TARGET umpire) diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index de9200c0..c50020b5 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -1,46 +1,9 @@ -####################################################################### -# Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -# rights reserved. -# -# Produced at the Lawrence Livermore National Laboratory. -# -# This file is part of CHAI. -# -# LLNL-CODE-705877 -# -# For details, see https:://github.com/LLNL/CHAI -# Please also see the NOTICE and LICENSE files. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the -# distribution. -# -# - Neither the name of the LLNS/LLNL nor the names of its contributors -# may be used to endorse or promote products derived from this -# software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -####################################################################### - +############################################################################## +# Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################## if (DOXYGEN_FOUND) add_subdirectory(doxygen) endif () diff --git a/docs/doxygen/CMakeLists.txt b/docs/doxygen/CMakeLists.txt index 1b2725a6..ac0fc239 100644 --- a/docs/doxygen/CMakeLists.txt +++ b/docs/doxygen/CMakeLists.txt @@ -1,44 +1,7 @@ -####################################################################### -# Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -# rights reserved. -# -# Produced at the Lawrence Livermore National Laboratory -# -# This file is part of CHAI. -# -# LLNL-CODE-705877 -# -# For details, see https:://github.com/LLNL/CHAI -# Please also see the NOTICE and LICENSE files. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the -# distribution. -# -# - Neither the name of the LLNS/LLNL nor the names of its contributors -# may be used to endorse or promote products derived from this -# software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -####################################################################### - +############################################################################## +# Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################## blt_add_doxygen_target(chai_doxgen) diff --git a/docs/doxygen/Doxyfile.in b/docs/doxygen/Doxyfile.in index 5e13e82f..1bb28a90 100644 --- a/docs/doxygen/Doxyfile.in +++ b/docs/doxygen/Doxyfile.in @@ -1,46 +1,9 @@ -####################################################################### -# Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -# rights reserved. -# -# Produced at the Lawrence Livermore National Laboratory -# -# This file is part of CHAI. -# -# LLNL-CODE-705877 -# -# For details, see https:://github.com/LLNL/CHAI -# Please also see the NOTICE and LICENSE files. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the -# distribution. -# -# - Neither the name of the LLNS/LLNL nor the names of its contributors -# may be used to endorse or promote products derived from this -# software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -####################################################################### - +############################################################################## +# Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################## #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- diff --git a/docs/sphinx/CMakeLists.txt b/docs/sphinx/CMakeLists.txt index 1b14348d..d9f4aed8 100644 --- a/docs/sphinx/CMakeLists.txt +++ b/docs/sphinx/CMakeLists.txt @@ -1,3 +1,9 @@ +############################################################################## +# Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################## ####################################################################### # Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All # rights reserved. diff --git a/docs/sphinx/conf.py.in b/docs/sphinx/conf.py.in index 0fd73ff2..1e870de5 100644 --- a/docs/sphinx/conf.py.in +++ b/docs/sphinx/conf.py.in @@ -1,48 +1,10 @@ +############################################################################## +# Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################## # -*- coding: utf-8 -*- - -####################################################################### -# Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -# rights reserved. -# -# Produced at the Lawrence Livermore National Laboratory -# -# This file is part of CHAI. -# -# LLNL-CODE-705877 -# -# For details, see https:://github.com/LLNL/CHAI -# Please also see the NOTICE and LICENSE files. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the -# distribution. -# -# - Neither the name of the LLNS/LLNL nor the names of its contributors -# may be used to endorse or promote products derived from this -# software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -####################################################################### - # # CHAI documentation build configuration file, created by # sphinx-quickstart on Thu Mar 30 12:14:09 2017. diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 5abdc2e3..3902adae 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,46 +1,9 @@ -####################################################################### -# Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -# rights reserved. -# -# Produced at the Lawrence Livermore National Laboratory. -# -# This file is part of CHAI. -# -# LLNL-CODE-705877 -# -# For details, see https:://github.com/LLNL/CHAI -# Please also see the NOTICE and LICENSE files. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the -# distribution. -# -# - Neither the name of the LLNS/LLNL nor the names of its contributors -# may be used to endorse or promote products derived from this -# software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -####################################################################### - +############################################################################## +# Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################## set (chai_umpire_example_depends chai) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b38cf609..2b18fdec 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,44 +1,7 @@ -####################################################################### -# Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -# rights reserved. -# -# Produced at the Lawrence Livermore National Laboratory. -# -# This file is part of CHAI. -# -# LLNL-CODE-705877 -# -# For details, see https:://github.com/LLNL/CHAI -# Please also see the NOTICE and LICENSE files. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the -# distribution. -# -# - Neither the name of the LLNS/LLNL nor the names of its contributors -# may be used to endorse or promote products derived from this -# software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -####################################################################### - +############################################################################## +# Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################## add_subdirectory(chai) diff --git a/src/chai/ArrayManager.inl b/src/chai/ArrayManager.inl index ff6c8cf9..c4505329 100644 --- a/src/chai/ArrayManager.inl +++ b/src/chai/ArrayManager.inl @@ -1,45 +1,9 @@ -// --------------------------------------------------------------------- -// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -// rights reserved. +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. // -// Produced at the Lawrence Livermore National Laboratory. -// -// This file is part of CHAI. -// -// LLNL-CODE-705877 -// -// For details, see https:://github.com/LLNL/CHAI -// Please also see the NOTICE and LICENSE files. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of the LLNS/LLNL nor the names of its contributors -// may be used to endorse or promote products derived from this -// software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// --------------------------------------------------------------------- +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// #ifndef CHAI_ArrayManager_INL #define CHAI_ArrayManager_INL diff --git a/src/chai/CMakeLists.txt b/src/chai/CMakeLists.txt index 7821fa3f..365da02e 100644 --- a/src/chai/CMakeLists.txt +++ b/src/chai/CMakeLists.txt @@ -1,46 +1,9 @@ -####################################################################### -# Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -# rights reserved. -# -# Produced at the Lawrence Livermore National Laboratory. -# -# This file is part of CHAI. -# -# LLNL-CODE-705877 -# -# For details, see https:://github.com/LLNL/CHAI -# Please also see the NOTICE and LICENSE files. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the -# distribution. -# -# - Neither the name of the LLNS/LLNL nor the names of its contributors -# may be used to endorse or promote products derived from this -# software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -####################################################################### - +############################################################################## +# Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################## set(CHAI_ENABLE_PICK ${ENABLE_PICK}) set(CHAI_ENABLE_CUDA ${ENABLE_CUDA}) set(CHAI_ENABLE_HIP ${ENABLE_HIP}) diff --git a/src/chai/ManagedArray.inl b/src/chai/ManagedArray.inl index 18f4db7a..deb55418 100644 --- a/src/chai/ManagedArray.inl +++ b/src/chai/ManagedArray.inl @@ -1,45 +1,9 @@ -// --------------------------------------------------------------------- -// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -// rights reserved. -// -// Produced at the Lawrence Livermore National Laboratory. -// -// This file is part of CHAI. -// -// LLNL-CODE-705877 -// -// For details, see https:://github.com/LLNL/CHAI -// Please also see the NOTICE and LICENSE files. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of the LLNS/LLNL nor the names of its contributors -// may be used to endorse or promote products derived from this -// software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// --------------------------------------------------------------------- +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. +// +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// #ifndef CHAI_ManagedArray_INL #define CHAI_ManagedArray_INL diff --git a/src/chai/ManagedArray_thin.inl b/src/chai/ManagedArray_thin.inl index 528751ee..3c2e4232 100644 --- a/src/chai/ManagedArray_thin.inl +++ b/src/chai/ManagedArray_thin.inl @@ -1,45 +1,9 @@ -// --------------------------------------------------------------------- -// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -// rights reserved. -// -// Produced at the Lawrence Livermore National Laboratory. -// -// This file is part of CHAI. -// -// LLNL-CODE-705877 -// -// For details, see https:://github.com/LLNL/CHAI -// Please also see the NOTICE and LICENSE files. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of the LLNS/LLNL nor the names of its contributors -// may be used to endorse or promote products derived from this -// software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// --------------------------------------------------------------------- +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. +// +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// #ifndef CHAI_ManagedArray_thin_INL #define CHAI_ManagedArray_thin_INL diff --git a/src/chai/chai-config.cmake.in b/src/chai/chai-config.cmake.in index 9413ddb7..1cc6198f 100644 --- a/src/chai/chai-config.cmake.in +++ b/src/chai/chai-config.cmake.in @@ -1,45 +1,9 @@ -####################################################################### -# Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -# rights reserved. -# -# Produced at the Lawrence Livermore National Laboratory. -# -# This file is part of CHAI. -# -# LLNL-CODE-705877 -# -# For details, see https:://github.com/LLNL/CHAI -# Please also see the NOTICE and LICENSE files. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the -# distribution. -# -# - Neither the name of the LLNS/LLNL nor the names of its contributors -# may be used to endorse or promote products derived from this -# software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -####################################################################### +############################################################################## +# Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################## set (CHAI_INSTALL_PREFIX @CMAKE_INSTALL_PREFIX@) set (CHAI_INCLUDE_DIRS @CMAKE_INSTALL_PREFIX@/include) diff --git a/src/chai/config.hpp.in b/src/chai/config.hpp.in index a7a443fe..4c35a724 100644 --- a/src/chai/config.hpp.in +++ b/src/chai/config.hpp.in @@ -1,45 +1,9 @@ -// --------------------------------------------------------------------- -// Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -// rights reserved. -// -// Produced at the Lawrence Livermore National Laboratory. -// -// This file is part of CHAI. -// -// LLNL-CODE-705877 -// -// For details, see https:://github.com/LLNL/CHAI -// Please also see the NOTICE and LICENSE files. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the -// distribution. -// -// - Neither the name of the LLNS/LLNL nor the names of its contributors -// may be used to endorse or promote products derived from this -// software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// --------------------------------------------------------------------- +////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +// project contributors. See the COPYRIGHT file for details. +// +// SPDX-License-Identifier: BSD-3-Clause +////////////////////////////////////////////////////////////////////////////// #ifndef CHAI_config_HPP #define CHAI_config_HPP diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 1e638b06..cc1c8705 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,46 +1,9 @@ -####################################################################### -# Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -# rights reserved. -# -# Produced at the Lawrence Livermore National Laboratory. -# -# This file is part of CHAI. -# -# LLNL-CODE-705877 -# -# For details, see https:://github.com/LLNL/CHAI -# Please also see the NOTICE and LICENSE files. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the -# distribution. -# -# - Neither the name of the LLNS/LLNL nor the names of its contributors -# may be used to endorse or promote products derived from this -# software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -####################################################################### - +############################################################################## +# Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################## add_subdirectory(unit) add_subdirectory(integration) diff --git a/tests/integration/CMakeLists.txt b/tests/integration/CMakeLists.txt index 22cbdd04..bd12651b 100644 --- a/tests/integration/CMakeLists.txt +++ b/tests/integration/CMakeLists.txt @@ -1,3 +1,9 @@ +############################################################################## +# Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################## set (managed_array_test_depends chai umpire gtest) diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index fea76c55..16594bb4 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -1,46 +1,9 @@ -####################################################################### -# Copyright (c) 2016-2018, Lawrence Livermore National Security, LLC. All -# rights reserved. -# -# Produced at the Lawrence Livermore National Laboratory. -# -# This file is part of CHAI. -# -# LLNL-CODE-705877 -# -# For details, see https:://github.com/LLNL/CHAI -# Please also see the NOTICE and LICENSE files. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the -# distribution. -# -# - Neither the name of the LLNS/LLNL nor the names of its contributors -# may be used to endorse or promote products derived from this -# software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -####################################################################### - +############################################################################## +# Copyright (c) 2016-19, Lawrence Livermore National Security, LLC and CHAI +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: BSD-3-Clause +############################################################################## set (managed_array_test_depends chai umpire gtest) From 95fe093888a66353984efca82efd9f7d46426108 Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Wed, 23 Oct 2019 19:15:11 -0700 Subject: [PATCH 47/58] RAJA no longer requires explicitly finding camp dependency --- cmake/thirdparty/SetupChaiThirdparty.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/cmake/thirdparty/SetupChaiThirdparty.cmake b/cmake/thirdparty/SetupChaiThirdparty.cmake index a2cd708e..68526bf5 100644 --- a/cmake/thirdparty/SetupChaiThirdparty.cmake +++ b/cmake/thirdparty/SetupChaiThirdparty.cmake @@ -25,7 +25,6 @@ blt_register_library( LIBRARIES umpire) if (ENABLE_RAJA_PLUGIN) - find_package(camp REQUIRED) find_package(RAJA REQUIRED) blt_register_library( From ca638167b953831966f49832bbcf585b82174fcf Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Thu, 24 Oct 2019 12:01:39 -0700 Subject: [PATCH 48/58] Disable RAJA plugin by default --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 06a19dfe..c519e53a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,7 +16,7 @@ option(ENABLE_IMPLICIT_CONVERSIONS "Enable implicit conversions to-from raw poin option(DISABLE_RM "Make ManagedArray a thin wrapper" Off) mark_as_advanced(DISABLE_RM) option(ENABLE_UM "Use CUDA unified (managed) memory" Off) -option(ENABLE_RAJA_PLUGIN "Build plugin to set RAJA execution spaces" On) +option(ENABLE_RAJA_PLUGIN "Build plugin to set RAJA execution spaces" Off) set(ENABLE_TESTS On CACHE BOOL "") set(ENABLE_EXAMPLES On CACHE BOOL "") From d4dcc1a90813ae69f9a8c76c831955be4fed8804 Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Thu, 7 Nov 2019 09:35:50 -0800 Subject: [PATCH 49/58] Updates to bring in RAJA as a submodule --- .gitmodules | 3 +++ CMakeLists.txt | 2 +- blt | 2 +- cmake/thirdparty/SetupChaiThirdparty.cmake | 29 +++++++++++----------- src/chai/ArrayManager.hpp | 3 +++ src/chai/CMakeLists.txt | 4 +-- src/tpl/raja | 1 + tests/integration/CMakeLists.txt | 2 +- 8 files changed, 26 insertions(+), 20 deletions(-) create mode 160000 src/tpl/raja diff --git a/.gitmodules b/.gitmodules index 8564a4ae..d77cfd4e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "src/tpl/umpire"] path = src/tpl/umpire url = https://github.com/LLNL/Umpire.git +[submodule "src/tpl/raja"] + path = src/tpl/raja + url = https://github.com/LLNL/RAJA.git diff --git a/CMakeLists.txt b/CMakeLists.txt index c519e53a..fa0cb77b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,7 +20,7 @@ option(ENABLE_RAJA_PLUGIN "Build plugin to set RAJA execution spaces" Off) set(ENABLE_TESTS On CACHE BOOL "") set(ENABLE_EXAMPLES On CACHE BOOL "") -set(ENABLE_DOCUMENTATION On CACHE BOOL "") +set(ENABLE_DOCS Off CACHE BOOL "") # options for Umpire as TPL set(ENABLE_GMOCK On CACHE BOOL "") diff --git a/blt b/blt index 47089360..30ccea5a 160000 --- a/blt +++ b/blt @@ -1 +1 @@ -Subproject commit 4708936054366585478d9c5430449358a0a3eb86 +Subproject commit 30ccea5ad9853bd6397d8c67deed88b55916d2be diff --git a/cmake/thirdparty/SetupChaiThirdparty.cmake b/cmake/thirdparty/SetupChaiThirdparty.cmake index 68526bf5..c3f44c3d 100644 --- a/cmake/thirdparty/SetupChaiThirdparty.cmake +++ b/cmake/thirdparty/SetupChaiThirdparty.cmake @@ -4,8 +4,6 @@ # # SPDX-License-Identifier: BSD-3-Clause ############################################################################## -set(ENABLE_FORTRAN Off CACHE BOOL "Enable Fortran in Umpire") - if (NOT TARGET umpire) if (DEFINED umpire_DIR) find_package(umpire REQUIRED) @@ -15,22 +13,23 @@ if (NOT TARGET umpire) INCLUDES ${UMPIRE_INCLUDE_DIRS} LIBRARIES umpire) else () + set(OLD_ENABLE_FORTRAN ${ENABLE_FORTRAN}) + set(ENABLE_FORTRAN Off CACHE BOOL "Enable Fortran in Umpire") add_subdirectory(${PROJECT_SOURCE_DIR}/src/tpl/umpire) + set(ENABLE_FORTRAN ${OLD_ENABLE_FORTRAN}) endif() endif() -blt_register_library( - NAME umpire - INCLUDES ${UMPIRE_INCLUDE_DIRS} - LIBRARIES umpire) - if (ENABLE_RAJA_PLUGIN) - find_package(RAJA REQUIRED) - - blt_register_library( - NAME raja - INCLUDES ${RAJA_INCLUDE_DIR} - LIBRARIES RAJA) - - message(STATUS "RAJA: ${RAJA_INCLUDE_DIR}") + if (NOT TARGET RAJA) + if (DEFINED raja_DIR) + message(STATUS "CHAI: using external RAJA via find_package") + find_package(RAJA REQUIRED) + else() + message(STATUS "CHAI: using builtin RAJA submodule") + add_subdirectory(${PROJECT_SOURCE_DIR}/src/tpl/raja) + endif() + else() + message(STATUS "CHAI: using existing RAJA target") + endif() endif () diff --git a/src/chai/ArrayManager.hpp b/src/chai/ArrayManager.hpp index 10702429..beb38205 100644 --- a/src/chai/ArrayManager.hpp +++ b/src/chai/ArrayManager.hpp @@ -7,12 +7,15 @@ #ifndef CHAI_ArrayManager_HPP #define CHAI_ArrayManager_HPP +#include "chai/config.hpp" #include "chai/ChaiMacros.hpp" #include "chai/ExecutionSpaces.hpp" #include "chai/PointerRecord.hpp" #include "chai/Types.hpp" +#if defined(CHAI_ENABLE_RAJA_PLUGIN) #include "chai/pluginLinker.hpp" +#endif #include diff --git a/src/chai/CMakeLists.txt b/src/chai/CMakeLists.txt index bf5caf4c..8a4966d7 100644 --- a/src/chai/CMakeLists.txt +++ b/src/chai/CMakeLists.txt @@ -20,7 +20,6 @@ set (chai_headers ArrayManager.hpp ArrayManager.inl ChaiMacros.hpp - pluginLinker.hpp ExecutionSpaces.hpp ManagedArray.hpp ManagedArray.inl @@ -53,6 +52,7 @@ endif () if (ENABLE_RAJA_PLUGIN) set (chai_headers ${chai_headers} + pluginLinker.hpp ManagedArrayView.hpp RajaExecutionSpacePlugin.hpp) @@ -62,7 +62,7 @@ if (ENABLE_RAJA_PLUGIN) set (chai_depends ${chai_depends} - raja) + RAJA) endif () blt_add_library( diff --git a/src/tpl/raja b/src/tpl/raja new file mode 160000 index 00000000..53cb89cf --- /dev/null +++ b/src/tpl/raja @@ -0,0 +1 @@ +Subproject commit 53cb89cf788d28bc4ed2b4e6f75483fdd26024aa diff --git a/tests/integration/CMakeLists.txt b/tests/integration/CMakeLists.txt index fc9da2df..0f1bc586 100644 --- a/tests/integration/CMakeLists.txt +++ b/tests/integration/CMakeLists.txt @@ -41,7 +41,7 @@ blt_add_test( if (ENABLE_RAJA_PLUGIN) set(raja_test_depends ${managed_array_test_depends} - raja) + RAJA) blt_add_executable( NAME raja-chai-tests From 4de461caebc0db66411a2768f48a7372d42c8937 Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Thu, 7 Nov 2019 09:49:27 -0800 Subject: [PATCH 50/58] Tweak for external RAJA --- cmake/thirdparty/SetupChaiThirdparty.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/thirdparty/SetupChaiThirdparty.cmake b/cmake/thirdparty/SetupChaiThirdparty.cmake index c3f44c3d..21a63f9a 100644 --- a/cmake/thirdparty/SetupChaiThirdparty.cmake +++ b/cmake/thirdparty/SetupChaiThirdparty.cmake @@ -22,7 +22,7 @@ endif() if (ENABLE_RAJA_PLUGIN) if (NOT TARGET RAJA) - if (DEFINED raja_DIR) + if (DEFINED RAJA_DIR) message(STATUS "CHAI: using external RAJA via find_package") find_package(RAJA REQUIRED) else() From 66baa9f5c335a0ada819208161840e173002e616 Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Wed, 13 Nov 2019 15:29:19 -0800 Subject: [PATCH 51/58] Bump version number --- CMakeLists.txt | 2 +- README.md | 2 +- docs/sphinx/conf.py | 4 ++-- docs/sphinx/conf.py.in | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fa0cb77b..7e01caa4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ ############################################################################## cmake_policy(SET CMP0057 NEW) -project(Chai LANGUAGES CXX) +project(Chai LANGUAGES CXX VERSION 2.0.0) set(ENABLE_CUDA Off CACHE BOOL "Enable CUDA") set(ENABLE_HIP Off CACHE BOOL "Enable HIP") diff --git a/README.md b/README.md index b45a98c6..93a62f23 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# CHAI v1.2 +# CHAI v2.0 [![Azure Build Status](https://dev.azure.com/davidbeckingsale/CHAI/_apis/build/status/LLNL.CHAI?branchName=develop)](https://dev.azure.com/davidbeckingsale/CHAI/_build/latest?definitionId=2&branchName=develop) [![Build Status](https://travis-ci.org/LLNL/CHAI.svg?branch=develop)](https://travis-ci.org/LLNL/CHAI) diff --git a/docs/sphinx/conf.py b/docs/sphinx/conf.py index bd3f0ee1..cdf48d8b 100644 --- a/docs/sphinx/conf.py +++ b/docs/sphinx/conf.py @@ -61,9 +61,9 @@ # built documents. # # The short X.Y version. -version = u'1.2' +version = u'2.0' # The full version, including alpha/beta/rc tags. -release = u'1.2.0' +release = u'2.0.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/sphinx/conf.py.in b/docs/sphinx/conf.py.in index 1e870de5..8e1585b0 100644 --- a/docs/sphinx/conf.py.in +++ b/docs/sphinx/conf.py.in @@ -60,9 +60,9 @@ author = u'' # built documents. # # The short X.Y version. -version = u'1.2' +version = u'2.0' # The full version, including alpha/beta/rc tags. -release = u'1.2.0' +release = u'2.0.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. From 9b0ee0ecc571b2ae93b57d2a3e4493e0d33343b7 Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Wed, 13 Nov 2019 15:38:54 -0800 Subject: [PATCH 52/58] Set CMP0048 to NEW to enable VERSION in project command (cmake) --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7e01caa4..a9e5a6c4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,6 +5,7 @@ # SPDX-License-Identifier: BSD-3-Clause ############################################################################## cmake_policy(SET CMP0057 NEW) +cmake_policy(SET CMP0048 NEW) project(Chai LANGUAGES CXX VERSION 2.0.0) From 56f58fe9138372f54175a00aa852751d8c5196b8 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Thu, 5 Dec 2019 12:45:28 -0800 Subject: [PATCH 53/58] More stable implementation --- CMakeLists.txt | 1 + src/chai/managed_ptr.hpp | 348 ++++++++++++++++++------ tests/integration/managed_ptr_tests.cpp | 176 +++++++++--- tests/unit/managed_ptr_unit_tests.cpp | 258 +++++++++++++++--- 4 files changed, 618 insertions(+), 165 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fefd839f..b7f27440 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,6 +53,7 @@ option(ENABLE_IMPLICIT_CONVERSIONS "Enable implicit conversions to-from raw poin option(DISABLE_RM "Make ManagedArray a thin wrapper" Off) mark_as_advanced(DISABLE_RM) option(ENABLE_UM "Use CUDA unified (managed) memory" Off) +option(CHAI_ENABLE_GPU_ERROR_CHECKING "Enable GPU error checking" On) set(ENABLE_TESTS On CACHE BOOL "") set(ENABLE_EXAMPLES On CACHE BOOL "") diff --git a/src/chai/managed_ptr.hpp b/src/chai/managed_ptr.hpp index fb3999a8..49ffa0e8 100644 --- a/src/chai/managed_ptr.hpp +++ b/src/chai/managed_ptr.hpp @@ -61,6 +61,8 @@ #ifdef __CUDACC__ +#ifdef CHAI_ENABLE_GPU_ERROR_CHECKING + inline void gpuErrorCheck(cudaError_t code, const char *file, int line, bool abort=true) { if (code != cudaSuccess) { @@ -71,17 +73,10 @@ inline void gpuErrorCheck(cudaError_t code, const char *file, int line, bool abo } } -#if DEBUG #define GPU_ERROR_CHECK(code) { gpuErrorCheck((code), __FILE__, __LINE__); } -#else +#else // CHAI_ENABLE_GPU_ERROR_CHECKING #define GPU_ERROR_CHECK(code) code -#endif - -inline void debug_cudaDeviceSynchronize() { -#if DEBUG - GPU_ERROR_CHECK(cudaDeviceSynchronize()); -#endif -} +#endif // CHAI_ENABLE_GPU_ERROR_CHECKING #endif // __CUDACC__ @@ -94,10 +89,7 @@ namespace chai { } struct managed_ptr_record { - managed_ptr_record() : - m_callback() - { - } + managed_ptr_record() = default; managed_ptr_record(std::function callback) : m_callback(callback) @@ -122,13 +114,14 @@ namespace chai { /// /// This wrapper stores both host and device pointers so that polymorphism can be /// used in both contexts with a single API. - /// The make_managed function calls new on both the host and device so that - /// polymorphism is valid in both contexts. Simply copying an object to the - /// device will not copy the vtable, so new must be called on the device. + /// The make_managed and make_managed_from_factory functions call new on both the + /// host and device so that polymorphism is valid in both contexts. Simply copying + /// an object to the device will not copy the vtable, so new must be called on + /// the device. /// /// Usage Requirements: - /// Methods that can be called on both the host and device must be declared - /// with the __host__ __device__ specifiers. This includes constructors + /// Methods that can be called on the host and/or device must be declared + /// with the __host__ and/or __device__ specifiers. This includes constructors /// and destructors. Furthermore, destructors of base and child classes /// must all be declared virtual. /// This wrapper does NOT automatically sync the device object if the host object @@ -136,37 +129,39 @@ namespace chai { /// you must explicitly modify the object in both the host context and the /// device context. /// Raw array members of T need to be initialized correctly with a host or - /// device pointer. If a ManagedArray is passed to the make_managed function - /// in place of a raw array, it will be cast to the appropriate host or device - /// pointer when passed to T's constructor on the host and on the device. If it - /// is desired that these host and device pointers be kept in sync, define a - /// callback that maintains a copy of the ManagedArray and upon the ACTION_MOVE - /// event calls the copy constructor of that ManagedArray. + /// device array. If a ManagedArray is passed to the make_managed or + /// make_managed_from_factory methods in place of a raw array, it will be + /// cast to the appropriate host or device pointer when passed to T's + /// constructor on the host and on the device. If it is desired that these + /// host and device pointers be kept in sync, define a callback that maintains + /// a copy of the ManagedArray and upon the ACTION_MOVE event calls the copy + /// constructor of that ManagedArray. /// If a raw array is passed to make_managed, accessing that member will be /// valid only in the correct context. To prevent the accidental use of that /// member in the wrong context, any methods that access it should be __host__ /// only or __device__ only. Special care should be taken when passing raw /// arrays as arguments to member functions. /// The same restrictions for raw array members also apply to raw pointer members. - /// A managed_ptr can be passed to the make_managed function in place of a raw - /// pointer, and the host constructor of T will be given the extracted host - /// pointer, and likewise the device constructor of T will be given the - /// extracted device pointer. It is recommended that a callback is defined that - /// maintains a copy of the managed_ptr and frees it on the ACTION_FREE event. - /// It is also recommended that the callback calls the copy constructor of the - /// managed_ptr on the ACTION_MOVE event so that the ACTION_MOVE event is - /// triggered also for the inner managed_ptr. + /// A managed_ptr can be passed to the make_managed or make_managed_from_factory + /// methods in place of a raw pointer, and the host constructor of T will + /// be given the extracted host pointer, and likewise the device constructor + /// of T will be given the extracted device pointer. If it is desired that these + /// host and device pointers be kept in sync, define a callback that maintains + /// a copy of the managed_ptr and upon the ACTION_MOVE event calls the copy + /// constructor of that managed_ptr. /// Again, if a raw pointer is passed to make_managed, accessing that member will /// only be valid in the correct context. Take care when passing raw pointers /// as arguments to member functions. - /// Be aware that only the debug version of CHAI will check for GPU errors. So - /// if you are seeing strange behavior and/or your code crashes in the - /// constructor/destructor of T, then build CHAI as debug to see what is - /// going on. For example, the constructor of T might run out of per-thread - /// stack space on the GPU. If that happens, you can increase the device - /// limit of per-thread stack space. Alternatively, you could add a call - /// to cudaDeviceSynchronize after calling make_managed and check the return - /// code of cudaDeviceSynchronize. + /// Be aware that CHAI checks every CUDA API call for GPU errors by default. To + /// turn off GPU error checking, pass -DCHAI_ENABLE_GPU_ERROR_CHECKING=OFF as + /// an argument to cmake when building CHAI. To turn on synchronization after + /// every kernel, call ArrayManager::getInstance()->enableDeviceSynchronize(). + /// Alternatively, call cudaDeviceSynchronize() after any call to make_managed, + /// make_managed_from_factory, or managed_ptr::free, and check the return code + /// for errors. If your code crashes in the constructor/destructor of T, then it + /// is recommended to turn on this synchronization. For example, the constructor + /// of T might run out of per-thread stack space on the GPU. If that happens, + /// you can increase the device limit of per-thread stack space. /// template class managed_ptr { @@ -178,7 +173,7 @@ namespace chai { /// /// Default constructor. /// - CHAI_HOST_DEVICE constexpr managed_ptr() noexcept {} + CHAI_HOST_DEVICE constexpr managed_ptr() noexcept = default; /// /// @author Alan Dayton @@ -286,8 +281,9 @@ namespace chai { /// /// Copy constructor. /// Constructs a copy of the given managed_ptr and if the execution space is - /// different, calls the user defined callback with ACTION_MOVE for each - /// of the execution spaces. + /// different from the last space the given managed_ptr was used in, calls + /// the user defined callback with ACTION_MOVE for each of the execution + /// spaces. /// /// @param[in] other The managed_ptr to copy /// @@ -306,8 +302,9 @@ namespace chai { /// /// Converting constructor. /// Constructs a copy of the given managed_ptr and if the execution space is - /// different, calls the user defined callback with ACTION_MOVE for each - /// of the execution spaces. U* must be convertible to T*. + /// different from the last space the given managed_ptr was used in, calls + /// the user defined callback with ACTION_MOVE for each of the execution + /// spaces. U* must be convertible to T*. /// /// @param[in] other The managed_ptr to copy /// @@ -373,14 +370,10 @@ namespace chai { /// /// @author Alan Dayton /// - /// Destructor - /// - CHAI_HOST_DEVICE ~managed_ptr() {} - - /// - /// @author Alan Dayton - /// - /// Copy assignment operator. Does a shallow copy. + /// Copy assignment operator. + /// Copies the given managed_ptr and if the execution space is different from + /// the last space the given managed_ptr was used in, calls the user defined + /// callback with ACTION_MOVE for each of the execution spaces. /// /// @param[in] other The managed_ptr to copy /// @@ -402,8 +395,10 @@ namespace chai { /// @author Alan Dayton /// /// Conversion copy assignment operator. - /// Copies the given managed_ptr. Does a shallow copy. U* must be convertible - /// to T*. + /// Copies the given managed_ptr and if the execution space is different from + /// the last space the given managed_ptr was used in, calls the user defined + /// callback with ACTION_MOVE for each of the execution spaces. U* must be + /// convertible to T*. /// /// @param[in] other The managed_ptr to copy /// @@ -501,10 +496,17 @@ namespace chai { /// @author Alan Dayton /// /// Sets the callback, which can be used to handle specific actions. - /// ACTION_MOVE can be used to call the copy constructor for ManagedArrays. - /// ACTION_FREE can be used to provide a custom deleter operation. Use - /// ExecutionSpace::NONE if freeing anything other than the actual object - /// pointers. + /// The copy constructors and copy assignment operators call the callback with + /// ACTION_MOVE if the execution space has changed since the managed_ptr was + /// last used. A common use case for this is to call the copy constructor + /// of class members that are ManagedArrays to trigger data movement. The + /// free method calls the user provided callback with ACTION_FREE in each of + /// the execution spaces with the pointers from each space. This can be used + /// to provide a custom deleter operation. If freeing anything other than the + /// actual object pointers, do that when the ExecutionSpace is NONE. The + /// callback should return true if the event has been handled (i.e. if a + /// callback is provided that only cleans up the device pointer, it should + /// return true in that case and false in every other case). /// /// @param[in] callback The callback to call when certain actions occur /// @@ -520,8 +522,10 @@ namespace chai { /// /// @author Alan Dayton /// - /// If a user callback is provided, calls the callback with the ACTION_FREE - /// event. Otherwise calls delete on the CPU and GPU pointers. + /// If a user defined callback has been provided, calls it with the ACTION_FREE + /// event in each execution space. If the callback does not handle an event + /// or a callback is not provided, this method calls delete on the host + /// and device pointers. /// CHAI_HOST void free() { if (m_pointer_record) { @@ -550,8 +554,12 @@ namespace chai { { if (pointer) { detail::destroy_on_device<<<1, 1>>>(temp); - debug_cudaDeviceSynchronize(); - GPU_ERROR_CHECK(cudaFree(temp)); + +#ifndef CHAI_DISABLE_RM + if (ArrayManager::getInstance()->deviceSynchronize()) { + GPU_ERROR_CHECK(cudaDeviceSynchronize()); + } +#endif } break; @@ -569,12 +577,6 @@ namespace chai { ExecutionSpace execSpace = static_cast(space); T* pointer = get(execSpace, false); - using T_non_const = typename std::remove_const::type; - - // We can use const_cast because can managed_ptr can only - // be constructed with non const pointers. - T_non_const* temp = const_cast(pointer); - switch (execSpace) { case CPU: delete pointer; @@ -583,9 +585,13 @@ namespace chai { case GPU: { if (pointer) { - detail::destroy_on_device<<<1, 1>>>(temp); - debug_cudaDeviceSynchronize(); - GPU_ERROR_CHECK(cudaFree(temp)); + detail::destroy_on_device<<<1, 1>>>(pointer); + +#ifndef CHAI_DISABLE_RM + if (ArrayManager::getInstance()->deviceSynchronize()) { + GPU_ERROR_CHECK(cudaDeviceSynchronize()); + } +#endif } break; @@ -770,6 +776,44 @@ namespace chai { return cpuPointer; } + /// + /// @author Alan Dayton + /// + /// Calls a factory method to create a new object on the host. + /// Sets the execution space to the CPU so that ManagedArrays and managed_ptrs + /// are moved to the host as necessary. + /// + /// @param[in] f The factory method + /// @param[in] args The arguments to the factory method + /// + /// @return The host pointer to the new object + /// + template + CHAI_HOST T* make_on_host_from_factory(F f, Args&&... args) { +#ifndef CHAI_DISABLE_RM + // Get the ArrayManager and save the current execution space + chai::ArrayManager* arrayManager = chai::ArrayManager::getInstance(); + ExecutionSpace currentSpace = arrayManager->getExecutionSpace(); + + // Set the execution space so that ManagedArrays and managed_ptrs + // are handled properly + arrayManager->setExecutionSpace(CPU); +#endif + + // Create the object on the device + T* cpuPointer = f(args...); + +#ifndef CHAI_DISABLE_RM + // Set the execution space back to the previous value + arrayManager->setExecutionSpace(currentSpace); +#endif + + // Return the GPU pointer + return cpuPointer; + } + #ifdef __CUDACC__ /// /// @author Alan Dayton @@ -785,8 +829,8 @@ namespace chai { template ::value, int>::type = 0> - CHAI_DEVICE void new_on_device(T* gpuPointer, Args&&... args) { - new(gpuPointer) T(args...); + CHAI_DEVICE void new_on_device(T** gpuPointer, Args&&... args) { + *gpuPointer = new T(args...); } /// @@ -803,8 +847,8 @@ namespace chai { template ::value, int>::type = 0> - CHAI_DEVICE void new_on_device(T* gpuPointer, Args&&... args) { - new(gpuPointer) T(getRawPointers(args)...); + CHAI_DEVICE void new_on_device(T** gpuPointer, Args&&... args) { + *gpuPointer = new T(getRawPointers(args)...); } /// @@ -820,11 +864,32 @@ namespace chai { /// template - __global__ void make_on_device(T* gpuPointer, Args... args) + __global__ void make_on_device(T** gpuPointer, Args... args) { new_on_device(gpuPointer, args...); } + /// + /// @author Alan Dayton + /// + /// Creates a new object on the device by calling the given factory method. + /// + /// @param[out] gpuPointer Used to return the device pointer to the new object + /// @param[in] f The factory method (must be a __device__ or __host__ __device__ + /// method + /// @param[in] args The arguments to the factory method + /// + /// @note Cannot capture argument packs in an extended device lambda, + /// so explicit kernel is needed. + /// + template + __global__ void make_on_device_from_factory(T** gpuPointer, F f, Args... args) + { + *gpuPointer = f(args...); + } + /// /// @author Alan Dayton /// @@ -835,7 +900,9 @@ namespace chai { template __global__ void destroy_on_device(T* gpuPointer) { - gpuPointer->~T(); + if (gpuPointer) { + delete gpuPointer; + } } /// @@ -860,13 +927,88 @@ namespace chai { arrayManager->setExecutionSpace(GPU); #endif - // Allocate space on the GPU to hold the new object - T* gpuPointer; - GPU_ERROR_CHECK(cudaMalloc(&gpuPointer, sizeof(T))); + // Allocate space on the GPU to hold the pointer to the new object + T** gpuBuffer; + GPU_ERROR_CHECK(cudaMalloc(&gpuBuffer, sizeof(T*))); + + // Create the object on the device + make_on_device<<<1, 1>>>(gpuBuffer, args...); + +#ifndef CHAI_DISABLE_RM + if (ArrayManager::getInstance()->deviceSynchronize()) { + GPU_ERROR_CHECK(cudaDeviceSynchronize()); + } +#endif + + // Allocate space on the CPU for the pointer and copy the pointer to the CPU + T** cpuBuffer = (T**) malloc(sizeof(T*)); + GPU_ERROR_CHECK(cudaMemcpy(cpuBuffer, gpuBuffer, sizeof(T*), + cudaMemcpyDeviceToHost)); + + // Get the GPU pointer + T* gpuPointer = cpuBuffer[0]; + + // Free the host and device buffers + free(cpuBuffer); + GPU_ERROR_CHECK(cudaFree(gpuBuffer)); + +#ifndef CHAI_DISABLE_RM + // Set the execution space back to the previous value + arrayManager->setExecutionSpace(currentSpace); +#endif + + // Return the GPU pointer + return gpuPointer; + } + + /// + /// @author Alan Dayton + /// + /// Calls a factory method to create a new object on the device. + /// + /// @param[in] f The factory method + /// @param[in] args The arguments to the factory method + /// + /// @return The device pointer to the new object + /// + template + CHAI_HOST T* make_on_device_from_factory(F f, Args&&... args) { +#ifndef CHAI_DISABLE_RM + // Get the ArrayManager and save the current execution space + chai::ArrayManager* arrayManager = chai::ArrayManager::getInstance(); + ExecutionSpace currentSpace = arrayManager->getExecutionSpace(); + + // Set the execution space so that chai::ManagedArrays and + // chai::managed_ptrs are handled properly + arrayManager->setExecutionSpace(GPU); +#endif + + // Allocate space on the GPU to hold the pointer to the new object + T** gpuBuffer; + GPU_ERROR_CHECK(cudaMalloc(&gpuBuffer, sizeof(T*))); // Create the object on the device - make_on_device<<<1, 1>>>(gpuPointer, args...); - debug_cudaDeviceSynchronize(); + make_on_device_from_factory<<<1, 1>>>(gpuBuffer, f, args...); + +#ifndef CHAI_DISABLE_RM + if (ArrayManager::getInstance()->deviceSynchronize()) { + GPU_ERROR_CHECK(cudaDeviceSynchronize()); + } +#endif + + // Allocate space on the CPU for the pointer and copy the pointer to the CPU + T** cpuBuffer = (T**) malloc(sizeof(T*)); + GPU_ERROR_CHECK(cudaMemcpy(cpuBuffer, gpuBuffer, sizeof(T*), + cudaMemcpyDeviceToHost)); + + // Get the GPU pointer + T* gpuPointer = cpuBuffer[0]; + + // Free the host and device buffers + free(cpuBuffer); + GPU_ERROR_CHECK(cudaFree(gpuBuffer)); #ifndef CHAI_DISABLE_RM // Set the execution space back to the previous value @@ -931,6 +1073,46 @@ namespace chai { #endif } + /// + /// @author Alan Dayton + /// + /// Makes a managed_ptr. + /// Factory function to create managed_ptrs. + /// + /// @param[in] f The factory function that will create the object + /// @param[in] args The arguments to the factory function + /// + template + CHAI_HOST managed_ptr make_managed_from_factory(F&& f, Args&&... args) { + static_assert(detail::is_invocable::value, + "F is not invocable with the given arguments."); + + static_assert(std::is_pointer::type>::value, + "F does not return a pointer."); + + using R = typename std::remove_pointer::type>::type; + + static_assert(std::is_convertible::value, + "F does not return a pointer that is convertible to T*."); + +#ifdef __CUDACC__ + // Construct on the GPU first to take advantage of asynchrony + T* gpuPointer = detail::make_on_device_from_factory(f, args...); +#endif + + // Construct on the CPU + T* cpuPointer = detail::make_on_host_from_factory(f, args...); + + // Construct and return the managed_ptr +#ifdef __CUDACC__ + return managed_ptr({CPU, GPU}, {cpuPointer, gpuPointer}); +#else + return managed_ptr({CPU}, {cpuPointer}); +#endif + } + /// /// @author Alan Dayton /// diff --git a/tests/integration/managed_ptr_tests.cpp b/tests/integration/managed_ptr_tests.cpp index 908074fa..105135f6 100644 --- a/tests/integration/managed_ptr_tests.cpp +++ b/tests/integration/managed_ptr_tests.cpp @@ -200,8 +200,8 @@ TEST(managed_ptr, class_with_raw_array) ASSERT_EQ(rawArrayClass->getValue(0), expectedValue); - array.free(); rawArrayClass.free(); + array.free(); } TEST(managed_ptr, class_with_multiple_raw_arrays) @@ -222,9 +222,9 @@ TEST(managed_ptr, class_with_multiple_raw_arrays) ASSERT_EQ(multipleRawArrayClass->getValue(0, 0), expectedValue1); ASSERT_EQ(multipleRawArrayClass->getValue(1, 0), expectedValue2); - array1.free(); - array2.free(); multipleRawArrayClass.free(); + array2.free(); + array1.free(); } TEST(managed_ptr, class_with_managed_array) @@ -241,8 +241,8 @@ TEST(managed_ptr, class_with_managed_array) ASSERT_EQ(derived->getValue(0), expectedValue); - array.free(); derived.free(); + array.free(); } TEST(managed_ptr, class_with_raw_ptr) @@ -260,9 +260,9 @@ TEST(managed_ptr, class_with_raw_ptr) ASSERT_EQ((*rawPointerClass).getValue(0), expectedValue); - array.free(); - rawArrayClass.free(); rawPointerClass.free(); + rawArrayClass.free(); + array.free(); } TEST(managed_ptr, class_with_managed_ptr) @@ -286,8 +286,77 @@ TEST(managed_ptr, nested_managed_ptr) ASSERT_EQ(container->getValue(), expectedValue); - derived.free(); container.free(); + derived.free(); +} + +TEST(managed_ptr, array_of_managed_ptr) +{ + int numManagedPointers = 10; + + int* expectedValues = new int[numManagedPointers]; + + chai::managed_ptr* managedPointers = new chai::managed_ptr[numManagedPointers]; + + for (int i = 0; i < numManagedPointers; ++i) { + const int expectedValue = rand(); + expectedValues[i] = expectedValue; + managedPointers[i] = chai::make_managed(expectedValue); + } + + for (int i = 0; i < numManagedPointers; ++i) { + ASSERT_EQ(managedPointers[i]->getValue(), expectedValues[i]); + managedPointers[i].free(); + } + + delete[] managedPointers; + delete[] expectedValues; +} + +TEST(managed_ptr, c_array_of_managed_ptr) +{ + int numManagedPointers = 10; + + int* expectedValues = new int[numManagedPointers]; + + chai::managed_ptr* managedPointers = (chai::managed_ptr*) malloc(numManagedPointers*sizeof(chai::managed_ptr)); + + for (int i = 0; i < numManagedPointers; ++i) { + const int expectedValue = rand(); + expectedValues[i] = expectedValue; + managedPointers[i] = chai::make_managed(expectedValue); + } + + for (int i = 0; i < numManagedPointers; ++i) { + ASSERT_EQ(managedPointers[i]->getValue(), expectedValues[i]); + managedPointers[i].free(); + } + + free(managedPointers); + delete[] expectedValues; +} + +TEST(managed_ptr, managed_array_of_managed_ptr) +{ + int numManagedPointers = 10; + + int* expectedValues = new int[numManagedPointers]; + + chai::ManagedArray> managedPointers(numManagedPointers, chai::CPU); + + forall(sequential(), 0, numManagedPointers, [=] (int i) { + const int expectedValue = rand(); + expectedValues[i] = expectedValue; + managedPointers[i] = chai::make_managed(expectedValue); + }); + + forall(sequential(), 0, numManagedPointers, [=] (int i) { + ASSERT_EQ(managedPointers[i]->getValue(), expectedValues[i]); + managedPointers[i].free(); + }); + + managedPointers.free(); + delete[] expectedValues; } #ifdef __CUDACC__ @@ -331,36 +400,61 @@ GPU_TEST(managed_ptr, make_on_device) GPU_TEST(managed_ptr, gpu_new_and_delete_on_device) { - // Initialize device side memory to hold the new object - RawArrayClass* gpuPointer = nullptr; - cudaMalloc(&gpuPointer, sizeof(RawArrayClass)); + // Initialize host side memory to hold a pointer + RawArrayClass** cpuPointerHolder = (RawArrayClass**) malloc(sizeof(RawArrayClass*)); + cpuPointerHolder[0] = nullptr; + + // Initialize device side memory to hold a pointer + RawArrayClass** gpuPointerHolder = nullptr; + cudaMalloc(&gpuPointerHolder, sizeof(RawArrayClass*)); // Create on the device - chai::detail::make_on_device<<<1, 1>>>(gpuPointer); + chai::detail::make_on_device<<<1, 1>>>(gpuPointerHolder); + + // Copy to the host side memory + cudaMemcpy(cpuPointerHolder, gpuPointerHolder, sizeof(RawArrayClass*), cudaMemcpyDeviceToHost); + + // Free device side memory + cudaFree(gpuPointerHolder); - // Check the pointer - ASSERT_NE(gpuPointer, nullptr); + // Save the pointer + ASSERT_NE(cpuPointerHolder[0], nullptr); + RawArrayClass* gpuPointer = cpuPointerHolder[0]; + + // Free host side memory + free(cpuPointerHolder); - // Clean up on the device chai::detail::destroy_on_device<<<1, 1>>>(gpuPointer); } GPU_TEST(managed_ptr, gpu_build_managed_ptr) { - // Initialize device side memory to hold the new object - RawArrayClass* gpuPointer = nullptr; - cudaMalloc(&gpuPointer, sizeof(RawArrayClass)); + // Initialize host side memory to hold a pointer + RawArrayClass** cpuPointerHolder = (RawArrayClass**) malloc(sizeof(RawArrayClass*)); + cpuPointerHolder[0] = nullptr; + + // Initialize device side memory to hold a pointer + RawArrayClass** gpuPointerHolder = nullptr; + cudaMalloc(&gpuPointerHolder, sizeof(RawArrayClass*)); // Create on the device - chai::detail::make_on_device<<<1, 1>>>(gpuPointer); + chai::detail::make_on_device<<<1, 1>>>(gpuPointerHolder); - // Check the pointer - ASSERT_NE(gpuPointer, nullptr); + // Copy to the host side memory + cudaMemcpy(cpuPointerHolder, gpuPointerHolder, sizeof(RawArrayClass*), cudaMemcpyDeviceToHost); + + // Free device side memory + cudaFree(gpuPointerHolder); + + // Save the pointer + ASSERT_NE(cpuPointerHolder[0], nullptr); + RawArrayClass* gpuPointer = cpuPointerHolder[0]; + + // Free host side memory + free(cpuPointerHolder); - // Make a managed_ptr chai::managed_ptr managedPtr({chai::GPU}, {gpuPointer}); - // Clean up the memory managedPtr.free(); } @@ -404,9 +498,9 @@ GPU_TEST(managed_ptr, gpu_class_with_raw_array) results.move(chai::CPU); ASSERT_EQ(results[0], expectedValue); - array.free(); - rawArrayClass.free(); results.free(); + rawArrayClass.free(); + array.free(); } GPU_TEST(managed_ptr, gpu_class_with_raw_array_and_callback) @@ -476,9 +570,9 @@ GPU_TEST(managed_ptr, gpu_class_with_managed_array) ASSERT_EQ(results[0], expectedValue); - array.free(); - derived.free(); results.free(); + derived.free(); + array.free(); } GPU_TEST(managed_ptr, gpu_class_with_raw_ptr) @@ -503,10 +597,10 @@ GPU_TEST(managed_ptr, gpu_class_with_raw_ptr) results.move(chai::CPU); ASSERT_EQ(results[0], expectedValue); - array.free(); - rawArrayClass.free(); - rawPointerClass.free(); results.free(); + rawPointerClass.free(); + rawArrayClass.free(); + array.free(); } GPU_TEST(managed_ptr, gpu_class_with_managed_ptr) @@ -525,8 +619,8 @@ GPU_TEST(managed_ptr, gpu_class_with_managed_ptr) results.move(chai::CPU); ASSERT_EQ(results[0], expectedValue); - derived.free(); results.free(); + derived.free(); } GPU_TEST(managed_ptr, gpu_nested_managed_ptr) @@ -545,9 +639,9 @@ GPU_TEST(managed_ptr, gpu_nested_managed_ptr) results.move(chai::CPU); ASSERT_EQ(results[0], expectedValue); - derived.free(); - container.free(); results.free(); + container.free(); + derived.free(); } GPU_TEST(managed_ptr, gpu_multiple_inheritance) @@ -569,8 +663,8 @@ GPU_TEST(managed_ptr, gpu_multiple_inheritance) ASSERT_EQ(results[0], true); ASSERT_EQ(results[1], true); - derived.free(); results.free(); + base2.free(); } GPU_TEST(managed_ptr, static_pointer_cast) @@ -601,9 +695,9 @@ GPU_TEST(managed_ptr, static_pointer_cast) ASSERT_EQ(results[1], expectedValue); ASSERT_EQ(results[2], expectedValue); - array.free(); - derived.free(); results.free(); + derivedFromBase.free(); + array.free(); } GPU_TEST(managed_ptr, dynamic_pointer_cast) @@ -634,9 +728,9 @@ GPU_TEST(managed_ptr, dynamic_pointer_cast) ASSERT_EQ(results[1], expectedValue); ASSERT_EQ(results[2], expectedValue); - array.free(); - base.free(); results.free(); + derivedFromBase.free(); + array.free(); } GPU_TEST(managed_ptr, const_pointer_cast) @@ -667,9 +761,9 @@ GPU_TEST(managed_ptr, const_pointer_cast) ASSERT_EQ(results[1], expectedValue); ASSERT_EQ(results[2], expectedValue); - array.free(); - derivedFromConst.free(); results.free(); + constDerived.free(); + array.free(); } GPU_TEST(managed_ptr, reinterpret_pointer_cast) @@ -700,9 +794,9 @@ GPU_TEST(managed_ptr, reinterpret_pointer_cast) ASSERT_EQ(results[1], expectedValue); ASSERT_EQ(results[2], expectedValue); - array.free(); - derived.free(); results.free(); + derivedFromBase.free(); + array.free(); } #endif diff --git a/tests/unit/managed_ptr_unit_tests.cpp b/tests/unit/managed_ptr_unit_tests.cpp index 281e88a5..5e7bb17b 100644 --- a/tests/unit/managed_ptr_unit_tests.cpp +++ b/tests/unit/managed_ptr_unit_tests.cpp @@ -141,7 +141,6 @@ TEST(managed_ptr, default_constructor) EXPECT_FALSE(derived != otherDerived); EXPECT_FALSE(otherDerived != derived); - // Make sure free is a no-op derived.free(); otherDerived.free(); } @@ -162,9 +161,8 @@ TEST(managed_ptr, nullptr_constructor) EXPECT_FALSE(derived != otherDerived); EXPECT_FALSE(otherDerived != derived); - // Make sure free is a no-op - derived.free(); otherDerived.free(); + derived.free(); } TEST(managed_ptr, cpu_pointer_constructor) @@ -289,7 +287,7 @@ TEST(managed_ptr, copy_assignment_operator) EXPECT_TRUE(otherDerived == derived); EXPECT_FALSE(otherDerived != derived); - derived.free(); + otherDerived.free(); } TEST(managed_ptr, copy_constructor_from_default_constructed) @@ -306,6 +304,8 @@ TEST(managed_ptr, copy_constructor_from_default_constructed) EXPECT_EQ(bool(otherDerived), false); EXPECT_EQ(otherDerived, nullptr); EXPECT_EQ(nullptr, otherDerived); + + derived.free(); } TEST(managed_ptr, copy_assignment_operator_from_default_constructed) @@ -323,6 +323,8 @@ TEST(managed_ptr, copy_assignment_operator_from_default_constructed) EXPECT_EQ(bool(otherDerived), false); EXPECT_EQ(otherDerived, nullptr); EXPECT_EQ(nullptr, otherDerived); + + otherDerived.free(); } TEST(managed_ptr, conversion_copy_constructor_from_default_constructed) @@ -339,6 +341,8 @@ TEST(managed_ptr, conversion_copy_constructor_from_default_constructed) EXPECT_EQ(bool(otherDerived), false); EXPECT_EQ(otherDerived, nullptr); EXPECT_EQ(nullptr, otherDerived); + + otherDerived.free(); } TEST(managed_ptr, conversion_copy_assignment_operator_from_default_constructed) @@ -356,6 +360,8 @@ TEST(managed_ptr, conversion_copy_assignment_operator_from_default_constructed) EXPECT_EQ(bool(otherDerived), false); EXPECT_EQ(otherDerived, nullptr); EXPECT_EQ(nullptr, otherDerived); + + derived.free(); } TEST(managed_ptr, copy_assignment_operator_from_host_ptr_constructed) @@ -384,8 +390,8 @@ TEST(managed_ptr, copy_assignment_operator_from_host_ptr_constructed) EXPECT_NE(thirdDerived, nullptr); EXPECT_NE(nullptr, thirdDerived); + derived.free(); otherDerived.free(); - thirdDerived.free(); } TEST(managed_ptr, conversion_copy_assignment_operator_from_host_ptr_constructed) @@ -434,7 +440,7 @@ TEST(managed_ptr, static_pointer_cast) EXPECT_TRUE(base != nullptr); EXPECT_TRUE(nullptr != base); - derived.free(); + base.free(); } TEST(managed_ptr, dynamic_pointer_cast) @@ -531,8 +537,11 @@ GPU_TEST(managed_ptr, gpu_default_constructor) EXPECT_FALSE(array2[7]); EXPECT_FALSE(array2[8]); - array.free(); array2.free(); + array.free(); + + derived.free(); + otherDerived.free(); } GPU_TEST(managed_ptr, gpu_nullptr_constructor) @@ -570,11 +579,14 @@ GPU_TEST(managed_ptr, gpu_nullptr_constructor) EXPECT_FALSE(array2[7]); EXPECT_FALSE(array2[8]); - array.free(); array2.free(); + array.free(); + + derived.free(); + otherDerived.free(); } -GPU_TEST(managed_ptr, gpu_pointer_constructor) +GPU_TEST(managed_ptr, gpu_gpu_pointer_constructor) { TestDerived* gpuPointer = chai::detail::make_on_device(3); chai::managed_ptr derived({chai::GPU}, {gpuPointer}); @@ -612,47 +624,73 @@ GPU_TEST(managed_ptr, gpu_pointer_constructor) EXPECT_TRUE(array3[3]); EXPECT_TRUE(array3[4]); - array1.free(); - array2.free(); array3.free(); + array2.free(); + array1.free(); + + derived.free(); } GPU_TEST(managed_ptr, gpu_new_and_delete_on_device) { - // Initialize device side memory to hold the new object - Simple* gpuPointer = nullptr; - cudaMalloc(&gpuPointer, sizeof(Simple)); + // Initialize host side memory to hold a pointer + Simple** cpuPointerHolder = (Simple**) malloc(sizeof(Simple*)); + cpuPointerHolder[0] = nullptr; + + // Initialize device side memory to hold a pointer + Simple** gpuPointerHolder = nullptr; + cudaMalloc(&gpuPointerHolder, sizeof(Simple*)); // Create on the device - chai::detail::make_on_device<<<1, 1>>>(gpuPointer); + chai::detail::make_on_device<<<1, 1>>>(gpuPointerHolder); + + // Copy to the host side memory + cudaMemcpy(cpuPointerHolder, gpuPointerHolder, sizeof(Simple*), cudaMemcpyDeviceToHost); + + // Free device side memory + cudaFree(gpuPointerHolder); - // Check the pointer - ASSERT_NE(gpuPointer, nullptr); + // Save the pointer + ASSERT_NE(cpuPointerHolder[0], nullptr); + Simple* gpuPointer = cpuPointerHolder[0]; + + // Free host side memory + free(cpuPointerHolder); - // Clean up on the device chai::detail::destroy_on_device<<<1, 1>>>(gpuPointer); } GPU_TEST(managed_ptr, gpu_new_and_delete_on_device_2) { - // Initialize device side memory to hold a the new object - Simple* gpuPointer = nullptr; - cudaMalloc(&gpuPointer, sizeof(Simple)); + // Initialize host side memory to hold a pointer + Simple** cpuPointerHolder = (Simple**) malloc(sizeof(Simple*)); + cpuPointerHolder[0] = nullptr; + + // Initialize device side memory to hold a pointer + Simple** gpuPointerHolder = nullptr; + cudaMalloc(&gpuPointerHolder, sizeof(Simple*)); // Create on the device - chai::detail::make_on_device<<<1, 1>>>(gpuPointer); + chai::detail::make_on_device<<<1, 1>>>(gpuPointerHolder); - // Check the pointer - ASSERT_NE(gpuPointer, nullptr); + // Copy to the host side memory + cudaMemcpy(cpuPointerHolder, gpuPointerHolder, sizeof(Simple*), cudaMemcpyDeviceToHost); - // Create a managed_ptr - chai::managed_ptr test({chai::GPU}, {gpuPointer}); + // Free device side memory + cudaFree(gpuPointerHolder); + + // Save the pointer + ASSERT_NE(cpuPointerHolder[0], nullptr); + Simple* gpuPointer = cpuPointerHolder[0]; + + // Free host side memory + free(cpuPointerHolder); - // Free the memory + chai::managed_ptr test({chai::GPU}, {gpuPointer}); test.free(); } -GPU_TEST(managed_ptr, simple_cuda_cpu_and_gpu_pointer_constructor) +GPU_TEST(managed_ptr, simple_gpu_cpu_and_gpu_pointer_constructor) { Simple* gpuPointer = chai::detail::make_on_device(3); Simple* cpuPointer = new Simple(4); @@ -672,6 +710,7 @@ GPU_TEST(managed_ptr, simple_cuda_cpu_and_gpu_pointer_constructor) EXPECT_EQ(array1[0], 3); array1.free(); + simple.free(); } @@ -716,9 +755,10 @@ GPU_TEST(managed_ptr, gpu_cpu_and_gpu_pointer_constructor) EXPECT_TRUE(array3[3]); EXPECT_TRUE(array3[4]); - array1.free(); - array2.free(); array3.free(); + array2.free(); + array1.free(); + derived.free(); } @@ -754,9 +794,98 @@ GPU_TEST(managed_ptr, gpu_make_managed) EXPECT_TRUE(array3[3]); EXPECT_TRUE(array3[4]); - array.free(); - array2.free(); array3.free(); + array2.free(); + array.free(); + + derived.free(); +} + +GPU_TEST(managed_ptr, make_managed_from_factory_function) +{ + const int expectedValue = rand(); + + auto factory = [] CHAI_HOST_DEVICE (const int value) { + return Factory(value); + }; + + auto derived = chai::make_managed_from_factory(factory, expectedValue); + + EXPECT_EQ((*derived).getValue(), expectedValue); + + EXPECT_NE(derived.get(), nullptr); + EXPECT_TRUE(derived); + EXPECT_FALSE(derived == nullptr); + EXPECT_FALSE(nullptr == derived); + EXPECT_TRUE(derived != nullptr); + EXPECT_TRUE(nullptr != derived); + + derived.free(); +} + +GPU_TEST(managed_ptr, make_managed_from_factory_lambda) +{ + const int expectedValue = rand(); + + auto factory = [] CHAI_HOST_DEVICE (const int value) { + return new TestDerived(value); + }; + + auto derived = chai::make_managed_from_factory(factory, expectedValue); + + EXPECT_EQ((*derived).getValue(), expectedValue); + + EXPECT_NE(derived.get(), nullptr); + EXPECT_TRUE(derived); + EXPECT_FALSE(derived == nullptr); + EXPECT_FALSE(nullptr == derived); + EXPECT_TRUE(derived != nullptr); + EXPECT_TRUE(nullptr != derived); + + derived.free(); +} + +GPU_TEST(managed_ptr, make_managed_from_overloaded_factory_function) +{ + const int expectedValue = rand(); + + auto factory = [] CHAI_HOST_DEVICE (const int value) { + return OverloadedFactory(value); + }; + + auto derived = chai::make_managed_from_factory(factory, expectedValue); + + EXPECT_EQ((*derived).getValue(), expectedValue); + + EXPECT_NE(derived.get(), nullptr); + EXPECT_TRUE(derived); + EXPECT_FALSE(derived == nullptr); + EXPECT_FALSE(nullptr == derived); + EXPECT_TRUE(derived != nullptr); + EXPECT_TRUE(nullptr != derived); + + derived.free(); +} + +GPU_TEST(managed_ptr, make_managed_from_factory_static_member_function) +{ + const int expectedValue = rand(); + + auto factory = [] CHAI_HOST_DEVICE (const int value) { + return TestBase::Factory(value); + }; + + auto derived = chai::make_managed_from_factory(factory, expectedValue); + + EXPECT_EQ((*derived).getValue(), expectedValue); + + EXPECT_NE(derived.get(), nullptr); + EXPECT_TRUE(derived); + EXPECT_FALSE(derived == nullptr); + EXPECT_FALSE(nullptr == derived); + EXPECT_TRUE(derived != nullptr); + EXPECT_TRUE(nullptr != derived); + derived.free(); } @@ -817,10 +946,11 @@ GPU_TEST(managed_ptr, gpu_copy_constructor) EXPECT_TRUE(array3[12]); EXPECT_FALSE(array3[13]); - array.free(); - array2.free(); array3.free(); - otherDerived.free(); + array2.free(); + array.free(); + + derived.free(); } GPU_TEST(managed_ptr, gpu_converting_constructor) @@ -880,10 +1010,11 @@ GPU_TEST(managed_ptr, gpu_converting_constructor) EXPECT_TRUE(array3[12]); EXPECT_FALSE(array3[13]); - array.free(); - array2.free(); array3.free(); - derived.free(); + array2.free(); + array.free(); + + base.free(); } GPU_TEST(managed_ptr, gpu_copy_assignment_operator) @@ -944,10 +1075,55 @@ GPU_TEST(managed_ptr, gpu_copy_assignment_operator) EXPECT_TRUE(array3[12]); EXPECT_FALSE(array3[13]); - array.free(); - array2.free(); array3.free(); - otherDerived.free(); + array2.free(); + array.free(); + + derived.free(); +} + +#endif + +// Enable the following tests to ensure that proper compiler errors are given +// for bad arguments since otherwise it is difficult to make sure the template +// metaprogramming is correct. + +#if 0 + +// Should give something like the following: +// error: static assertion failed: F is not invocable with the given arguments. + +TEST(managed_ptr, bad_function_to_make_managed_from_factory_function) +{ + const int expectedValue = rand(); + + auto factory = [] CHAI_HOST (const int value) { + return new TestDerived(value); + }; + + auto derived = chai::make_managed_from_factory(expectedValue, factory); + + EXPECT_EQ((*derived).getValue(), expectedValue); +} + +#endif + +#if 0 + +// Should give something like the following: +// error: static assertion failed: F is not invocable with the given arguments. + +TEST(managed_ptr, bad_arguments_to_make_managed_from_factory_function) +{ + const int expectedValue = rand(); + + auto factory = [] CHAI_HOST (const int value) { + return new TestDerived(value); + }; + + auto derived = chai::make_managed_from_factory(factory, expectedValue, 3); + + EXPECT_EQ((*derived).getValue(), expectedValue); } #endif From d6db9218c3f1616b97f55187a48a9891b6173766 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Thu, 5 Dec 2019 12:49:59 -0800 Subject: [PATCH 54/58] Reduce duplication in benchmarks --- benchmarks/chai_managed_ptr_benchmarks.cpp | 347 +++++++++++++++++++++ 1 file changed, 347 insertions(+) diff --git a/benchmarks/chai_managed_ptr_benchmarks.cpp b/benchmarks/chai_managed_ptr_benchmarks.cpp index b47c2a48..fc2b82b2 100644 --- a/benchmarks/chai_managed_ptr_benchmarks.cpp +++ b/benchmarks/chai_managed_ptr_benchmarks.cpp @@ -52,6 +52,8 @@ class Base { public: CHAI_HOST_DEVICE virtual void scale(size_t numValues, int* values) = 0; + + CHAI_HOST_DEVICE virtual void sumAndScale(size_t numValues, int* values, int& value) = 0; }; class Derived : public Base { @@ -64,6 +66,16 @@ class Derived : public Base { } } + CHAI_HOST_DEVICE virtual void sumAndScale(size_t numValues, int* values, int& value) override { + int result = 0; + + for (size_t i = 0; i < numValues; ++i) { + result += values[i]; + } + + value *= m_value * result; + } + private: int m_value = -1; }; @@ -74,6 +86,10 @@ class BaseCRTP { CHAI_HOST_DEVICE void scale(size_t numValues, int* values) { return static_cast(this)->scale(numValues, values); } + + CHAI_HOST_DEVICE void sumAndScale(size_t numValues, int* values, int& value) { + return static_cast(this)->sumAndScale(numValues, values, value); + } }; class DerivedCRTP : public BaseCRTP { @@ -86,6 +102,16 @@ class DerivedCRTP : public BaseCRTP { } } + CHAI_HOST_DEVICE void sumAndScale(size_t numValues, int* values, int& value) { + int result = 0; + + for (size_t i = 0; i < numValues; ++i) { + result += values[i]; + } + + value *= m_value * result; + } + private: int m_value = -1; }; @@ -100,6 +126,16 @@ class NoInheritance { } } + CHAI_HOST_DEVICE void sumAndScale(size_t numValues, int* values, int& value) { + int result = 0; + + for (size_t i = 0; i < numValues; ++i) { + result += values[i]; + } + + value *= m_value * result; + } + private: int m_value = -1; }; @@ -390,6 +426,7 @@ void benchmark_use_managed_ptr_gpu(benchmark::State& state) BENCHMARK(benchmark_use_managed_ptr_gpu); + // Curiously recurring template pattern __global__ void square(BaseCRTP object, size_t numValues, int* values) { object.scale(numValues, values); @@ -448,6 +485,316 @@ void benchmark_no_inheritance_gpu(benchmark::State& state) BENCHMARK(benchmark_no_inheritance_gpu); +__global__ void square(size_t numValues, int* values, chai::managed_ptr object) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + + if (i < numValues) { + int temp[4] = {i, i+1, i+2, i+3}; + object->sumAndScale(4, temp, values[i]); + } +} + +// managed_ptr (bulk) +template +void benchmark_bulk_use_managed_ptr_gpu(benchmark::State& state) +{ + chai::managed_ptr object = chai::make_managed(2); + + int* values; + cudaMalloc(&values, N * sizeof(int)); + fill<<<(N+255)/256, 256>>>(N, values); + + cudaDeviceSynchronize(); + + while (state.KeepRunning()) { + square<<<(N+255)/256, 256>>>(N, values, object); + cudaDeviceSynchronize(); + } + + cudaFree(values); + object.free(); + cudaDeviceSynchronize(); +} + +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_gpu, 1); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_gpu, 256); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_gpu, 512); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_gpu, 1024); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_gpu, 2048); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_gpu, 4096); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_gpu, 8192); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_gpu, 16384); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_gpu, 32768); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_gpu, 65536); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_gpu, 131072); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_gpu, 262144); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_gpu, 524288); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_gpu, 1048576); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_gpu, 2097152); + +// Curiously recurring template pattern +__global__ void square(size_t numValues, int* values, BaseCRTP object) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + + if (i < numValues) { + int temp[4] = {i, i+1, i+2, i+3}; + object.sumAndScale(4, temp, values[i]); + } +} + +template +void benchmark_bulk_curiously_recurring_template_pattern_gpu(benchmark::State& state) +{ + BaseCRTP* derivedCRTP = new DerivedCRTP(2); + auto object = *derivedCRTP; + + int* values; + cudaMalloc(&values, N * sizeof(int)); + fill<<<(N+255)/256, 256>>>(N, values); + + cudaDeviceSynchronize(); + + while (state.KeepRunning()) { + square<<<(N+255)/256, 256>>>(N, values, object); + cudaDeviceSynchronize(); + } + + cudaFree(values); + delete derivedCRTP; + cudaDeviceSynchronize(); +} + +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_gpu, 1); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_gpu, 256); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_gpu, 512); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_gpu, 1024); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_gpu, 2048); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_gpu, 4096); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_gpu, 8192); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_gpu, 16384); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_gpu, 32768); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_gpu, 65536); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_gpu, 131072); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_gpu, 262144); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_gpu, 524288); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_gpu, 1048576); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_gpu, 2097152); + +// Class without inheritance +__global__ void square(size_t numValues, int* values, NoInheritance object) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + + if (i < numValues) { + int temp[4] = {i, i+1, i+2, i+3}; + object.sumAndScale(4, temp, values[i]); + } +} + +template +void benchmark_bulk_no_inheritance_gpu(benchmark::State& state) +{ + NoInheritance* noInheritance = new NoInheritance(2); + auto object = *noInheritance; + + int* values; + cudaMalloc(&values, N * sizeof(int)); + fill<<<(N+255)/256, 256>>>(N, values); + + cudaDeviceSynchronize(); + + while (state.KeepRunning()) { + square<<<(N+255)/256, 256>>>(N, values, object); + cudaDeviceSynchronize(); + } + + cudaFree(values); + delete noInheritance; + cudaDeviceSynchronize(); +} + +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_gpu, 1); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_gpu, 256); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_gpu, 512); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_gpu, 1024); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_gpu, 2048); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_gpu, 4096); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_gpu, 8192); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_gpu, 16384); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_gpu, 32768); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_gpu, 65536); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_gpu, 131072); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_gpu, 262144); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_gpu, 524288); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_gpu, 1048576); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_gpu, 2097152); + +#endif + +// managed_ptr +template +static void benchmark_bulk_polymorphism_cpu(benchmark::State& state) +{ + Base* object = new Derived(2); + + int* values = (int*) malloc(N * sizeof(int)); + + for (size_t i = 0; i < N; ++i) { + values[i] = i * i; + } + +#ifdef __CUDACC__ + cudaDeviceSynchronize(); +#endif + + while (state.KeepRunning()) { + for (int i = 0; i < N; ++i) { + int temp[4] = {i, i+1, i+2, i+3}; + object->sumAndScale(4, temp, values[i]); + } + } + + delete object; +#ifdef __CUDACC__ + cudaDeviceSynchronize(); +#endif +} + +BENCHMARK_TEMPLATE(benchmark_bulk_polymorphism_cpu, 1); +BENCHMARK_TEMPLATE(benchmark_bulk_polymorphism_cpu, 256); +BENCHMARK_TEMPLATE(benchmark_bulk_polymorphism_cpu, 512); +BENCHMARK_TEMPLATE(benchmark_bulk_polymorphism_cpu, 1024); +BENCHMARK_TEMPLATE(benchmark_bulk_polymorphism_cpu, 2048); +BENCHMARK_TEMPLATE(benchmark_bulk_polymorphism_cpu, 4096); +BENCHMARK_TEMPLATE(benchmark_bulk_polymorphism_cpu, 8192); +BENCHMARK_TEMPLATE(benchmark_bulk_polymorphism_cpu, 16384); +BENCHMARK_TEMPLATE(benchmark_bulk_polymorphism_cpu, 32768); +BENCHMARK_TEMPLATE(benchmark_bulk_polymorphism_cpu, 65536); +BENCHMARK_TEMPLATE(benchmark_bulk_polymorphism_cpu, 131072); +BENCHMARK_TEMPLATE(benchmark_bulk_polymorphism_cpu, 262144); +BENCHMARK_TEMPLATE(benchmark_bulk_polymorphism_cpu, 524288); +BENCHMARK_TEMPLATE(benchmark_bulk_polymorphism_cpu, 1048576); +BENCHMARK_TEMPLATE(benchmark_bulk_polymorphism_cpu, 2097152); + +// managed_ptr +template +static void benchmark_bulk_use_managed_ptr_cpu(benchmark::State& state) +{ + chai::managed_ptr object = chai::make_managed(2); + + int* values = (int*) malloc(N * sizeof(int)); + + for (size_t i = 0; i < N; ++i) { + values[i] = i * i; + } + +#ifdef __CUDACC__ + cudaDeviceSynchronize(); #endif + while (state.KeepRunning()) { + for (int i = 0; i < N; ++i) { + int temp[4] = {i, i+1, i+2, i+3}; + object->sumAndScale(4, temp, values[i]); + } + } + + object.free(); + cudaDeviceSynchronize(); +} + +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_cpu, 1); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_cpu, 256); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_cpu, 512); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_cpu, 1024); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_cpu, 2048); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_cpu, 4096); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_cpu, 8192); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_cpu, 16384); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_cpu, 32768); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_cpu, 65536); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_cpu, 131072); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_cpu, 262144); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_cpu, 524288); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_cpu, 1048576); +BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_cpu, 2097152); + +// Curiously recurring template pattern +template +static void benchmark_bulk_curiously_recurring_template_pattern_cpu(benchmark::State& state) +{ + BaseCRTP* object = new DerivedCRTP(2); + + int* values = (int*) malloc(N * sizeof(int)); + + for (size_t i = 0; i < N; ++i) { + values[i] = i * i; + } + + while (state.KeepRunning()) { + for (int i = 0; i < N; ++i) { + int temp[4] = {i, i+1, i+2, i+3}; + object->sumAndScale(4, temp, values[i]); + } + } + + free(values); + delete object; +} + +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_cpu, 1); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_cpu, 256); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_cpu, 512); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_cpu, 1024); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_cpu, 2048); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_cpu, 4096); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_cpu, 8192); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_cpu, 16384); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_cpu, 32768); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_cpu, 65536); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_cpu, 131072); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_cpu, 262144); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_cpu, 524288); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_cpu, 1048576); +BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_cpu, 2097152); + +// Class without inheritance +template +static void benchmark_bulk_no_inheritance_cpu(benchmark::State& state) +{ + NoInheritance* object = new NoInheritance(2); + + int* values = (int*) malloc(N * sizeof(int)); + + for (size_t i = 0; i < N; ++i) { + values[i] = i * i; + } + + while (state.KeepRunning()) { + for (int i = 0; i < N; ++i) { + int temp[4] = {i, i+1, i+2, i+3}; + object->sumAndScale(4, temp, values[i]); + } + } + + free(values); + delete object; +} + +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_cpu, 1); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_cpu, 256); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_cpu, 512); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_cpu, 1024); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_cpu, 2048); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_cpu, 4096); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_cpu, 8192); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_cpu, 16384); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_cpu, 32768); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_cpu, 65536); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_cpu, 131072); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_cpu, 262144); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_cpu, 524288); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_cpu, 1048576); +BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_cpu, 2097152); + BENCHMARK_MAIN(); + From 403f9ba97e973e7a98da60d11740252cfbc6a34b Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Thu, 5 Dec 2019 15:49:23 -0800 Subject: [PATCH 55/58] Fix benchmarks --- benchmarks/chai_managed_ptr_benchmarks.cpp | 122 +++++++++++---------- 1 file changed, 64 insertions(+), 58 deletions(-) diff --git a/benchmarks/chai_managed_ptr_benchmarks.cpp b/benchmarks/chai_managed_ptr_benchmarks.cpp index fc2b82b2..f23611e3 100644 --- a/benchmarks/chai_managed_ptr_benchmarks.cpp +++ b/benchmarks/chai_managed_ptr_benchmarks.cpp @@ -51,25 +51,27 @@ class Base { public: - CHAI_HOST_DEVICE virtual void scale(size_t numValues, int* values) = 0; + CHAI_HOST_DEVICE virtual void scale(int numValues, int* values) = 0; - CHAI_HOST_DEVICE virtual void sumAndScale(size_t numValues, int* values, int& value) = 0; + CHAI_HOST_DEVICE virtual void sumAndScale(int numValues, int* values, int& value) = 0; + + CHAI_HOST_DEVICE virtual ~Base() = default; }; class Derived : public Base { public: CHAI_HOST_DEVICE Derived(int value) : Base(), m_value(value) {} - CHAI_HOST_DEVICE virtual void scale(size_t numValues, int* values) override { - for (size_t i = 0; i < numValues; ++i) { + CHAI_HOST_DEVICE virtual void scale(int numValues, int* values) override { + for (int i = 0; i < numValues; ++i) { values[i] *= m_value; } } - CHAI_HOST_DEVICE virtual void sumAndScale(size_t numValues, int* values, int& value) override { + CHAI_HOST_DEVICE virtual void sumAndScale(int numValues, int* values, int& value) override { int result = 0; - for (size_t i = 0; i < numValues; ++i) { + for (int i = 0; i < numValues; ++i) { result += values[i]; } @@ -83,11 +85,11 @@ class Derived : public Base { template class BaseCRTP { public: - CHAI_HOST_DEVICE void scale(size_t numValues, int* values) { + CHAI_HOST_DEVICE void scale(int numValues, int* values) { return static_cast(this)->scale(numValues, values); } - CHAI_HOST_DEVICE void sumAndScale(size_t numValues, int* values, int& value) { + CHAI_HOST_DEVICE void sumAndScale(int numValues, int* values, int& value) { return static_cast(this)->sumAndScale(numValues, values, value); } }; @@ -96,16 +98,16 @@ class DerivedCRTP : public BaseCRTP { public: CHAI_HOST_DEVICE DerivedCRTP(int value) : BaseCRTP(), m_value(value) {} - CHAI_HOST_DEVICE void scale(size_t numValues, int* values) { - for (size_t i = 0; i < numValues; ++i) { + CHAI_HOST_DEVICE void scale(int numValues, int* values) { + for (int i = 0; i < numValues; ++i) { values[i] *= m_value; } } - CHAI_HOST_DEVICE void sumAndScale(size_t numValues, int* values, int& value) { + CHAI_HOST_DEVICE void sumAndScale(int numValues, int* values, int& value) { int result = 0; - for (size_t i = 0; i < numValues; ++i) { + for (int i = 0; i < numValues; ++i) { result += values[i]; } @@ -120,16 +122,16 @@ class NoInheritance { public: CHAI_HOST_DEVICE NoInheritance(int value) : m_value(value) {} - CHAI_HOST_DEVICE void scale(size_t numValues, int* values) { - for (size_t i = 0; i < numValues; ++i) { + CHAI_HOST_DEVICE void scale(int numValues, int* values) { + for (int i = 0; i < numValues; ++i) { values[i] *= m_value; } } - CHAI_HOST_DEVICE void sumAndScale(size_t numValues, int* values, int& value) { + CHAI_HOST_DEVICE void sumAndScale(int numValues, int* values, int& value) { int result = 0; - for (size_t i = 0; i < numValues; ++i) { + for (int i = 0; i < numValues; ++i) { result += values[i]; } @@ -140,7 +142,7 @@ class NoInheritance { int m_value = -1; }; -template +template class ClassWithSize { private: char m_values[N]; @@ -161,10 +163,10 @@ static void benchmark_use_managed_ptr_cpu(benchmark::State& state) { chai::managed_ptr object = chai::make_managed(2); - size_t numValues = 100; + int numValues = 100; int* values = (int*) malloc(100 * sizeof(int)); - for (size_t i = 0; i < numValues; ++i) { + for (int i = 0; i < numValues; ++i) { values[i] = i * i; } @@ -177,7 +179,9 @@ static void benchmark_use_managed_ptr_cpu(benchmark::State& state) } object.free(); +#ifdef __CUDACC__ cudaDeviceSynchronize(); +#endif } BENCHMARK(benchmark_use_managed_ptr_cpu); @@ -187,10 +191,10 @@ static void benchmark_curiously_recurring_template_pattern_cpu(benchmark::State& { BaseCRTP* object = new DerivedCRTP(2); - size_t numValues = 100; + int numValues = 100; int* values = (int*) malloc(100 * sizeof(int)); - for (size_t i = 0; i < numValues; ++i) { + for (int i = 0; i < numValues; ++i) { values[i] = i * i; } @@ -209,10 +213,10 @@ static void benchmark_no_inheritance_cpu(benchmark::State& state) { NoInheritance* object = new NoInheritance(2); - size_t numValues = 100; + int numValues = 100; int* values = (int*) malloc(100 * sizeof(int)); - for (size_t i = 0; i < numValues; ++i) { + for (int i = 0; i < numValues; ++i) { values[i] = i * i; } @@ -228,11 +232,11 @@ BENCHMARK(benchmark_no_inheritance_cpu); #if defined(CHAI_ENABLE_CUDA) || defined(CHAI_ENABLE_HIP) -template +template __global__ void copy_kernel(ClassWithSize) {} // Benchmark how long it takes to copy a class to the GPU -template +template static void benchmark_pass_copy_to_gpu(benchmark::State& state) { ClassWithSize helper; @@ -248,7 +252,7 @@ BENCHMARK_TEMPLATE(benchmark_pass_copy_to_gpu, 64); BENCHMARK_TEMPLATE(benchmark_pass_copy_to_gpu, 512); BENCHMARK_TEMPLATE(benchmark_pass_copy_to_gpu, 4096); -template +template static void benchmark_copy_to_gpu(benchmark::State& state) { ClassWithSize* cpuPointer = new ClassWithSize(); @@ -273,17 +277,17 @@ BENCHMARK_TEMPLATE(benchmark_copy_to_gpu, 262144); BENCHMARK_TEMPLATE(benchmark_copy_to_gpu, 2097152); // Benchmark how long it takes to call placement new on the GPU -template +template __global__ void placement_new_kernel(ClassWithSize* address) { (void) new(address) ClassWithSize(); } -template +template __global__ void placement_delete_kernel(ClassWithSize* address) { address->~ClassWithSize(); } -template +template static void benchmark_placement_new_on_gpu(benchmark::State& state) { while (state.KeepRunning()) { @@ -305,17 +309,17 @@ BENCHMARK_TEMPLATE(benchmark_placement_new_on_gpu, 262144); BENCHMARK_TEMPLATE(benchmark_placement_new_on_gpu, 2097152); // Benchmark how long it takes to call new on the GPU -template +template __global__ void create_kernel(ClassWithSize** address) { *address = new ClassWithSize(); } -template +template __global__ void delete_kernel(ClassWithSize** address) { delete *address; } -template +template static void benchmark_new_on_gpu(benchmark::State& state) { while (state.KeepRunning()) { @@ -337,12 +341,12 @@ BENCHMARK_TEMPLATE(benchmark_new_on_gpu, 262144); BENCHMARK_TEMPLATE(benchmark_new_on_gpu, 2097152); // Benchmark current approach -template +template __global__ void delete_kernel_2(ClassWithSize* address) { delete address; } -template +template static void benchmark_new_on_gpu_and_copy_to_host(benchmark::State& state) { while (state.KeepRunning()) { @@ -368,12 +372,12 @@ BENCHMARK_TEMPLATE(benchmark_new_on_gpu_and_copy_to_host, 262144); BENCHMARK_TEMPLATE(benchmark_new_on_gpu_and_copy_to_host, 2097152); // Benchmark how long it takes to create a stack object on the GPU -template +template __global__ void create_on_stack_kernel() { (void) ClassWithSize(); } -template +template static void benchmark_create_on_stack_on_gpu(benchmark::State& state) { while (state.KeepRunning()) { @@ -391,15 +395,15 @@ BENCHMARK_TEMPLATE(benchmark_create_on_stack_on_gpu, 262144); BENCHMARK_TEMPLATE(benchmark_create_on_stack_on_gpu, 2097152); // Use managed_ptr -__global__ void fill(size_t numValues, int* values) { - size_t i = blockIdx.x * blockDim.x + threadIdx.x; +__global__ void fill(int numValues, int* values) { + int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < numValues) { values[i] = i * i; } } -__global__ void square(chai::managed_ptr object, size_t numValues, int* values) { +__global__ void square(chai::managed_ptr object, int numValues, int* values) { object->scale(numValues, values); } @@ -407,7 +411,7 @@ void benchmark_use_managed_ptr_gpu(benchmark::State& state) { chai::managed_ptr object = chai::make_managed(2); - size_t numValues = 100; + int numValues = 100; int* values; cudaMalloc(&values, numValues * sizeof(int)); fill<<<1, 100>>>(numValues, values); @@ -428,7 +432,7 @@ BENCHMARK(benchmark_use_managed_ptr_gpu); // Curiously recurring template pattern -__global__ void square(BaseCRTP object, size_t numValues, int* values) { +__global__ void square(BaseCRTP object, int numValues, int* values) { object.scale(numValues, values); } @@ -437,7 +441,7 @@ void benchmark_curiously_recurring_template_pattern_gpu(benchmark::State& state) BaseCRTP* derivedCRTP = new DerivedCRTP(2); auto object = *derivedCRTP; - size_t numValues = 100; + int numValues = 100; int* values; cudaMalloc(&values, numValues * sizeof(int)); fill<<<1, 100>>>(numValues, values); @@ -457,7 +461,7 @@ void benchmark_curiously_recurring_template_pattern_gpu(benchmark::State& state) BENCHMARK(benchmark_curiously_recurring_template_pattern_gpu); // Class without inheritance -__global__ void square(NoInheritance object, size_t numValues, int* values) { +__global__ void square(NoInheritance object, int numValues, int* values) { object.scale(numValues, values); } @@ -466,7 +470,7 @@ void benchmark_no_inheritance_gpu(benchmark::State& state) NoInheritance* noInheritance = new NoInheritance(2); auto object = *noInheritance; - size_t numValues = 100; + int numValues = 100; int* values; cudaMalloc(&values, numValues * sizeof(int)); fill<<<1, 100>>>(numValues, values); @@ -485,7 +489,7 @@ void benchmark_no_inheritance_gpu(benchmark::State& state) BENCHMARK(benchmark_no_inheritance_gpu); -__global__ void square(size_t numValues, int* values, chai::managed_ptr object) { +__global__ void square(int numValues, int* values, chai::managed_ptr object) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < numValues) { @@ -495,7 +499,7 @@ __global__ void square(size_t numValues, int* values, chai::managed_ptr ob } // managed_ptr (bulk) -template +template void benchmark_bulk_use_managed_ptr_gpu(benchmark::State& state) { chai::managed_ptr object = chai::make_managed(2); @@ -533,7 +537,7 @@ BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_gpu, 1048576); BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_gpu, 2097152); // Curiously recurring template pattern -__global__ void square(size_t numValues, int* values, BaseCRTP object) { +__global__ void square(int numValues, int* values, BaseCRTP object) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < numValues) { @@ -542,7 +546,7 @@ __global__ void square(size_t numValues, int* values, BaseCRTP obje } } -template +template void benchmark_bulk_curiously_recurring_template_pattern_gpu(benchmark::State& state) { BaseCRTP* derivedCRTP = new DerivedCRTP(2); @@ -581,7 +585,7 @@ BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_gpu, 1048 BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_gpu, 2097152); // Class without inheritance -__global__ void square(size_t numValues, int* values, NoInheritance object) { +__global__ void square(int numValues, int* values, NoInheritance object) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < numValues) { @@ -590,7 +594,7 @@ __global__ void square(size_t numValues, int* values, NoInheritance object) { } } -template +template void benchmark_bulk_no_inheritance_gpu(benchmark::State& state) { NoInheritance* noInheritance = new NoInheritance(2); @@ -631,14 +635,14 @@ BENCHMARK_TEMPLATE(benchmark_bulk_no_inheritance_gpu, 2097152); #endif // managed_ptr -template +template static void benchmark_bulk_polymorphism_cpu(benchmark::State& state) { Base* object = new Derived(2); int* values = (int*) malloc(N * sizeof(int)); - for (size_t i = 0; i < N; ++i) { + for (int i = 0; i < N; ++i) { values[i] = i * i; } @@ -676,14 +680,14 @@ BENCHMARK_TEMPLATE(benchmark_bulk_polymorphism_cpu, 1048576); BENCHMARK_TEMPLATE(benchmark_bulk_polymorphism_cpu, 2097152); // managed_ptr -template +template static void benchmark_bulk_use_managed_ptr_cpu(benchmark::State& state) { chai::managed_ptr object = chai::make_managed(2); int* values = (int*) malloc(N * sizeof(int)); - for (size_t i = 0; i < N; ++i) { + for (int i = 0; i < N; ++i) { values[i] = i * i; } @@ -699,7 +703,9 @@ static void benchmark_bulk_use_managed_ptr_cpu(benchmark::State& state) } object.free(); +#ifdef __CUDACC__ cudaDeviceSynchronize(); +#endif } BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_cpu, 1); @@ -719,14 +725,14 @@ BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_cpu, 1048576); BENCHMARK_TEMPLATE(benchmark_bulk_use_managed_ptr_cpu, 2097152); // Curiously recurring template pattern -template +template static void benchmark_bulk_curiously_recurring_template_pattern_cpu(benchmark::State& state) { BaseCRTP* object = new DerivedCRTP(2); int* values = (int*) malloc(N * sizeof(int)); - for (size_t i = 0; i < N; ++i) { + for (int i = 0; i < N; ++i) { values[i] = i * i; } @@ -758,14 +764,14 @@ BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_cpu, 1048 BENCHMARK_TEMPLATE(benchmark_bulk_curiously_recurring_template_pattern_cpu, 2097152); // Class without inheritance -template +template static void benchmark_bulk_no_inheritance_cpu(benchmark::State& state) { NoInheritance* object = new NoInheritance(2); int* values = (int*) malloc(N * sizeof(int)); - for (size_t i = 0; i < N; ++i) { + for (int i = 0; i < N; ++i) { values[i] = i * i; } From b77b805f4848dc1d1830d1fa1768ae862b0215c7 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Thu, 5 Dec 2019 16:19:31 -0800 Subject: [PATCH 56/58] Fix memory leaks in benchmarks --- benchmarks/chai_managed_ptr_benchmarks.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/benchmarks/chai_managed_ptr_benchmarks.cpp b/benchmarks/chai_managed_ptr_benchmarks.cpp index f23611e3..57f95db1 100644 --- a/benchmarks/chai_managed_ptr_benchmarks.cpp +++ b/benchmarks/chai_managed_ptr_benchmarks.cpp @@ -178,7 +178,9 @@ static void benchmark_use_managed_ptr_cpu(benchmark::State& state) object->scale(numValues, values); } + free(values); object.free(); + #ifdef __CUDACC__ cudaDeviceSynchronize(); #endif @@ -657,7 +659,9 @@ static void benchmark_bulk_polymorphism_cpu(benchmark::State& state) } } + free(values); delete object; + #ifdef __CUDACC__ cudaDeviceSynchronize(); #endif @@ -702,7 +706,9 @@ static void benchmark_bulk_use_managed_ptr_cpu(benchmark::State& state) } } + free(values); object.free(); + #ifdef __CUDACC__ cudaDeviceSynchronize(); #endif From 804ae035295e061b32dee4099ee7953d41223774 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Thu, 5 Dec 2019 16:33:45 -0800 Subject: [PATCH 57/58] Add capability to turn on/off cuda synchronizes --- src/chai/ArrayManager.hpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/chai/ArrayManager.hpp b/src/chai/ArrayManager.hpp index beb38205..ddca8220 100644 --- a/src/chai/ArrayManager.hpp +++ b/src/chai/ArrayManager.hpp @@ -237,6 +237,21 @@ class ArrayManager */ void disableCallbacks() { m_callbacks_active = false; } + /*! + * \brief Turn on device synchronization after every kernel. + */ + void enableDeviceSynchronize() { m_device_synchronize = true; } + + /*! + * \brief Turn off device synchronization after every kernel. + */ + void disableDeviceSynchronize() { m_device_synchronize = false; } + + /*! + * \brief Turn on device synchronization after every kernel. + */ + bool deviceSynchronize() { return m_device_synchronize; } + protected: /*! * \brief Construct a new ArrayManager. @@ -318,6 +333,11 @@ class ArrayManager * \brief Controls whether or not callbacks are called. */ bool m_callbacks_active; + + /*! + * Whether or not to synchronize on device after every CHAI kernel. + */ + bool m_device_synchronize = false; }; } // end of namespace chai From a1a46e546bdeadeb7b40ed32d96a2423bbfea689 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Thu, 5 Dec 2019 16:45:34 -0800 Subject: [PATCH 58/58] Fix nvcc build warnings --- benchmarks/chai_managed_ptr_benchmarks.cpp | 2 +- src/chai/managed_ptr.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/chai_managed_ptr_benchmarks.cpp b/benchmarks/chai_managed_ptr_benchmarks.cpp index 57f95db1..47bd5a88 100644 --- a/benchmarks/chai_managed_ptr_benchmarks.cpp +++ b/benchmarks/chai_managed_ptr_benchmarks.cpp @@ -55,7 +55,7 @@ class Base { CHAI_HOST_DEVICE virtual void sumAndScale(int numValues, int* values, int& value) = 0; - CHAI_HOST_DEVICE virtual ~Base() = default; + virtual ~Base() = default; }; class Derived : public Base { diff --git a/src/chai/managed_ptr.hpp b/src/chai/managed_ptr.hpp index 49ffa0e8..39dca4b8 100644 --- a/src/chai/managed_ptr.hpp +++ b/src/chai/managed_ptr.hpp @@ -173,7 +173,7 @@ namespace chai { /// /// Default constructor. /// - CHAI_HOST_DEVICE constexpr managed_ptr() noexcept = default; + constexpr managed_ptr() noexcept = default; /// /// @author Alan Dayton