diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..891303ca --- /dev/null +++ b/.clang-format @@ -0,0 +1,5 @@ +Language: Cpp +IndentWidth: 4 +ColumnLimit: 80 +UseTab: Never +BreakBeforeBraces: Attach diff --git a/CMakeLists.txt b/CMakeLists.txt index bacc1ad5..f4c6ad27 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,9 +16,9 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR CHEETAH_STANDALONE_BUI set_property(GLOBAL PROPERTY USE_FOLDERS ON) set(PACKAGE_NAME Cheetah) - set(PACKAGE_VERSION 12.0.0) + set(PACKAGE_VERSION 14.0.6) set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") - set(PACKAGE_BUGREPORT "bugs@opencilk.org") + set(PACKAGE_BUGREPORT "https://github.com/OpenCilk/cheetah/issues/") endif() # Require out of source build. @@ -58,6 +58,8 @@ option(CHEETAH_ENABLE_SHARED "Build cheetah as a shared library." ON) option(CHEETAH_ENABLE_STATIC "Build cheetah as a static library." ON) option(CHEETAH_ENABLE_ASAN "Build cheetah with ASan support." ON) +option(CHEETAH_EMULATE_TLS "Build Cheetah with emulated TLS. Necessary for some JITs." OFF) + set(CHEETAH_ABI_VERSION "1" CACHE STRING "ABI version of cheetah. Defaults to 1.") if (NOT CHEETAH_ENABLE_SHARED AND NOT CHEETAH_ENABLE_STATIC) @@ -109,6 +111,9 @@ if (CHEETAH_STANDALONE_BUILD) endif() set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}" CACHE STRING "Default options for lit") set(LLVM_LIT_OUTPUT_DIR "${CHEETAH_EXEC_OUTPUT_DIR}") + + # Define llvm-link path + set(LLVM_LINK_PATH "${LLVM_TOOLS_BINARY_DIR}/llvm-link") endif() construct_cheetah_default_triple() @@ -119,8 +124,13 @@ if ("${CHEETAH_DEFAULT_TARGET_TRIPLE}" MATCHES ".*hf$") endif() set(CHEETAH_C_FLAGS "") -set(CHEETAH_CXX_FLAGS "") -set(CHEETAH_COMPILE_FLAGS "") +if (CHEETAH_EMULATE_TLS) + set(CHEETAH_CXX_FLAGS "-femulated-tls") + set(CHEETAH_COMPILE_FLAGS "-femulated-tls") +else() + set(CHEETAH_CXX_FLAGS "") + set(CHEETAH_COMPILE_FLAGS "") +endif() set(CHEETAH_COMPILE_DEFS "") set(CHEETAH_LINK_FLAGS "") set(CHEETAH_COMMON_LIBS "") @@ -187,3 +197,32 @@ if (CHEETAH_INCLUDE_TESTS) # add_subdirectory(handcomp_test) # add_subdirectory(bench) endif() + +#=============================================================================== +# Setup CMAKE CONFIG PACKAGE +#=============================================================================== +make_directory(${CHEETAH_CMAKE_BUILDDIR}) +set(CHEETHA_LIBRARY_HOST "/lib/${LLVM_HOST_TARGET}") + +if(CHEETAH_INSTALL_LIBRARY) + set(CHEETAH_LIBRARY_DIR "") + if (LLVM_TREE_AVAILABLE) + set(CHEETAH_LIBRARY_DIR "${CMAKE_INSTALL_PREFIX}/${CHEETAH_LIBRARY_INSTALL_DIR}") + else() + set(CHEETAH_LIBRARY_DIR "${CMAKE_INSTALL_PREFIX}") + endif() + configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/cmake/CilkThreadsConfig.cmake.in + ${CHEETAH_CMAKE_BUILDDIR}/CilkThreadsConfig.cmake + NO_SOURCE_PERMISSIONS + @ONLY) +else() + set(CHEETAH_LIBRARY_DIR ${CHEETAH_LIBRARY_OUTPUT_DIR}) + configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/cmake/CilkThreadsConfig.cmake.in + ${CHEETAH_CMAKE_BUILDDIR}/CilkThreadsConfig.cmake + NO_SOURCE_PERMISSIONS + @ONLY) +endif() + +install(FILES ${CHEETAH_CMAKE_BUILDDIR}/CilkThreadsConfig.cmake DESTINATION ${CHEETAH_CMAKE_INSTALLDIR}) diff --git a/cmake/CilkThreadsConfig.cmake.in b/cmake/CilkThreadsConfig.cmake.in new file mode 100644 index 00000000..fffa9c32 --- /dev/null +++ b/cmake/CilkThreadsConfig.cmake.in @@ -0,0 +1,5 @@ +add_library(CilkThreads::Threads INTERFACE IMPORTED) +set_property(TARGET CilkThreads::Threads PROPERTY INTERFACE_LINK_LIBRARIES "-lopencilk -lopencilk-personality-cpp -L@CHEETAH_LIBRARY_DIR@@CHEETHA_LIBRARY_HOST@") +set_property(TARGET CilkThreads::Threads + PROPERTY INTERFACE_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -lopencilk -lopencilk-personality-cpp -L@CHEETAH_LIBRARY_DIR@@CHEETHA_LIBRARY_HOST@>" + "$<$>:-lopencilk -lopencilk-personality-cpp -L@CHEETAH_LIBRARY_DIR@@CHEETHA_LIBRARY_HOST@>") diff --git a/cmake/Modules/AddCheetah.cmake b/cmake/Modules/AddCheetah.cmake index 34b95339..8ee51d51 100644 --- a/cmake/Modules/AddCheetah.cmake +++ b/cmake/Modules/AddCheetah.cmake @@ -466,10 +466,10 @@ function(add_cheetah_bitcode name) set(output_file_${libname} lib${output_name_${libname}}.bc) add_custom_command( OUTPUT ${output_dir_${libname}}/${output_file_${libname}} - COMMAND cp $ ${output_dir_${libname}}/${output_file_${libname}} + COMMAND ${LLVM_LINK_PATH} -o ${output_dir_${libname}}/${output_file_${libname}} $ DEPENDS ${libname}_compile $ COMMENT "Building bitcode ${output_file_${libname}}" - VERBATIM) + VERBATIM COMMAND_EXPAND_LISTS) add_custom_target(${libname} DEPENDS ${output_dir_${libname}}/${output_file_${libname}}) install(FILES ${output_dir_${libname}}/${output_file_${libname}} DESTINATION ${install_dir_${libname}} diff --git a/cmake/Modules/CheetahUtils.cmake b/cmake/Modules/CheetahUtils.cmake index 021627dd..56dda7ea 100644 --- a/cmake/Modules/CheetahUtils.cmake +++ b/cmake/Modules/CheetahUtils.cmake @@ -219,7 +219,7 @@ macro(load_llvm_config) endif() if (LLVM_CONFIG_PATH) execute_process( - COMMAND ${LLVM_CONFIG_PATH} "--obj-root" "--bindir" "--libdir" "--src-root" "--includedir" + COMMAND ${LLVM_CONFIG_PATH} "--obj-root" "--bindir" "--libdir" "--src-root" "--includedir" "--host-target" RESULT_VARIABLE HAD_ERROR OUTPUT_VARIABLE CONFIG_OUTPUT) if (HAD_ERROR) @@ -231,12 +231,14 @@ macro(load_llvm_config) list(GET CONFIG_OUTPUT 2 LIBRARY_DIR) list(GET CONFIG_OUTPUT 3 MAIN_SRC_DIR) list(GET CONFIG_OUTPUT 4 INCLUDE_DIR) + list(GET CONFIG_OUTPUT 5 HOST_TARGET) set(LLVM_BINARY_DIR ${BINARY_DIR} CACHE PATH "Path to LLVM build tree") set(LLVM_LIBRARY_DIR ${LIBRARY_DIR} CACHE PATH "Path to llvm/lib") set(LLVM_MAIN_SRC_DIR ${MAIN_SRC_DIR} CACHE PATH "Path to LLVM source tree") set(LLVM_TOOLS_BINARY_DIR ${TOOLS_BINARY_DIR} CACHE PATH "Path to llvm/bin") set(LLVM_INCLUDE_DIR ${INCLUDE_DIR} CACHE PATH "Paths to LLVM headers") + set(LLVM_HOST_TARGET ${HOST_TARGET} CACHE PATH "Host target from LLVM") # Detect if we have the LLVMXRay and TestingSupport library installed and # available from llvm-config. diff --git a/cmake/base-config-ix.cmake b/cmake/base-config-ix.cmake index 798affc3..7a54e93f 100644 --- a/cmake/base-config-ix.cmake +++ b/cmake/base-config-ix.cmake @@ -6,6 +6,7 @@ include(CheckIncludeFile) include(CheckCXXSourceCompiles) include(TestBigEndian) +include(CMakePushCheckState) check_include_file(unwind.h HAVE_UNWIND_H) @@ -44,6 +45,9 @@ if (LLVM_TREE_AVAILABLE) set(CHEETAH_OUTPUT_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}/clang/${CLANG_VERSION}) set(CHEETAH_EXEC_OUTPUT_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR}) set(CHEETAH_INSTALL_PATH lib${LLVM_LIBDIR_SUFFIX}/clang/${CLANG_VERSION}) + set(CHEETAH_CMAKE_BUILDDIR ${LLVM_LIBRARY_OUTPUT_INTDIR}/cmake/clang) + set(CHEETAH_CMAKE_INSTALLDIR lib${LLVM_LIBDIR_SUFFIX}/cmake/clang) + option(CHEETAH_INCLUDE_TESTS "Generate and build cheetah unit tests." ${LLVM_INCLUDE_TESTS}) option(CHEETAH_ENABLE_WERROR "Fail and stop if warning is triggered" @@ -71,6 +75,8 @@ else() "Path where built cheetah executables should be stored.") set(CHEETAH_INSTALL_PATH ${CMAKE_INSTALL_PREFIX} CACHE PATH "Path where built cheetah libraries should be installed.") + set(CHEETAH_CMAKE_BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/cmake) + set(CHEETAH_CMAKE_INSTALLDIR ${CMAKE_INSTALL_PREFIX}/cmake) option(CHEETAH_INCLUDE_TESTS "Generate and build cheetah unit tests." OFF) option(CHEETAH_ENABLE_WERROR "Fail and stop if warning is triggered" OFF) # Use a host compiler to compile/link tests. diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index 87eafc3b..cab331f7 100644 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -1,6 +1,7 @@ include(CheckLibraryExists) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) +include(CMakePushCheckState) function(check_linker_flag flag out_var) cmake_push_check_state() diff --git a/config.mk b/config.mk index 417e4863..514b6877 100644 --- a/config.mk +++ b/config.mk @@ -11,6 +11,7 @@ RTS_DIR=../runtime RTS_LIB=libopencilk RTS_C_PERSONALITY_LIB=libopencilk-personality-c RTS_CXX_PERSONALITY_LIB=libopencilk-personality-cpp +RTS_PEDIGREE_LIB=libopencilk-pedigrees # All runtime libraries and associated files will be placed in # `/oath/to/cheetah/lib/`, so that the compiler can easily find diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt index f0a4722f..8dbda7b8 100644 --- a/include/CMakeLists.txt +++ b/include/CMakeLists.txt @@ -3,23 +3,8 @@ set(cilk_header_files cilk/cilk_api.h cilk/cilk_stub.h cilk/holder.h - cilk/hyperobject_base.h - cilk/metaprogramming.h - cilk/reducer.h - cilk/reducer_file.h - cilk/reducer_list.h - cilk/reducer_max.h - cilk/reducer_min.h - cilk/reducer_min_max.h - cilk/reducer_opadd.h - cilk/reducer_opand.h - cilk/reducer_opmul.h - cilk/reducer_opor.h - cilk/reducer_opxor.h - cilk/reducer_ostream.h - cilk/reducer_string.h - cilk/reducer_vector.h -) + cilk/opadd_reducer.h + cilk/ostream_reducer.h) set(output_dir ${CHEETAH_OUTPUT_DIR}/include) set(out_files) diff --git a/include/cilk/cilk.h b/include/cilk/cilk.h index 900169a4..7014987a 100644 --- a/include/cilk/cilk.h +++ b/include/cilk/cilk.h @@ -6,4 +6,6 @@ #define cilk_for _Cilk_for #define cilk_scope _Cilk_scope +#define cilk_reducer _Hyperobject + #endif /* _CILK_H */ diff --git a/include/cilk/cilk_api.h b/include/cilk/cilk_api.h index 4f23dd5d..112c4471 100644 --- a/include/cilk/cilk_api.h +++ b/include/cilk/cilk_api.h @@ -1,5 +1,8 @@ #ifndef _CILK_API_H #define _CILK_API_H + +#include /* size_t */ + #ifdef __cplusplus extern "C" { #endif @@ -11,7 +14,6 @@ extern unsigned __cilkrts_get_nworkers(void); extern unsigned __cilkrts_get_worker_number(void) __attribute__((deprecated)); extern int __cilkrts_running_on_workers(void); -#if defined(__cilk_pedigrees__) || defined(ENABLE_CILKRTS_PEDIGREE) #include typedef struct __cilkrts_pedigree { uint64_t rank; @@ -19,10 +21,20 @@ typedef struct __cilkrts_pedigree { } __cilkrts_pedigree; extern __cilkrts_pedigree __cilkrts_get_pedigree(void); extern void __cilkrts_bump_worker_rank(void); +extern void __cilkrts_dprand_set_seed(uint64_t seed); +extern void __cilkrts_init_dprng(void); extern uint64_t __cilkrts_get_dprand(void); -#endif // defined(__cilk_pedigrees__) || defined(ENABLE_CILKRTS_PEDIGREE) -#undef VISIBILITY +typedef void (*__cilk_identity_fn)(void *); +typedef void (*__cilk_reduce_fn)(void *, void *); + +extern void *__cilkrts_reducer_lookup(void *key); +extern void __cilkrts_reducer_register(void *key, size_t size, + __cilk_identity_fn id, + __cilk_reduce_fn reduce) + __attribute__((deprecated)); +extern void __cilkrts_reducer_unregister(void *key) + __attribute__((deprecated)); #ifdef __cplusplus } diff --git a/include/cilk/holder.h b/include/cilk/holder.h index 80bd79da..2f36e770 100644 --- a/include/cilk/holder.h +++ b/include/cilk/holder.h @@ -1,1007 +1,25 @@ -/* - * Copyright (C) 2011-2018, Intel Corporation - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY - * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * ********************************************************************* - * - * PLEASE NOTE: This file is a downstream copy of a file maintained in - * a repository at cilkplus.org. Changes made to this file that are not - * submitted through the contribution process detailed at - * http://www.cilkplus.org/submit-cilk-contribution will be lost the next - * time that a new version is released. Changes only submitted to the - * GNU compiler collection or posted to the git repository at - * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are - * not tracked. - * - * We welcome your contributions to this open source project. Thank you - * for your assistance in helping us improve Cilk Plus. - * - */ - -/* - * holder.h - * - * Purpose: hyperobject to provide different views of an object to each - * parallel strand. - */ - -#ifndef HOLDER_H_INCLUDED -#define HOLDER_H_INCLUDED - -#include -#include -#include +#ifndef _HOLDER_H +#define _HOLDER_H #ifdef __cplusplus -/* C++ Interface - * - * Classes: holder - * - * Description: - * ============ - * This component provides a hyperobject that isolates a parallel uses of a - * common variable where it is not necessary to preserve changes from - * different parallel strands. In effect, a holder acts a bit like - * thread-local storage, but has qualities that work better with the - * fork-join structure of Intel(R) Cilk(TM) Plus. In particular, a holder has the - * following qualities: - * - * - The view of a holder before the first spawn within a function is the same - * as the view after each sync (as in the case of a reducer). - * - The view of a holder within the first spawned child of a function (or the - * first child spawned after a sync) is the same as the view on entry to the - * function. - * - The view of a holder before entering a _Cilk_for loop is the same as the - * view during the first iteration of the loop and the view at the end of - * the loop. - * - The view of a holder in the continuation of a spawn or in an arbitrary - * iteration of a _Cilk_for loop is *non-deterministic*. It is generally - * recommended that the holder be explicitly put into a known state in these - * situations. - * - * A holder can be used as an alternative to parameter-passing. They are most - * useful for replacing non-local variables without massive refactoring. A - * holder takes advantage of the fact that, most of the time, a holder view - * does not change after a spawn or from one iteration of a parallel for loop - * to the next (i.e., stealing is the exception, not the rule). When the - * holder view is a large object that is expensive to construct, this - * optimization can save significant time versus creating a separate local - * object for each view. In addition, a holder using the "keep last" policy - * will have the same value after a sync as the serialization of the same - * program. The last quality will often allow the program to avoid - * recomputing a value. - * - * Usage Example: - * ============== - * Function 'compute()' is a complex function that computes a value using a - * memoized algorithm, storing intermediate results in a hash table. Compute - * calls several other functions, each of which calls several other functions, - * all of which share a global hash table. In all, there are over a dozen - * functions with a total of about 60 references to the hash table. - *.. - * hash_table memos; - * - * void h(const X& x); // Uses memos - * - * double compute(const X& x) - * { - * memos.clear(); - * // ... - * memos[i] = x; - * ... - * g(i); // Uses memos - * // ... - * std::for_each(c.begin(), c.end(), h); // Call h for each element of c - * } - * - * int main() - * { - * const std::size_t ARRAY_SIZE = 1000000; - * extern X myArray[ARRAY_SIZE]; - * - * for (std::size_t i = 0; i < ARRAY_SIZE; ++i) - * { - * compute(myArray[i]); - * } - * } - *.. - * We would like to replace the 'for' loop in 'main' with a 'cilk_for'. - * Although the hash table is cleared on entry to each call to 'compute()', - * and although the values stored in the hash table are no longer used after - * 'compute()' returns, the use of the hash table as a global variable - * prevents 'compute()' from being called safely in parallel. One way to do - * this would be to make 'memos' a private variable within the cilk_for loop - * and pass it down to the actual computation, so that each loop iteration has - * its own private copy: - *.. - * cilk_for (std::size_t i = 0; i < ARRAY_SIZE; ++i) - * { - * hash_table memos; - * compute(myArray[i], memos); - * } - *.. - * The problem with this approach is that it requires changing the signature - * of 'compute', 'h', 'g', and every one of the dozen or so functions that - * reference 'memos' as well as any function that calls those functions. This - * may break the abstraction of 'compute' and other functions, exposing an - * implementation detail that was not part of the interface. In addition, the - * function 'h' is called through a templated algorithm, 'for_each', which - * requires a fixed interface. Finally, there is constructor and destructor - * overhead for 'hash_table' each time through the loop. - * - * The alternative approach is to replace 'memos' with a holder. The holder - * would be available to all of the functions involved, but would not cause a - * race between parallel loop iterations. In order to make this work, each - * use of the 'memos' variable must be (mechanically) replaced by a use of the - * holder: - *.. - * cilk::holder > memos_h; - * - * void h(const X& x); // Uses memos_h - * - * double compute(const X& x) - * { - * memos_h().clear(); // operator() used to "dereference" the holder - * // ... - * memos_h()[i] = x; // operator() used to "dereference" the holder - * ... - * g(i); // Uses memos_h - * // ... - * std::for_each(c.begin(), c.end(), h); // Call h for each element of c - * } - *.. - * Note that each reference to the holder must be modified with an empty pair - * of parenthesis. This syntax is needed because there is no facility in C++ - * for a "smart reference" that would allow 'memos_h' to be a perfect - * replacement for 'memos'. One way that a user can avoid this syntax change - * is to wrap the holder in a class that has the same inteface as - * 'hash_table' but redirects all calls to the holder: - *.. - * template - * class hash_table_holder - * { - * private: - * cilk::holder > m_holder; - * public: - * void clear() { m_holder().clear(); } - * V& operator[](const K& x) { return m_holder()[x]; } - * std::size_t size() const { return m_holder().size(); } - * // etc. ... - * }; - *.. - * Using the above wrapper, the original code can be left unchanged except for - * replacing 'hash_table' with 'hash_table_holder' and replacing 'for' with - * 'cilk_for': - *.. - * hash_table_holder memos; - * - * void h(const X& x); // Uses memos - * - * double compute(const X& x) - * { - * memos.clear(); // Calls hash_table_holder::clear(). - * // ... - * } - *.. - * The above changes have no benefit over the use of thread-local storage. - * What if one of the functions has a 'cilk_spawn', however? - *.. - * void h(const X& x) - * { - * Y y = x.nested(); - * double d, w; - * if (y) - * { - * w = cilk_spawn compute_width(y); // May use 'memos' - * d = compute_depth(y); // Does not use 'memos' - * cilk_sync; - * compute(y); // recursive call. Uses 'memos'. - * } - * } - *.. - * In the above example, the view of the holder within 'compute_width' is the - * same as the view on entry to 'h'. More importantly, the view of the holder - * within the recursive call to 'compute' is the same as the view on entry to - * 'h', even if a different worker is executing the recursive call. Thus, the - * holder view within a Intel Cilk Plus program has useful qualities not found in - * thread-local storage. - */ +#include namespace cilk { - - /** - * After a sync, the value stored in a holder matches the most recent - * value stored into the holder by one of the starnds entering the sync. - * The holder policy used to instantiate the holder determines which of - * the entering strands determines the final value of the holder. A policy - * of 'holder_keep_indeterminate' (the default) is the most efficient, and - * results in an indeterminate value depending on the runtime schedule - * (see below for more specifics). An indeterminate value after a sync is - * often acceptable, especially if the value of the holder is not reused - * after the sync. All of the remaining policies retain the value of the - * last strand that would be executed in the serialization of the program. - * They differ in the mechanism used to move the value from one view to - * another. A policy of 'holder_keep_last_copy' moves values by - * copy-assignment. A policy of 'holder_keep_last_swap' moves values by - * calling 'swap'. A policy of 'holder_keep_last_move' is available only - * for compilers that support C++0x rvalue references and moves values by - * move-assignment. A policy of 'holder_keep_last' attempts to choose the - * most efficient mechanism: member-function 'swap' if the view type - * supports it, otherwise move-assignment if supported, otherwise - * copy-assignment. (The swap member function for a class that provides - * one is almost always as fast or faster than move-assignment or - * copy-assignment.) - * - * The behavior of 'holder_keep_indeterminate', while indeterminate, is - * not random and can be used for advanced programming or debugging. With - * a policy of 'holder_keep_intermediate', values are never copied or - * moved between views. The value of the view after a sync is the same as - * the value set in the last spawned child before a steal occurs or the - * last value set in the continuation if no steal occurs. Using this - * knowledge, a programmer can use a holder to detect the earliest steal - * in a piece of code. An indeterminate holder is also useful for keeping - * cached data similar to the way some applications might use thread-local - * storage. - */ - enum holder_policy { - holder_keep_indeterminate, - holder_keep_last, - holder_keep_last_copy, - holder_keep_last_swap, -#ifdef __CILKRTS_RVALUE_REFERENCES - holder_keep_last_move -#endif - }; - - namespace internal { - - // Private special-case holder policy using the swap member-function - const holder_policy holder_keep_last_member_swap = - (holder_policy) (holder_keep_last_swap | 0x10); - - /* The constant, 'has_member_swap::value', will be 'true' if 'T' - * has a non-static member function with prototype 'void swap(T&)'. - * The mechanism used to detect 'swap' is the most portable among - * present-day compilers, but is not the most robust. Specifically, - * the prototype for 'swap' must exactly match 'void swap(T&)'. - * Near-matches like a 'swap' function that returns 'int' instead of - * 'void' will not be detected. Detection will also fail if 'T' - * inherits 'swap' from a base class. - */ - template - class has_member_swap - { - // This technique for detecting member functions was described by - // Rani Sharoni in comp.lang.c++.moderated: - // http://groups.google.com/group/comp.lang.c++.moderated/msg/2b06b2432fddfb60 - - // sizeof(notchar) is guaranteed larger than 1 - struct notchar { char x[2]; }; - - // Instantiationg Q will fail unless U contains a - // non-static member with prototype 'void swap(U&)'. - template struct Q { }; - - // First 'test' is preferred overload if U::swap exists with the - // correct prototype. Second 'test' is preferred overload - // otherwise. - template static char test(Q*); - template static notchar test(...); - - public: - /// 'value' will be true if T has a non-static member function - /// with prototype 'void swap(T&)'. - static const bool value = (1 == sizeof(test(0))); - }; - - template const bool has_member_swap::value; - - /** - * @brief Utility class for exception safety. - * - * The constuctor for this class takes a pointer and an allocator and - * holds on to them. The destructor deallocates the pointed-to - * object, without calling its destructor, typically to recover memory - * in case an exception is thrown. The release member clears the - * pointer so that the deallocation is prevented, i.e., when the - * exception danger has passed. The behavior of this class is similar - * to auto_ptr and unique_ptr. - */ - template > - class auto_deallocator - { - Allocator m_alloc; - Type* m_ptr; - - // Non-copiable - auto_deallocator(const auto_deallocator&); - auto_deallocator& operator=(const auto_deallocator&); - - public: - /// Constructor - explicit auto_deallocator(Type* p, const Allocator& a = Allocator()) - : m_alloc(a), m_ptr(p) { } - - /// Destructor - free allocated resources - ~auto_deallocator() { if (m_ptr) m_alloc.deallocate(m_ptr, 1); } - - /// Remove reference to resource - void release() { m_ptr = 0; } - }; - - /** - * Pure-abstract base class to initialize holder views - */ - template - class init_base - { - public: - virtual ~init_base() { } - virtual init_base* clone_self(Allocator& a) const = 0; - virtual void delete_self(Allocator& a) = 0; - virtual void construct_view(Type* p, Allocator& a) const = 0; - }; - - /** - * Class to default-initialize a holder view - */ - template - class default_init : public init_base - { - typedef init_base base; - - /// Private constructor (called from static make() function). - default_init() { } - - // Non-copiable - default_init(const default_init&); - default_init& operator=(const default_init&); - - public: - // Static factory function - static default_init* make(Allocator& a); - - // Virtual function overrides - virtual ~default_init(); - virtual base* clone_self(Allocator& a) const; - virtual void delete_self(Allocator& a); - virtual void construct_view(Type* p, Allocator& a) const; - }; - - template - default_init* - default_init::make(Allocator&) - { - // Return a pointer to a singleton. All instances of this class - // are identical, so we need only one. - static default_init self; - return &self; - } - - template - default_init::~default_init() - { - } - - template - init_base* - default_init::clone_self(Allocator& a) const - { - return make(a); - } - - template - void default_init::delete_self(Allocator&) - { - // Since make() returned a shared singleton, there is nothing to - // delete here. - } - - template - void - default_init::construct_view(Type* p, - Allocator&) const - { - ::new((void*) p) Type(); - // TBD: In a C++0x library, this should be rewritten - // std::allocator_traits::construct(a, p); - } - - /** - * Class to copy-construct a view from a stored exemplar. - */ - template - class exemplar_init : public init_base - { - typedef init_base base; - - Type* m_exemplar; - - // Private constructors (called from make() functions). - exemplar_init(const Type& val, Allocator& a); -#ifdef __CILKRTS_RVALUE_REFERENCES - exemplar_init(Type&& val, Allocator& a); -#endif - - // Non-copyiable - exemplar_init(const exemplar_init&); - exemplar_init& operator=(const exemplar_init&); - - public: - // Static factory functions - static exemplar_init* make(const Type& val, - Allocator& a = Allocator()); -#ifdef __CILKRTS_RVALUE_REFERENCES - static exemplar_init* make(Type&& val, - Allocator& a = Allocator()); -#endif - - // Virtual function overrides - virtual ~exemplar_init(); - virtual base* clone_self(Allocator& a) const; - virtual void delete_self(Allocator& a); - virtual void construct_view(Type* p, Allocator& a) const; - }; - - template - exemplar_init::exemplar_init(const Type& val, - Allocator& a) - { - m_exemplar = a.allocate(1); - auto_deallocator guard(m_exemplar, a); - a.construct(m_exemplar, val); - guard.release(); - } - -#ifdef __CILKRTS_RVALUE_REFERENCES - template - exemplar_init::exemplar_init(Type&& val, - Allocator& a) - { - m_exemplar = a.allocate(1); - auto_deallocator guard(m_exemplar, a); - a.construct(m_exemplar, std::forward(val)); - guard.release(); - } -#endif - - template - exemplar_init* - exemplar_init::make(const Type& val, - Allocator& a) - { - typedef typename Allocator::template rebind::other - self_alloc_t; - self_alloc_t alloc(a); - - exemplar_init *self = alloc.allocate(1); - auto_deallocator guard(self, alloc); - - // Don't use allocator to construct self. Allocator should be - // used only on elements of type 'Type'. - ::new((void*) self) exemplar_init(val, a); - - guard.release(); - - return self; - } - -#ifdef __CILKRTS_RVALUE_REFERENCES - template - exemplar_init* - exemplar_init::make(Type&& val, - Allocator& a) - { - typedef typename Allocator::template rebind::other - self_alloc_t; - self_alloc_t alloc(a); - - exemplar_init *self = alloc.allocate(1); - auto_deallocator guard(self, alloc); - - // Don't use allocator to construct self. Allocator should be - // used only on elements of type 'Type'. - ::new((void*) self) exemplar_init(std::forward(val), a); - - guard.release(); - - return self; - } -#endif - - template - exemplar_init::~exemplar_init() - { - // Called only by delete_self, which deleted the exemplar using an - // allocator. - } - - template - init_base* - exemplar_init::clone_self(Allocator& a) const - { - return make(*m_exemplar, a); - } - - template - void exemplar_init::delete_self(Allocator& a) - { - typename Allocator::template rebind::other alloc(a); - - a.destroy(m_exemplar); - a.deallocate(m_exemplar, 1); - m_exemplar = 0; - - this->~exemplar_init(); - alloc.deallocate(this, 1); - } - - template - void - exemplar_init::construct_view(Type* p, - Allocator& a) const - { - a.construct(p, *m_exemplar); - // TBD: In a C++0x library, this should be rewritten - // std::allocator_traits::construct(a, p, *m_exemplar); - } - - /** - * Class to construct a view using a stored functor. The functor, - * 'f', must be be invokable using the expression 'Type x = f()'. - */ - template - class functor_init : - public init_base - { - typedef typename Allocator::value_type value_type; - typedef init_base base; - typedef typename Allocator::template rebind::other f_alloc; - - Func *m_functor; - - /// Private constructors (called from make() functions - functor_init(const Func& f, Allocator& a); -#ifdef __CILKRTS_RVALUE_REFERENCES - functor_init(Func&& f, Allocator& a); -#endif - - // Non-copiable - functor_init(const functor_init&); - functor_init& operator=(const functor_init&); - - public: - // Static factory functions - static functor_init* make(const Func& val, - Allocator& a = Allocator()); -#ifdef __CILKRTS_RVALUE_REFERENCES - static functor_init* make(Func&& val, - Allocator& a = Allocator()); -#endif - - // Virtual function overrides - virtual ~functor_init(); - virtual base* clone_self(Allocator& a) const; - virtual void delete_self(Allocator& a); - virtual void - construct_view(value_type* p, Allocator& a) const; - }; - - /// Specialization to strip off reference from 'Func&'. - template - struct functor_init - : functor_init { }; - - /// Specialization to strip off reference and cvq from 'const Func&'. - template - struct functor_init - : functor_init { }; - - template - functor_init::functor_init(const Func& f, - Allocator& a) - { - f_alloc alloc(a); - - m_functor = alloc.allocate(1); - auto_deallocator guard(m_functor, alloc); - alloc.construct(m_functor, f); - guard.release(); - } - -#ifdef __CILKRTS_RVALUE_REFERENCES - template - functor_init::functor_init(Func&& f, - Allocator& a) - { - f_alloc alloc(a); - - m_functor = alloc.allocate(1); - auto_deallocator guard(m_functor, alloc); - alloc.construct(m_functor, std::forward(f)); - guard.release(); - } -#endif - - template - functor_init* - functor_init::make(const Func& f, Allocator& a) - { - typedef typename Allocator::template rebind::other - self_alloc_t; - self_alloc_t alloc(a); - - functor_init *self = alloc.allocate(1); - auto_deallocator guard(self, alloc); - - // Don't use allocator to construct self. Allocator should be - // used only on elements of type 'Func'. - ::new((void*) self) functor_init(f, a); - - guard.release(); - - return self; - } - -#ifdef __CILKRTS_RVALUE_REFERENCES - template - functor_init* - functor_init::make(Func&& f, Allocator& a) - { - typedef typename Allocator::template rebind::other - self_alloc_t; - self_alloc_t alloc(a); - - functor_init *self = alloc.allocate(1); - auto_deallocator guard(self, alloc); - - // Don't use allocator to construct self. Allocator should be - // used only on elements of type 'Func'. - ::new((void*) self) functor_init(std::forward(f), a); - - guard.release(); - - return self; - } -#endif - - template - functor_init::~functor_init() - { - // Called only by delete_self, which deleted the functor using an - // allocator. - } - - template - init_base* - functor_init::clone_self(Allocator& a) const - { - return make(*m_functor, a); - } - - template - inline - void functor_init::delete_self(Allocator& a) - { - typename Allocator::template rebind::other alloc(a); - f_alloc fa(a); - - fa.destroy(m_functor); - fa.deallocate(m_functor, 1); - m_functor = 0; - - this->~functor_init(); - alloc.deallocate(this, 1); - } - - template - void functor_init::construct_view(value_type* p, - Allocator& a) const - { - a.construct(p, (*m_functor)()); - // In C++0x, the above should be written - // std::allocator_traits::construct(a, p, m_functor()); - } - - /** - * Functor called to reduce a holder - */ - template - struct holder_reduce_functor; - - /** - * Specialization to keep the left (first) value. - */ - template - struct holder_reduce_functor - { - void operator()(Type* left, Type* right) const { } - }; - - /** - * Specialization to copy-assign from the right (last) value. - */ - template - struct holder_reduce_functor - { - void operator()(Type* left, Type* right) const { - *left = *right; - } - }; - - /* - * Specialization to keep the right (last) value via swap. - */ - template - struct holder_reduce_functor - { - void operator()(Type* left, Type* right) const { - using std::swap; - swap(*left, *right); - } - }; - -#ifdef __CILKRTS_RVALUE_REFERENCES - /* - * Specialization to move-assign from the right (last) value. - */ - template - struct holder_reduce_functor - { - void operator()(Type* left, Type* right) const { - *left = std::move(*right); - } - }; -#endif - - /* - * Specialization to keep the right (last) value via the swap member - * function. - */ - template - struct holder_reduce_functor - { - void operator()(Type* left, Type* right) const { - left->swap(*right); - } - }; - - /* - * Specialization to keep the right (last) value by the most efficient - * means detectable. - */ - template - struct holder_reduce_functor : - holder_reduce_functor::value ? - holder_keep_last_member_swap : -#ifdef __CILKRTS_RVALUE_REFERENCES - holder_keep_last_move -#else - holder_keep_last_copy -#endif - )> - { - }; - } // end namespace internal - - /** - * Monoid for holders. - * Allocator type is required to be thread-safe. - */ - template > - class holder_monoid : public monoid_base - { - // Allocator is mutable because the copy of the monoid inside the - // reducer is const (to avoid races on the shared state). However, - // the allocator is required to be thread-safe, so it is ok (and - // necessary) to modify. - mutable Allocator m_allocator; - internal::init_base *m_initializer; - - public: - /// This constructor uses default-initialization for both the leftmost - /// view and each identity view. - holder_monoid(const Allocator& a = Allocator()) - : m_allocator(a) - , m_initializer( - internal::default_init::make(m_allocator)) - { } - - /// These constructors use 'val' as an exemplar to copy-construct both - /// the leftmost view and each identity view. - holder_monoid(const Type& val, const Allocator& a = Allocator()) - : m_allocator(a) - , m_initializer(internal::exemplar_init::make( - val, m_allocator)) { } - /// This constructor uses 'f' as a functor to construct both - /// the leftmost view and each identity view. - template - holder_monoid(const Func& f, const Allocator& a = Allocator()) - : m_allocator(a) - , m_initializer( - internal::functor_init::make(f,m_allocator)) - { } - - /// Copy constructor - holder_monoid(const holder_monoid& rhs) - : m_allocator(rhs.m_allocator) - , m_initializer(rhs.m_initializer->clone_self(m_allocator)) { } - - /// "Extended" copy constructor with allocator - holder_monoid(const holder_monoid& rhs, const Allocator& a) - : m_allocator(a) - , m_initializer(rhs.m_initializer->clone_self(m_allocator)) { } - -#ifdef __CILKRTS_RVALUE_REFERENCES - /// Move constructor - holder_monoid(holder_monoid&& rhs) - : m_allocator(rhs.m_allocator) - , m_initializer(rhs.m_initializer) { - rhs.m_initializer = - internal::default_init::make(m_allocator); - } - - /// "Extended" move constructor with allocator - holder_monoid(holder_monoid&& rhs, const Allocator& a) - : m_allocator(a) - , m_initializer(0) { - if (a != rhs.m_allocator) - m_initializer = rhs.m_initializer->clone_self(a); - else { - m_initializer = rhs.m_initializer; - rhs.m_initializer = - internal::default_init::make(m_allocator); - } - } -#endif - /// Destructor - ~holder_monoid() { m_initializer->delete_self(m_allocator); } - - holder_monoid& operator=(const holder_monoid& rhs) { - if (this == &rhs) return *this; - m_initializer->delete_self(m_allocator); - m_initializer = rhs.m_initializer->clone_self(m_allocator); - } - -#ifdef __CILKRTS_RVALUE_REFERENCES - holder_monoid& operator=(holder_monoid&& rhs) { - if (m_allocator != rhs.m_allocator) - // Delegate to copy-assignment on unequal allocators - return operator=(static_cast(rhs)); - std::swap(m_initializer, rhs.m_initializer); - return *this; - } -#endif - - /// Constructs IDENTITY value into the uninitilized '*p' - void identity(Type* p) const - { m_initializer->construct_view(p, m_allocator); } - - /// Calls the destructor on the object pointed-to by 'p' - void destroy(Type* p) const - { m_allocator.destroy(p); } - - /// Return a pointer to size bytes of raw memory - void* allocate(std::size_t s) const { - return m_allocator.allocate(1); - } - - /// Deallocate the raw memory at p - void deallocate(void* p) const { - m_allocator.deallocate(static_cast(p), sizeof(Type)); - } - - void reduce(Type* left, Type* right) const { - internal::holder_reduce_functor()(left, right); - } - - void swap(holder_monoid& other) { - std::swap(m_initializer, other.m_initializer); - } - - Allocator get_allocator() const { - return m_allocator; - } - }; - - // Namespace-scope swap - template - inline void swap(holder_monoid& a, - holder_monoid& b) - { - a.swap(b); - } - - /** - * Hyperobject to provide different views of an object to each - * parallel strand. - */ - template > - class holder : public reducer > - { - typedef holder_monoid monoid_type; - typedef reducer imp; - - // Return a value of Type constructed using the functor Func. - template - Type make_value(const Func& f) const { - struct obj { - union { - char buf[sizeof(Type)]; - void* align1; - double align2; - }; - - obj(const Func& f) { f(static_cast(buf)); } - ~obj() { static_cast(buf)->~Type(); } - - operator Type&() { return *static_cast(buf); } - }; - - return obj(f); - } - - public: - /// Default constructor uses default-initialization for both the - /// leftmost view and each identity view. - holder(const Allocator& alloc = Allocator()) - : imp(monoid_type(alloc)) { } - /// Construct from an exemplar that is used to initialize both the - /// leftmost view and each identity view. - holder(const Type& v, const Allocator& alloc = Allocator()) - // Alas, cannot use an rvalue reference for 'v' because it is used - // twice in the same expression for initializing imp. - : imp(monoid_type(v, alloc), v) { } +template static void init(void *view) { + new(view) A; +} +template static void reduce(void *left, void *right) { + if (std::is_destructible::value) + static_cast(right)->~A(); +} - /// Construct from a functor that is used to initialize both the - /// leftmost view and each identity view. The functor, 'f', must be be - /// invokable using the expression 'Type x = f()'. - template - holder(const Func& f, const Allocator& alloc = Allocator()) - // Alas, cannot use an rvalue for 'f' because it is used twice in - // the same expression for initializing imp. - : imp(monoid_type(f, alloc), make_value(f)) { } - }; +template +using holder = A _Hyperobject(init, reduce); -} // end namespace cilk +} // namespace cilk -#else /* C */ -# error Holders are currently available only for C++ -#endif /* __cplusplus */ +#endif // __cplusplus -#endif /* HOLDER_H_INCLUDED */ +#endif // _HOLDER_H diff --git a/include/cilk/hyperobject_base.h b/include/cilk/hyperobject_base.h deleted file mode 100644 index 006aa721..00000000 --- a/include/cilk/hyperobject_base.h +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef _CILK_HYPEROBJECT_BASE -#define _CILK_HYPEROBJECT_BASE - -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -struct __cilkrts_hyperobject_base; - -/* Callback function signatures. The first argument always points to the - * reducer itself and is commonly ignored. */ -typedef void (*cilk_reduce_fn_t)(void *r, void *lhs, void *rhs); -typedef void (*cilk_identity_fn_t)(void *r, void *view); -typedef void (*cilk_destroy_fn_t)(void *r, void *view); -typedef void *(*cilk_allocate_fn_t)(struct __cilkrts_hyperobject_base *r, size_t bytes); -typedef void (*cilk_deallocate_fn_t)(struct __cilkrts_hyperobject_base *r, void *view); - -/** Representation of the monoid */ -typedef struct cilk_c_monoid { - cilk_reduce_fn_t reduce_fn; - cilk_identity_fn_t identity_fn; - cilk_destroy_fn_t destroy_fn; - cilk_allocate_fn_t allocate_fn; - cilk_deallocate_fn_t deallocate_fn; -} cilk_c_monoid; - -/** Base of the hyperobject */ -typedef struct __cilkrts_hyperobject_base { - cilk_c_monoid __c_monoid; - uint32_t __id_num; /* for runtime use only, initialize to 0 */ - uint32_t __view_offset; /* offset (in bytes) to leftmost view */ - size_t __view_size; /* Size of each view */ -} __cilkrts_hyperobject_base; - -/* Library interface. - TODO: Add optimization hints like "strand pure" as in Cilk Plus. */ -void __cilkrts_hyper_create(__cilkrts_hyperobject_base *key); -void __cilkrts_hyper_destroy(__cilkrts_hyperobject_base *key); -#if defined __clang__ && defined __cilk && __cilk >= 300 -__attribute__((strand_pure, strand_malloc)) -#endif -void *__cilkrts_hyper_lookup(__cilkrts_hyperobject_base *key); -void *__cilkrts_hyper_alloc(__cilkrts_hyperobject_base *key, size_t bytes); -void __cilkrts_hyper_dealloc(__cilkrts_hyperobject_base *key, void *view); - -#ifdef __cplusplus -} /* end extern "C" */ -#endif - -#endif /* _CILK_HYPEROBJECT_BASE */ diff --git a/include/cilk/metaprogramming.h b/include/cilk/metaprogramming.h deleted file mode 100644 index 4f8a69a6..00000000 --- a/include/cilk/metaprogramming.h +++ /dev/null @@ -1,587 +0,0 @@ -/* metaprogramming.h -*- C++ -*- - * - * Copyright (C) 2012-2018, Intel Corporation - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY - * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * ********************************************************************* - * - * PLEASE NOTE: This file is a downstream copy of a file maintained in - * a repository at cilkplus.org. Changes made to this file that are not - * submitted through the contribution process detailed at - * http://www.cilkplus.org/submit-cilk-contribution will be lost the next - * time that a new version is released. Changes only submitted to the - * GNU compiler collection or posted to the git repository at - * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are - * not tracked. - * - * We welcome your contributions to this open source project. Thank you - * for your assistance in helping us improve Cilk Plus. - */ - -/** @file metaprogramming.h - * - * @brief Defines metaprogramming utility classes used in the Intel(R) Cilk(TM) Plus library. - * - * @ingroup common - */ - -#ifndef METAPROGRAMMING_H_INCLUDED -#define METAPROGRAMMING_H_INCLUDED - -#ifdef __cplusplus - -#include -#include -#include -#ifdef _WIN32 -#include -#endif -#include - -namespace cilk { - -namespace internal { - -/** Test if a class is empty. - * - * If @a Class is an empty (and therefore necessarily stateless) class, then - * the "empty base-class optimization" guarantees that - * `sizeof(check_for_empty_class) == sizeof(char)`. Conversely, if - * `sizeof(check_for_empty_class) > sizeof(char)`, then @a Class is not - * empty, and we must discriminate distinct instances of @a Class. - * - * Typical usage: - * - * // General definition of A for non-empty B: - * template ::value> > - * class A { ... }; - * - * // Specialized definition of A for empty B: - * template - * class A { ... }; - * - * @tparam Class The class to be tested for emptiness. - * - * @result The `value` member will be `true` if @a Class is empty, - * `false` otherwise. - * - * @ingroup common - */ -template -class class_is_empty { - class check_for_empty_class : public Class - { - char m_data; - public: - // Declared but not defined - check_for_empty_class(); - check_for_empty_class(const check_for_empty_class&); - check_for_empty_class& operator=(const check_for_empty_class&); - ~check_for_empty_class(); - }; -public: - - /** Constant is true if and only if @a Class is empty. - */ - static const bool value = (sizeof(check_for_empty_class) == sizeof(char)); -}; - - -/** A class containing raw bytes with a specified alignment and size. - * - * An object of type `aligned_storage` will have alignment `A` and - * size at least `S`. Its contents will be uninitialized bytes. - * - * @tparam Size The required minimum size of the resulting class. - * @tparam Alignment The required alignment of the resulting class. - * - * @pre @a Alignment shall be a power of 2 no greater than 64. - * - * @note This is implemented using the `CILK_ALIGNAS` macro, which uses - * the non-standard, implementation-specific features - * `__declspec(align(N))` on Windows, and - * `__attribute__((__aligned__(N)))` on Unix. The `gcc` implementation - * of `__attribute__((__aligned__(N)))` requires a numeric literal `N` - * (_not_ an arbitrary compile-time constant expression). Therefore, - * this class is implemented using specialization on the required - * alignment. - * - * @note The template class is specialized only for the supported - * alignments. An attempt to instantiate it for an unsupported - * alignment will result in a compilation error. - */ -template -struct aligned_storage; - -#define CILK_ALIGNAS(A) __attribute__((aligned(A))) -/// @cond -template class aligned_storage - { CILK_ALIGNAS( 1) char m_bytes[Size]; }; -template class aligned_storage - { CILK_ALIGNAS( 2) char m_bytes[Size]; }; -template class aligned_storage - { CILK_ALIGNAS( 4) char m_bytes[Size]; }; -template class aligned_storage - { CILK_ALIGNAS( 8) char m_bytes[Size]; }; -template class aligned_storage - { CILK_ALIGNAS(16) char m_bytes[Size]; }; -template class aligned_storage - { CILK_ALIGNAS(32) char m_bytes[Size]; }; -template class aligned_storage - { CILK_ALIGNAS(64) char m_bytes[Size]; }; -/// @endcond -#undef CILK_ALIGNAS - -/** A buffer of uninitialized bytes with the same size and alignment as a - * specified type. - * - * The class `storage_for_object` will have the same size and alignment - * properties as `Type`, but it will contain only raw (uninitialized) bytes. - * This allows the definition of a data member which can contain a `Type` - * object which is initialized explicitly under program control, rather - * than implicitly as part of the initialization of the containing class. - * For example: - * - * class C { - * storage_for_object _member; - * public: - * C() ... // Does NOT initialize _member - * void initialize(args) - * { new (_member.pointer()) MemberClass(args); } - * const MemberClass& member() const { return _member.object(); } - * MemberClass& member() { return _member.object(); } - * - * @tparam Type The type whose size and alignment are to be reflected - * by this class. - */ -template -class storage_for_object : - aligned_storage< sizeof(Type), __alignof(Type) > -{ -public: - /// Return a typed reference to the buffer. - const Type& object() const { return *reinterpret_cast(this); } - /// Return a typed reference to the buffer. - Type& object() { return *reinterpret_cast(this); } -}; - - -/** Get the functor class corresponding to a binary function type. - * - * The `binary_functor` template class can be instantiated with a binary - * functor class or with a real binary function, and will yield an equivalent - * binary functor class in either case. - * - * @tparam F A binary functor class, a binary function type, or a pointer to - * binary function type. - * - * @result `binary_functor::%type` will be the same as @a F if @a F is - * a class. It will be a `std::pointer_to_binary_function` wrapper - * if @a F is a binary function or binary function pointer type. - * (It will _not_ necessarily be an `Adaptable Binary Function` - * class, since @a F might be a non-adaptable binary functor - * class.) - * - * @ingroup common - */ -template -struct binary_functor { - /// The binary functor class equivalent to @a F. - typedef F type; -}; - -/// @copydoc binary_functor -/// Specialization for binary function. -template -struct binary_functor { - /// The binary functor class equivalent to @a F. - typedef std::pointer_to_binary_function type; -}; - -/// @copydoc binary_functor -/// Specialization for pointer to binary function. -template -struct binary_functor { - /// The binary functor class equivalent to @a F. - typedef std::pointer_to_binary_function type; -}; - - -/** Indirect binary function class with specified types. - * - * `typed_indirect_binary_function` is an `Adaptable Binary Function` class - * based on an existing binary functor class or binary function type @a F. If - * @a F is a stateless class, then this class will be empty, and its - * `operator()` will invoke @a F's `operator()`. Otherwise, an object of this - * class will hold a pointer to an object of type @a F, and will refer its - * `operator()` calls to the pointed-to @a F object. - * - * That is, suppose that we have the declarations: - * - * F *p; - * typed_indirect_binary_function ibf(p); - * - * Then: - * - * - `ibf(x, y) == (*p)(x, y)`. - * - `ibf(x, y)` will not do a pointer dereference if `F` is an empty class. - * - * @note Just to repeat: if `F` is an empty class, then - * `typed_indirect_binary_function\' is also an empty class. - * This is critical for its use in the - * @ref cilk::cilk_lib_1_1::min_max_internal::view_base - * "min/max reducer view classes", where it allows the view to - * call a comparison functor in the monoid without actually - * having to allocate a pointer in the view class when the - * comparison class is empty. - * - * @note If you have an `Adaptable Binary Function` class or a binary - * function type, then you can use the - * @ref indirect_binary_function class, which derives the - * argument and result types parameter type instead of requiring - * you to specify them as template arguments. - * - * @tparam F A binary functor class, a binary function type, or a pointer to - * binary function type. - * @param A1 The first argument type. - * @param A2 The second argument type. - * @param R The result type. - * - * @see min_max::comparator_base - * @see indirect_binary_function - * - * @ingroup common - */ -template < typename F - , typename A1 - , typename A2 - , typename R - , typename Functor = typename binary_functor::type - , bool FunctorIsEmpty = class_is_empty::value - > -class typed_indirect_binary_function : std::binary_function -{ - const F* f; -public: - /// Constructor captures a pointer to the wrapped function. - typed_indirect_binary_function(const F* f) : f(f) {} - - /// Return the comparator pointer, or `NULL` if the comparator is stateless. - const F* pointer() const { return f; } - - /// Apply the pointed-to functor to the arguments. - R operator()(const A1& a1, const A2& a2) const { return (*f)(a1, a2); } -}; - - -/// @copydoc typed_indirect_binary_function -/// Specialization for an empty functor class. (This is only possible if @a F -/// itself is an empty class. If @a F is a function or pointer-to-function -/// type, then the functor will contain a pointer.) -template -class typed_indirect_binary_function : - std::binary_function -{ -public: - /// Return `NULL` for the comparator pointer of a stateless comparator. - const F* pointer() const { return 0; } - - /// Constructor discards the pointer to a stateless functor class. - typed_indirect_binary_function(const F* f) {} - - /// Create an instance of the stateless functor class and apply it to the arguments. - R operator()(const A1& a1, const A2& a2) const { return F()(a1, a2); } -}; - - -/** Indirect binary function class with inferred types. - * - * This is identical to @ref cilk::internal::typed_indirect_binary_function, - * except that it derives the binary function argument and result types from - * the parameter type @a F instead of taking them as additional template - * parameters. If @a F is a class type, then it must be an `Adaptable Binary - * Function`. - * - * @see typed_indirect_binary_function - * - * @ingroup common - */ -template ::type> -class indirect_binary_function : - typed_indirect_binary_function< F - , typename Functor::first_argument_type - , typename Functor::second_argument_type - , typename Functor::result_type - > -{ - typedef typed_indirect_binary_function< F - , typename Functor::first_argument_type - , typename Functor::second_argument_type - , typename Functor::result_type - > - base; -public: - indirect_binary_function(const F* f) : base(f) {} ///< Constructor -}; - - -/** Choose a type based on a boolean constant. - * - * This metafunction is identical to C++11's condition metafunction. - * It needs to be here until we can reasonably assume that users will be - * compiling with C++11. - * - * @tparam Cond A boolean constant. - * @tparam IfTrue A type. - * @tparam IfFalse A type. - * @result The `type` member will be a typedef of @a IfTrue if @a Cond - * is true, and a typedef of @a IfFalse if @a Cond is false. - * - * @ingroup common - */ -template -struct condition -{ - typedef IfTrue type; ///< The type selected by the condition. -}; - -/// @copydoc condition -/// Specialization for @a Cond == `false`. -template -struct condition -{ - typedef IfFalse type; ///< The type selected by the condition. -}; - - -/** @def __CILKRTS_STATIC_ASSERT - * - * @brief Compile-time assertion. - * - * Causes a compilation error if a compile-time constant expression is false. - * - * @par Usage example. - * This assertion is used in reducer_min_max.h to avoid defining - * legacy reducer classes that would not be binary-compatible with the - * same classes compiled with earlier versions of the reducer library. - * - * __CILKRTS_STATIC_ASSERT( - * internal::class_is_empty< internal::binary_functor >::value, - * "cilk::reducer_max only works with an empty Compare class"); - * - * @note In a C++11 compiler, this is just the language predefined - * `static_assert` macro. - * - * @note In a non-C++11 compiler, the @a Msg string is not directly included - * in the compiler error message, but it may appear if the compiler - * prints the source line that the error occurred on. - * - * @param Cond The expression to test. - * @param Msg A string explaining the failure. - * - * @ingroup common - */ -#if defined(__INTEL_CXX11_MODE__) || defined(__GXX_EXPERIMENTAL_CXX0X__) -# define __CILKRTS_STATIC_ASSERT(Cond, Msg) static_assert(Cond, Msg) -#else -# define __CILKRTS_STATIC_ASSERT(Cond, Msg) \ - typedef int __CILKRTS_STATIC_ASSERT_DUMMY_TYPE \ - [::cilk::internal::static_assert_failure<(Cond)>::Success] - -/// @cond internal - template struct static_assert_failure { }; - template <> struct static_assert_failure { enum { Success = 1 }; }; - -# define __CILKRTS_STATIC_ASSERT_DUMMY_TYPE \ - __CILKRTS_STATIC_ASSERT_DUMMY_TYPE1(__cilkrts_static_assert_, __LINE__) -# define __CILKRTS_STATIC_ASSERT_DUMMY_TYPE1(a, b) \ - __CILKRTS_STATIC_ASSERT_DUMMY_TYPE2(a, b) -# define __CILKRTS_STATIC_ASSERT_DUMMY_TYPE2(a, b) a ## b -/// @endcond - -#endif - -/// @cond internal - -/** @name Aligned heap management. - */ -//@{ - -/** Implementation-specific aligned memory allocation function. - * - * @param size The minimum number of bytes to allocate. - * @param alignment The required alignment (must be a power of 2). - * @return The address of a block of memory of at least @a size - * bytes. The address will be a multiple of @a alignment. - * `NULL` if the allocation fails. - * - * @see deallocate_aligned() - */ -inline void* allocate_aligned(std::size_t size, std::size_t alignment) -{ -#ifdef _WIN32 - return _aligned_malloc(size, alignment); -#else -#if defined(__ANDROID__) || defined(__VXWORKS__) - return memalign(std::max(alignment, sizeof(void*)), size); -#else - void* ptr; - return (posix_memalign(&ptr, std::max(alignment, sizeof(void*)), size) == 0) ? ptr : 0; -#endif -#endif -} - -/** Implementation-specific aligned memory deallocation function. - * - * @param ptr A pointer which was returned by a call to alloc_aligned(). - */ -inline void deallocate_aligned(void* ptr) -{ -#ifdef _WIN32 - _aligned_free(ptr); -#else - std::free(ptr); -#endif -} - -/** Class to allocate and guard an aligned pointer. - * - * A new_aligned_pointer object allocates aligned heap-allocated memory when - * it is created, and automatically deallocates it when it is destroyed - * unless its `ok()` function is called. - * - * @tparam T The type of the object to allocate on the heap. The allocated - * will have the size and alignment of an object of type T. - */ -template -class new_aligned_pointer { - void* m_ptr; -public: - /// Constructor allocates the pointer. - new_aligned_pointer() : - m_ptr(allocate_aligned(sizeof(T), __alignof(T))) {} - /// Destructor deallocates the pointer. - ~new_aligned_pointer() { if (m_ptr) deallocate_aligned(m_ptr); } - /// Get the pointer. - operator void*() { return m_ptr; } - /// Return the pointer and release the guard. - T* ok() { - T* ptr = static_cast(m_ptr); - m_ptr = 0; - return ptr; - } -}; - -//@} - -/// @endcond - -} // namespace internal - -//@{ - -/** Allocate an aligned data structure on the heap. - * - * `cilk::aligned_new([args])` is equivalent to `new T([args])`, except - * that it guarantees that the returned pointer will be at least as aligned - * as the alignment requirements of type `T`. - * - * @ingroup common - */ -template -T* aligned_new() -{ - internal::new_aligned_pointer ptr; - new (ptr) T(); - return ptr.ok(); -} - -template -T* aligned_new(const T1& x1) -{ - internal::new_aligned_pointer ptr; - new (ptr) T(x1); - return ptr.ok(); -} - -template -T* aligned_new(const T1& x1, const T2& x2) -{ - internal::new_aligned_pointer ptr; - new (ptr) T(x1, x2); - return ptr.ok(); -} - -template -T* aligned_new(const T1& x1, const T2& x2, const T3& x3) -{ - internal::new_aligned_pointer ptr; - new (ptr) T(x1, x2, x3); - return ptr.ok(); -} - -template -T* aligned_new(const T1& x1, const T2& x2, const T3& x3, const T4& x4) -{ - internal::new_aligned_pointer ptr; - new (ptr) T(x1, x2, x3, x4); - return ptr.ok(); -} - -template -T* aligned_new(const T1& x1, const T2& x2, const T3& x3, const T4& x4, const T5& x5) -{ - internal::new_aligned_pointer ptr; - new (ptr) T(x1, x2, x3, x4, x5); - return ptr.ok(); -} - -//@} - - -/** Deallocate an aligned data structure on the heap. - * - * `cilk::aligned_delete(ptr)` is equivalent to `delete ptr`, except that it - * operates on a pointer that was allocated by aligned_new(). - * - * @ingroup common - */ -template -void aligned_delete(const T* ptr) -{ - ptr->~T(); - internal::deallocate_aligned((void*)ptr); -} - -} // namespace cilk - -#endif // __cplusplus - -#endif // METAPROGRAMMING_H_INCLUDED diff --git a/include/cilk/opadd_reducer.h b/include/cilk/opadd_reducer.h new file mode 100644 index 00000000..be7c2696 --- /dev/null +++ b/include/cilk/opadd_reducer.h @@ -0,0 +1,22 @@ +#ifndef _OPADD_REDUCER_H +#define _OPADD_REDUCER_H + +#ifdef __cplusplus + +namespace cilk { + +template static void zero(void *v) { + *static_cast(v) = static_cast(0); +} + +template static void plus(void *l, void *r) { + *static_cast(l) += *static_cast(r); +} + +template using opadd_reducer = T _Hyperobject(zero, plus); + +} // namespace cilk + +#endif // #ifdef __cplusplus + +#endif // _OPADD_REDUCER_H diff --git a/include/cilk/ostream_reducer.h b/include/cilk/ostream_reducer.h new file mode 100644 index 00000000..1528abd6 --- /dev/null +++ b/include/cilk/ostream_reducer.h @@ -0,0 +1,74 @@ +#ifndef _OSTREAM_REDUCER_H +#define _OSTREAM_REDUCER_H + +#ifdef __cplusplus + +#include +#include + +/* Adapted from Intel Cilk Plus */ + +namespace cilk { + +template +class ostream_view : public std::basic_ostream +{ + typedef std::basic_ostream base; + typedef std::basic_ostream ostream_type; + + // A non-leftmost view is associated with a private string buffer. (The + // leftmost view is associated with the buffer of the reducer's associated + // ostream, so its private buffer is unused.) + // + std::basic_stringbuf m_buffer; + +public: + void reduce(ostream_view* other) + { + // Writing an empty buffer results in failure. Testing `sgetc()` is the + // easiest way of checking for an empty buffer. + if (other->m_buffer.sgetc() != Traits::eof()) { + *this << (&other->m_buffer); + } + } + + static void reduce(void *left_v, void *right_v) { + ostream_view *left = + static_cast *>(left_v); + ostream_view *right = + static_cast *>(right_v); + left->reduce(right); + right->~ostream_view(); + } + + static void identity(void *view) { + new (view) ostream_view(); + } + + /** Non-leftmost (identity) view constructor. The view is associated with + * its internal buffer. Required by @ref monoid_base. + */ + ostream_view() : base(&m_buffer) {} + + /** Leftmost view constructor. The view is associated with an existing + * ostream. + */ + ostream_view(const ostream_type& os) : base(0) + { + base::rdbuf(os.rdbuf()); // Copy stream buffer + base::flags(os.flags()); // Copy formatting flags + base::setstate(os.rdstate()); // Copy error state + } + +}; + +template> + using ostream_reducer = ostream_view + _Hyperobject(&ostream_view>::identity, + &ostream_view>::reduce); + +} // namespace cilk + +#endif // __cplusplus + +#endif // _OSTREAM_REDUCER_H diff --git a/include/cilk/reducer.h b/include/cilk/reducer.h deleted file mode 100644 index 8f984b80..00000000 --- a/include/cilk/reducer.h +++ /dev/null @@ -1,1866 +0,0 @@ -/* reducer.h -*- C++ -*- - * - * Copyright (C) 2009-2018, Intel Corporation - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY - * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * ********************************************************************* - * - * PLEASE NOTE: This file is a downstream copy of a file maintained in - * a repository at cilkplus.org. Changes made to this file that are not - * submitted through the contribution process detailed at - * http://www.cilkplus.org/submit-cilk-contribution will be lost the next - * time that a new version is released. Changes only submitted to the - * GNU compiler collection or posted to the git repository at - * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are - * not tracked. - * - * We welcome your contributions to this open source project. Thank you - * for your assistance in helping us improve Cilk Plus. - */ - -/** @file reducer.h - * - * @brief Defines foundation classes for creating Intel(R) Cilk(TM) Plus - * reducers. - * - * @ingroup Reducers - * - * @see @ref pagereducers - * - * @defgroup Reducers Reducers - */ - -#ifndef REDUCER_H_INCLUDED -#define REDUCER_H_INCLUDED - -#include "cilk/hyperobject_base.h" -#include "cilk/metaprogramming.h" -#include - -#define __CILKRTS_STRAND_STALE(fn) fn - -#ifdef __cplusplus - -//===================== C++ interfaces =================================== - -#include - -namespace cilk { - -/** Class for provisionally constructed objects. - * - * The monoid_base::construct() functions manually construct both a - * monoid and a view. If one of these is constructed successfully, and the - * construction of the other (or some other initialization) fails, then the - * first one must be destroyed to avoid a memory leak. Because the - * construction is explicit, the destruction must be explicit, too. - * - * A provisional_guard object wraps a pointer to a newly constructed - * object. A call to its confirm() function confirms that the object is - * really going to be used. If the guard is destroyed without being - * confirmed, then the pointed-to object is destroyed (but not - * deallocated). - * - * Expected usage: - * - * provisional_guard x1_provisional( new (x1) T1 ); - * … more initialization … - * x1_provisional.confirm(); - * - * or - * - * provisional_guard x1_provisional( new (x1) T1 ); - * x1_provisional.confirm_if( new (x2) T2 ); - * - * If an exception is thrown in the "more initialization" code in the - * first example, or in the `T2` constructor in the second example, then - * `x1_provisional` will not be confirmed, so when its destructor is - * called during exception unwinding, the `T1` object that was constructed - * in `x1` will be destroyed. - * - * **NOTE**: Do *not* be tempted to chain a `provisional_guard` - * constructor with `confirm_if` as in this example: - * - * // BAD IDEA - * provisional_guard( new (x1) T1 ).confirm_if( new (x2) T2 ); - * - * The code above is problematic because the evaluation of the T2 - * constructor is unsequenced with respect to the call to the - * `provisional_guard` constructor (and with respect the T1 constructor). - * Thus, the compiler may choose to evaluate `new (x2) T2` before - * constructing the guard and leak the T1 object if the `T2` constructor - * throws. - * - * @tparam Type The type of the provisionally constructed object. - */ -template class provisional_guard { - Type *m_ptr; - - public: - /** Constructor. Creates a guard for a provisionally constructed object. - * - * @param ptr A pointer to the provisionally constructed object. - */ - provisional_guard(Type *ptr) : m_ptr(ptr) {} - - /** Destructor. Destroy the object pointed to by the contained pointer - * if it has not been confirmed. - */ - ~provisional_guard() { - if (m_ptr) - m_ptr->~Type(); - } - - /** Confirm the provisional construction. Do *not* delete the contained - * pointer when the guard is destroyed. - */ - void confirm() { m_ptr = 0; } - - /** Confirm provisional construction if argument is non-null. Note that - * if an exception is thrown during evaluation of the argument - * expression, then this function will not be called, and the - * provisional object will not be confirmed. This allows the usage: - * - * x1_provisional.confirm_if( new (x2) T2() ); - * - * @param cond An arbitrary pointer. The provisional object will be - * confirmed if @a cond is not null. - * - * @returns The value of the @a cond argument. - */ - template Cond *confirm_if(Cond *cond) { - if (cond) - m_ptr = 0; - return cond; - } -}; - -/** Base class for defining monoids. - * - * The monoid_base class template is useful for creating classes that model - * the monoid concept. It provides the core type and memory management - * functionality. A subclass of monoid_base need only declare and implement - * the `identity` and `reduce` functions. - * - * The monoid_base class also manages the integration between the monoid, the - * reducer class that is based on it, and an optional view class which wraps - * value objects and restricts access to their operations. - * - * @tparam Value The value type for the monoid. - * @tparam View An optional view class that serves as a proxy for the value - * type. - * - * @see monoid_with_view - */ -template class monoid_base { - - public: - /** Value type of the monoid. - */ - typedef Value value_type; - - /** View type of the monoid. Defaults to be the same as the value type. - * @see monoid_with_view - */ - typedef View view_type; - - enum { - /** Should reducers created with this monoid be aligned? - * - * @details - * "Aligned" means that the view is allocated at a cache-line aligned - * offset in the reducer, and the reducer must be cache-line aligned. - * "Unaligned" means that the reducer as a whole is just naturally - * aligned, but it contains a large enough block of uninitialized - * storage for a cache-line aligned view to be allocated in it at - * reducer construction time. - * - * Since the standard heap allocator (new reducer) does not allocate - * cache-line aligned storage, only unaligned reducers can be safely - * allocated on the heap. - * - * Default is false (unaligned) unless overridden in a subclass. - * - * @since 1.02 - * (In Intel Cilk Plus library versions 1.0 and 1.01, the default was - * true. In Intel Cilk Plus library versions prior to 1.0, reducers were - * always aligned, and this data member did not exist.) - */ - align_reducer = false - }; - - /** Destroys a view. Destroys (without deallocating) the @a View object - * pointed to by @a p. - * - * @param p The address of the @a View object to be destroyed. - */ - void destroy(view_type *p) const { p->~view_type(); } - - /** Allocates raw memory. Allocate @a s bytes of memory with no - * initialization. - * - * @param s The number of bytes of memory to allocate. - * @return An untyped pointer to the allocated memory. - */ - void *allocate(size_t s) const { return operator new(s); } - - /** Deallocates raw memory pointed to by @a p - * without doing any destruction. - * - * @param p Pointer to the memory to be deallocated. - * - * @pre @a p points to a block of memory that was allocated by a - * call to allocate(). - */ - void deallocate(void *p) const { operator delete(p); } - - /** Creates the identity value. Constructs (without allocating) a @a View - * object representing the default value of the @a Value type. - * - * @param p A pointer to a block of raw memory large enough to hold a - * @a View object. - * - * @post The memory pointed to by @a p contains a @a View object that - * represents the default value of the @a View type. - * - * @deprecated This function constructs the @a View object with its default - * constructor, which will often, but not always, yield the - * appropriate identity value. Monoid classes should declare - * their identity function explicitly, rather than relying on - * this default definition. - */ - void identity(View *p) const { new ((void *)p) View(); } - - /** @name Constructs the monoid and the view with arbitrary arguments. - * - * A @ref reducer object contains monoid and view data members, which are - * declared as raw storage (byte arrays), so that they are not implicitly - * constructed when the reducer is constructed. Instead, a reducer - * constructor calls one of the monoid class's static construct() - * functions with the addresses of the monoid and the view, and the - * construct() function uses placement `new` to construct them. - * This allows the monoid to determine the order in which the monoid and - * view are constructed, and to make one of them dependent on the other. - * - * Any arguments to the reducer constructor are just passed on as - * additional arguments to the construct() function (after the monoid - * and view addresses are set). - * - * A monoid whose needs are satisfied by the suite of construct() - * functions below, such as @ref monoid_with_view, can just inherit them - * from monoid_base. Other monoids will need to provide their own versions - * to override the monoid_base functions. - */ - //@{ - - /** Default-constructs the monoid, identity-constructs the view. - * - * @param monoid Address of uninitialized monoid object. - * @param view Address of uninitialized initial view object. - */ - //@{ - template - static void construct(Monoid *monoid, View *view) { - provisional_guard guard(new ((void *)monoid) Monoid()); - monoid->identity(view); - guard.confirm(); - } - //@} - - /** Default-constructs the monoid, and passes one to five const reference - * arguments to the view constructor. - */ - //@{ - - template - static void construct(Monoid *monoid, View *view, const T1 &x1) { - provisional_guard guard(new ((void *)monoid) Monoid()); - guard.confirm_if(new ((void *)view) View(x1)); - } - - template - static void construct(Monoid *monoid, View *view, const T1 &x1, - const T2 &x2) { - provisional_guard guard(new ((void *)monoid) Monoid()); - guard.confirm_if(new ((void *)view) View(x1, x2)); - } - - template - static void construct(Monoid *monoid, View *view, const T1 &x1, - const T2 &x2, const T3 &x3) { - provisional_guard guard(new ((void *)monoid) Monoid()); - guard.confirm_if(new ((void *)view) View(x1, x2, x3)); - } - - template - static void construct(Monoid *monoid, View *view, const T1 &x1, - const T2 &x2, const T3 &x3, const T4 &x4) { - provisional_guard guard(new ((void *)monoid) Monoid()); - guard.confirm_if(new ((void *)view) View(x1, x2, x3, x4)); - } - - template - static void construct(Monoid *monoid, View *view, const T1 &x1, - const T2 &x2, const T3 &x3, const T4 &x4, - const T5 &x5) { - provisional_guard guard(new ((void *)monoid) Monoid()); - guard.confirm_if(new ((void *)view) View(x1, x2, x3, x4, x5)); - } - - //@} - - /** Default-constructs the monoid, and passes one non-const reference - * argument to the view constructor. - */ - //@{ - template - static void construct(Monoid *monoid, View *view, T1 &x1) { - provisional_guard guard(new ((void *)monoid) Monoid()); - guard.confirm_if(new ((void *)view) View(x1)); - } - //@} - - /** Copy-constructs the monoid, and identity-constructs the view - * constructor. - * - * @param monoid Address of uninitialized monoid object. - * @param view Address of uninitialized initial view object. - * @param m Object to be copied into `*monoid` - */ - //@{ - template - static void construct(Monoid *monoid, View *view, const Monoid &m) { - provisional_guard guard(new ((void *)monoid) Monoid(m)); - monoid->identity(view); - guard.confirm(); - } - //@} - - /** Copy-constructs the monoid, and passes one to four const reference - * arguments to the view constructor. - */ - //@{ - - template - static void construct(Monoid *monoid, View *view, const Monoid &m, - const T1 &x1) { - provisional_guard guard(new ((void *)monoid) Monoid(m)); - guard.confirm_if(new ((void *)view) View(x1)); - } - - template - static void construct(Monoid *monoid, View *view, const Monoid &m, - const T1 &x1, const T2 &x2) { - provisional_guard guard(new ((void *)monoid) Monoid(m)); - guard.confirm_if(new ((void *)view) View(x1, x2)); - } - - template - static void construct(Monoid *monoid, View *view, const Monoid &m, - const T1 &x1, const T2 &x2, const T3 &x3) { - provisional_guard guard(new ((void *)monoid) Monoid(m)); - guard.confirm_if(new ((void *)view) View(x1, x2, x3)); - } - - template - static void construct(Monoid *monoid, View *view, const Monoid &m, - const T1 &x1, const T2 &x2, const T3 &x3, - const T4 &x4) { - provisional_guard guard(new ((void *)monoid) Monoid(m)); - guard.confirm_if(new ((void *)view) View(x1, x2, x3, x4)); - } - - //@} - - //@} -}; - -/** Monoid class that gets its value type and identity and reduce operations - * from its view. - * - * A simple implementation of the monoid-view-reducer architecture would - * distribute knowledge about the type and operations for the reduction - * between the monoid and the view - the identity and reduction operations are - * specified in the monoid, the reduction operations are implemented in the - * view, and the value type is specified in both the monoid and the view. - * This is inelegant. - * - * monoid_with_view is a subclass of @ref monoid_base that gets its value type - * and its identity and reduction operations from its view class. No - * customization of the monoid_with_view class itself is needed beyond - * instantiating it with an appropriate view class. (Customized subclasses of - * monoid_with_view may be needed for other reasons, such as to keep some - * state for the reducer.) All of the Intel Cilk Plus predefined reducers use - * monoid_with_view or one of its subclasses. - * - * The view class `View` of a monoid_with_view must provide the following - * public definitions: - * - * Definition | Meaning - * ---------------------------------|-------- - * `value_type` | a typedef of the value type for the - * | reduction - * `View()` | a default constructor which constructs - * | the identity value for the reduction - * `void reduce(const View* other)` | a member function which applies the - * | reduction operation to the values of - * | `this` view and the `other` view, - * | leaving the result as the value of - * | `this` view, and leaving the value of - * | the `other` view undefined (but valid) - * - * @tparam View The view class for the monoid. - * @tparam Align If true, reducers instantiated on this monoid will be - * cache-aligned. By default, library reducers (unlike legacy - * library reducer _wrappers_) are aligned only as required by - * contents. - */ -template -class monoid_with_view : public monoid_base { - public: - /** Should reducers created with this monoid be aligned? - */ - enum { align_reducer = Align }; - - /** Create the identity value. - * - * Implements the monoid `identity` operation by using the @a View class's - * default constructor. - * - * @param p A pointer to a block of raw memory large enough to hold a - * @p View object. - */ - void identity(View *p) const { new ((void *)p) View(); } - - /** Reduce the values of two views. - * - * Implements the monoid `reduce` operation by calling the left view's - * `%reduce()` function with the right view as an operand. - * - * @param left The left operand of the reduce operation. - * @param right The right operand of the reduce operation. - * @post The left view contains the result of the reduce - * operation, and the right view is undefined. - */ - void reduce(View *left, View *right) const { left->reduce(right); } -}; - -/** Base class for simple views with (usually) scalar values. - * - * The scalar_view class is intended as a base class which provides about half - * of the required definitions for simple views. It defines the `value_type` - * required by a @ref monoid_with_view (but not the identity constructor and - * reduce operation, which are inherently specific to a particular kind of - * reduction). It also defines the value access functions which will be called - * by the corresponding @ref reducer functions. (It uses copy semantics for - * the view_move_in() and view_move_out() functions, which is appropriate - * for simple scalar types, but not necessarily for more complex types like - * STL containers. - * - * @tparam Type The type of value wrapped by the view. - */ -template class scalar_view { - protected: - Type m_value; ///< The wrapped accumulator variable. - - public: - /** Value type definition required by @ref monoid_with_view. - */ - typedef Type value_type; - - /** Default constructor. - */ - scalar_view() : m_value() {} - - /** Value constructor. - */ - scalar_view(const Type &v) : m_value(v) {} - - /** @name Value functions required by the reducer class. - * - * Note that the move in/out functions use simple assignment semantics. - */ - //@{ - - /** Set the value of the view. - */ - void view_move_in(Type &v) { m_value = v; } - - /** Get the value of the view. - */ - void view_move_out(Type &v) { v = m_value; } - - /** Set the value of the view. - */ - void view_set_value(const Type &v) { m_value = v; } - - /** Get the value of the view. - */ - Type const &view_get_value() const { return m_value; } - - /** Type returned by view_get_value. - */ - typedef Type const &return_type_for_get_value; - - /** Get a reference to the value contained in the view. For legacy - * reducer support only. - */ - Type &view_get_reference() { return m_value; } - - /** Get a reference to the value contained in the view. For legacy - * reducer support only. - */ - Type const &view_get_reference() const { return m_value; } - //@} -}; - -/** Wrapper class for move-in construction. - * - * Some types allow their values to be _moved_ as an alternative to copying. - * Moving a value may be much faster than copying it, but may leave the value - * of the move's source undefined. Consider the `swap` operation provided by - * many STL container classes: - * - * list x, y; - * x = y; // Copy - * x.swap(y); // Move - * - * The assignment _copies_ the value of `y` into `x` in time linear in the - * size of `y`, leaving `y` unchanged. The `swap` _moves_ the value of `y` - * into `x` in constant time, but it also moves the value of `x` into `y`, - * potentially leaving `y` undefined. - * - * A move_in_wrapper simply wraps a pointer to an object. It is created by a - * call to cilk::move_in(). Passing a move_in_wrapper to a view constructor - * (actually, passing it to a reducer constructor, which passes it to the - * monoid `construct()` function, which passes it to the view constructor) - * allows, but does not require, the value pointed to by the wrapper to be - * moved into the view instead of copied. - * - * A view class exercises this option by defining a _move-in constructor_, - * i.e., a constructor with a move_in_wrapper parameter. The constructor calls - * the wrapper's `value()` function to get a reference to its pointed-to - * value, and can then use that reference in a move operation. - * - * A move_in_wrapper also has an implicit conversion to its pointed-to value, - * so if a view class does not define a move-in constructor, its ordinary - * value constructor will be called with the wrapped value. For example, an - * @ref ReducersAdd "op_add" view does not have a move-in constructor, so - * - * int x; - * reducer< op_add > xr(move_in(x)); - * - * will simply call the `op_add_view(const int &)` constructor. But an - * @ref ReducersList "op_list_append" view does have a move-in constructor, - * so - * - * list x; - * reducer< op_list_append > xr(move_in(x)); - * - * will call the `op_list_append_view(move_in_wrapper)` constructor, - * which can `swap` the value of `x` into the view. - * - * @note Remember that passing the value of a variable to a reducer - * constructor using a move_in_wrapper leaves the variable undefined. - * You cannot assume that the constructor either will or will not copy - * or move the value. - * - * @tparam Type The type of the wrapped value. - * - * @see cilk::move_in() - */ -template class move_in_wrapper { - Type *m_pointer; - - public: - /** Constructor that captures the address of its argument. This is almost - * always called from the @ref move_in function. - */ - explicit move_in_wrapper(Type &ref) : m_pointer(&ref) {} - - /** Implicit conversion to the wrapped value. This allows a move_in_wrapper - * to be used where a value of the wrapped type is expected, in which case - * the wrapper is completely transparent. - */ - operator Type &() const { return *m_pointer; } - - /** Get a reference to the pointed-to value. This has the same effect as - * the implicit conversion, but makes the intent clearer in a move-in - * constructor. - */ - Type &value() const { return *m_pointer; } -}; - -/** Function to create a move_in_wrapper for a value. - * - * @tparam Type The type of the argument, which will be the `type` of the - * created wrapper. - * - * @see move_in_wrapper - */ -template inline move_in_wrapper move_in(Type &ref) { - return move_in_wrapper(ref); -} - -/** @copydoc move_in(Type&) - * - * @note Applying a function that is explicitly specified as modifying its - * argument to a const argument is obviously an irrational thing to - * do. This move_in() variant is just provided to allow calling a - * move-in constructor with a function return value, which the - * language treats as a const. Using it for any other purpose will - * probably end in tears. - */ -template inline move_in_wrapper move_in(const Type &ref) { - return move_in_wrapper(ref); -} - -/** Wrapper class to allow implicit downcasts to reducer subclasses. - * - * The Intel Cilk Plus library contains a collection of reducer wrapper classes - * which were created before the `cilk::reducer` style was developed. - * For example, `cilk::reducer_opadd` provided essentially the same - * functionality that is now provided by - * `cilk::reducer< cilk::op_add >`. These legacy reducer classes are - * deprecated, but still supported, and they have been reimplemented as - * subclasses of the corresponding `cilk::reducer` classes. For example: - * - * template - * reducer_opadd : public reducer< op_add > { ... }; - * - * This reimplementation allows transparent conversion between legacy and - * new reducers. That is, a `reducer*` or `reducer&` can be - * used anywhere that a `reducer_opadd*` or `reducer_opadd&` is expected, - * and vice versa. - * - * The conversion from the legacy reducer to the new reducer is just an - * up-cast, which is provided for free by C++. The conversion from the new - * reducer to the legacy reducer is a down-cast, though, which requires an - * explicit conversion member function in the `reducer` class. The challenge - * is to define a function in the reducer template class which will convert - * each cilk::reducer specialization to the corresponding legacy reducer, - * if there is one. - * - * The trick is in the legacy_reducer_downcast template class, which provides - * a mapping from `cilk::reducer` specializations to legacy reducer classes. - * `reducer` has a conversion function to convert itself to - * `legacy_reducer_downcast< reducer >::%type`. By default, - * `legacy_reducer_downcast::%type` is just a trivial subclass of - * `Reducer`, which is uninteresting, but a reducer with a legacy counterpart - * will have a specialization of `legacy_reducer_downcast` whose `type` is - * the corresponding legacy reducer. For example: - * - * template - * struct legacy_reducer_downcast< reducer< op_add > > - * { - * typedef reducer_opadd type; - * }; - * - * - * @tparam Reducer The new-style reducer class whose corresponding legacy - * reducer class is `type`, if there is such a legacy reducer - * class. - */ -template struct legacy_reducer_downcast { - /** The related legacy reducer class. - * - * By default, this is just a trivial subclass of Reducer, but it can be - * overridden in the specialization of legacy_reducer_downcast for - * a reducer that has a corresponding legacy reducers. - */ - struct type : Reducer {}; -}; - -namespace internal { -/// @cond internal - -template struct reducer_set_get { - // sizeof(notchar) != sizeof(char) - struct notchar { - char x[2]; - }; - - // `does_view_define_return_type_for_get_value(View*)` returns `char` if - // `View` defines `return_type_for_get_value`, and `notchar` if it doesn't. - - template struct using_type {}; - - template - static char does_view_define_return_type_for_get_value( - using_type *); - - template - static notchar does_view_define_return_type_for_get_value(...); - - // `VIEW_DOES_DEFINE_RETURN_TYPE_FOR_GET_VALUE` is true if `View` defines - // `return_type_for_get_value`. - - enum { - VIEW_DOES_DEFINE_RETURN_TYPE_FOR_GET_VALUE = - sizeof(does_view_define_return_type_for_get_value(0)) == - sizeof(char) - }; - - // `return_type_for_get_value` is `View::return_type_for_get_value` - // if it is defined, and just `Value` otherwise. - - template - struct return_type_for_view_get_value { - typedef Value type; - }; - - template - struct return_type_for_view_get_value { - typedef typename InnerView::return_type_for_get_value type; - }; - - public: - typedef typename return_type_for_view_get_value< - View, VIEW_DOES_DEFINE_RETURN_TYPE_FOR_GET_VALUE>::type - return_type_for_get_value; - - static void move_in(View &view, Value &v) { view.view_move_in(v); } - static void move_out(View &view, Value &v) { view.view_move_out(v); } - - static void set_value(View &view, const Value &v) { - view.view_set_value(v); - } - - static return_type_for_get_value get_value(const View &view) { - return view.view_get_value(); - } -}; - -template struct reducer_set_get { - typedef const Value &return_type_for_get_value; - - static void move_in(Value &view, Value &v) { view = v; } - static void move_out(Value &view, Value &v) { v = view; } - - static void set_value(Value &view, const Value &v) { view = v; } - - static return_type_for_get_value get_value(const Value &view) { - return view; - } -}; - -/// @endcond - -/** Base class defining the data layout that is common to all reducers. - */ -template class reducer_base { - typedef typename Monoid::view_type view_type; - - // This makes the reducer a hyper-object. (Partially initialized in - // the derived reducer_content class.) - // - __cilkrts_hyperobject_base m_base; - - // The monoid is allocated here as raw bytes, and is constructed explicitly - // by a call to the monoid_type::construct() function in the constructor of - // the `reducer` subclass. - // - storage_for_object m_monoid; - - // Used for sanity checking at destruction. - // - void *m_initialThis; - - // The leftmost view comes next. It is defined in the derived - // reducer_content class. - - /** @name C-callable wrappers for the C++-coded monoid dispatch functions. - */ - //@{ - - static void reduce_wrapper(void *r, void *lhs, void *rhs); - static void identity_wrapper(void *r, void *view); - static void destroy_wrapper(void *r, void *view); - static void *allocate_wrapper(void *r, size_t bytes); - static void deallocate_wrapper(void *r, void *view); - - //@} - - protected: - /** Constructor. - * - * @param leftmost The address of the leftmost view in the reducer. - */ - reducer_base(char* leftmost) - : m_base{{ - (cilk_reduce_fn_t) &reduce_wrapper, - (cilk_identity_fn_t) &identity_wrapper, - (cilk_destroy_fn_t) &destroy_wrapper, - (cilk_allocate_fn_t) &allocate_wrapper, - (cilk_deallocate_fn_t) &deallocate_wrapper - }, - 0, /* Cilk Plus flags or OpenCilk ID */ - (char*)leftmost - (char*)this, /* __view_offset */ - sizeof(view_type) /* __view_size */ - }, - m_initialThis(this) - { - __cilkrts_hyper_create(&m_base); - } - - /** Destructor. - */ - __CILKRTS_STRAND_STALE(~reducer_base()) { - // Make sure we haven't been memcopy'd or corrupted - assert(this == m_initialThis); - __cilkrts_hyper_destroy(&m_base); - } - - /** Monoid data member. - * - * @return A pointer to the reducer's monoid data member. - */ - Monoid *monoid_ptr() { return &m_monoid.object(); } - - /** Leftmost view data member. - * - * @return A pointer to the reducer's leftmost view data member. - * - * @note This function returns the address of the *leftmost* view, - * which is unique for the lifetime of the reducer. It is - * intended to be used in constructors and destructors. - * Use the reducer::view() function to access the per-strand - * view instance. - */ - view_type *leftmost_ptr() { - char *view_addr = (char *)this + m_base.__view_offset; - return reinterpret_cast(view_addr); - } - - public: - /** @name Access the current view. - * - * These functions return a reference to the instance of the reducer's - * view that was created for the current strand of a parallel computation - * (and create it if it doesn't already exist). Note the difference from - * the (private) leftmost_ptr() function, which returns a pointer to the - * _leftmost_ view, which is the same in all strands. - */ - //@{ - - /** Per-strand view instance. - * - * @return A reference to the per-strand view instance. - */ - view_type &view() { - return *static_cast(__cilkrts_hyper_lookup(&m_base)); - } - - /** @copydoc view() - */ - const view_type &view() const { - return const_cast(this)->view(); - } - - //@} - - /** Initial view pointer field. - * - * @internal - * - * @return a reference to the m_initialThis field. - * - * @note This function is provided for "white-box" testing of the - * reducer layout code. There is never any reason for user code - * to call it. - */ - const void *const &initial_this() const { return m_initialThis; } -}; - -template -void reducer_base::reduce_wrapper(void *r, void *lhs, void *rhs) { - Monoid *monoid = static_cast(r)->monoid_ptr(); - monoid->reduce(static_cast(lhs), - static_cast(rhs)); -} - -template -void reducer_base::identity_wrapper(void *r, void *view) { - Monoid *monoid = static_cast(r)->monoid_ptr(); - monoid->identity(static_cast(view)); -} - -template -void reducer_base::destroy_wrapper(void *r, void *view) { - Monoid *monoid = static_cast(r)->monoid_ptr(); - monoid->destroy(static_cast(view)); -} - -template -void *reducer_base::allocate_wrapper(void *r, size_t bytes) { - Monoid *monoid = static_cast(r)->monoid_ptr(); - return monoid->allocate(bytes); -} - -template -void reducer_base::deallocate_wrapper(void *r, void *view) { - Monoid *monoid = static_cast(r)->monoid_ptr(); - monoid->deallocate(static_cast(view)); -} - -/** Base class defining the data members of a reducer. - * - * @tparam Aligned The `m_view` data member, and therefore the entire - * structure, are cache-line aligned if this parameter - * is `true'. - */ -template -class reducer_content; - -/** Base class defining the data members of an aligned reducer. - */ -template -class reducer_content : public reducer_base { - typedef typename Monoid::view_type view_type; - - // The leftmost view is defined as raw bytes. It will be constructed - // by the monoid `construct` function. It is cache-aligned, which - // will push it into a new cache line. Furthermore, its alignment causes - // the reducer as a whole to be cache-aligned, which makes the reducer - // size a multiple of a cache line. Since there is nothing in the reducer - // after the view, all this means that the leftmost view gets one or more - // cache lines all to itself, which prevents false sharing. - // - __attribute__((aligned((64)))) char m_leftmost[sizeof(view_type)]; - - protected: - reducer_content() : reducer_base((char *)&m_leftmost) {} -}; - -/** Base class defining the data members of an unaligned reducer. - */ -template -class reducer_content : public reducer_base { - typedef typename Monoid::view_type view_type; ///< The view type. - - // Reserve space for the leftmost view. The view will be allocated at an - // aligned offset in this space at runtime, to guarantee that the view - // will get one or more cache lines all to itself, to prevent false - // sharing. - // - // The number of bytes to reserve is determined as follows: - // * Start with the view size. - // * Round up to a multiple of the cache line size, to get the total size - // of the cache lines that will be dedicated to the view. - // * Add (cache line size - 1) filler bytes to guarantee that the reserved - // area will contain a cache-aligned block of the required cache lines, - // no matter where the reserved area starts. - // - char m_leftmost[((sizeof(view_type) + 63UL) & ~63UL) + 63U]; - // View size rounded up to multiple cache lines - - protected: - /** Constructor. Find the first cache-aligned position in the reserved - * area, and pass it to the base constructor as the leftmost view - * address. - */ - reducer_content() - : reducer_base( - (char *)(((std::size_t)&m_leftmost + 63UL) & ~63UL)) {} -}; - -} // namespace internal - -// The __cilkrts_hyperobject_ functions are defined differently depending on -// whether a file is compiled with or without the CILK_STUB option. Therefore, -// reducers compiled in the two modes should be link-time incompatible, so that -// object files compiled with stubbed reducers won't be linked into an -// unstubbed program, or vice versa. We achieve this by putting the reducer -// class definition into the cilk::stub namespace in a stubbed compilation. - -#ifdef CILK_STUB -namespace stub { -#endif - -/** Reducer class. - * - * A reducer is instantiated on a Monoid. The Monoid provides the value - * type, associative reduce function, and identity for the reducer. - * - * @tparam Monoid The monoid class that the reducer is instantiated on. It - * must model the @ref reducers_monoid_concept "monoid - * concept". - * - * @see @ref pagereducers - */ -template -class reducer : public internal::reducer_content { - typedef internal::reducer_content base; - using base::leftmost_ptr; - using base::monoid_ptr; - - public: - typedef Monoid monoid_type; ///< The monoid type. - typedef typename Monoid::value_type value_type; ///< The value type. - typedef typename Monoid::view_type view_type; ///< The view type. - - private: - typedef internal::reducer_set_get set_get; - - reducer(const reducer &); ///< Disallow copying. - reducer &operator=(const reducer &); ///< Disallow assignment. - - public: - /** @name Constructors - * - * All reducer constructors call the static `construct()` function of the - * monoid class to construct the reducer's monoid and leftmost view. - * - * The reducer constructor arguments are simply passed through to the - * construct() function. Thus, the constructor parameters accepted by a - * particular reducer class are determined by its monoid class. - */ - //@{ - - /** 0 – 6 const reference parameters. - */ - //@{ - - reducer() { monoid_type::construct(monoid_ptr(), leftmost_ptr()); } - - template reducer(const T1 &x1) { - monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1); - } - - template reducer(const T1 &x1, const T2 &x2) { - monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1, x2); - } - - template - reducer(const T1 &x1, const T2 &x2, const T3 &x3) { - monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1, x2, x3); - } - - template - reducer(const T1 &x1, const T2 &x2, const T3 &x3, const T4 &x4) { - monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1, x2, x3, x4); - } - - template - reducer(const T1 &x1, const T2 &x2, const T3 &x3, const T4 &x4, - const T5 &x5) { - monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1, x2, x3, x4, - x5); - } - - template - reducer(const T1 &x1, const T2 &x2, const T3 &x3, const T4 &x4, - const T5 &x5, const T6 &x6) { - monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1, x2, x3, x4, x5, - x6); - } - - //@} - - /** 1 non-const reference parameter. - */ - //@{ - - template reducer(T1 &x1) { - monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1); - } - - //@} - - /** Destructor. - */ - __CILKRTS_STRAND_STALE(~reducer()) { - leftmost_ptr()->~view_type(); - monoid_ptr()->~monoid_type(); - } - - //@{ - /** Get the monoid. - * - * @return A reference to the monoid object belonging to this reducer. - */ - Monoid &monoid() { return *monoid_ptr(); } - - const Monoid &monoid() const { - return const_cast(this)->monoid(); - } - //@} - - //@{ - /** Access the current view. - * - * Return a reference to the instance of the reducer's view that was - * created for the current strand of a parallel computation (and create - * it if it doesn't already exist). - */ - view_type &view() { return base::view(); } - const view_type &view() const { return base::view(); } - //@} - - /** @name Dereference the reducer to get the view. - * - * "Dereferencing" a reducer yields the view for the current strand. The - * view, in turn, acts as a proxy for its contained value, exposing only - * those operations which are consistent with the reducer's monoid. Thus, - * all modifications of the reducer's accumulator variable are written as - * - * *reducer OP ... - * - * or - * - * reducer->func(...) - * - * (The permitted operations on a reducer's accumulator are listed in the - * documentation for that particular kind of reducer.) - * - * @note `*r` is a synonym for `r.view()`. Recommended style is to use - * `*r` (or `r->`) in the common case where code is simply - * updating the accumulator variable wrapped in the view, and to - * use `r.view()` in the unusual case where it is desirable to - * call attention to the view itself. - */ - //@{ - - //@{ - /** Dereference operator. - * - * @return A reference to the per-strand view instance. - */ - view_type &operator*() { return view(); } - view_type const &operator*() const { return view(); } - //@} - - //@{ - /** Pointer operator. - * - * @return A pointer to the per-strand view instance. - */ - view_type *operator->() { return &view(); } - view_type const *operator->() const { return &view(); } - //@} - - //@{ - /** Deprecated view access. - * - * `r()` is a synonym for `*r` which was used with early versions of - * Intel Cilk Plus reducers. `*r` is now the preferred usage. - * - * @deprecated Use operator*() instead of operator()(). - * - * @return A reference to the per-strand view instance. - */ - view_type &operator()() { return view(); } - view_type const &operator()() const { return view(); } - //@} - - //@} - - /** @name Set and get the value. - * - * These functions are used to set an initial value for the reducer before - * starting the reduction, or to get the final value after the reduction - * is complete. - * - * @note These functions are completely different from the view - * operations that are made available via operator*() and - * operator->(), which are used to _modify_ the reducer's value - * _during_ the reduction. - * - * @warning These functions _can_ be called at any time, and in - * general, they will refer to the value contained in the view - * for the current strand. However, using them other than to - * set the reduction's initial value or get its final value - * will almost always result in undefined behavior. - */ - //@{ - - /** Move a value into the reducer. - * - * This function is used to set the initial value of the reducer's - * accumulator variable by either copying or _moving_ the value of @a obj - * into it. Moving a value can often be performed in constant time, even - * for large container objects, but has the side effect of leaving the - * value of @a obj undefined. (See the description of the - * @ref move_in_wrapper class for a discussion of moving values.) - * - * @par Usage - * A move_in() call to initialize a reducer is often paired with a - * move_out() call to get its final value: - * - * reducer xr; - * xr.move_in(x); - * … do the reduction … - * xr.move_out(x); - * - * @par Assumptions - * - You cannot assume either that this will function will copy its - * value or that it will move it. - * - You must assume that the value of @a obj will be undefined - * after the call to move_in(). - * - You can assume that move_in() will be at least as efficient as - * set_value(), and you should therefore prefer move_in() unless - * you need the value of @a obj to be unchanged after the call. - * (But you should usually prefer the move-in constructor over a - * move_in() call - see the note below.) - * - * @note The behavior of a default constructor followed by move-in - * initialization: - * - * reducer xr; - * xr.move_in(x); - * - * @note is not necessarily the same as a move-in constructor: - * - * reducer xr(move_in(x)); - * - * @note In particular, when @a Type is a container type with a - * non-empty allocator, the move-in constructor will create the - * accumulator variable with the same allocator as the input - * argument @a x, while the default constructor will create the - * accumulator variable with a default allocator. The mismatch of - * allocators in the latter case means that the input argument - * @a x may have to be copied in linear time instead of being - * moved in constant time. - * - * @note Best practice is to prefer the move-in constructor over the - * move-in function unless the move-in function is required for - * some specific reason. - * - * @warning Calling this function other than to set the initial value - * for a reduction will almost always result in undefined - * behavior. - * - * @param obj The object containing the value that will be moved into the - * reducer. - * - * @post The reducer contains the value that was initially in @a obj. - * @post The value of @a obj is undefined. - * - * @see set_value() - */ - void move_in(value_type &obj) { set_get::move_in(view(), obj); } - - /** Move the value out of the reducer. - * - * This function is used to retrieve the final value of the reducer's - * accumulator variable by either copying or _moving_ the value of @a obj - * into it. Moving a value can often be performed in constant time, even - * for large container objects, but has the side effect of leaving the - * value of the reducer's accumulator variable undefined. (See the - * description of the @ref move_in_wrapper class for a discussion of - * moving values.) - * - * @par Usage - * A move_in() call to initialize a reducer is often paired with a - * move_out() call to get its final value: - * - * reducer xr; - * xr.move_in(x); - * … do the reduction … - * xr.move_out(x); - * - * @par Assumptions - * - You cannot assume either that this will function will copy its - * value or that it will move it. - * - You must assume that the value of the reducer's accumulator - * variable will be undefined after the call to move_out(). - * - You can assume that move_out() will be at least as efficient as - * get_value(), and you should therefore prefer move_out() unless - * you need the accumulator variable to be preserved after the - * call. - * - * @warning Calling this function other than to retrieve the final - * value of a reduction will almost always result in undefined - * behavior. - * - * @param obj The object that the value of the reducer will be moved into. - * - * @post @a obj contains the value that was initially in the reducer. - * @post The value of the reducer is undefined. - * - * @see get_value() - */ - void move_out(value_type &obj) { set_get::move_out(view(), obj); } - - /** Set the value of the reducer. - * - * This function sets the initial value of the reducer's accumulator - * variable to the value of @a obj. - * - * @note The behavior of a default constructor followed by - * initialization: - * - * reducer xr; - * xr.set_value(x); - * - * @note is not necessarily the same as a value constructor: - * - * reducer xr(x); - * - * @note In particular, when @a Type is a container type with a - * non-empty allocator, the value constructor will create the - * accumulator variable with the same allocator as the input - * argument @a x, while the default constructor will create the - * accumulator variable with a default allocator. - * - * @warning Calling this function other than to set the initial value - * for a reduction will almost always result in undefined - * behavior. - * - * @param obj The object containing the value that will be copied into - * the reducer. - * - * @post The reducer contains a copy of the value in @a obj. - * - * @see move_in() - */ - void set_value(const value_type &obj) { set_get::set_value(view(), obj); } - - /** Get the value of the reducer. - * - * This function gets the final value of the reducer's accumulator - * variable. - * - * @warning Calling this function other than to retrieve the final - * value of a reduction will almost always result in undefined - * behavior. - * - * @return A reference to the value contained in the reducer. - * - * @see move_out() - */ - typename set_get::return_type_for_get_value get_value() const { - return set_get::get_value(view()); - } - - //@} - - /** Implicit downcast to legacy reducer wrapper, if any. - * - * @see legacy_reducer_downcast - */ - operator typename legacy_reducer_downcast::type &() { - typedef typename legacy_reducer_downcast::type downcast_type; - return *reinterpret_cast(this); - } - - /** Implicit downcast to legacy reducer wrapper, if any. - * - * @see legacy_reducer_downcast - */ - operator const typename legacy_reducer_downcast::type &() const { - typedef typename legacy_reducer_downcast::type downcast_type; - return *reinterpret_cast(this); - } -}; - -#ifdef CILK_STUB -} // namespace stub -using stub::reducer; -#endif - -} // end namespace cilk - -#endif /* __cplusplus */ - -/** @page page_reducers_in_c Creating and Using Reducers in C - * - * @tableofcontents - * - * The Intel Cilk Plus runtime supports reducers written in C as well as in - * C++. The basic logic is the same, but the implementation details are very - * different. The C++ reducer implementation uses templates heavily to create - * very generic components. The C reducer implementation uses macros, which - * are a much blunter instrument. The most immediate consequence is that the - * monoid/view/reducer architecture is mostly implicit rather than explicit - * in C reducers. - * - * @section reducers_c_overview Overview of Using Reducers in C - * - * The basic usage pattern for C reducers is: - * - * 1. Create and initialize a reducer object. - * 2. Tell the Intel Cilk Plus runtime about the reducer. - * 3. Update the value contained in the reducer in a parallel computation. - * 4. Tell the Intel Cilk Plus runtime that you are done with the reducer. - * 5. Retrieve the value from the reducer. - * - * @subsection reducers_c_creation Creating and Initializing a C Reducer - * - * The basic pattern for creating and initializing a reducer object in C is - * - * CILK_C_DECLARE_REDUCER(value-type) reducer-name = - * CILK_C_INIT_REDUCER(value-type, - * reduce-function, - * identity-function, - * destroy-function, - * initial-value); - * - * This is simply an initialized definition of a variable named - * _reducer-name_. The @ref CILK_C_DECLARE_REDUCER macro expands to an - * anonymous `struct` declaration for a reducer object containing a view of - * type _value-type_, and the @ref CILK_C_INIT_REDUCER macro expands to a - * struct initializer. - * - * @subsection reducers_c_reduce_func Reduce Functions - * - * The reduce function for a reducer is called when a parallel execution - * strand terminates, to combine the values computed by the terminating - * strand and the strand to its left. It takes three arguments: - * - * - `void* reducer` - the address of the reducer. - * - `void* left` - the address of the value for the left strand. - * - `void* right` - the address of the value for the right (terminating) - * strand. - * - * It must apply the reducer's reduction operation to the `left` and `right` - * values, leaving the result in the `left` value. The `right` value is - * undefined after the reduce function call. - * - * @subsection reducers_c_identity_func Identity Functions - * - * The identity function for a reducer is called when a parallel execution - * strand begins, to initialize its value to the reducer's identity value. It - * takes two arguments: - * - * - `void* reducer` - the address of the reducer. - * - `void* v` - the address of a freshly allocated block of memory of size - * `sizeof(value-type)`. - * - * It must initialize the memory pointed to by `v` so that it contains the - * reducer's identity value. - * - * @subsection reducers_c_destroy_func Destroy Functions - * - * The destroy function for a reducer is called when a parallel execution - * strand terminates, to do any necessary cleanup before its value is - * deallocated. It takes two arguments: - * - * - `void* reducer` - the address of the reducer. - * - `void* p` - the address of the value for the terminating strand. - * - * It must release any resources belonging to the value pointed to by `p`, to - * avoid a resource leak when the memory containing the value is deallocated. - * - * A null pointer can be used for the destructor function if the reducer's - * values do not need any cleanup. - * - * @subsection reducers_c_register Tell the Intel Cilk Plus Runtime About the - * Reducer - * - * Call the @ref CILK_C_REGISTER_REDUCER macro to register the reducer with - * the Intel Cilk Plus runtime: - * - * CILK_C_REGISTER_REDUCER(reducer-name); - * - * The runtime will manage reducer values for all registered reducers when - * parallel execution strands begin and end. - * - * @subsection reducers_c_update Update the Value Contained in the Reducer - * - * The @ref REDUCER_VIEW macro returns a reference to the reducer's value for - * the current parallel strand: - * - * REDUCER_VIEW(reducer-name) = REDUCER_VIEW(reducer-name) OP x; - * - * C++ reducer views restrict access to the wrapped value so that it can only - * be modified in ways consistent with the reducer's operation. No such - * protection is provided for C reducers. It is entirely the responsibility - * of the user to avoid modifying the value in any inappropriate way. - * - * @subsection c_reducers_unregister Tell the Intel Cilk Plus Runtime That You - * Are Done with the Reducer - * - * When the parallel computation is complete, call the @ref - * CILK_C_UNREGISTER_REDUCER macro to unregister the reducer with the - * Intel Cilk Plus runtime: - * - * CILK_C_UNREGISTER_REDUCER(reducer-name); - * - * The runtime will stop managing reducer values for the reducer. - * - * @subsection c_reducers_retrieve Retrieve the Value from the Reducer - * - * When the parallel computation is complete, use the @ref REDUCER_VIEW macro - * to retrieve the final value computed by the reducer. - * - * @subsection reducers_c_example_custom Example - Creating and Using a - * Custom C Reducer - * - * The `IntList` type represents a simple list of integers. - * - * struct _intListNode { - * int value; - * _intListNode* next; - * } IntListNode; - * typedef struct { IntListNode* head; IntListNode* tail; } IntList; - * - * // Initialize a list to be empty - * void IntList_init(IntList* list) { list->head = list->tail = 0; } - * - * // Append an integer to the list - * void IntList_append(IntList* list, int x) - * { - * IntListNode* node = (IntListNode*) malloc(sizeof(IntListNode)); - * if (list->tail) list->tail->next = node; else list->head = node; - * list->tail = node; - * } - * - * // Append the right list to the left list, and leave the right list - * // empty - * void IntList_concat(IntList* left, IntList* right) - * { - * if (left->head) { - * left->tail->next = right->head; - * if (right->tail) left->tail = right->tail; - * } - * else { - * *left = *right; - * } - * IntList_init(*right); - * } - * - * This code creates a reducer that supports creating an `IntList` by - * appending values to it. - * - * void identity_IntList(void* reducer, void* list) - * { - * IntList_init((IntList*)list); - * } - * - * void reduce_IntList(void* reducer, void* left, void* right) - * { - * IntList_concat((IntList*)left, (IntList*)right); - * } - * - * CILK_C_DECLARE_REDUCER(IntList) my_list_int_reducer = - * CILK_C_INIT_REDUCER(IntList, - * reduce_int_list, - * identity_int_list, - * 0); - * // Initial value omitted // - * ListInt_init(&REDUCER_VIEW(my_int_list_reducer)); - * - * CILK_C_REGISTER_REDUCER(my_int_list_reducer); - * cilk_for (int i = 0; i != n; ++i) { - * IntList_append(&REDUCER_VIEW(my_int_list_reducer), a[i]); - * } - * CILK_C_UNREGISTER_REDUCER(my_int_list_reducer); - * - * IntList result = REDUCER_VIEW(my_int_list_reducer); - * - * @section reducers_c_predefined Predefined C Reducers - * - * Some of the predefined reducer classes in the Intel Cilk Plus library come - * with a set of predefined macros to provide the same capabilities in C. In - * general, two macros are provided for each predefined reducer family: - * - * - `CILK_C_REDUCER_operation(reducer-name, type-name, initial-value)` - - * Declares a reducer object named _reducer-name_ with initial value - * _initial-value_ to perform a reduction using the _operation_ on values - * of the type specified by _type-name_. This is the equivalent of the - * general code described in @ref reducers_c_creation : - * - * CILK_C_DECLARE_REDUCER(type) reducer-name = - * CILK_C_INIT_REDUCER(type, ..., initial-value); - * - * where _type_ is the C type corresponding to _type_name_. See @ref - * reducers_c_type_names below for the _type-names_ that you can use. - * - * - `CILK_C_REDUCER_operation_TYPE(type-name)` - Expands to the `typedef` - * name for the type of the reducer object declared by - * `CILK_C_REDUCER_operation(reducer-name, type-name, initial-value)`. - * - * See @ref reducers_c_example_predefined. - * - * The predefined C reducers are: - * - * | Operation | Name | Documentation | - * |-------------------|---------------|-------------------------------| - * | addition | `OPADD` | @ref ReducersAdd | - * | bitwise AND | `OPAND` | @ref ReducersAnd | - * | bitwise OR | `OPOR` | @ref ReducersOr | - * | bitwise XOR | `OPXOR` | @ref ReducersXor | - * | multiplication | `OPMUL` | @ref ReducersMul | - * | minimum | `MIN` | @ref ReducersMinMax | - * | minimum & index | `MIN_INDEX` | @ref ReducersMinMax | - * | maximum | `MAX` | @ref ReducersMinMax | - * | maximum & index | `MAX_INDEX` | @ref ReducersMinMax | - * - * @subsection reducers_c_type_names Numeric Type Names - * - * The type and function names created by the C reducer definition macros - * incorporate both the reducer kind (`opadd`, `opxor`, etc.) and the value - * type of the reducer (`int`, `double`, etc.). The value type is represented - * by a _numeric type name_ string. The types supported in C reducers, and - * their corresponding numeric type names, are given in the following table: - * - * | Type | Numeric Type Name | - * |-----------------------|-------------------------------| - * | `char` | `char` | - * | `unsigned char` | `uchar` | - * | `signed char` | `schar` | - * | `wchar_t` | `wchar_t` | - * | `short` | `short` | - * | `unsigned short` | `ushort` | - * | `int` | `int` | - * | `unsigned int` | `uint` | - * | `unsigned int` | `unsigned` (alternate name) | - * | `long` | `long` | - * | `unsigned long` | `ulong` | - * | `long long` | `longlong` | - * | `unsigned long long` | `ulonglong` | - * | `float` | `float` | - * | `double` | `double` | - * | `long double` | `longdouble` | - * - * @subsection reducers_c_example_predefined Example - Using a Predefined C - * Reducer - * - * To compute the sum of all the values in an array of `unsigned int`: - * - * CILK_C_REDUCER_OPADD(sum, uint, 0); - * CILK_C_REGISTER_REDUCER(sum); - * cilk_for(int i = 0; i != n; ++i) { - * REDUCER_VIEW(sum) += a[i]; - * } - * CILK_C_UNREGISTER_REDUCER(sum); - * printf("The sum is %u\n", REDUCER_VIEW(sum)); - */ - -/** @name C language reducer macros - * - * These macros are used to declare and work with reducers in C code. - * - * @see @ref page_reducers_in_c - */ -//@{ - -/// @cond internal - -/** @name Compound identifier macros. - * - * These macros are used to construct an identifier by concatenating two or - * three identifiers. - */ -//@{ - -/** Expand to an identifier formed by concatenating two identifiers. - */ -#define __CILKRTS_MKIDENT(a, b) __CILKRTS_MKIDENT_IMP(a, b, ) - -/** Expand to an identifier formed by concatenating three identifiers. - */ -#define __CILKRTS_MKIDENT3(a, b, c) __CILKRTS_MKIDENT_IMP(a, b, c) - -/** Helper macro to do the concatenation. - */ -#define __CILKRTS_MKIDENT_IMP(a, b, c) a##b##c - -//@} - -/** Compiler-specific keyword for the "type of" operator. - */ -#if defined(__GNUC__) && !defined(__INTEL_COMPILER) -#define _Typeof __typeof__ -#endif - -/** @name Predefined reducer function declaration macros. - * - * These macros are used to create the function headers for the identity, - * reduction, and destructor functions for a builtin reducer family. The - * macro can be followed by a semicolon to create a declaration, or by a - * brace-enclosed body to create a definition. - */ -//@{ - -/** Create an identity function header. - * - * @note The name of the function's value pointer parameter will always be `v`. - * - * @param name The reducer family name. - * @param tn The type name. - */ -#define __CILKRTS_DECLARE_REDUCER_IDENTITY(name, tn) \ - void __CILKRTS_MKIDENT3(name, _identity_, tn)(void *key, void *v) - -/** Create a reduction function header. - * - * @param name The reducer family name. - * @param tn The type name. - * @param l The name to use for the function's left value pointer parameter. - * @param r The name to use for the function's right value pointer - * parameter. - */ -#define __CILKRTS_DECLARE_REDUCER_REDUCE(name, tn, l, r) \ - void __CILKRTS_MKIDENT3(name, _reduce_, tn)(void *key, void *l, void *r) - -/** Create a destructor function header. - * - * @param name The reducer family name. - * @param tn The type name. - * @param p The name to use for the function's value pointer parameter. - */ -#define __CILKRTS_DECLARE_REDUCER_DESTROY(name, tn, p) \ - void __CILKRTS_MKIDENT3(name, _destroy_, tn)(void *key, void *p) - -//@} - -/// @endcond - -/*************************************************************************** - * Real implementation - ***************************************************************************/ - -/** Declaration of a C reducer structure type. - * - * This macro expands into an anonymous structure declaration for a C reducer - * structure which contains a @a Type value. For example: - * - * CILK_C_DECLARE_REDUCER(int) my_add_int_reducer = - * CILK_C_INIT_REDUCER(int, …); - * - * @param Type The type of the value contained in the reducer object. - * - * @see @ref reducers_c_creation - */ -#define CILK_C_DECLARE_REDUCER(Type) \ - struct { \ - __cilkrts_hyperobject_base __cilkrts_hyperbase; \ - Type __attribute__((aligned(64))) value; \ - } - -/** Initializer for a C reducer structure. - * - * This macro expands into a brace-enclosed structure initializer for a C - * reducer structure that was declared with - * `CILK_C_DECLARE_REDUCER(Type)`. For example: - * - * CILK_C_DECLARE_REDUCER(int) my_add_int_reducer = - * CILK_C_INIT_REDUCER(int, - * add_int_reduce, - * add_int_identity, - * 0, - * 0); - * - * @param Type The type of the value contained in the reducer object. Must - * be the same as the @a Type argument of the - * CILK_C_DECLARE_REDUCER macro call that created the - * reducer. - * @param Reduce The address of the @ref reducers_c_reduce_func - * "reduce function" for the reducer. - * @param Identity The address of the @ref reducers_c_identity_func - * "identity function" for the reducer. - * @param Destroy The address of the @ref reducers_c_destroy_func - * "destroy function" for the reducer. - * @param ... The initial value for the reducer. (A single expression if - * @a Type is a scalar type; a list of values if @a Type is a - * struct or array type.) - * - * @see @ref reducers_c_creation - */ - -#define CILK_C_INIT_REDUCER(Type, Reduce, Identity, Destroy, ...) \ - { \ - {{Reduce, Identity, Destroy, __cilkrts_hyper_alloc, \ - __cilkrts_hyper_dealloc}, \ - 0, \ - 64, /* TODO: Assert that this really is 64. */ \ - sizeof(Type)}, \ - __VA_ARGS__ \ - } - -/** Register a reducer with the Intel Cilk Plus runtime. - * - * The runtime will manage reducer values for all registered reducers when - * parallel execution strands begin and end. For example: - * - * CILK_C_REGISTER_REDUCER(my_add_int_reducer); - * cilk_for (int i = 0; i != n; ++i) { - * … - * } - * - * @param Expr The reducer to be registered. - * - * @see @ref page_reducers_in_c - */ -#define CILK_C_REGISTER_REDUCER(Expr) \ - __cilkrts_hyper_create(&(Expr).__cilkrts_hyperbase) - -/** Unregister a reducer with the Intel Cilk Plus runtime. - * - * The runtime will stop managing reducer values for a reducer after it is - * unregistered. For example: - * - * cilk_for (int i = 0; i != n; ++i) { - * … - * } - * CILK_C_UNREGISTER_REDUCER(my_add_int_reducer); - * - * @param Expr The reducer to be unregistered. - * - * @see @ref page_reducers_in_c - */ -#define CILK_C_UNREGISTER_REDUCER(Expr) \ - __cilkrts_hyper_destroy(&(Expr).__cilkrts_hyperbase) - -/** Get the current view for a reducer. - * - * The `REDUCER_VIEW(reducer-name)` returns a reference to the reducer's - * value for the current parallel strand. This can be used to initialize the - * value of the reducer before it is used, to modify the value of the reducer - * on the current parallel strand, or to retrieve the final value of the - * reducer at the end of the parallel computation. - * - * REDUCER_VIEW(my_add_int_reducer) = REDUCER_VIEW(my_add_int_reducer) + x; - * - * @note C++ reducer views restrict access to the wrapped value so that it - * can only be modified in ways consistent with the reducer's operation. No - * such protection is provided for C reducers. It is entirely the - * responsibility of the user to refrain from modifying the value in any - * inappropriate way. - * - * @param Expr The reducer whose value is to be returned. - * - * @see @ref page_reducers_in_c - */ -#define REDUCER_VIEW(Expr) \ - (*(_Typeof((Expr).value) *)__cilkrts_hyper_lookup( \ - &(Expr).__cilkrts_hyperbase)) - -//@} C language reducer macros - -#undef __CILKRTS_STRAND_STALE - -#endif // CILK_REDUCER_H_INCLUDED diff --git a/include/cilk/reducer_file.h b/include/cilk/reducer_file.h deleted file mode 100644 index 73aacda8..00000000 --- a/include/cilk/reducer_file.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (C) 2009-2018, Intel Corporation - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY - * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * ********************************************************************* - * - * PLEASE NOTE: This file is a downstream copy of a file maintained in - * a repository at cilkplus.org. Changes made to this file that are not - * submitted through the contribution process detailed at - * http://www.cilkplus.org/submit-cilk-contribution will be lost the next - * time that a new version is released. Changes only submitted to the - * GNU compiler collection or posted to the git repository at - * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are - * not tracked. - * - * We welcome your contributions to this open source project. Thank you - * for your assistance in helping us improve Cilk Plus. - * - */ - - diff --git a/include/cilk/reducer_list.h b/include/cilk/reducer_list.h deleted file mode 100644 index 73ff2247..00000000 --- a/include/cilk/reducer_list.h +++ /dev/null @@ -1,1146 +0,0 @@ -/* reducer_list.h -*- C++ -*- - * - * Copyright (C) 2009-2018, Intel Corporation - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY - * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * ********************************************************************* - * - * PLEASE NOTE: This file is a downstream copy of a file maintained in - * a repository at cilkplus.org. Changes made to this file that are not - * submitted through the contribution process detailed at - * http://www.cilkplus.org/submit-cilk-contribution will be lost the next - * time that a new version is released. Changes only submitted to the - * GNU compiler collection or posted to the git repository at - * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are - * not tracked. - * - * We welcome your contributions to this open source project. Thank you - * for your assistance in helping us improve Cilk Plus. - */ - -/** @file reducer_list.h - * - * @brief Defines classes for parallel list creation by appending or - * prepending reducers. - * - * @ingroup ReducersList - * - * @see ReducersList - */ - -#ifndef REDUCER_LIST_H_INCLUDED -#define REDUCER_LIST_H_INCLUDED - -#include -#include - -/** @defgroup ReducersList List Reducers - * - * List-append and list-prepend reducers create standard lists by - * concatenating a set of lists or values in parallel. - * - * @ingroup Reducers - * - * You should be familiar with @ref pagereducers "Intel(R) Cilk(TM) Plus reducers" - * (from file `reducers.md`) and particularly with @ref reducers_using, before - * trying to use the information in this file. - * - * @section redlist_usage Usage Example - * - * // Create a list containing the labels of the nodes of a tree in - * // "inorder" (left subtree, root, right subtree). - * - * struct Tree { Tree* left; Tree* right; string label; ... }; - * - * list x; - * cilk::reducer< cilk::op_list_append > xr(cilk::move_in(x)); - * collect_labels(tree, xr); - * xr.move_out(x); - * - * void collect_labels(Tree* node, - * cilk::reducer< cilk::op_list_append >& xr) - * { - * if (node) { - * cilk_spawn collect_labels(node->left, xr); - * xr->push_back(node->label); - * collect_labels(node->right, xr); - * cilk_sync; - * } - * } - * - * @section redlist_monoid The Monoid - * - * @subsection redlist_monoid_values Value Set - * - * The __value set__ of a list reducer is the set of values of the class - * `std::list`, which we refer to as the reducer's _list - * type_. - * - * @subsection redlist_monoid_operator Operator - * - * The operator of a list-append reducer is defined as - * - * x CAT y == (every element of x, followed by every element of y) - * - * The operator of a list-prepend reducer is defined as - * - * x RCAT y == (every element of y, followed by every element of x) - * - * @subsection redlist_monoid_identity Identity - * - * The identity value of a list reducer is the empty list, which is the value - * of the expression `std::list([allocator])`. - * - * @section redlist_operations Operations - * - * In the operation descriptions below, the type name `List` refers to the - * reducer's string type, `std::list`. - * - * @subsection redlist_constructors Constructors - * - * Any argument list which is valid for a `std::list` constructor is valid for - * a list reducer constructor. The usual move-in constructor is also provided: - * - * reducer(move_in(List& variable)) - * - * A list reducer with no constructor arguments, or with only an allocator - * argument, will initially contain the identity value, an empty list. - * - * @subsection redlist_get_set Set and Get - * - * r.set_value(const List& value) - * const List& = r.get_value() const - * r.move_in(List& variable) - * r.move_out(List& variable) - * - * @subsection redlist_view_ops View Operations - * - * The view of a list-append reducer provides the following member functions: - * - * void push_back(const Type& element) - * void insert_back(List::size_type n, const Type& element) - * template void insert_back(Iter first, Iter last) - * void splice_back(List& x) - * void splice_back(List& x, List::iterator i) - * void splice_back(List& x, List::iterator first, List::iterator last) - * - * The view of a list-prepend reducer provides the following member functions: - * - * void push_front(const Type& element) - * void insert_front(List::size_type n, const Type& element) - * template void insert_front(Iter first, Iter last) - * void splice_front(List& x) - * void splice_front(List& x, List::iterator i) - * void splice_front(List& x, List::iterator first, List::iterator last) - * - * The `push_back` and `push_front` functions are the same as the - * corresponding `std::list` functions. The `insert_back`, `splice_back`, - * `insert_front`, and `splice_front` functions are the same as the - * `std::list` `insert` and `splice` functions, with the first parameter - * fixed to the end or beginning of the list, respectively. - * - * @section redlist_performance Performance Considerations - * - * An efficient reducer requires that combining the values of two views (using - * the view `reduce()` function) be a constant-time operations. Two lists can - * be merged in constant time using the `splice()` function if they have the - * same allocator. Therefore, the lists for new views are created (by the view - * identity constructor) using the same allocator as the list that was created - * when the reducer was constructed. - * - * The performance of adding elements to a list reducer depends on the view - * operations that are used: - * - * * The `push` functions add a single element to the list, and therefore - * take constant time. - * * An `insert` function that inserts _N_ elements adds each of them - * individually, and therefore takes _O(N)_ time. - * * A `splice` function that inserts _N_ elements just adjusts a couple of - * pointers, and therefore takes constant time, _if the splice is from a - * list with the same allocator as the reducer_. Otherwise, it is - * equivalent to an `insert`, and takes _O(N)_ time. - * - * This means that for best performance, if you will be adding elements to a - * list reducer in batches, you should `splice` them from a list having the - * same allocator as the reducer. - * - * The reducer `move_in` and `move_out` functions do a constant-time `swap` if - * the variable has the same allocator as the reducer, and a linear-time copy - * otherwise. - * - * Note that the allocator of a list reducer is determined when the reducer is - * constructed. The following two examples may have very different behavior: - * - * list a_list; - * - * reducer< list_append reducer1(move_in(a_list)); - * ... parallel computation ... - * reducer1.move_out(a_list); - * - * reducer< list_append reducer2; - * reducer2.move_in(a_list); - * ... parallel computation ... - * reducer2.move_out(a_list); - * - * * `reducer1` will be constructed with the same allocator as `a_list`, - * because the list was specified in the constructor. The `move_in` - * and `move_out` can therefore be done with a `swap` in constant time. - * * `reducer2` will be constructed with a _default_ allocator, - * "`Allocator()`", which may or may not be the same as the allocator of - * `a_list`. Therefore, the `move_in` and `move_out` may have to be done - * with a copy in _O(N)_ time. - * - * (All instances of an allocator type with no internal state (like - * `std::allocator`) are "the same". You only need to worry about the "same - * allocator" issue when you create list reducers with custom allocator types.) - * - * @section redlist_types Type and Operator Requirements - * - * `std::list` must be a valid type. - */ - - -namespace cilk { - -namespace internal { - -/** @ingroup ReducersList */ -//@{ - -/** Base class for list-append and prepend view classes. - * - * @note This class provides the definitions that are required for a class - * that will be used as the parameter of a @ref list_monoid_base - * specialization. - * - * @tparam Type The list element type (not the list type). - * @tparam Allocator The list's allocator class. - * - * @see ReducersList - * @see list_monoid_base - */ -template -class list_view_base -{ -protected: - /// The type of the contained list. - typedef std::list list_type; - - /// The list accumulator variable. - list_type m_value; - -public: - - /** @name Monoid support. - */ - //@{ - - /// Required by @ref monoid_with_view - typedef list_type value_type; - - /// Required by @ref list_monoid_base - Allocator get_allocator() const - { - return m_value.get_allocator(); - } - - //@} - - - /** @name Constructors. - */ - //@{ - - /// Standard list constructor. - explicit list_view_base(const Allocator& a = Allocator()) : m_value(a) {} - explicit list_view_base( - typename list_type::size_type n, - const Type& value = Type(), - const Allocator& a = Allocator() ) : m_value(n, value, a) {} - template - list_view_base(Iter first, Iter last, const Allocator& a = Allocator()) : - m_value(first, last, a) {} - list_view_base(const list_type& list) : m_value(list) {} - - /// Move-in constructor. - explicit list_view_base(move_in_wrapper w) - : m_value(w.value().get_allocator()) - { - m_value.swap(w.value()); - } - - //@} - - /** @name Reducer support. - */ - //@{ - - /// Required by reducer::move_in() - void view_move_in(value_type& v) - { - if (m_value.get_allocator() == v.get_allocator()) - // Equal allocators. Do a (fast) swap. - m_value.swap(v); - else - // Unequal allocators. Do a (slow) copy. - m_value = v; - v.clear(); - } - - /// Required by reducer::move_out() - void view_move_out(value_type& v) - { - if (m_value.get_allocator() == v.get_allocator()) - // Equal allocators. Do a (fast) swap. - m_value.swap(v); - else - // Unequal allocators. Do a (slow) copy. - v = m_value; - m_value.clear(); - } - - /// Required by reducer::set_value() - void view_set_value(const value_type& v) { m_value = v; } - - /// Required by reducer::get_value() - value_type const& view_get_value() const { return m_value; } - - /// Type returned by view_get_value. - typedef value_type const& return_type_for_get_value; - - // Required by legacy wrapper get_reference() - value_type & view_get_reference() { return m_value; } - value_type const& view_get_reference() const { return m_value; } - - //@} -}; - - -/** Base class for list-append and prepend monoid classes. - * - * The key to efficient reducers is that the `identity` operation, which - * creates a new per-strand view, and the `reduce` operation, which combines - * two per-strand views, must be constant-time operations. Two lists can be - * concatenated in constant time only if they have the same allocator. - * Therefore, all the per-strand list accumulator variables must be created - * with the same allocator as the leftmost view list. - * - * This means that a list reduction monoid must have a copy of the allocator - * of the leftmost view's list, so that it can use it in the `identity` - * operation. This, in turn, requires that list reduction monoids have a - * specialized `construct()` function, which constructs the leftmost view - * before the monoid, and then passes the leftmost view's allocator to the - * monoid constructor. - * - * @tparam View The list-append or prepend view class. - * @tparam Align If `false` (the default), reducers instantiated on this - * monoid will be naturally aligned (the Intel Cilk Plus library 1.0 - * behavior). If `true`, reducers instantiated on this monoid - * will be cache-aligned for binary compatibility with - * reducers in Intel Cilk Plus library version 0.9. - * - * @see ReducersList - * @see list_view_base - */ -template -class list_monoid_base : public monoid_with_view -{ - typedef typename View::value_type list_type; - typedef typename list_type::allocator_type allocator_type; - typedef provisional_guard view_guard; - - allocator_type m_allocator; - -public: - - /** Constructor. - * - * There is no default constructor for list monoids, because the allocator - * must always be specified. - * - * @param allocator The list allocator to be used when - * identity-constructing new views. - */ - list_monoid_base(const allocator_type& allocator = allocator_type()) : - m_allocator(allocator) {} - - /** Creates an identity view. - * - * List view identity constructors take the list allocator as an argument. - * - * @param v The address of the uninitialized memory in which the view - * will be constructed. - */ - void identity(View *v) const { ::new((void*) v) View(m_allocator); } - - /** @name construct functions - * - * All `construct()` functions first construct the leftmost view, using - * the optional @a x1, @a x2, and @a x3 arguments that were passed in from - * the reducer constructor. They then call the view's `get_allocator()` - * function to get the list allocator from its contained list, and pass it - * to the monoid constructor. - */ - //@{ - - template - static void construct(Monoid* monoid, View* view) - { - view_guard vg( new((void*) view) View() ); - vg.confirm_if( new((void*) monoid) Monoid(view->get_allocator()) ); - } - - template - static void construct(Monoid* monoid, View* view, const T1& x1) - { - view_guard vg( new((void*) view) View(x1) ); - vg.confirm_if( new((void*) monoid) Monoid(view->get_allocator()) ); - } - - template - static void construct(Monoid* monoid, View* view, - const T1& x1, const T2& x2) - { - view_guard vg( new((void*) view) View(x1, x2) ); - vg.confirm_if( new((void*) monoid) Monoid(view->get_allocator()) ); - } - - template - static void construct(Monoid* monoid, View* view, - const T1& x1, const T2& x2, const T3& x3) - { - view_guard vg( new((void*) view) View(x1, x2, x3) ); - vg.confirm_if( new((void*) monoid) Monoid(view->get_allocator()) ); - } - - //@} -}; - -//@} - -} // namespace internal - - -/** @ingroup ReducersList */ -//@{ - -/** The list-append reducer view class. - * - * This is the view class for reducers created with - * `cilk::reducer< cilk::op_list_append >`. It holds the - * accumulator variable for the reduction, and allows only append operations - * to be performed on it. - * - * @note The reducer "dereference" operation (`reducer::operator *()`) - * yields a reference to the view. Thus, for example, the view class's - * `push_back` operation would be used in an expression like - * `r->push_back(a)`, where `r` is a list-append reducer variable. - * - * @tparam Type The list element type (not the list type). - * @tparam Allocator The list allocator type. - * - * @see ReducersList - * @see op_list_append - */ -template ::allocator_type> -class op_list_append_view : public internal::list_view_base -{ - typedef internal::list_view_base base; - typedef std::list list_type; - typedef typename list_type::iterator iterator; - - iterator end() { return this->m_value.end(); } - -public: - - /** @name Constructors. - * - * All op_list_append_view constructors simply pass their arguments on to - * the @ref internal::list_view_base base class constructor. - * - * @ref internal::list_view_base supports all the std::list constructor - * forms, as well as the reducer move_in constructor form. - */ - //@{ - - op_list_append_view() : base() {} - - template - op_list_append_view(const T1& x1) : base(x1) {} - - template - op_list_append_view(const T1& x1, const T2& x2) : base(x1, x2) {} - - template - op_list_append_view(const T1& x1, const T2& x2, const T3& x3) : - base(x1, x2, x3) {} - - //@} - - /** @name View modifier operations. - */ - //@{ - - /** Adds an element at the end of the list. - * - * This is equivalent to `list.push_back(element)` - */ - void push_back(const Type& element) - { this->m_value.push_back(element); } - - /** Inserts elements at the end of the list. - * - * This is equivalent to `list.insert(list.end(), n, element)` - */ - void insert_back(typename list_type::size_type n, const Type& element) - { this->m_value.insert(end(), n, element); } - - /** Inserts elements at the end of the list. - * - * This is equivalent to `list.insert(list.end(), first, last)` - */ - template - void insert_back(Iter first, Iter last) - { this->m_value.insert(end(), first, last); } - - /** Splices elements at the end of the list. - * - * This is equivalent to `list.splice(list.end(), x)` - */ - void splice_back(list_type& x) { - if (x.get_allocator() == this->m_value.get_allocator()) - this->m_value.splice(end(), x); - else { - insert_back(x.begin(), x.end()); - x.clear(); - } - } - - /** Splices elements at the end of the list. - * - * This is equivalent to `list.splice(list.end(), x, i)` - */ - void splice_back(list_type& x, iterator i) { - if (x.get_allocator() == this->m_value.get_allocator()) - this->m_value.splice(end(), x, i); - else { - push_back(*i); - x.erase(i); - } - } - - /** Splices elements at the end of the list. - * - * This is equivalent to `list.splice(list.end(), x, first, last)` - */ - void splice_back(list_type& x, iterator first, iterator last) { - if (x.get_allocator() == this->m_value.get_allocator()) - this->m_value.splice(end(), x, first, last); - else { - insert_back(first, last); - x.erase(first, last); - } - } - - //@} - - /** Reduces the views of two strands. - * - * This function is invoked by the @ref op_list_append monoid to combine - * the views of two strands when the right strand merges with the left - * one. It appends the value contained in the right-strand view to the - * value contained in the left-strand view, and leaves the value in the - * right-strand view undefined. - * - * @param right A pointer to the right-strand view. (`this` points to - * the left-strand view.) - * - * @note Used only by the @ref op_list_append monoid to implement the - * monoid reduce operation. - */ - void reduce(op_list_append_view* right) - { - this->m_value.splice(end(), right->m_value); - } -}; - - -/** The list-prepend reducer view class. - * - * This is the view class for reducers created with - * `cilk::reducer< cilk::op_list_prepend >`. It holds the - * accumulator variable for the reduction, and allows only prepend operations - * to be performed on it. - * - * @note The reducer "dereference" operation (`reducer::operator *()`) - * yields a reference to the view. Thus, for example, the view class's - * `push_front` operation would be used in an expression like - * `r->push_front(a)`, where `r` is a list-prepend reducer variable. - * - * @tparam Type The list element type (not the list type). - * @tparam Allocator The list allocator type. - * - * @see ReducersList - * @see op_list_prepend - */ -template ::allocator_type> -class op_list_prepend_view : public internal::list_view_base -{ - typedef internal::list_view_base base; - typedef std::list list_type; - typedef typename list_type::iterator iterator; - - iterator begin() { return this->m_value.begin(); } - -public: - - /** @name Constructors. - * - * All op_list_prepend_view constructors simply pass their arguments on to - * the @ref internal::list_view_base base class constructor. - * - * @ref internal::list_view_base supports all the std::list constructor - * forms, as well as the reducer move_in constructor form. - * - */ - //@{ - - op_list_prepend_view() : base() {} - - template - op_list_prepend_view(const T1& x1) : base(x1) {} - - template - op_list_prepend_view(const T1& x1, const T2& x2) : base(x1, x2) {} - - template - op_list_prepend_view(const T1& x1, const T2& x2, const T3& x3) : - base(x1, x2, x3) {} - - //@} - - /** @name View modifier operations. - */ - //@{ - - /** Adds an element at the beginning of the list. - * - * This is equivalent to `list.push_front(element)` - */ - void push_front(const Type& element) - { this->m_value.push_front(element); } - - /** Inserts elements at the beginning of the list. - * - * This is equivalent to `list.insert(list.begin(), n, element)` - */ - void insert_front(typename list_type::size_type n, const Type& element) - { this->m_value.insert(begin(), n, element); } - - /** Inserts elements at the beginning of the list. - * - * This is equivalent to `list.insert(list.begin(), first, last)` - */ - template - void insert_front(Iter first, Iter last) - { this->m_value.insert(begin(), first, last); } - - /** Splices elements at the beginning of the list. - * - * This is equivalent to `list.splice(list.begin(), x)` - */ - void splice_front(list_type& x) { - if (x.get_allocator() == this->m_value.get_allocator()) - this->m_value.splice(begin(), x); - else { - insert_front(x.begin(), x.begin()); - x.clear(); - } - } - - /** Splices elements at the beginning of the list. - * - * This is equivalent to `list.splice(list.begin(), x, i)` - */ - void splice_front(list_type& x, iterator i) { - if (x.get_allocator() == this->m_value.get_allocator()) - this->m_value.splice(begin(), x, i); - else { - push_front(*i); - x.erase(i); - } - } - - /** Splices elements at the beginning of the list. - * - * This is equivalent to `list.splice(list.begin(), x, first, last)` - */ - void splice_front(list_type& x, iterator first, iterator last) { - if (x.get_allocator() == this->m_value.get_allocator()) - this->m_value.splice(begin(), x, first, last); - else { - insert_front(first, last); - x.erase(first, last); - } - } - - //@} - - /** Reduces the views of two strands. - * - * This function is invoked by the @ref op_list_prepend monoid to combine - * the views of two strands when the right strand merges with the left - * one. It prepends the value contained in the right-strand view to the - * value contained in the left-strand view, and leaves the value in the - * right-strand view undefined. - * - * @param right A pointer to the right-strand view. (`this` points to - * the left-strand view.) - * - * @note Used only by the @ref op_list_prepend monoid to implement the - * monoid reduce operation. - */ - /** Reduce operation. - * - * Required by @ref monoid_base. - */ - void reduce(op_list_prepend_view* right) - { - this->m_value.splice(begin(), right->m_value); - } -}; - - - -/** Monoid class for list-append reductions. Instantiate the cilk::reducer - * template class with a op_list_append monoid to create a list-append reducer - * class. For example, to create a list of strings: - * - * cilk::reducer< cilk::op_list_append > r; - * - * @tparam Type The list element type (not the list type). - * @tparam Alloc The list allocator type. - * @tparam Align If `false` (the default), reducers instantiated on this - * monoid will be naturally aligned (the Intel Cilk Plus library 1.0 - * behavior). If `true`, reducers instantiated on this monoid - * will be cache-aligned for binary compatibility with - * reducers in Intel Cilk Plus library version 0.9. - * - * @see ReducersList - * @see op_list_append_view - */ -template ::allocator_type, - bool Align = false> -struct op_list_append : - public internal::list_monoid_base, Align> -{ - /// Construct with default allocator. - op_list_append() {} - /// Construct with specified allocator. - op_list_append(const Allocator& alloc) : - internal::list_monoid_base, Align>(alloc) {} -}; - -/** Monoid class for list-prepend reductions. Instantiate the cilk::reducer - * template class with a op_list_prepend monoid to create a list-prepend - * reducer class. For example, to create a list of strings: - * - * cilk::reducer< cilk::op_list_prepend > r; - * - * @tparam Type The list element type (not the list type). - * @tparam Alloc The list allocator type. - * @tparam Align If `false` (the default), reducers instantiated on this - * monoid will be naturally aligned (the Intel Cilk Plus library 1.0 - * behavior). If `true`, reducers instantiated on this monoid - * will be cache-aligned for binary compatibility with - * reducers in Intel Cilk Plus library version 0.9. - * - * @see ReducersList - * @see op_list_prepend_view - */ -template ::allocator_type, - bool Align = false> -struct op_list_prepend : - public internal::list_monoid_base, Align> -{ - /// Construct with default allocator. - op_list_prepend() {} - /// Construct with specified allocator. - op_list_prepend(const Allocator& alloc) : - internal::list_monoid_base, Align>(alloc) {} -}; - - -/** Deprecated list-append reducer wrapper class. - * - * reducer_list_append is the same as - * @ref reducer<@ref op_list_append>, except that reducer_list_append is a - * proxy for the contained view, so that accumulator variable update - * operations can be applied directly to the reducer. For example, an element - * is appended to a `reducer<%op_list_append>` with `r->push_back(a)`, but an - * element can be appended to a `%reducer_list_append` with `r.push_back(a)`. - * - * @deprecated Users are strongly encouraged to use `reducer` - * reducers rather than the old wrappers like reducer_list_append. - * The `reducer` reducers show the reducer/monoid/view - * architecture more clearly, are more consistent in their - * implementation, and present a simpler model for new - * user-implemented reducers. - * - * @note Implicit conversions are provided between `%reducer_list_append` - * and `reducer<%op_list_append>`. This allows incremental code - * conversion: old code that used `%reducer_list_append` can pass a - * `%reducer_list_append` to a converted function that now expects a - * pointer or reference to a `reducer<%op_list_append>`, and vice - * versa. - * - * @tparam Type The value type of the list. - * @tparam Allocator The allocator type of the list. - * - * @see op_list_append - * @see reducer - * @see ReducersList - */ -template > -class reducer_list_append : - public reducer > -{ - typedef reducer > base; - using base::view; -public: - - /// The reducer's list type. - typedef typename base::value_type list_type; - - /// The list's element type. - typedef Type list_value_type; - - /// The reducer's primitive component type. - typedef Type basic_value_type; - - /// The monoid type. - typedef typename base::monoid_type Monoid; - - /** @name Constructors - */ - //@{ - - /** Constructs a reducer with an empty list. - */ - reducer_list_append() {} - - /** Constructs a reducer with a specified initial list value. - */ - reducer_list_append(const std::list &initial_value) : - base(initial_value) {} - - //@} - - - /** @name Forwarded functions - * @details Functions that update the contained accumulator variable are - * simply forwarded to the contained @ref op_and_view. */ - //@{ - - /// @copydoc op_list_append_view::push_back(const Type&) - void push_back(const Type& element) { view().push_back(element); } - - //@} - - /** Allows mutable access to the list within the current view. - * - * @warning If this method is called before the parallel calculation is - * complete, the list returned by this method will be a partial - * result. - * - * @returns A mutable reference to the list within the current view. - */ - list_type &get_reference() { return view().view_get_reference(); } - - /** Allows read-only access to the list within the current view. - * - * @warning If this method is called before the parallel calculation is - * complete, the list returned by this method will be a partial - * result. - * - * @returns A const reference to the list within the current view. - */ - list_type const &get_reference() const { return view().view_get_reference(); } - - /// @name Dereference - //@{ - /** Dereferencing a wrapper is a no-op. It simply returns the wrapper. - * Combined with the rule that a wrapper forwards view operations to the - * view, this means that view operations can be written the same way on - * reducers and wrappers, which is convenient for incrementally - * converting code using wrappers to code using reducers. That is: - * - * reducer< op_list_append > r; - * r->push_back(a); // *r returns the view - * // push_back is a view member function - * - * reducer_list_append w; - * w->push_back(a); // *w returns the wrapper - * // push_back is a wrapper member function that - * // calls the corresponding view function - */ - //@{ - reducer_list_append& operator*() { return *this; } - reducer_list_append const& operator*() const { return *this; } - - reducer_list_append* operator->() { return this; } - reducer_list_append const* operator->() const { return this; } - //@} - - /** @name Upcast - * @details In Intel Cilk Plus library 0.9, reducers were always cache-aligned. - * In library 1.0, reducer cache alignment is optional. By default, - * reducers are unaligned (i.e., just naturally aligned), but legacy - * wrappers inherit from cache-aligned reducers for binary compatibility. - * - * This means that a wrapper will automatically be upcast to its aligned - * reducer base class. The following conversion operators provide - * pseudo-upcasts to the corresponding unaligned reducer class. - */ - //@{ - operator reducer< op_list_append >& () - { - return *reinterpret_cast< - reducer< op_list_append >* - >(this); - } - operator const reducer< op_list_append >& () const - { - return *reinterpret_cast< - const reducer< op_list_append >* - >(this); - } - //@} - -}; - - -/** Deprecated list-prepend reducer wrapper class. - * - * reducer_list_prepend is the same as - * @ref reducer<@ref op_list_prepend>, except that reducer_list_prepend is a - * proxy for the contained view, so that accumulator variable update operations - * can be applied directly to the reducer. For example, an element is prepended - * to a `reducer` with `r->push_back(a)`, but an element is - * prepended to a `reducer_list_prepend` with `r.push_back(a)`. - * - * @deprecated Users are strongly encouraged to use `reducer` - * reducers rather than the old wrappers like reducer_list_prepend. - * The `reducer` reducers show the reducer/monoid/view - * architecture more clearly, are more consistent in their - * implementation, and present a simpler model for new - * user-implemented reducers. - * - * @note Implicit conversions are provided between `%reducer_list_prepend` - * and `reducer<%op_list_prepend>`. This allows incremental code - * conversion: old code that used `%reducer_list_prepend` can pass a - * `%reducer_list_prepend` to a converted function that now expects a - * pointer or reference to a `reducer<%op_list_prepend>`, and vice - * versa. - * - * @tparam Type The value type of the list. - * @tparam Allocator The allocator type of the list. - * - * @see op_list_prepend - * @see reducer - * @see ReducersList - */ -template > -class reducer_list_prepend : - public reducer > -{ - typedef reducer > base; - using base::view; -public: - - /** The reducer's list type. - */ - typedef typename base::value_type list_type; - - /** The list's element type. - */ - typedef Type list_value_type; - - /** The reducer's primitive component type. - */ - typedef Type basic_value_type; - - /** The monoid type. - */ - typedef typename base::monoid_type Monoid; - - /** @name Constructors - */ - //@{ - - /** Constructs a reducer with an empty list. - */ - reducer_list_prepend() {} - - /** Constructs a reducer with a specified initial list value. - */ - reducer_list_prepend(const std::list &initial_value) : - base(initial_value) {} - - //@} - - /** @name Forwarded functions - * @details Functions that update the contained accumulator variable are - * simply forwarded to the contained @ref op_and_view. - */ - //@{ - - /// @copydoc op_list_prepend_view::push_front(const Type&) - void push_front(const Type& element) { view().push_front(element); } - - //@} - - /** Allows mutable access to the list within the current view. - * - * @warning If this method is called before the parallel calculation is - * complete, the list returned by this method will be a partial - * result. - * - * @returns A mutable reference to the list within the current view. - */ - list_type &get_reference() { return view().view_get_reference(); } - - /** Allows read-only access to the list within the current view. - * - * @warning If this method is called before the parallel calculation is - * complete, the list returned by this method will be a partial - * result. - * - * @returns A const reference to the list within the current view. - */ - list_type const &get_reference() const { return view().view_get_reference(); } - - /// @name Dereference - /** Dereferencing a wrapper is a no-op. It simply returns the wrapper. - * Combined with the rule that a wrapper forwards view operations to the - * view, this means that view operations can be written the same way on - * reducers and wrappers, which is convenient for incrementally - * converting code using wrappers to code using reducers. That is: - * - * reducer< op_list_prepend > r; - * r->push_front(a); // *r returns the view - * // push_front is a view member function - * - * reducer_list_prepend w; - * w->push_front(a); // *w returns the wrapper - * // push_front is a wrapper member function that - * // calls the corresponding view function - */ - //@{ - reducer_list_prepend& operator*() { return *this; } - reducer_list_prepend const& operator*() const { return *this; } - - reducer_list_prepend* operator->() { return this; } - reducer_list_prepend const* operator->() const { return this; } - //@} - - /** @name Upcast - * @details In Intel Cilk Plus library 0.9, reducers were always cache-aligned. - * In library 1.0, reducer cache alignment is optional. By default, - * reducers are unaligned (i.e., just naturally aligned), but legacy - * wrappers inherit from cache-aligned reducers for binary compatibility. - * - * This means that a wrapper will automatically be upcast to its aligned - * reducer base class. The following conversion operators provide - * pseudo-upcasts to the corresponding unaligned reducer class. - */ - //@{ - operator reducer< op_list_prepend >& () - { - return *reinterpret_cast< - reducer< op_list_prepend >* - >(this); - } - operator const reducer< op_list_prepend >& () const - { - return *reinterpret_cast< - const reducer< op_list_prepend >* - >(this); - } - //@} - -}; - -/// @cond internal - -/** Metafunction specialization for reducer conversion. - * - * This specialization of the @ref legacy_reducer_downcast template class - * defined in reducer.h causes the `reducer< op_list_append >` - * class to have an `operator reducer_list_append& ()` - * conversion operator that statically downcasts the `reducer` - * to the corresponding `reducer_list_append` type. (The reverse conversion, - * from `reducer_list_append` to `reducer`, is just an upcast, - * which is provided for free by the language.) - */ -template -struct legacy_reducer_downcast > > -{ - typedef reducer_list_append type; -}; - -/** Metafunction specialization for reducer conversion. - * - * This specialization of the @ref legacy_reducer_downcast template class - * defined in reducer.h causes the - * `reducer< op_list_prepend >` class to have an - * `operator reducer_list_prepend& ()` conversion operator - * that statically downcasts the `reducer` to the - * corresponding `reducer_list_prepend` type. (The reverse conversion, from - * `reducer_list_prepend` to `reducer`, is just an upcast, - * which is provided for free by the language.) - */ -template -struct legacy_reducer_downcast > > -{ - typedef reducer_list_prepend type; -}; - -/// @endcond - -//@} - -} // Close namespace cilk - -#endif // REDUCER_LIST_H_INCLUDED diff --git a/include/cilk/reducer_max.h b/include/cilk/reducer_max.h deleted file mode 100644 index fa4d0c50..00000000 --- a/include/cilk/reducer_max.h +++ /dev/null @@ -1,57 +0,0 @@ -/* reducer_max.h -*- C++ -*- - * - * Copyright (C) 2009-2018, Intel Corporation - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY - * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * ********************************************************************* - * - * PLEASE NOTE: This file is a downstream copy of a file maintained in - * a repository at cilkplus.org. Changes made to this file that are not - * submitted through the contribution process detailed at - * http://www.cilkplus.org/submit-cilk-contribution will be lost the next - * time that a new version is released. Changes only submitted to the - * GNU compiler collection or posted to the git repository at - * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are - * not tracked. - * - * We welcome your contributions to this open source project. Thank you - * for your assistance in helping us improve Cilk Plus. - */ - -/** @file reducer_max.h - * - * @brief Defines classes for doing parallel maximum reductions. - * - * @ingroup ReducersMinMax - * - * @see ReducersMinMax - */ - -#include "reducer_min_max.h" diff --git a/include/cilk/reducer_min.h b/include/cilk/reducer_min.h deleted file mode 100644 index 521a4d32..00000000 --- a/include/cilk/reducer_min.h +++ /dev/null @@ -1,57 +0,0 @@ -/* reducer_min.h -*- C++ -*- - * - * Copyright (C) 2009-2018, Intel Corporation - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY - * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * ********************************************************************* - * - * PLEASE NOTE: This file is a downstream copy of a file maintained in - * a repository at cilkplus.org. Changes made to this file that are not - * submitted through the contribution process detailed at - * http://www.cilkplus.org/submit-cilk-contribution will be lost the next - * time that a new version is released. Changes only submitted to the - * GNU compiler collection or posted to the git repository at - * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are - * not tracked. - * - * We welcome your contributions to this open source project. Thank you - * for your assistance in helping us improve Cilk Plus. - */ - -/** @file reducer_min.h - * - * @brief Defines classes for doing parallel minimum reductions. - * - * @ingroup ReducersMinMax - * - * @see ReducersMinMax - */ - -#include "reducer_min_max.h" diff --git a/include/cilk/reducer_min_max.h b/include/cilk/reducer_min_max.h deleted file mode 100644 index 947dad09..00000000 --- a/include/cilk/reducer_min_max.h +++ /dev/null @@ -1,3743 +0,0 @@ -/* reducer_min_max.h -*- C++ -*- - * - * Copyright (C) 2009-2018, Intel Corporation - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY - * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * ********************************************************************* - * - * PLEASE NOTE: This file is a downstream copy of a file maintained in - * a repository at cilkplus.org. Changes made to this file that are not - * submitted through the contribution process detailed at - * http://www.cilkplus.org/submit-cilk-contribution will be lost the next - * time that a new version is released. Changes only submitted to the - * GNU compiler collection or posted to the git repository at - * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are - * not tracked. - * - * We welcome your contributions to this open source project. Thank you - * for your assistance in helping us improve Cilk Plus. - */ - -/** @file reducer_min_max.h - * - * @brief Defines classes for doing parallel minimum and maximum reductions. - * - * @ingroup ReducersMinMax - * - * @see ReducersMinMax - */ - -#ifndef REDUCER_MIN_MAX_H_INCLUDED -#define REDUCER_MIN_MAX_H_INCLUDED - -#include - -#ifdef __cplusplus - -#include -#include - -/** @defgroup ReducersMinMax Minimum and Maximum Reducers - * - * Minimum and maximum reducers allow the computation of the minimum or - * maximum of a set of values in parallel. - * - * @ingroup Reducers - * - * You should be familiar with @ref pagereducers "Intel(R) Cilk(TM) Plus - * reducers", described in file `reducers.md`, and particularly with @ref - * reducers_using, before trying to use the information in this file. - * - * @section redminmax_usage Usage Examples - * - * cilk::reducer< cilk::op_max > rm; - * cilk_for (int i = 0; i < ARRAY_SIZE; ++i) - * { - * rm->calc_max(a[i]); // or *rm = cilk::max_of(*max, a[i]) - * } - * std::cout << "maximum value is " << rm.get_value() << std::endl; - * - * and - * - * cilk::reducer< cilk::op_min_index > rmi; - * cilk_for (int i = 0; i < ARRAY_SIZE; ++i) - * { - * rmi->calc_min(i, a[i]) // or *rmi = cilk::min_of(*rmi, i, a[i]); - * } - * std::cout << "minimum value a[" << rmi.get_value().first << "] = " - * << rmi.get_value().second << std::endl; - * - * @section redminmax_monoid The Monoid - * - * @subsection redminmax_monoid_values Value Set - * - * The value set of a minimum or maximum reducer is the set of values of - * `Type`, augmented with a "special identity value" which is not a value of - * `Type`, but which is defined to be greater than (less than) any value of - * `Type`. - * - * @subsection redminmax_monoid_operator Operator - * - * By default, the operator of a minimum reducer is defined as - * - * x MIN y == (x < y) ? x : y - * - * Thus, `a1 MIN a2 MIN … an` is the first `ai` which is not greater than any - * other `ai`. - * - * The operator of a maximum reducer is defined as - * - * x MAX y == (x > y) ? x : y - * - * Thus, `a1 MAX a2 MAX … an` is the first `ai` which is not less than any - * other `ai`. - * - * @subsection redminmax_monoid_comparators Comparators - * - * Min/max reducers are not limited to finding the minimum or maximum value - * determined by the `<` or `>` operator. In fact, all min/max reducers use a - * _comparator_, which is either a function or an object of a function class - * that defines a [strict weak ordering] - * (http://en.wikipedia.org/wiki/Strict_weak_ordering#Strict_weak_orderings) - * on a set of values. (This is exactly the same as the requirement for the - * comparison predicate for STL associative containers and sorting - * algorithms.) - * - * Just as with STL algorithms and containers, the comparator type parameter - * for min/max reducers is optional. If it is omitted, it defaults to - * `std::less`, which gives the behavior described in the previous section. - * Using non-default comparators (anything other than `std::less`) with - * min/max reducers is just like using them with STL containers and - * algorithms. - * - * Taking comparator objects into account, the reduction operation `MIN` for a - * minimum reducer is defined as - * - * x MIN y == compare(x, y) ? x : y - * - * where `compare()` is the reducer's comparator. Similarly, the reduction - * operation MAX for a maximum reducer is defined as - * - * x MAX y == compare(y, x) ? x : y - * - * (If `compare(x, y) == x < y`, then `compare(y, x) == x > y`.) - * - * @subsection redminmax_monoid_identity Identity - * - * The identity value of a min/max reducer is its monoid's - * ["special identity value"](#redminmax_monoid_values), which is not a value - * of the reducer's data type. (See @ref redminmax_initial.) - * - * @section redminmax_index Value and Index Reducers - * - * Min/max reducers come in two families. The _value_ reducers, with the - * `op_min` and `op_max` monoids, simply find the smallest or largest value - * from a set of values. The _index_ reducers, with the `op_min_index` and - * `op_max_index` monoids, also record an index value associated with the - * first occurrence of the smallest or largest value. - * - * In the `%op_min_index` usage example [above](#redminmax_usage), the values - * are taken from an array, and the index of a value is the index of the array - * element it comes from. More generally, though, an index can be any sort of - * key which identifies a particular value in a collection of values. For - * example, if the values were taken from the nodes of a tree, then the - * "index" of a value might be a pointer to the node containing that value. - * - * A min/max index reducer is essentially the same as a min/max value reducer - * whose value type is an (index, value) pair, and whose comparator ignores - * the index part of the pair. (index, value) pairs are represented by - * `std::pair` objects. This has the consequence that wherever - * the interface of a min/max value reducer has a `Type`, the interface of a - * min/max index reducer has a `std::pair`. (There are - * convenience variants of the `reducer(Type)` constructor and the - * `calc_min()`, `calc_max()`, `%min_of()`, and `%max_of()` functions that - * take an index argument and a value argument instead of a single index/value - * pair argument.) - * - * @section redminmax_operations Operations - * - * @subsection redminmax_constructors Constructors - * - * @subsubsection redminmax_constructors_value Min/Max Value Reducers - * - * reducer() // identity - * reducer(const Compare& compare) // identity - * reducer(const Type& value) - * reducer(move_in(Type& variable)) - * reducer(const Type& value, const Compare& compare) - * reducer(move_in(Type& variable), const Compare& compare) - * - * @subsubsection redminmax_constructors_index Min/Max Index Reducers - * - * reducer() // identity - * reducer(const Compare& compare) // identity - * reducer(const std::pair& pair) - * reducer(const Index& index, const Type& value) - * reducer(move_in(std::pair& variable)) - * reducer(const std::pair& pair, const Compare& compare) - * reducer(const Index& index, const Type& value, const Compare& compare) - * reducer(move_in(std::pair& variable), const Compare& - * compare) - * - * See the explanation of the following two constructors in - * @ref redminmax_index_vector. - * - * reducer(const Index& index) - * reducer(const Index& index, const Compare& compare) - * - * @subsection redminmax_get_set Set and Get - * - * r.set_value(const Type& value) - * Type = r.get_value() const - * r.move_in(Type& variable) - * r.move_out(Type& variable) - * - * Note that for an index reducer, the `Type` in these operations is actually a - * `std::pair`. (See @ref redminmax_index.) There is _not_ a - * `set_value(value, index)` operation. - * - * @subsection redminmax_initial Initial Values and is_set() - * - * The initial value of the leftmost view of a default-initialized min/max - * reducer, or of a non-leftmost view (created for a stolen parallel strand) - * is the special identity value, which is not a value of the reducer's value - * type. - * - * A view will have a real (non-identity) value if: - * - * - it is the leftmost view of a reducer that was constructed with an - * initial value, or - * - it was assigned a value with a call to `reducer.set_value()` or - * `reducer.move_in()`, or - * - it has been updated with a call to `reducer->calc_min()` or - * `reducer->calc_max()`, or - * - it has been updated with an assignment `*reducer = min_of(*reducer, x)` - * or `*reducer = max_of(*reducer, x)`. - * - * Calling `get_value()` or `move_out()` on a reducer whose view has the - * special identity value will yield an undefined result. The `is_set()` - * function can be used to test whether a view has the special identity value - * or a real value. If a reducer's current view has the special identity - * value, then `reducer()->is_set()` will return `false` (and - * `reducer.get_value()` will return an undefined value); if the view has a - * real value, them `reducer->is_set()` will return `true` and - * `reducer.get_value()` will return the value. - * - * @subsubsection redminmax_index_vector Special Issues with Min/Max Index - * Reducers - * - * The index portion of the computed index/value pair will be wrong in the - * following special case: - * - * - The reducer's value type is a simple numeric type. - * - The reducer uses the default comparator (`std::less`). - * - The reducer is updated at least once with a call to `calc_min()` or - * `calc_max()` or an assignment with `min_of()` or `max_of()`. - * - The value in _every_ update to the reducer is the maximum value of the - * value type (for a min_index reducer) or the minimum value of the value - * type (for a max_index reducer). - * - * In this case, `reducer.get_value().first` should be the index argument from - * the first reducer update, but it will actually be the default value of the - * `Index` type. Now, in the common case where the index type is an integer - * type and the reducer is finding the smallest or largest element in an - * array, the default value of the index type will be zero, which is the - * index of the first element in the array, so everything will work out: - * - * unsigned a[3] = {0, 0, 0}; - * reducer< op_max_index > r; - * for (int i = 0; i < 3; ++i) r->calc_max(i, a[i]); - * // r.get_value() = (0, 0) - * - * However, it doesn't always work out so well: - * - * typedef std::map my_map; - * my_map a; - * a["first"] = 0; - * a["second"] = 0; - * a["third"] = 0; - * reducer< op_max_index > r; - * for (typename my_map::iterator i = a.begin(); i != a.end(); ++i) - * r.calc_max(i->first, i->second); - * // r.get_value() = ("", 0), should be ("first", 0) - * - * If you know that no data value is associated with the default index value, - * then you can treat the default index value as a flag meaning "use the index - * of the first data value." But suppose that you don't know whether there is - * an element in the map with index `""`. Then you won't know what to do when - * `r.get_value().first == ""`. - * - * As a workaround for this conundrum, you can specify an alternative - * "default" index value. Either provide an index argument, _but not a - * value argument_, to the reducer constructor: - * - * reducer< op_max_index > - * r(a.empty() ? std::string() : a.begin()->first); - * - * or specify the default index with the view `set_default_index()` function: - * - * reducer< op_max_index > r; - * if (!a.empty()) r->set_default_index(a.begin()->first); - * - * Note that setting a default index, unlike setting an initial value, does - * not mark the view as having a non-identity value: - * - * reducer< op_min_index > r; - * r->set_default_index(-1); - * // r->is_set() = false - * // r.get_value() is undefined - * - * @subsection redminmax_view_ops View Operations - * - * The basic reduction operation is `x = x MIN a` for a minimum reducer, or - * `x = x MAX a` for a maximum reducer. The basic syntax for these operations - * uses the `calc_min()` and `calc_max()` member functions of the view class. - * An assignment syntax is also provided, using the `%cilk::min_of()` and - * `%cilk::max_of()` global functions: - * - * Class | Modifier | Assignment - * ---------------|---------------------|----------- - * `op_min` | `r->calc_min(x)` | `*r = min_of(*r, x)` or `*r = min_of(x, *r)` - * `op_max` | `r->calc_max(x)` | `*r = max_of(*r, x)` or `*r = max_of(x, *r)` - * `op_min_index` | `r->calc_min(i, x)` | `*r = min_of(*r, i, x)` or `*r = min_of(i, x, *r)` - * `op_max_index` | `r->calc_max(i, x)` | `*r = max_of(*r, i, x)` or `*r = max_of(i, x, *r)` - * - * Wherever an "`i`, `x`" argument pair is shown in the table above, a single - * pair argument may be passed instead. For example: - * - * Index index; - * Type value; - * std::pair ind_val(index, value); - * // The following statements are all equivalent. - * r->calc_min(index, value); - * r->calc_min(ind_val); - * *r = min_of(*r, index, value); - * *r = min_of(*r, ind_val); - * - * The `calc_min()` and `calc_max()` member functions return a reference to - * the view, so they can be chained: - * - * r->calc_max(x).calc_max(y).calc_max(z); - * - * In a `%min_of()` or `%max_of()` assignment, the view on the left-hand side - * of the assignment must be the same as the view argument in the call. - * Otherwise, the behavior is undefined (but an assertion error will occur if - * the code is compiled with debugging enabled). - * - * *r = max_of(*r, x); // OK - * *r1 = max_of(*r2, y); // ERROR - * - * `%min_of()` and `%max_of()` calls can be nested: - * - * *r = max_of(max_of(max_of(*r, x), y), z); - * *r = min_of(i, a[i], min_of(j, a[j], min_of(k, a[k], *r))); - * - * @section redminmax_compatibility Binary Compatibility Issues - * - * Most Intel Cilk Plus library reducers provide binary compatibility between - * `reducer_KIND` reducers compiled with Intel Cilk Plus library version 0.9 - * (distributed with Intel® C++ Composer XE version 13.0 and earlier) and the - * ame reducers compiled with Intel Cilk Plus library version 1.0 and later. - * - * Because of implementation changes that were needed to allow vectorization - * of loops containing min/max reducers, this binary compatibility is _not_ - * generally available for min/max reducers, either between Intel Cilk Plus - * library versions 0.9 and 1.0, or between versions 1.0 and 1.1. (Code compiled - * with different versions can be linked together safely, but min/max reducers - * in different library versions are in different namespaces, so reducer objects - * cannot be shared between them.) - * - * If this is an inconvenience, the simplest solution is just to recompile any - * existing code you may have that uses min/max reducers. If that is - * impossible, you can define the `CILK_LIBRARY_0_9_REDUCER_MINMAX` macro (on - * the compiler command line, or in your source code before including - * `reducer_min_max.h`) when compiling with the new library. This will cause - * it to generate numeric reducers that will be link-time and run-time - * compatible with the 0.9 library. - * - * @subsection redminmax_compatibility_stateful Non-empty Comparators - * - * The representation of min/max reducers with non-empty comparator objects or - * with comparator functions is so different in between the 0.9 and 1.1 - * libraries that there is no way to make them binary compatible, even when - * compiling with `CILK_LIBRARY_0_9_REDUCER_MINMAX`. Therefore, the - * `reducer_{min|max}[_index]` wrapper classes have been coded in the 1.0 and - * later library so that they will not even compile when instantiated with a - * non-empty comparator class. - * - * This is not a problem when using an empty comparator class, such as the - * default `std::less`. - * - * @section redminmax_types Type Requirements - * - * `Type` and `Index` must be `Copy Constructible`, `Default Constructible`, - * and `Assignable`. - * - * `Compare` must be `Copy Constructible` if the reducer is constructed with a - * `compare` argument, and `Default Constructible` otherwise. - * - * The `Compare` function must induce a strict weak ordering on the elements - * of `Type`. - * - * @section redminmax_in_c Minimum and Maximum Reducers in C - * - * These macros can be used to do minimum and maximum reductions in C: - * - * Declaration | Type | Operation - * -----------------------------|-----------------------------------|---------- - * @ref CILK_C_REDUCER_MIN |@ref CILK_C_REDUCER_MIN_TYPE |@ref CILK_C_REDUCER_MIN_CALC - * @ref CILK_C_REDUCER_MAX |@ref CILK_C_REDUCER_MAX_TYPE |@ref CILK_C_REDUCER_MAX_CALC - * @ref CILK_C_REDUCER_MIN_INDEX |@ref CILK_C_REDUCER_MIN_INDEX_TYPE |@ref CILK_C_REDUCER_MIN_INDEX_CALC - * @ref CILK_C_REDUCER_MAX_INDEX |@ref CILK_C_REDUCER_MAX_INDEX_TYPE |@ref CILK_C_REDUCER_MAX_INDEX_CALC - * - * For example: - * - * CILK_C_REDUCER_MIN(r, int, INT_MAX); - * CILK_C_REGISTER_REDUCER(r); - * cilk_for(int i = 0; i != n; ++i) { - * CILK_C_REDUCER_MIN_CALC(r, a[i]); - * } - * CILK_C_UNREGISTER_REDUCER(r); - * printf("The smallest value in a is %d\n", REDUCER_VIEW(r)); - * - * - * CILK_C_REDUCER_MAX_INDEX(r, uint, 0); - * CILK_C_REGISTER_REDUCER(r); - * cilk_for(int i = 0; i != n; ++i) { - * CILK_C_REDUCER_MAX_INDEX_CALC(r, i, a[i]); - * } - * CILK_C_UNREGISTER_REDUCER(r); - * printf("The largest value in a is %u at %d\n", - * REDUCER_VIEW (r).value, REDUCER_VIEW(r).index); - * - * See @ref reducers_c_predefined. - */ - -namespace cilk { - -/** @defgroup ReducersMinMaxBinComp Binary compatibility - * - * If the macro `CILK_LIBRARY_0_9_REDUCER_MINMAX` is defined, then we generate - * reducer code and data structures which are binary-compatible with code that - * was compiled with the old min/max wrapper definitions, so we want the - * mangled names of the legacy min/max reducer wrapper classes to be the - * same as the names produced by the old definitions. - * - * Conversely, if the macro is not defined, then we generate binary- - * incompatible code, so we want different mangled names, to make sure that - * the linker does not allow new and old compiled legacy wrappers to be passed - * to one another. (Global variables are a different, and probably insoluble, - * problem.) - * - * Similarly, min/max classes compiled with and without - * CILK_LIBRARY_0_9_REDUCER_MINMAX are binary-incompatible, and must get - * different mangled names. - * - * The trick is, when compiling in normal (non-compatibility) mode, wrap - * everything in an extra namespace, and then `use` it into the top-level cilk - * namespace. Then - * - * * Classes and functions compiled in normal mode will be in - * different namespaces from the same classes and functions compiled in - * compatibility mode. - * * The legacy wrapper classes and functions will be in the same namespace - * as the same classes and functions compiled with the 0.9 library if and - * only if they are compiled in compatibility mode. - * - * @ingroup ReducersMinMax - */ - -#ifndef CILK_LIBRARY_0_9_REDUCER_MINMAX -/** Namespace to wrap min/max reducer definitions when not compiling in "binary - * compatibility" mode. - * - * By default, all of the min/max reducer definitions are defined in this - * namespace and then imported into namespace ::cilk, so that they do not - * clash with the legacy definitions with the same names. However, if the - * macro `CILK_LIBRARY_0_9_REDUCER_MINMAX` is defined, then the min/max - * definitions go directly into namespace ::cilk, so that, for example, - * cilk::reducer_max defined with the 1.0 library is equivalent (to the - * linker) to cilk::reducer_max defined with the 0.9 library. - * - * @ingroup ReducersMinMaxBinComp - * @ingroup ReducersMinMax - */ -namespace cilk_lib_1_1 { -#endif - -/** Namespace containing internal implementation classes and functions for - * min/max reducers. - * - * @ingroup ReducersMinMax - */ -namespace min_max_internal { - -using ::cilk::internal::binary_functor; -using ::cilk::internal::class_is_empty; -using ::cilk::internal::typed_indirect_binary_function; - -/** @defgroup ReducersMinMaxIsSet The "is_set optimization" - * - * The obvious definition of the identity value for a max or min reducer is as - * the smallest (or largest) value of the value type. However, for an - * arbitrary comparator and/or an arbitrary value type, the largest / smallest - * value may not be known. It may not even be defined - what is the largest - * string? - * - * Therefore, min/max reducers represent their value internally as a pair - * `(value, is_set)`. When `is_set` is true, the pair represents the known - * value `value`; when `is_set` is false, the pair represents the identity - * value. - * - * This is an effective solution, but the most common use of min/max reducers - * is probably with numeric types and the default definition of minimum or - * maximum (using `std::less`), in which case there are well-defined, knowable - * smallest and largest values. Testing `is_set` for every comparison is then - * unnecessary and wasteful. - * - * The "is_set optimization" just means generating code that doesn't use - * `is_set` when it isn't needed. It is implemented using two metaprogramming - * classes: - * - * - do_is_set_optimization tests whether the optimization is applicable. - * - identity_value gets the appropriate identity value for a type. - * - * The is_set optimization is the reason that min/max reducers compiled with - * Intel Cilk Plus library 1.0 are binary-incompatible with the same reducers - * compiled with library 0.9, and therefore the optimization is suppressed when - * compiling in - * ReducersMinMaxBinComp "binary compatibility mode". - * - * @ingroup ReducersMinMax - */ - -/** Tests whether the ReducersMinMaxIsSet "is_set optimization" is - * applicable. - * - * The @ref do_is_set_optimization class is used to test whether the is_set - * optimization should be applied for a particular reducer. It is instantiated - * with a value type and a comparator, and defines a boolean constant, - * `value`. Then `%do_is_set_optimization::%value` can be used as - * a boolean template parameter to control the specialization of another - * class. - * - * In ReducersMinMaxBinComp "binary compatibility mode" (i.e., when the - * `CILK_LIBRARY_0_9_REDUCER_MINMAX` macro is defined), `value` will always - * be false. - * - * @tparam Type The value type for the reducer. - * @tparam Compare The comparator type for the reducer. - * - * @result The `value` data member will be `true` if @a Type is a numeric - * type, @a Compare is `std::less`, and - * `CILK_LIBRARY_0_9_REDUCER_MINMAX` is not defined. - * - * @see ReducersMinMaxIsSet - * @see @ref view_content - * - * @ingroup ReducersMinMaxIsSet - */ -template struct do_is_set_optimization { - /// `True` if the is_set optimization should be applied to min/max reducers - /// with this value type and comparator; `false` otherwise. - static const bool value = false; -}; - -#ifndef CILK_LIBRARY_0_9_REDUCER_MINMAX -/// @cond -template struct do_is_set_optimization> { - /// True in the special case where optimization is possible. - static const bool value = std::numeric_limits::is_specialized; -}; -/// @endcond -#endif - -/** Gets the identity value when using the ReducersMinMaxIsSet - * "is_set optimization". - * - * This class defines a function which assigns the appropriate identity value - * to a variable when the is_set optimization is applicable. - * - * @tparam Type The value type for the reducer. - * @tparam Compare The comparator type for the reducer. - * @tparam ForMax `true` to get the identity value for a max reducer (i.e., - * the smallest value of @a Type), `false` to get the identity - * value for a min reducer (i.e., the largest value of - * @a Type). - * - * @result If @a Type and @a Compare qualify for the is_set optimization, the - * `set_identity()' function will set its argument variable to the - * smallest or largest value of @a Type, depending on @a ForMax. - * Otherwise, `set_identity()` will be a no-op. - * - * @see ReducersMinMaxIsSet - * - * @ingroup ReducersMinMaxIsSet - * @see @ref view_content - */ -template ::is_specialized, - bool = std::numeric_limits::has_infinity> -struct identity_value { - /// Assign the identity value to the reference parameter. - static void set_identity(Type &) {} -}; - -/// @cond -template -struct identity_value, true, true, true> { - /// Floating max identity is negative infinity. - static void set_identity(Type &id) { - id = -std::numeric_limits::infinity(); - } -}; - -template -struct identity_value, true, true, false> { - /// Integer max identity is minimum value of type. - static void set_identity(Type &id) { - id = std::numeric_limits::min(); - } -}; - -template -struct identity_value, false, true, true> { - /// Floating min identity is positive infinity. - static void set_identity(Type &id) { - id = std::numeric_limits::infinity(); - } -}; - -template -struct identity_value, false, true, false> { - /// Integer min identity is maximum value of type. - static void set_identity(Type &id) { - id = std::numeric_limits::max(); - } -}; - -/// @endcond - -/** Adapter class to reverse the arguments of a predicate. - * - * Observe that: - * - * (x < y) == (y > x) - * max(x, y) == (x < y) ? y : x - * min(x, y) == (y < x) ? y : x == (x > y) ? y : x - * - * More generally, if `c` is a predicate defining a `Strict Weak Ordering`, - * and `c*(x, y) == c(y, x)`, then - * - * max(x, y, c) == c(x, y) ? y : x - * min(x, y, c) == c(y, x) ? y : x == c*(x, y) ? y : x == max(x, y, c*) - * - * For any predicate `C` with argument type `T`, the template class - * `%reverse_predicate` defines a predicate which is identical to `C`, - * except that its arguments are reversed. Thus, for example, we could - * implement `%op_min_view` as - * `%op_max_view >`. - * (Actually, op_min_view and op_max_view are both implemented as subclasses - * of a common base class, view_base.) - * - * @note If `C` is an empty functor class, then `reverse_predicate(C)` will - * also be an empty functor class. - * - * @tparam Predicate The predicate whose arguments are to be reversed. - * @tparam Argument @a Predicate's argument type. - * - * @ingroup ReducersMinMax - */ -template -class reverse_predicate : private binary_functor::type { - typedef typename binary_functor::type base; - - public: - /// Default constructor - reverse_predicate() : base() {} - /// Constructor with predicate object - reverse_predicate(const Predicate &p) : base(p) {} - /// The reversed predicate operation - bool operator()(const Argument &x, const Argument &y) const { - return base::operator()(y, x); - } -}; - -/** Class to represent the comparator for a min/max view class. - * - * This class is intended to accomplish two objectives in the implementation - * of min/max views. - * - * 1. To minimize data bloat, when we have a reducer with a non-stateless - * comparator, we want to keep a single instance of the comparator object - * in the monoid, and just call it from the views. - * 2. In ReducersMinMaxBinComp "binary compatibility mode", views for - * reducers with a stateless comparator must have the same content as in - * Intel Cilk Plus library 0.9 - that is, they must contain only `value` - * and `is_set` data members. - * - * To achieve the first objective, we use the - * @ref internal::typed_indirect_binary_function class defined in - * metaprogramming.h to wrap a pointer to the actual comparator. If no - * pointer is needed because the actual comparator is stateless, the - * `typed_indirect_binary_function` class will be empty, too. - * - * To achieve the second objective, we make the - * `typed_indirect_binary_function` class a base class of the view rather than - * a data member, so the "empty base class" rule will ensure no that no - * additional space is allocated in the view unless it is needed. - * - * We could simply use typed_indirect_binary_function as the base class of the - * view, but this would mean writing comparisons as `(*this)(x, y)`, which is - * just weird. So, instead, we comparator_base as a subclass of - * typed_indirect_binary_function which provides function `compare()` - * as a synonym for `operator()`. - * - * @tparam Type The value type of the comparator class. - * @tparam Compare A predicate class. - * - * @see internal::typed_indirect_binary_function - * - * @ingroup ReducersMinMax - */ -template -class comparator_base - : private typed_indirect_binary_function { - typedef typed_indirect_binary_function base; - - protected: - comparator_base(const Compare *f) : base(f) {} ///< Constructor. - - /// Comparison function. - bool compare(const Type &a, const Type &b) const { - return base::operator()(a, b); - } - - /// Get the comparator pointer. - const Compare *compare_pointer() const { return base::pointer(); } -}; - -/** @defgroup ReducersMinMaxViewContent Content classes for min/max views - * - * @ingroup ReducersMinMax - * - * Minimum and maximum reducer view classes inherit from a "view content" - * class. The content class defines the actual data members for the view, - * and provides typedefs and member functions for accessing the data members - * as needed to support the view functionality. - * - * There are two content classes, which encapsulate the differences between - * simple min/max reducers and min/max with index reducers: - * - * - view_content - * - index_view_content - * - * @note An obvious, and arguably simpler, encapsulation strategy would be - * to just let the `Type` of a min/max view be an (index, value) pair - * structure for min_index and max_index reducers. Then all views - * would just have a `Type` data member and an `is_set` data member, - * and the comparator for min_index and max_index views could be - * customized to consider only the value component of the (index, - * value) `Type` pair. Unfortunately, this would break binary - * compatibility with reducer_max_index and reducer_min_index in - * Intel Cilk Plus library 0.9, because the memory layout of an - * (index, value) pair followed by a `bool` is different from the - * memory layout of an index data member followed by a value data - * member followed by a `bool` data member. The content class is - * designed to exactly replicate the layout of the views in library 0.9 - * reducers. - * - * A content class `C`, and its objects `c`, must define the following: - * - * Definition | Meaning - * ------------------------------------|-------- - * `C::value_type` | A typedef for `Type` of the view. (A `std::pair` for min_index and max_index views). - * `C::comp_value_type` | A typedef for the type of value compared by the view's `compare()` function. - * `C()` | Constructs the content with the identity value. - * `C(const value_type&)` | Constructs the content with a specified value. - * `c.is_set()` | Returns true if the content has a known value. - * `c.value()` | Returns the content's value. - * `c.set_value(const value_type&)` | Sets the content's value. (The value becomes known.) - * `c.comp_value()` | Returns a const reference to the value or component of the value that is to be compared by the view's comparator. - * `C::comp_value(const value_type&)` | Returns a const reference to a value or component of a value that is to be compared by the view's comparator. - * - * @see view_base - */ - -/** Content class for op_min_view and op_max_view. - * - * @tparam Type The value type of the op_min_view or op_max_view. - * @tparam Compare The comparator class specified for the op_min_view or - * op_max_view. (_Not_ the derived comparator class actually - * used by the view_base. For example, the view_content of an - * `op_min_view` will have `Compare = std::less`, - * but its comparator_base will have - * `Compare = reverse_predicate< std::less >`.) - * @tparam ForMax `true` if this is the content class for an op_max_view, - * `false` if it is for an op_min_view. - * - * @note The general implementation of view_content uses an `is_set` data - * member. There is also a specialization which implements the - * ReducersMinMaxIsSet "is_set optimization". View classes that - * inherit from view_content do not need to know anything about the - * difference, though; the details are abstracted away in the - * view_content interface. - * - * @see ReducersMinMaxViewContent - * - * @ingroup ReducersMinMaxViewContent - * @ingroup ReducersMinMax - */ -template ::value> -class view_content { - protected: - /// @cond - Type m_value; - bool m_is_set; - /// @endcond - public: - /// The value type of the view. - typedef Type value_type; - - /// The type compared by the view's `compare()` function (which is the same - /// as the value type for view_content). - typedef Type comp_value_type; - - /// Construct with the identity value. - view_content() : m_value(), m_is_set(false) {} - - /// Construct with a defined value. - view_content(const value_type &value) : m_value(value), m_is_set(true) {} - - /// Gets the value. - value_type value() const { return m_value; } - - /// Sets the value. - void set_value(const value_type &value) { m_value = value; } - - /// Sets the is_set flag. - void set_is_set() { m_is_set = true; } - - /// Sets the index part of the value (which is meaningless for non-index - /// reducers, but required for view_base). - void set_default_index(const value_type &) {} - - /// Gets the comparison value (which, for view_content, is the same as the - /// value). - const comp_value_type &comp_value() const { return m_value; } - - /// Given an arbitrary value, gets the corresponding comparison value - /// (which, for view_content, is the same as the value). - static const comp_value_type &comp_value(const value_type &value) { - return value; - } - - /// Gets a const reference to value part of the value (which is the same as - /// the value for view_content). - const Type &get_reference() const { return m_value; } - - /// Gets a const reference to the index part of the value (which is - /// meaningless for non-index reducers, but required for view_base. - const Type &get_index_reference() const { return m_value; } - - /// Tests if the value is defined. - bool is_set() const { return m_is_set; } - - /// Tests if the view has a comparable value. - bool has_value() const { return is_set(); } -}; - -/// @cond - -/* This is the specialization of the view_content class for cases where - * the is_set optimization is applicable). - */ -template -class view_content - : public view_content { - typedef view_content base; - typedef identity_value Identity; - - public: - typedef typename base::value_type value_type; - ; - typedef typename base::comp_value_type comp_value_type; - ; - - view_content() : base() { Identity::set_identity(this->m_value); } - - view_content(const value_type &value) : base(value) {} - - bool has_value() const { return true; } -}; - -/// @endcond - -/** Content class for op_min_index_view and op_max_index_view. - * - * @tparam Index The index type of the op_min_index_view or - op_max_index_view. - * @tparam Type The value type of the op_min_view or op_max_view. (_Not_ - * the value type of the view, which will be - * `std::pair`.) - * @tparam Compare The comparator class specified for the op_min_index_view or - * op_max_index_view. (_Not_ the derived comparator class - * actually used by the view_base. For example, the - * index_view_content of an `op_min_index_view` will have - * `Compare = std::less`, but its comparator_base will - * have `Compare = reverse_predicate< std::less >`.) - * @tparam ForMax `true` if this is the content class for an - * op_max_index_view, `false` if it is for an - * op_min_index_view. - * - * @see ReducersMinMaxViewContent - * - * @ingroup ReducersMinMaxViewContent - * @ingroup ReducersMinMax - */ -template ::value> -class index_view_content { - protected: - /// @cond - Index m_index; - Type m_value; - bool m_is_set; - /// @endcond - public: - /// The value type of the view (which is an pair for - /// index_view_content). - typedef std::pair value_type; - - /// The type compared by the view's `compare()` function (which is the data - /// value type for index_view_content). - typedef Type comp_value_type; - - /// Construct with the identity value. - index_view_content() : m_index(), m_value(), m_is_set(false) {} - - /// Construct with an index/value pair. - index_view_content(const value_type &value) - : m_index(value.first), m_value(value.second), m_is_set(true) {} - - /// Construct with an index and a value. - index_view_content(const Index &index, const Type &value) - : m_index(index), m_value(value), m_is_set(true) {} - - /// Construct with just an index. - index_view_content(const Index &index) - : m_index(index), m_value(), m_is_set(false) {} - - /// Gets the value. - value_type value() const { return value_type(m_index, m_value); } - - /// Sets the value. - void set_value(const value_type &value) { - m_index = value.first; - m_value = value.second; - } - - /// Sets the is_set flag. - void set_is_set() { m_is_set = true; } - - /// Sets the (initial) index, without marking the view as set. - void set_default_index(const Index &index) { m_index = index; } - - /// Gets the comparison value (which, for index_view_content, is the value - /// component of the index/value pair). - const comp_value_type &comp_value() const { return m_value; } - - /// Given an arbitrary value (i.e., index/value pair), gets the - /// corresponding comparison value (which, for index_view_content, is the - /// value component of the index/value pair). - static const comp_value_type &comp_value(const value_type &value) { - return value.second; - } - - /// Gets a const reference to the value part of the value. - const Type &get_reference() const { return m_value; } - - /// Gets a const reference to the index part of the value. - const Index &get_index_reference() const { return m_index; } - - /// Tests if the value is defined. - bool is_set() const { return m_is_set; } - - /// Tests if the view has a comparable value. - bool has_value() const { return is_set(); } -}; - -/// @cond - -/* This is the specialization of the index_view_content class for cases where - * the is_set optimization is applicable). - */ -template -class index_view_content - : public index_view_content { - typedef index_view_content base; - typedef identity_value Identity; - - public: - typedef typename base::value_type value_type; - ; - typedef typename base::comp_value_type comp_value_type; - ; - - index_view_content() : base() { Identity::set_identity(this->m_value); } - - index_view_content(const value_type &value) : base(value) {} - - index_view_content(const Index &index, const Type &value) - : base(index, value) {} - - index_view_content(const Index &index) : base() { - Identity::set_identity(this->m_value); - this->m_index = index; - } - - /// Test if the view has a comparable value. - bool has_value() const { return true; } -}; - -/// @endcond - -template class rhs_proxy; - -/** Creates an rhs_proxy. - */ -template -inline rhs_proxy make_proxy(const typename View::value_type &value, - const View &view); - -template class view_base; - -/** Class to represent the right-hand side of - * `*reducer = {min|max}_of(*reducer, value)`. - * - * The only assignment operator for a min/max view class takes a rhs_proxy as - * its operand. This results in the syntactic restriction that the only - * expressions that can be assigned to a min/max view are ones which generate - * an rhs_proxy - that is, expressions of the form `max_of(view, value)` and - * `min_of(view, value)`. - * - * @warning - * The lhs and rhs views in such an assignment must be the same; otherwise, - * the behavior will be undefined. (I.e., `*r1 = min_of(*r1, x)` is legal; - * `*r1 = min_of(*r2, x)` is illegal.) This condition will be checked with a - * runtime assertion when compiled in debug mode. - * - * @tparam View The view class (op_{min|max}[_index]_view) that this proxy - * was created from. - * - * @see view_base - * - * @ingroup ReducersMinMax - */ -template class rhs_proxy { - typedef typename View::less_type less_type; - typedef typename View::compare_type compare_type; - typedef typename View::value_type value_type; - typedef typename View::content_type content_type; - typedef typename content_type::comp_value_type comp_value_type; - - friend class view_base; - friend rhs_proxy make_proxy(const typename View::value_type &value, - const View &view); - - typed_indirect_binary_function - m_comp; - const View *m_view; - value_type m_value; - - rhs_proxy &operator=(const rhs_proxy &); // Disable assignment operator - rhs_proxy(); // Disable default constructor - - // Constructor (called from view_base::make_proxy). - rhs_proxy(const View *view, const value_type &value, - const compare_type *compare) - : m_view(view), m_value(value), m_comp(compare) {} - - // Checks matching view, then return value (called from view_base::assign). - value_type value(const typename View::base *view) const { - return m_value; - } - - public: - /** Supports max_of(max_of(view, value), value) and the like. - */ - rhs_proxy calc(const value_type &x) const { - return rhs_proxy(m_view, - m_comp(content_type::comp_value(m_value), - content_type::comp_value(x)) - ? x - : m_value, - m_comp.pointer()); - } -}; - -template -inline rhs_proxy make_proxy(const typename View::value_type &value, - const View &view) { - return rhs_proxy(&view, value, view.compare_pointer()); -} - -//@} - -/** Base class for min and max view classes. - * - * This class accumulates the minimum or maximum of a set of values which have - * occurred as arguments to the `calc()` function, as determined by a - * comparator. The accumulated value will be the first `calc()` argument value - * `x` such that `compare(x, y)` is false for every `calc()` argument value - * `y`. - * - * If the comparator is `std::less`, then the accumulated value is the first - * argument value which is not less than any other argument value, i.e., the - * maximum. Similarly, if the comparator is `reverse_predicate`, - * which is equivalent to `std::greater`, then the accumulated value is the - * first argument value which is not greater than any other argument value, - * i.e., the minimum. - * - * @note This class provides the definitions that are required for a class - * that will be used as the parameter of a - * min_max_internal::monoid_base specialization. - * - * @tparam Content A content class that provides the value types and data - * members for the view. - * @tparam Less A "less than" binary predicate that defines the min or - * max function. - * @tparam Compare A binary predicate to be used to compare the values. - * (The same as @a Less for max reducers; its reversal for - * min reducers.) - * - * @see ReducersMinMaxViewContent - * @see op_max_view - * @see op_min_view - * @see op_max_index_view - * @see op_min_index_view - * @see monoid_base - * - * @ingroup ReducersMinMax - */ -template -class view_base : - // comparator_base comes first to ensure that it will get empty base class - // treatment - private comparator_base, - private Content { - typedef comparator_base base; - using base::compare; - using Content::comp_value; - using Content::has_value; - using Content::set_is_set; - using Content::set_value; - using Content::value; - typedef Content content_type; - - template friend class rhs_proxy; - template - friend rhs_proxy make_proxy(const typename View::value_type &value, - const View &view); - - public: - /** @name Monoid support. - */ - //@{ - - /** Value type. Required by @ref monoid_with_view. - */ - typedef typename Content::value_type value_type; - - /** The type of the comparator specified by the user, that defines the - * ordering on @a Type. Required by min_max::monoid_base. - */ - typedef Less less_type; - - /** The type of the comparator actually used by the view. Required by - * min_max::monoid_base. (This is the same as the @ref less_type for a - * max reducer, or `reverse_predicate` for a min reducer.) - */ - typedef Compare compare_type; - - /** Reduces two views. Required by @ref monoid_with_view. - */ - void reduce(view_base *other) { - if (other->is_set() && - (!this->is_set() || - compare(this->comp_value(), other->comp_value()))) { - this->set_value(other->value()); - this->set_is_set(); - } - } - - //@} - - /** Default constructor. Initializes to identity value. - */ - explicit view_base(const compare_type *compare) - : base(compare), Content() {} - - /** Value constructor. - */ - template - view_base(const T1 &x1, const compare_type *compare) - : base(compare), Content(x1) {} - - /** Value constructor. - */ - template - view_base(const T1 &x1, const T2 &x2, const compare_type *compare) - : base(compare), Content(x1, x2) {} - - /** Move-in constructor. - */ - explicit view_base(move_in_wrapper w, - const compare_type *compare) - : base(compare), Content(w.value()) {} - - /** @name Reducer support. - */ - //@{ - - void view_move_in(value_type &v) { - set_value(v); - set_is_set(); - } - void view_move_out(value_type &v) { v = value(); } - void view_set_value(const value_type &v) { - set_value(v); - set_is_set(); - } - value_type view_get_value() const { return value(); } - // view_get_reference() NOT SUPPORTED - - //@} - - /** Sets the contained index data member, without marking the view as set. - * (Meaningless for non-index reducers.) - */ - using Content::set_default_index; - - /** Is the value defined? - */ - using Content::is_set; - - /** Reference to contained value data member. - * @deprecated For legacy reducers only. - */ - using Content::get_reference; - - /** Reference to contained index data member. - * (Meaningless for non-index reducers.) - * @deprecated For legacy reducers only. - */ - using Content::get_index_reference; - - protected: - /** Updates the min/max value. - */ - void calc(const value_type &x) { - if (!has_value() || compare(comp_value(), comp_value(x))) - set_value(x); - set_is_set(); - } - - /** Assigns the result of a `{min|max}_of(view, value)` expression to the - * view. - * - * @see rhs_proxy - */ - template void assign(const rhs_proxy &rhs) { - calc(rhs.value(this)); - } -}; - -/** Base class for min and max monoid classes. - * - * The unique characteristic of minimum and maximum reducers is that they - * incorporate a comparator functor that defines what "minimum" or "maximum" - * means. The monoid for a reducer contains the comparator that will be used - * for the reduction. If the comparator is a function or a class with state, - * then each view will have a pointer to the comparator. - * - * This means that the `construct()` functions first construct the monoid - * (possibly with an explicit comparator argument), and then construct the - * view with a pointer to the monoid's comparator. - * - * @tparam View The view class. - * @tparam Align If true, reducers instantiated on this monoid will be - * aligned. By default, library reducers (unlike legacy - * library reducer _wrappers_) are unaligned. - * - * @see view_base - * - * @ingroup ReducersMinMax - */ -template -class monoid_base : public monoid_with_view { - typedef typename View::compare_type compare_type; - typedef typename View::less_type less_type; - - const compare_type m_compare; - - const compare_type *compare_pointer() const { return &m_compare; } - - public: - /** Default constructor uses default comparator. - */ - monoid_base() : m_compare() {} - - /** Constructor. - * - * @param compare The comparator to use. - */ - monoid_base(const compare_type &compare) : m_compare(compare) {} - - /** Creates an identity view. - * - * List view identity constructors take the list allocator as an argument. - * - * @param v The address of the uninitialized memory in which the view - * will be constructed. - */ - void identity(View *v) const { ::new ((void *)v) View(compare_pointer()); } - - /** @name construct functions - * - * Min/max monoid `construct()` functions optionally take one or two value - * arguments, a @ref move_in argument, and/or a comparator argument. - */ - //@{ - - template - static void construct(Monoid *monoid, View *view) { - provisional_guard mg(new ((void *)monoid) Monoid); - mg.confirm_if(new ((void *)view) View(monoid->compare_pointer())); - } - - template - static void construct(Monoid *monoid, View *view, const T1 &x1) { - provisional_guard mg(new ((void *)monoid) Monoid); - mg.confirm_if(new ((void *)view) View(x1, monoid->compare_pointer())); - } - - template - static void construct(Monoid *monoid, View *view, const T1 &x1, - const T2 &x2) { - provisional_guard mg(new ((void *)monoid) Monoid); - mg.confirm_if(new ((void *)view) - View(x1, x2, monoid->compare_pointer())); - } - - template - static void construct(Monoid *monoid, View *view, - const less_type &compare) { - provisional_guard mg(new ((void *)monoid) Monoid(compare)); - mg.confirm_if(new ((void *)view) View(monoid->compare_pointer())); - } - - template - static void construct(Monoid *monoid, View *view, const T1 &x1, - const less_type &compare) { - provisional_guard mg(new ((void *)monoid) Monoid(compare)); - mg.confirm_if(new ((void *)view) View(x1, monoid->compare_pointer())); - } - - template - static void construct(Monoid *monoid, View *view, const T1 &x1, - const T2 &x2, const less_type &compare) { - provisional_guard mg(new ((void *)monoid) Monoid(compare)); - mg.confirm_if(new ((void *)view) - View(x1, x2, monoid->compare_pointer())); - } - - //@} -}; - -} // namespace min_max_internal - -/** @defgroup ReducersMinMaxMaxValue Maximum reducers (value only) - * - * These reducers will find the largest value from a set of values. - * - * @ingroup ReducersMinMax - */ -//@{ - -/** The maximum reducer view class. - * - * This is the view class for reducers created with - * `cilk::reducer< cilk::op_max >`. It accumulates the maximum, - * as determined by a comparator, of a set of values which have occurred as - * arguments to the `calc_max()` function. The accumulated value will be the - * first argument `x` such that `compare(x, y)` is false for every argument - * `y`. - * - * If the comparator is `std::less`, then the accumulated value is the first - * argument value which is not less than any other argument value, i.e., the - * maximum. - * - * @note The reducer "dereference" operation (`reducer::operator *()`) - * yields a reference to the view. Thus, for example, the view class's - * `calc_max()` function would be used in an expression like - * `r->calc_max(a)` where `r` is an op_max reducer variable. - * - * @tparam Type The type of the values compared by the reducer. This will - * be the value type of a monoid_with_view that is - * instantiated with this view. - * @tparam Compare A `Strict Weak Ordering` whose argument type is @a Type. It - * defines the "less than" relation used to compute the - * maximum. - * - * @see ReducersMinMax - * @see op_max - */ -template -class op_max_view : public min_max_internal::view_base< - min_max_internal::view_content, - Compare, Compare> { - typedef min_max_internal::view_base< - min_max_internal::view_content, Compare, Compare> - base; - using base::assign; - using base::calc; - friend class min_max_internal::rhs_proxy; - - public: - /** @name Constructors. - * - * All op_max_view constructors simply pass their arguments on to the - * @ref view_base base class. - */ - //@{ - - template op_max_view(const T1 &x1) : base(x1) {} - - template - op_max_view(const T1 &x1, const T2 &x2) : base(x1, x2) {} - - //@} - - /** @name View modifier operations. - */ - //@{ - - /** Maximizes with a value. - * - * If @a x is greater than the current value of the view (as defined by - * the reducer's comparator), or if the view was created without an - * initial value and its value has never been updated (with `calc_max()` - * or `= max_of()`), then the value of the view is set to @a x. - * - * @param x The value to maximize the view's value with. - * - * @return A reference to the view. (Allows chaining - * `view.comp_max(a).comp_max(b)…`.) - */ - op_max_view &calc_max(const Type &x) { - calc(x); - return *this; - } - - /** Assigns the result of a `max_of(view, value)` expression to the view. - * - * @param rhs An rhs_proxy value created by a `max_of(view, value)` - * expression. - * - * @return A reference to the view. - * - * @see min_max_internal::view_base::rhs_proxy - */ - op_max_view & - operator=(const min_max_internal::rhs_proxy &rhs) { - assign(rhs); - return *this; - } - - //@} -}; - -/** Computes the maximum of the value in an op_max_view and another value. - * - * The result of this computation can only be assigned back to the original - * view or used in another max_of() call. For example, - * - * *reducer = max_of(*reducer, x); - * *reducer = max_of(x, *reducer); - * - * @see min_max_internal::rhs_proxy - */ -template -inline min_max_internal::rhs_proxy> -max_of(const op_max_view &view, const Type &value) { - return min_max_internal::make_proxy(value, view); -} - -/// @copydoc max_of(const op_max_view&, const Type&) -template -inline min_max_internal::rhs_proxy> -max_of(const Type &value, const op_max_view &view) { - return min_max_internal::make_proxy(value, view); -} - -/** Computes nested maximum. - * - * Compute the maximum of the result of a max_of() call and another value. - * - * The result of this computation can only be assigned back to the original - * view or wrapper, or used in another max_of() call. For example, - * - * *reducer = max_of(x, max_of(y, *reducer)); - * wrapper = max_of(max_of(wrapper, x), y); - * - * @see min_max_internal::rhs_proxy - */ -template -inline min_max_internal::rhs_proxy> -max_of(const min_max_internal::rhs_proxy> &proxy, - const Type &value) { - return proxy.calc(value); -} - -/// @copydoc max_of(const min_max_internal::rhs_proxy< op_max_view >&, const Type&) -template -inline min_max_internal::rhs_proxy> -max_of(const Type &value, - const min_max_internal::rhs_proxy> &proxy) { - return proxy.calc(value); -} - -/** Monoid class for maximum reductions. Instantiate the cilk::reducer template - * class with an op_max monoid to create a maximum reducer class. For example, - * to compute the maximum of a set of `int` values: - * - * cilk::reducer< cilk::op_max > r; - * - * @see ReducersMinMax - * @see op_max_view - */ -template , bool Align = false> -class op_max - : public min_max_internal::monoid_base, Align> { - typedef min_max_internal::monoid_base, Align> - base; - - public: - /// Construct with default comparator. - op_max() {} - /// Construct with specified comparator. - op_max(const Compare &compare) : base(compare) {} -}; - -//@} - -/** @defgroup ReducersMinMaxMinValue Minimum reducers (value only) - * - * These reducers will find the smallest value from a set of values. - * - * @ingroup ReducersMinMax - */ -//@{ - -/** The minimum reducer view class. - * - * This is the view class for reducers created with - * `cilk::reducer< cilk::op_min >`. It accumulates the minimum, - * as determined by a comparator, of a set of values which have occurred as - * arguments to the `calc_min()` function. The accumulated value will be the - * first argument `x` such that `compare(y, x)` is false for every argument - * `y`. - * - * If the comparator is `std::less`, then the accumulated value is the first - * argument value which no other argument value is less than, i.e., the - * minimum. - * - * @note The reducer "dereference" operation (`reducer::operator *()`) - * yields a reference to the view. Thus, for example, the view class's - * `calc_min()` function would be used in an expression like - * `r->calc_min(a)` where `r` is an op_min reducer variable. - * - * @tparam Type The type of the values compared by the reducer. This will - * be the value type of a monoid_with_view that is - * instantiated with this view. - * @tparam Compare A `Strict Weak Ordering` whose argument type is @a Type. It - * defines the "less than" relation used to compute the - * minimum. - * - * @see ReducersMinMax - * @see op_min - */ -template -class op_min_view - : public min_max_internal::view_base< - min_max_internal::view_content, Compare, - min_max_internal::reverse_predicate> { - typedef min_max_internal::view_base< - min_max_internal::view_content, Compare, - min_max_internal::reverse_predicate> - base; - using base::assign; - using base::calc; - friend class min_max_internal::rhs_proxy; - - public: - /** @name Constructors. - * - * All op_min_view constructors simply pass their arguments on to the - * @ref view_base base class. - */ - //@{ - - template op_min_view(const T1 &x1) : base(x1) {} - - template - op_min_view(const T1 &x1, const T2 &x2) : base(x1, x2) {} - - //@} - - /** @name View modifier operations. - */ - //@{ - - /** Minimizes with a value. - * - * If @a x is less than the current value of the view (as defined by the - * reducer's comparator), or if the view was created without an initial - * value and its value has never been updated (with `calc_min()` or - * `= min_of()`), then the value of the view is set to @a x. - * - * @param x The value to minimize the view's value with. - * - * @return A reference to the view. (Allows chaining - * `view.comp_min(a).comp_min(b)…`.) - */ - op_min_view &calc_min(const Type &x) { - calc(x); - return *this; - } - - /** Assigns the result of a `min_of(view, value)` expression to the view. - * - * @param rhs An rhs_proxy value created by a `min_of(view, value)` - * expression. - * - * @return A reference to the view. - * - * @see min_max_internal::view_base::rhs_proxy - */ - op_min_view & - operator=(const min_max_internal::rhs_proxy &rhs) { - assign(rhs); - return *this; - } -}; - -/** Computes the minimum of the value in a view and another value. - * - * The result of this computation can only be assigned back to the original - * view or used in another min_of() call. For example, - * - * *reducer = min_of(*reducer, x); - * *reducer = min_of(x, *reducer); - * - * @see min_max_internal::view_base::rhs_proxy - */ -template -inline min_max_internal::rhs_proxy> -min_of(const op_min_view &view, const Type &value) { - return min_max_internal::make_proxy(value, view); -} - -/// @copydoc min_of(const op_min_view&, const Type&) -template -inline min_max_internal::rhs_proxy> -min_of(const Type &value, const op_min_view &view) { - return min_max_internal::make_proxy(value, view); -} - -/** Computes nested minimum. - * - * Compute the minimum of the result of a min_of() call and another value. - * - * The result of this computation can only be assigned back to the original - * view or wrapper, or used in another min_of() call. For example, - * - * *reducer = min_of(x, min_of(y, *reducer)); - * wrapper = min_of(min_of(wrapper, x), y); - * - * @see min_max_internal::rhs_proxy - */ -template -inline min_max_internal::rhs_proxy> -min_of(const min_max_internal::rhs_proxy> &proxy, - const Type &value) { - return proxy.calc(value); -} - -/// @copydoc min_of(const min_max_internal::rhs_proxy< op_min_view >&, const Type&) -template -inline min_max_internal::rhs_proxy> -min_of(const Type &value, - const min_max_internal::rhs_proxy> &proxy) { - return proxy.calc(value); -} - -/** Monoid class for minimum reductions. Instantiate the cilk::reducer template - * class with an op_min monoid to create a minimum reducer class. For example, - * to compute the minimum of a set of `int` values: - * - * cilk::reducer< cilk::op_min > r; - * - * @see ReducersMinMax - * @see op_min_view - */ -template , bool Align = false> -class op_min - : public min_max_internal::monoid_base, Align> { - typedef min_max_internal::monoid_base, Align> - base; - - public: - /// Construct with default comparator. - op_min() {} - /// Construct with specified comparator. - op_min(const Compare &compare) : base(compare) {} -}; - -//@} - -/** @defgroup ReducersMinMaxMaxIndex Maximum reducers (value and index) - * - * These reducers will find the largest value from a set of values, and its - * index in the set. - * - * @ingroup ReducersMinMax - */ -//@{ - -/** The maximum index reducer view class. - * - * This is the view class for reducers created with - * `cilk::reducer< cilk::op_max_index >`. It accumulates - * the maximum, as determined by a comparator, of a set of values which have - * occurred as arguments to the `calc_max()` function, and records the index - * of the maximum value. The accumulated value will be the first argument `x` - * such that `compare(x, y)` is false for every argument `y`. - * - * If the comparator is `std::less`, then the accumulated value is the first - * argument value which is not less than any other argument value, i.e., the - * maximum. - * - * @note The reducer "dereference" operation (`reducer::operator *()`) - * yields a reference to the view. Thus, for example, the view class's - * `calc_max()` function would be used in an expression like - * `r->calc_max(i, a)`where `r` is an op_max_index reducer - * variable. - * - * @note The word "index" suggests an integer index into an array, but there - * is no restriction on the index type or how it should be used. In - * general, it may be convenient to use it for any kind of key that - * can be used to locate the maximum value in the collection that it - * came from - for example: - * - An index into an array. - * - A key into an STL map. - * - An iterator into any STL container. - * - * @note A max_index reducer is essentially a max reducer whose value type - * is a `std::pair`. This fact is camouflaged in the view - * `calc_max` function, the global `max_of` functions, and the reducer - * value constructor, which can all take an index argument and a value - * argument as an alternative to a single `std::pair` argument. - * However, the reducer `set_value()`, `get_value()`, `move_in()`, and - * `move_out()` functions work only with pairs, not with individual - * value and/or index arguments. - * - * @tparam Index The type of the indices associated with the values. - * @tparam Type The type of the values compared by the reducer. This will - * be the value type of a monoid_with_view that is - * instantiated with this view. - * @tparam Compare Used to compare the values. It must be a binary predicate. - * If it is omitted, then the view computes the conventional - * arithmetic maximum. - * - * @see ReducersMinMax - * @see op_max_index - */ -template -class op_max_index_view - : public min_max_internal::view_base< - min_max_internal::index_view_content, - Compare, Compare> { - typedef min_max_internal::view_base< - min_max_internal::index_view_content, - Compare, Compare> - base; - using base::assign; - using base::calc; - typedef std::pair pair_type; - friend class min_max_internal::rhs_proxy; - - public: - /** @name Constructors. - * - * All op_max_index_view constructors simply pass their arguments on to the - * @ref view_base base class, except for the `(index, value [, compare])` - * constructors, which create a `std::pair` containing the index and value. - */ - //@{ - - op_max_index_view() : base() {} - - template op_max_index_view(const T1 &x1) : base(x1) {} - - template - op_max_index_view(const T1 &x1, const T2 &x2) : base(x1, x2) {} - - template - op_max_index_view(const T1 &x1, const T2 &x2, const T3 &x3) - : base(x1, x2, x3) {} - - op_max_index_view(const Index &i, const Type &v) : base(pair_type(i, v)) {} - - op_max_index_view(const Index &i, const Type &v, - const typename base::compare_type *c) - : base(pair_type(i, v), c) {} - - //@} - - /** Maximizes with a value and index. - * - * If @a x is greater than the current value of the view (as defined by - * the reducer's comparator), or if the view was created without an - * initial value and its value has never been updated (with `calc_max()` - * or `= max_of()`), then the value of the view is set to @a x, and the - * index is set to @a i.. - * - * @param i The index of the value @a x. - * @param x The value to maximize the view's value with. - * - * @return A reference to the view. (Allows - * `view.comp_max(i, a).comp_max(j, b)…`.) - */ - op_max_index_view &calc_max(const Index &i, const Type &x) { - calc(pair_type(i, x)); - return *this; - } - - /** Maximizes with an index/value pair. - * - * If @a pair.second is greater than the current value of the view (as - * defined by the reducer's comparator), or if the view was created - * without an initial value and its value has never been updated (with - * `calc_max()` or `= max_of()`), then the value of the view is set to - * @a pair.second, and the index is set to @a pair.first. - * - * @param pair A pair containing a value to maximize the view's value - * with and its associated index. - * - * @return A reference to the view. (Allows - * `view.comp_max(p1).comp_max(p2)…`.) - */ - op_max_index_view &calc_max(const pair_type &pair) { - calc(pair); - return *this; - } - - /** Assigns the result of a `max_of(view, index, value)` expression to the - * view. - * - * @param rhs An rhs_proxy value created by a `max_of(view, index, value)` - * expression. - * - * @return A reference to the view. - * - * @see min_max_internal::view_base::rhs_proxy - */ - op_max_index_view & - operator=(const min_max_internal::rhs_proxy &rhs) { - assign(rhs); - return *this; - } -}; - -/** Computes the maximum of the value in a view and another value. - * - * The result of this computation can only be assigned back to the original - * view or used in another max_of() call. For example, - * - * *reducer = max_of(*reducer, i, x); - * *reducer = max_of(i, x, *reducer); - * - * @see min_max_internal::rhs_proxy - */ -template -inline min_max_internal::rhs_proxy> -max_of(const op_max_index_view &view, const Index &index, - const Type &value) { - return min_max_internal::make_proxy(std::pair(index, value), - view); -} - -/// @copydoc max_of(const op_max_index_view&, const -/// Index&, const Type&) -template -inline min_max_internal::rhs_proxy> -max_of(const Index &index, const Type &value, - const op_max_index_view &view) { - return min_max_internal::make_proxy(std::pair(index, value), - view); -} - -/// @copydoc max_of(const op_max_index_view&, const -/// Index&, const Type&) -template -inline min_max_internal::rhs_proxy> -max_of(const op_max_index_view &view, - const std::pair &pair) { - return min_max_internal::make_proxy(pair, view); -} - -/// @copydoc max_of(const op_max_index_view&, const -/// Index&, const Type&) -template -inline min_max_internal::rhs_proxy> -max_of(const std::pair &pair, - const op_max_index_view &view) { - return min_max_internal::make_proxy(pair, view); -} - -/** Computes the nested maximum between the value in a view and other values. - * - * Compute the maximum of the result of a max_of() call and another value. - * - * The result of this computation can only be assigned back to the original - * view or used in another max_of() call. For example, - * - * *reducer = max_of(x, max_of(y, *reducer)); - * *reducer = max_of(max_of(*reducer, x), y); - * - * @see min_max_internal::rhs_proxy - */ -template -inline min_max_internal::rhs_proxy> -max_of( - const min_max_internal::rhs_proxy> - &proxy, - const Index &index, const Type &value) { - return proxy.calc(std::pair(index, value)); -} - -/// @copydoc max_of(const min_max_internal::rhs_proxy< op_max_index_view >&, const Index&, const Type&) -template -inline min_max_internal::rhs_proxy> -max_of( - const Index &index, const Type &value, - const min_max_internal::rhs_proxy> - &proxy) { - return proxy.calc(std::pair(index, value)); -} - -/// @copydoc max_of(const min_max_internal::rhs_proxy< op_max_index_view >&, const Index&, const Type&) -template -inline min_max_internal::rhs_proxy> -max_of( - const min_max_internal::rhs_proxy> - &proxy, - const std::pair &pair) { - return proxy.calc(pair); -} - -/// @copydoc max_of(const min_max_internal::rhs_proxy< op_max_index_view >&, const Index&, const Type&) -template -inline min_max_internal::rhs_proxy> -max_of( - const std::pair &pair, - const min_max_internal::rhs_proxy> - &proxy) { - return proxy.calc(pair); -} - -/** Monoid class for maximum reductions with index. Instantiate the - * cilk::reducer template class with an op_max_index monoid to create a - * max_index reducer class. For example, to compute the maximum of an array of - * `double` values and the array index of the max value: - * - * cilk::reducer< cilk::op_max_index > r; - * - * @see ReducersMinMax - * @see op_max_index_view - */ -template , - bool Align = false> -class op_max_index : public min_max_internal::monoid_base< - op_max_index_view, Align> { - typedef min_max_internal::monoid_base< - op_max_index_view, Align> - base; - - public: - /// Construct with default comparator. - op_max_index() {} - /// Construct with specified comparator. - op_max_index(const Compare &compare) : base(compare) {} -}; - -//@} - -/** @defgroup ReducersMinMaxMinIndex Minimum reducers (value and index) - * - * These reducers will find the smallest value from a set of values, and its - * index in the set. - * - * @ingroup ReducersMinMax - */ -//@{ - -/** The minimum index reducer view class. - * - * This is the view class for reducers created with - * `cilk::reducer >`. It accumulates - * the minimum, as determined by a comparator, of a set of values which have - * occurred as arguments to the `calc_min()` function, and records the index - * of the minimum value. The accumulated value will be the first argument `x` - * such that `compare(y, x)` is false for every argument `y`. - * - * If the comparator is `std::less`, then the accumulated value is the first - * argument value which no other argument value is less than, i.e., the - * minimum. - * - * @note The reducer "dereference" operation (`reducer::operator *()`) - * yields a reference to the view. Thus, for example, the view class's - * `calc_min()` function would be - * used in an expression like `r->calc_min(i, a)`where `r` is an - * op_min_index reducer variable. - * - * @note The word "index" suggests an integer index into an array, but there - * is no restriction on the index type or how it should be used. In - * general, it may be convenient to use it for any kind of key that - * can be used to locate the minimum value in the collection that it - * came from - for example: - * - An index into an array. - * - A key into an STL map. - * - An iterator into any STL container. - * - * @note A min_index reducer is essentially a min reducer whose value type - * is a `std::pair`. This fact is camouflaged in the view - * `calc_min` function, the global `min_of` functions, and the reducer - * value constructor, which can all take an index argument and a value - * argument as an alternative to a single `std::pair` argument. - * However, the reducer `set_value()`, `get_value()`, `move_in()`, and - * `move_out()` functions work only with pairs, not with individual - * value and/or index arguments. - * - * @tparam Index The type of the indices associated with the values. - * @tparam Type The type of the values compared by the reducer. This will - * be the value type of a monoid_with_view that is - * instantiated with this view. - * @tparam Compare Used to compare the values. It must be a binary predicate. - * If it is omitted, then the view computes the conventional - * arithmetic minimum. - * - * @see ReducersMinMax - * @see op_min_index - */ -template -class op_min_index_view - : public min_max_internal::view_base< - min_max_internal::index_view_content, - Compare, min_max_internal::reverse_predicate> { - typedef min_max_internal::view_base< - min_max_internal::index_view_content, - Compare, min_max_internal::reverse_predicate> - base; - using base::assign; - using base::calc; - typedef std::pair pair_type; - friend class min_max_internal::rhs_proxy; - - public: - /** @name Constructors. - * - * All op_min_index_view constructors simply pass their arguments on to the - * @ref view_base base class, except for the `(index, value [, compare])` - * constructors, which create a `std::pair` containing the index and value. - */ - //@{ - - op_min_index_view() : base() {} - - template op_min_index_view(const T1 &x1) : base(x1) {} - - template - op_min_index_view(const T1 &x1, const T2 &x2) : base(x1, x2) {} - - template - op_min_index_view(const T1 &x1, const T2 &x2, const T3 &x3) - : base(x1, x2, x3) {} - - op_min_index_view(const Index &i, const Type &v) : base(pair_type(i, v)) {} - - op_min_index_view(const Index &i, const Type &v, - const typename base::compare_type *c) - : base(pair_type(i, v), c) {} - - //@} - - /** Minimizes with a value and index. - * - * If @a x is greater than the current value of the view (as defined by - * the reducer's comparator), or if the view was created without an - * initial value and its value has never been updated (with `calc_min()` - * or `= min_of()`), then the value of the view is set to @a x, and the - * index is set to @a i.. - * - * @param i The index of the value @a x. - * @param x The value to minimize the view's value with. - * - * @return A reference to the view. (Allows - * `view.comp_min(i, a).comp_min(j, b)…`.) - */ - op_min_index_view &calc_min(const Index &i, const Type &x) { - calc(pair_type(i, x)); - return *this; - } - - /** Maximizes with an index/value pair. - * - * If @a pair.second is less than the current value of the view (as - * defined by the reducer's comparator), or if the view was created - * without an initial value and its value has never been updated (with - * `calc_min()` or `= min_of()`), then the value of the view is set to - * @a pair.second, and the index is set to @a pair.first. - * - * @param pair A pair containing a value to minimize the view's value - * with and its associated index. - * - * @return A reference to the view. (Allows - * `view.comp_min(p1).comp_min(p2)…`.) - */ - op_min_index_view &calc_min(const pair_type &pair) { - calc(pair); - return *this; - } - - /** Assigns the result of a `min_of(view, index, value)` expression to the - * view. - * - * @param rhs An rhs_proxy value created by a `min_of(view, index, value)` - * expression. - * - * @return A reference to the view. - * - * @see min_max_internal::view_base::rhs_proxy - */ - op_min_index_view & - operator=(const min_max_internal::rhs_proxy &rhs) { - assign(rhs); - return *this; - } -}; - -/** Computes the minimum of the value in a view and another value. - * - * The result of this computation can only be assigned back to the original - * view or used in another min_of() call. For example, - * - * *reducer = min_of(*reducer, i, x); - * *reducer = min_of(i, x, *reducer); - * - * @see min_max_internal::min_min_view_base::rhs_proxy - */ -template -inline min_max_internal::rhs_proxy> -min_of(const op_min_index_view &view, const Index &index, - const Type &value) { - return min_max_internal::make_proxy(std::pair(index, value), - view); -} - -/// @copydoc min_of(const op_min_index_view&, const -/// Index&, const Type&) -template -inline min_max_internal::rhs_proxy> -min_of(const Index &index, const Type &value, - const op_min_index_view &view) { - return min_max_internal::make_proxy(std::pair(index, value), - view); -} - -/// @copydoc min_of(const op_min_index_view&, const -/// Index&, const Type&) -template -inline min_max_internal::rhs_proxy> -min_of(const op_min_index_view &view, - const std::pair &pair) { - return min_max_internal::make_proxy(pair, view); -} - -/// @copydoc min_of(const op_min_index_view&, const -/// Index&, const Type&) -template -inline min_max_internal::rhs_proxy> -min_of(const std::pair &pair, - const op_min_index_view &view) { - return min_max_internal::make_proxy(pair, view); -} - -/** Computes nested minimum between the value in a view and other values. - * - * Compute the minimum of the result of a min_of() call and another value. - * - * The result of this computation can only be assigned back to the original - * view or used in another min_of() call. For example, - * - * *reducer = min_of(x, min_of(y, *reducer)); - * *reducer = min_of(min_of(*reducer, x), y); - * - * @see min_max_internal::min_min_view_base::rhs_proxy - */ -template -inline min_max_internal::rhs_proxy> -min_of( - const min_max_internal::rhs_proxy> - &proxy, - const Index &index, const Type &value) { - return proxy.calc(std::pair(index, value)); -} - -/// @copydoc min_of(const min_max_internal::rhs_proxy< op_min_index_view >&, const Index&, const Type&) -template -inline min_max_internal::rhs_proxy> -min_of( - const Index &index, const Type &value, - const min_max_internal::rhs_proxy> - &proxy) { - return proxy.calc(std::pair(index, value)); -} - -/// @copydoc min_of(const min_max_internal::rhs_proxy< op_min_index_view >&, const Index&, const Type&) -template -inline min_max_internal::rhs_proxy> -min_of( - const min_max_internal::rhs_proxy> - &proxy, - const std::pair &pair) { - return proxy.calc(pair); -} - -/// @copydoc min_of(const min_max_internal::rhs_proxy< op_min_index_view >&, const Index&, const Type&) -template -inline min_max_internal::rhs_proxy> -min_of( - const std::pair &pair, - const min_max_internal::rhs_proxy> - &proxy) { - return proxy.calc(pair); -} - -/** Monoid class for minimum reductions with index. Instantiate the - * cilk::reducer template class with an op_min_index monoid to create a - * min_index reducer class. For example, to compute the minimum of an array of - * `double` values and the array index of the min value: - * - * cilk::reducer< cilk::op_min_index > r; - * - * @see ReducersMinMax - * @see op_min_index_view - */ -template , - bool Align = false> -class op_min_index : public min_max_internal::monoid_base< - op_min_index_view, Align> { - typedef min_max_internal::monoid_base< - op_min_index_view, Align> - base; - - public: - /// Construct with default comparator. - op_min_index() {} - /// Construct with specified comparator. - op_min_index(const Compare &compare) : base(compare) {} -}; - -//@} - -/** Deprecated maximum reducer wrapper class. - * - * reducer_max is the same as @ref reducer<@ref op_max>, except that - * reducer_max is a proxy for the contained view, so that accumulator - * variable update operations can be applied directly to the reducer. For - * example, a value is maximized with a `reducer<%op_max>` with - * `r->calc_max(a)`, but a value can be maximized with a `%reducer_max` with - * `r.calc_max(a)`. - * - * - * @deprecated Users are strongly encouraged to use `reducer` - * reducers rather than the old wrappers like reducer_max. - * The `reducer` reducers show the reducer/monoid/view - * architecture more clearly, are more consistent in their - * implementation, and present a simpler model for new - * user-implemented reducers. - * - * @note Implicit conversions are provided between `%reducer_max` - * and `reducer<%op_max>`. This allows incremental code - * conversion: old code that used `%reducer_max` can pass a - * `%reducer_max` to a converted function that now expects a - * pointer or reference to a `reducer<%op_max>`, and vice - * versa. **But see @ref redminmax_compatibility.** - * - * @tparam Type The value type of the reducer. - * @tparam Compare The "less than" comparator type for the reducer. - * - * @see op_max - * @see op_max_view - * @see reducer - * @see ReducersMinMax - * @ingroup ReducersMinMaxMaxValue - */ -template > -class reducer_max : public reducer> { - __CILKRTS_STATIC_ASSERT( - ::cilk::internal::class_is_empty< - typename ::cilk::internal::binary_functor::type>::value, - "cilk::reducer_max only works with " - "an empty Compare class"); - typedef reducer> base; - - public: - /// Type of data in a reducer_max. - typedef Type basic_value_type; - - /// The view type for the reducer. - typedef typename base::view_type view_type; - - /// The view type for the reducer. - typedef typename base::view_type View; - - /// The monoid type for the reducer. - typedef typename base::monoid_type monoid_type; - - /// The monoid type for the reducer. - typedef typename base::monoid_type Monoid; - - /// The view's rhs proxy type. - typedef min_max_internal::rhs_proxy rhs_proxy; - - using base::view; - - /** @name Constructors - */ - //@{ - - /// Constructs the wrapper in its identity state (either `!is_set()`, or - /// `value() == identity value`). - reducer_max() : base() {} - - /// Constructs the wrapper with a specified initial value. - explicit reducer_max(const Type &initial_value) : base(initial_value) {} - - /// Constructs the wrapper in its identity state with a specified - /// comparator. - explicit reducer_max(const Compare &comp) : base(comp) {} - - /// Constructs the wrapper with a specified initial value and a specified - /// comparator. - reducer_max(const Type &initial_value, const Compare &comp) - : base(initial_value, comp) {} - - //@} - - /** @name Forwarded functions - * @details Functions that update the contained accumulator variable are - * simply forwarded to the contained @ref op_max_view. */ - //@{ - - /// @copydoc cilk_lib_1_1::min_max_internal::view_content::is_set() const - bool is_set() const { return view().is_set(); } - - /// @copydoc op_max_view::calc_max(const Type&) - reducer_max &calc_max(const Type &x) { - view().calc_max(x); - return *this; - } - - /// @copydoc op_max_view::operator=(const - /// min_max_internal::rhs_proxy&) - reducer_max &operator=(const rhs_proxy &rhs) { - view() = rhs; - return *this; - } - - //@} - - /** Allows read-only access to the value within the current view. - * - * @returns A const reference to the value within the current view. - */ - const Type &get_reference() const { return view().get_reference(); } - - /// @name Dereference - /** Dereferencing a wrapper is a no-op. It simply returns the wrapper. - * Combined with the rule that a wrapper forwards view operations to the - * view, this means that view operations can be written the same way on - * reducers and wrappers, which is convenient for incrementally - * converting code using wrappers to code using reducers. That is: - * - * reducer< op_max > r; - * r->calc_max(a); // *r returns the view - * // calc_max is a view member function - * - * reducer_max w; - * w->calc_max(a); // *w returns the wrapper - * // calc_max is a wrapper member function that - * // calls the corresponding view function - */ - //@{ - reducer_max &operator*() { return *this; } - reducer_max const &operator*() const { return *this; } - - reducer_max *operator->() { return this; } - reducer_max const *operator->() const { return this; } - //@} - - /** @name Upcast - * @details In Intel Cilk Plus library 0.9, reducers were always - * cache-aligned. In library 1.0, reducer cache alignment is optional. By - * default, reducers are unaligned (i.e., just naturally aligned), but - * legacy wrappers inherit from cache-aligned reducers for binary - * compatibility. - * - * This means that a wrapper will automatically be upcast to its aligned - * reducer base class. The following conversion operators provide - * pseudo-upcasts to the corresponding unaligned reducer class. - */ - //@{ - operator reducer> &() { - return *reinterpret_cast> *>(this); - } - - operator const reducer> &() const { - return *reinterpret_cast> *>( - this); - } - //@} -}; - -/// @cond internal -// The legacy definition of max_of(reducer_max, value) has different -// behavior and a different return type than this definition. We add an -// unused third argument to this version of the function to give it a different -// signature, so that they won't end up sharing a single object file entry. -struct max_of_1_0_t {}; -const max_of_1_0_t max_of_1_0 = {}; -/// @endcond - -/** Computes the maximum of the value in a reducer_max and another value. - * - * @deprecated Because reducer_max is deprecated. - * - * The result of this computation can only be assigned back to the original - * reducer or used in another max_of() call. For example, - * - * reducer = max_of(reducer, x); - * reducer = max_of(x, reducer); - * - * @see min_max_internal::rhs_proxy - * - * @ingroup ReducersMinMaxMaxValue - */ -template -inline min_max_internal::rhs_proxy> -max_of(const reducer_max &r, const Type &value, - const max_of_1_0_t & = max_of_1_0) { - return min_max_internal::make_proxy(value, r.view()); -} - -/// @copydoc max_of(const reducer_max&, const Type&, const -/// max_of_1_0_t&) -/// @ingroup ReducersMinMaxMaxValue -template -inline min_max_internal::rhs_proxy> -max_of(const Type &value, const reducer_max &r, - const max_of_1_0_t & = max_of_1_0) { - return min_max_internal::make_proxy(value, r.view()); -} - -/** Deprecated minimum reducer wrapper class. - * - * reducer_min is the same as @ref reducer<@ref op_min>, except that - * reducer_min is a proxy for the contained view, so that accumulator - * variable update operations can be applied directly to the reducer. For - * example, a value is minimized with a `reducer<%op_min>` with - * `r->calc_min(a)`, but a value can be minimized with a `%reducer_min` with - * `r.calc_min(a)`. - * - * - * @deprecated Users are strongly encouraged to use `reducer` - * reducers rather than the old wrappers like reducer_min. - * The `reducer` reducers show the reducer/monoid/view - * architecture more clearly, are more consistent in their - * implementation, and present a simpler model for new - * user-implemented reducers. - * - * @note Implicit conversions are provided between `%reducer_min` - * and `reducer<%op_min>`. This allows incremental code - * conversion: old code that used `%reducer_min` can pass a - * `%reducer_min` to a converted function that now expects a - * pointer or reference to a `reducer<%op_min>`, and vice - * versa. **But see @ref redminmax_compatibility.** - * - * @tparam Type The value type of the reducer. - * @tparam Compare The "less than" comparator type for the reducer. - * - * @see op_min - * @see op_min_view - * @see reducer - * @see ReducersMinMax - * @ingroup ReducersMinMaxMinValue - */ -template > -class reducer_min : public reducer> { - __CILKRTS_STATIC_ASSERT( - ::cilk::internal::class_is_empty< - typename ::cilk::internal::binary_functor::type>::value, - "cilk::reducer_min only works with " - "an empty Compare class"); - typedef reducer> base; - - public: - /// Type of data in a reducer_min. - typedef Type basic_value_type; - - /// The view type for the reducer. - typedef typename base::view_type view_type; - - /// The view type for the reducer. - typedef typename base::view_type View; - - /// The monoid type for the reducer. - typedef typename base::monoid_type monoid_type; - - /// The monoid type for the reducer. - typedef typename base::monoid_type Monoid; - - /// The view's rhs proxy type. - typedef min_max_internal::rhs_proxy rhs_proxy; - - using base::view; - - /** @name Constructors - */ - //@{ - - /// Constructs the wrapper in its identity state (either `!is_set()`, or - /// `value() == identity value`). - reducer_min() : base() {} - - /// Constructs the wrapper with a specified initial value. - explicit reducer_min(const Type &initial_value) : base(initial_value) {} - - /// Constructs the wrapper in its identity state with a specified - /// comparator. - explicit reducer_min(const Compare &comp) : base(comp) {} - - /// Constructs the wrapper with a specified initial value and a specified - /// comparator. - reducer_min(const Type &initial_value, const Compare &comp) - : base(initial_value, comp) {} - - //@} - - /** @name Forwarded functions - * @details Functions that update the contained accumulator variable are - * simply forwarded to the contained @ref op_min_view. */ - //@{ - - /// @copydoc cilk_lib_1_1::min_max_internal::view_content::is_set() const - bool is_set() const { return view().is_set(); } - - /// @copydoc op_min_view::calc_min(const Type&) - reducer_min &calc_min(const Type &x) { - view().calc_min(x); - return *this; - } - - /// @copydoc op_min_view::operator=(const - /// min_max_internal::rhs_proxy&) - reducer_min &operator=(const rhs_proxy &rhs) { - view() = rhs; - return *this; - } - - //@} - - /** Allows read-only access to the value within the current view. - * - * @returns A const reference to the value within the current view. - */ - const Type &get_reference() const { return view().get_reference(); } - - /// @name Dereference - /** Dereferencing a wrapper is a no-op. It simply returns the wrapper. - * Combined with the rule that a wrapper forwards view operations to the - * view, this means that view operations can be written the same way on - * reducers and wrappers, which is convenient for incrementally - * converting code using wrappers to code using reducers. That is: - * - * reducer< op_min > r; - * r->calc_min(a); // *r returns the view - * // calc_min is a view member function - * - * reducer_min w; - * w->calc_min(a); // *w returns the wrapper - * // calc_min is a wrapper member function that - * // calls the corresponding view function - */ - //@{ - reducer_min &operator*() { return *this; } - reducer_min const &operator*() const { return *this; } - - reducer_min *operator->() { return this; } - reducer_min const *operator->() const { return this; } - //@} - - /** @name Upcast - * @details In Intel Cilk Plus library 0.9, reducers were always - * cache-aligned. In library 1.0, reducer cache alignment is optional. By - * default, reducers are unaligned (i.e., just naturally aligned), but - * legacy wrappers inherit from cache-aligned reducers for binary - * compatibility. - * - * This means that a wrapper will automatically be upcast to its aligned - * reducer base class. The following conversion operators provide - * pseudo-upcasts to the corresponding unaligned reducer class. - */ - //@{ - operator reducer> &() { - return *reinterpret_cast> *>(this); - } - - operator const reducer> &() const { - return *reinterpret_cast> *>( - this); - } - //@} -}; - -/** Computes the minimum of a reducer and a value. - * - * @deprecated Because reducer_min is deprecated. - */ -//@{ -// The legacy definition of min_of(reducer_min, value) has different -// behavior and a different return type than this definition. We add an -// unused third argument to this version of the function to give it a different -// signature, so that they won't end up sharing a single object file entry. -struct min_of_1_0_t {}; -const min_of_1_0_t min_of_1_0 = {}; - -template -inline min_max_internal::rhs_proxy> -min_of(const reducer_min &r, const Type &value, - const min_of_1_0_t & = min_of_1_0) { - return min_max_internal::make_proxy(value, r.view()); -} - -template -inline min_max_internal::rhs_proxy> -min_of(const Type &value, const reducer_min &r, - const min_of_1_0_t & = min_of_1_0) { - return min_max_internal::make_proxy(value, r.view()); -} -//@} - -/** Deprecated maximum with index reducer wrapper class. - * - * reducer_max_index is the same as @ref reducer<@ref op_max_index>, except - * that reducer_max_index is a proxy for the contained view, so that - * accumulator variable update operations can be applied directly to the - * reducer. For example, a value is maximized with a `reducer<%op_max_index>` - * with `r->calc_max(i, a)`, but a value can be maximized with a - * `%reducer_max` with `r.calc_max(i, aa)`. - * - * - * @deprecated Users are strongly encouraged to use `reducer` - * reducers rather than the old wrappers like reducer_max. - * The `reducer` reducers show the reducer/monoid/view - * architecture more clearly, are more consistent in their - * implementation, and present a simpler model for new - * user-implemented reducers. - * - * @note Implicit conversions are provided between `%reducer_max_index` - * and `reducer<%op_max_index>`. This allows incremental code - * conversion: old code that used `%reducer_max_index` can pass a - * `%reducer_max_index` to a converted function that now expects a - * pointer or reference to a `reducer<%op_max_index>`, and vice - * versa. **But see @ref redminmax_compatibility.** - * - * @tparam Index The index type of the reducer. - * @tparam Type The value type of the reducer. - * @tparam Compare The "less than" comparator type for the reducer. - * - * @see op_max_index - * @see op_max_index_view - * @see reducer - * @see ReducersMinMax - * @ingroup ReducersMinMaxMaxIndex - */ -template > -class reducer_max_index - : public reducer> { - __CILKRTS_STATIC_ASSERT( - ::cilk::internal::class_is_empty< - typename ::cilk::internal::binary_functor::type>::value, - "cilk::reducer_max_index only works with " - "an empty Compare class"); - typedef reducer> base; - - public: - /// Type of data in a reducer_max_index. - typedef Type basic_value_type; - - /// The view type for the reducer. - typedef typename base::view_type view_type; - - /// The view type for the reducer. - typedef typename base::view_type View; - - /// The monoid type for the reducer. - typedef typename base::monoid_type monoid_type; - - /// The monoid type for the reducer. - typedef typename base::monoid_type Monoid; - - /// The view's rhs proxy type. - typedef min_max_internal::rhs_proxy rhs_proxy; - - using base::view; - - /** @name Constructors - */ - //@{ - - /// Constructs the wrapper in its identity state (`!is_set()`). - reducer_max_index() : base() {} - - /// Construct with a specified initial index and value. - reducer_max_index(const Index &initial_index, const Type &initial_value) - : base(initial_index, initial_value) {} - - /// Constructs the wrapper with a specified comparator. - explicit reducer_max_index(const Compare &comp) : base(comp) {} - - /// Constructs the wrapper with a specified initial index, value, - /// and comparator. - reducer_max_index(const Index &initial_index, const Type &initial_value, - const Compare &comp) - : base(initial_index, initial_value, comp) {} - - //@} - - /** @name Set / Get - */ - //@{ - - /// Sets the index and value of this object. - void set_value(const Index &index, const Type &value) { - base::set_value(std::make_pair(index, value)); - } - - /// Returns the maximum value. - const Type &get_value() const { return view().get_reference(); } - - /// Returns the maximum index. - const Index &get_index() const { return view().get_index_reference(); } - - /// Returns a const reference to value data member in the view. - const Type &get_reference() const { return view().get_reference(); } - - /// Returns a const reference to index data member in the view. - const Index &get_index_reference() const { - return view().get_index_reference(); - } - - //@} - - /** @name Forwarded functions - * @details Functions that update the contained accumulator variable are - * simply forwarded to the contained @ref op_max_view. */ - //@{ - - /// @copydoc cilk_lib_1_1::min_max_internal::view_content::is_set() const - bool is_set() const { return view().is_set(); } - - /// @copydoc op_max_index_view::calc_max(const Index&, const Type&) - reducer_max_index &calc_max(const Index &i, const Type &x) { - view().calc_max(i, x); - return *this; - } - - /// @copydoc op_max_view::operator=(const - /// min_max_internal::rhs_proxy&) - reducer_max_index &operator=(const rhs_proxy &rhs) { - view() = rhs; - return *this; - } - - //@} - - /// @name Dereference - /** Dereferencing a wrapper is a no-op. It simply returns the wrapper. - * Combined with the rule that a wrapper forwards view operations to the - * view, this means that view operations can be written the same way on - * reducers and wrappers, which is convenient for incrementally - * converting code using wrappers to code using reducers. That is: - * - * reducer< op_max_index > r; - * r->calc_max(i, a); // *r returns the view - * // calc_max is a view member function - * - * reducer_max_index w; - * w->calc_max(i, a); // *w returns the wrapper - * // calc_max is a wrapper member function that - * // calls the corresponding view function - */ - //@{ - reducer_max_index &operator*() { return *this; } - reducer_max_index const &operator*() const { return *this; } - - reducer_max_index *operator->() { return this; } - reducer_max_index const *operator->() const { return this; } - //@} - - /** @name Upcast - * @details In Intel Cilk Plus library 0.9, reducers were always - * cache-aligned. In library 1.0, reducer cache alignment is optional. By - * default, reducers are unaligned (i.e., just naturally aligned), but - * legacy wrappers inherit from cache-aligned reducers for binary - * compatibility. - * - * This means that a wrapper will automatically be upcast to its aligned - * reducer base class. The following conversion operators provide - * pseudo-upcasts to the corresponding unaligned reducer class. - */ - //@{ - operator reducer> &() { - return *reinterpret_cast< - reducer> *>(this); - } - - operator const reducer> &() - const { - return *reinterpret_cast< - const reducer> *>(this); - } - //@} -}; - -/** Deprecated minimum with index reducer wrapper class. - * - * reducer_min_index is the same as @ref reducer<@ref op_min_index>, except - * that reducer_min_index is a proxy for the contained view, so that - * accumulator variable update operations can be applied directly to the - * reducer. For example, a value is minimized with a `reducer<%op_min_index>` - * with `r->calc_min(i, a)`, but a value can be minimized with a - * `%reducer_min` with `r.calc_min(i, aa)`. - * - * - * @deprecated Users are strongly encouraged to use `reducer` - * reducers rather than the old wrappers like reducer_min. - * The `reducer` reducers show the reducer/monoid/view - * architecture more clearly, are more consistent in their - * implementation, and present a simpler model for new - * user-implemented reducers. - * - * @note Implicit conversions are provided between `%reducer_min_index` - * and `reducer<%op_min_index>`. This allows incremental code - * conversion: old code that used `%reducer_min_index` can pass a - * `%reducer_min_index` to a converted function that now expects a - * pointer or reference to a `reducer<%op_min_index>`, and vice - * versa. **But see @ref redminmax_compatibility.** - * - * @tparam Index The index type of the reducer. - * @tparam Type The value type of the reducer. - * @tparam Compare The "less than" comparator type for the reducer. - * - * @see op_min_index - * @see op_min_index_view - * @see reducer - * @see ReducersMinMax - * @ingroup ReducersMinMaxMinIndex - */ -template > -class reducer_min_index - : public reducer> { - __CILKRTS_STATIC_ASSERT( - ::cilk::internal::class_is_empty< - typename ::cilk::internal::binary_functor::type>::value, - "cilk::reducer_min_index only works with " - "an empty Compare class"); - typedef reducer> base; - - public: - /// Type of data in a reducer_min_index. - typedef Type basic_value_type; - - /// The view type for the reducer. - typedef typename base::view_type view_type; - - /// The view type for the reducer. - typedef typename base::view_type View; - - /// The monoid type for the reducer. - typedef typename base::monoid_type monoid_type; - - /// The monoid type for the reducer. - typedef typename base::monoid_type Monoid; - - /// The view's rhs proxy type. - typedef min_max_internal::rhs_proxy rhs_proxy; - - using base::view; - - /** @name Constructors - */ - //@{ - - /// Constructs the wrapper in its identity state (`!is_set()`). - reducer_min_index() : base() {} - - /// Construct with a specified initial index and value. - reducer_min_index(const Index &initial_index, const Type &initial_value) - : base(initial_index, initial_value) {} - - /// Constructs the wrapper with a specified comparator. - explicit reducer_min_index(const Compare &comp) : base(comp) {} - - /// Constructs the wrapper with a specified initial index, value, - /// and comparator. - reducer_min_index(const Index &initial_index, const Type &initial_value, - const Compare &comp) - : base(initial_index, initial_value, comp) {} - - //@} - - /** @name Set / Get - */ - //@{ - - /// Sets the index and value of this object. - void set_value(const Index &index, const Type &value) { - base::set_value(std::make_pair(index, value)); - } - - /// Returns the minimum value. - const Type &get_value() const { return view().get_reference(); } - - /// Returns the minimum index. - const Index &get_index() const { return view().get_index_reference(); } - - /// Returns a const reference to value data member in the view. - const Type &get_reference() const { return view().get_reference(); } - - /// Returns a const reference to index data member in the view. - const Index &get_index_reference() const { - return view().get_index_reference(); - } - - //@} - - /** @name Forwarded functions - * @details Functions that update the contained accumulator variable are - * simply forwarded to the contained @ref op_min_view. */ - //@{ - - /// @copydoc cilk_lib_1_1::min_max_internal::view_content::is_set() const - bool is_set() const { return view().is_set(); } - - /// @copydoc op_min_index_view::calc_min(const Index&, const Type&) - reducer_min_index &calc_min(const Index &i, const Type &x) { - view().calc_min(i, x); - return *this; - } - - /// @copydoc op_min_view::operator=(const - /// min_max_internal::rhs_proxy&) - reducer_min_index &operator=(const rhs_proxy &rhs) { - view() = rhs; - return *this; - } - - //@} - - /// @name Dereference - /** Dereferencing a wrapper is a no-op. It simply returns the wrapper. - * Combined with the rule that a wrapper forwards view operations to the - * view, this means that view operations can be written the same way on - * reducers and wrappers, which is convenient for incrementally - * converting code using wrappers to code using reducers. That is: - * - * reducer< op_min_index > r; - * r->calc_min(i, a); // *r returns the view - * // calc_min is a view member function - * - * reducer_min_index w; - * w->calc_min(i, a); // *w returns the wrapper - * // calc_min is a wrapper member function that - * // calls the corresponding view function - */ - //@{ - reducer_min_index &operator*() { return *this; } - reducer_min_index const &operator*() const { return *this; } - - reducer_min_index *operator->() { return this; } - reducer_min_index const *operator->() const { return this; } - //@} - - /** @name Upcast - * @details In Intel Cilk Plus library 0.9, reducers were always - * cache-aligned. In library 1.0, reducer cache alignment is optional. By - * default, reducers are unaligned (i.e., just naturally aligned), but - * legacy wrappers inherit from cache-aligned reducers for binary - * compatibility. - * - * This means that a wrapper will automatically be upcast to its aligned - * reducer base class. The following conversion operators provide - * pseudo-upcasts to the corresponding unaligned reducer class. - */ - //@{ - operator reducer> &() { - return *reinterpret_cast< - reducer> *>(this); - } - - operator const reducer> &() - const { - return *reinterpret_cast< - const reducer> *>(this); - } - //@} -}; - -#ifndef CILK_LIBRARY_0_9_REDUCER_MINMAX -} // namespace cilk_lib_1_1 -using namespace cilk_lib_1_1; -#endif - -/// @cond internal -/** Metafunction specialization for reducer conversion. - * - * These specializations of the @ref legacy_reducer_downcast template class - * defined in reducer.h causes each `reducer< op_xxxx >` classes to have - * an `operator reducer_xxxx& ()` conversion operator that statically - * downcasts the `reducer` to the corresponding `reducer_xxxx` type. - * (The reverse conversion, from `reducer_xxxx` to `reducer`, is just - * an upcast, which is provided for free by the language.) - */ -template -struct legacy_reducer_downcast>> { - typedef reducer_max type; -}; - -template -struct legacy_reducer_downcast>> { - typedef reducer_min type; -}; - -template -struct legacy_reducer_downcast< - reducer>> { - typedef reducer_max_index type; -}; - -template -struct legacy_reducer_downcast< - reducer>> { - typedef reducer_min_index type; -}; -/// @endcond - -} // namespace cilk - -#endif // __cplusplus - -/** @name C language reducer macros - * - * These macros are used to declare and work with numeric minimum and maximum - * reducers in C code. - * - * @see @ref page_reducers_in_c - */ -//@{ - -#ifdef CILK_C_DEFINE_REDUCERS - -/* Integer min/max constants */ -#include - -/* Wchar_t min/max constants */ -#if defined(_MSC_VER) || defined(__ANDROID__) -#include -#else -#include -#endif - -/* Floating-point min/max constants */ -#include -#ifndef HUGE_VALF -static const unsigned int __huge_valf[] = {0x7f800000}; -#define HUGE_VALF (*((const float *)__huge_valf)) -#endif - -#ifndef HUGE_VALL -static const unsigned int __huge_vall[] = {0, 0, 0x00007f80, 0}; -#define HUGE_VALL (*((const long double *)__huge_vall)) -#endif - -#endif - -/** Declares max reducer type name. - * - * This macro expands into the identifier which is the name of the max reducer - * type for a specified numeric type. - * - * @param tn The @ref reducers_c_type_names "numeric type name" specifying - * the type of the reducer. - * - * @see @ref reducers_c_predefined - */ -#define CILK_C_REDUCER_MAX_TYPE(tn) __CILKRTS_MKIDENT(cilk_c_reducer_max_, tn) - -/** Declares a max reducer object. - * - * This macro expands into a declaration of a max reducer object for a - * specified numeric type. For example: - * - * CILK_C_REDUCER_MAX(my_reducer, double, -DBL_MAX); - * - * @param obj The variable name to be used for the declared reducer object. - * @param tn The @ref reducers_c_type_names "numeric type name" specifying - * the type of the reducer. - * @param v The initial value for the reducer. (A value which can be - * assigned to the numeric type represented by @a tn.) - * - * @see @ref reducers_c_predefined - */ -#define CILK_C_REDUCER_MAX(obj, tn, v) \ - CILK_C_REDUCER_MAX_TYPE(tn) \ - obj = CILK_C_INIT_REDUCER( \ - _Typeof(obj.value), __CILKRTS_MKIDENT(cilk_c_reducer_max_reduce_, tn), \ - __CILKRTS_MKIDENT(cilk_c_reducer_max_identity_, tn), \ - 0, v) - -/** Maximizes with a value. - * - * `CILK_C_REDUCER_MAX_CALC(reducer, v)` sets the current view of the - * reducer to the max of its previous value and a specified new value. - * This is equivalent to - * - * REDUCER_VIEW(reducer) = max(REDUCER_VIEW(reducer), v) - * - * @param reducer The reducer whose contained value is to be updated. - * @param v The value that it is to be maximized with. - */ -#define CILK_C_REDUCER_MAX_CALC(reducer, v) \ - do { \ - _Typeof((reducer).value) *view = &(REDUCER_VIEW(reducer)); \ - _Typeof(v) __value = (v); \ - if (*view < __value) { \ - *view = __value; \ - } \ - } while (0) - -/// @cond internal - -/** Declares the max reducer functions for a numeric type. - * - * This macro expands into external function declarations for functions which - * implement the reducer functionality for the max reducer type for a specified - * numeric type. - * - * @param t The value type of the reducer. - * @param tn The value "type name" identifier, used to construct the reducer - * type name, function names, etc. - */ -#define CILK_C_REDUCER_MAX_DECLARATION(t, tn, id) \ - typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_MAX_TYPE(tn); \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_max, tn, l, r); \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_max, tn); - -/** Defines the max reducer functions for a numeric type. - * - * This macro expands into function definitions for functions which implement - * the reducer functionality for the max reducer type for a specified numeric - * type. - * - * @param t The value type of the reducer. - * @param tn The value "type name" identifier, used to construct the reducer - * type name, function names, etc. - */ -#define CILK_C_REDUCER_MAX_DEFINITION(t, tn, id) \ - typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_MAX_TYPE(tn); \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_max, tn, l, r) { \ - if (*(t *)l < *(t *)r) \ - *(t *)l = *(t *)r; \ - } \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_max, tn) { *(t *)v = id; } - -//@{ -/** @def CILK_C_REDUCER_MAX_INSTANCE - * @brief Declare or define implementation functions for a reducer type. - * - * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS` - * will be defined, and this macro will generate reducer implementation - * functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and - * this macro will expand into external declarations for the functions. - */ -#ifdef CILK_C_DEFINE_REDUCERS -#define CILK_C_REDUCER_MAX_INSTANCE(t, tn, id) \ - CILK_C_REDUCER_MAX_DEFINITION(t, tn, id) -#else -#define CILK_C_REDUCER_MAX_INSTANCE(t, tn, id) \ - CILK_C_REDUCER_MAX_DECLARATION(t, tn, id) -#endif -//@} - -/* Declare or define an instance of the reducer type and its functions for each - * numeric type. - */ -#ifdef __cplusplus -extern "C" { -#endif -CILK_C_REDUCER_MAX_INSTANCE(char, char, CHAR_MIN) -CILK_C_REDUCER_MAX_INSTANCE(unsigned char, uchar, 0) -CILK_C_REDUCER_MAX_INSTANCE(signed char, schar, SCHAR_MIN) -CILK_C_REDUCER_MAX_INSTANCE(wchar_t, wchar_t, WCHAR_MIN) -CILK_C_REDUCER_MAX_INSTANCE(short, short, SHRT_MIN) -CILK_C_REDUCER_MAX_INSTANCE(unsigned short, ushort, 0) -CILK_C_REDUCER_MAX_INSTANCE(int, int, INT_MIN) -CILK_C_REDUCER_MAX_INSTANCE(unsigned int, uint, 0) -CILK_C_REDUCER_MAX_INSTANCE(unsigned int, unsigned, 0) // alternate name -CILK_C_REDUCER_MAX_INSTANCE(long, long, LONG_MIN) -CILK_C_REDUCER_MAX_INSTANCE(unsigned long, ulong, 0) -CILK_C_REDUCER_MAX_INSTANCE(long long, longlong, LLONG_MIN) -CILK_C_REDUCER_MAX_INSTANCE(unsigned long long, ulonglong, 0) -CILK_C_REDUCER_MAX_INSTANCE(float, float, -HUGE_VALF) -CILK_C_REDUCER_MAX_INSTANCE(double, double, -HUGE_VAL) -CILK_C_REDUCER_MAX_INSTANCE(long double, longdouble, -HUGE_VALL) -#ifdef __cplusplus -} /* end extern "C" */ -#endif - -/// @endcond - -/** Max_index reducer type name. - * - * This macro expands into the identifier which is the name of the max_index - * reducer type for a specified numeric type. - * - * @param tn The @ref reducers_c_type_names "numeric type name" specifying - * the type of the reducer. - * - * @see @ref reducers_c_predefined - */ -#define CILK_C_REDUCER_MAX_INDEX_TYPE(tn) \ - __CILKRTS_MKIDENT(cilk_c_reducer_max_index_, tn) - -/** Declares an op_max_index reducer object. - * - * This macro expands into a declaration of a max_index reducer object for a - * specified numeric type. For example: - * - * CILK_C_REDUCER_MAX_INDEX(my_reducer, double, -DBL_MAX_INDEX); - * - * @param obj The variable name to be used for the declared reducer object. - * @param tn The @ref reducers_c_type_names "numeric type name" specifying - * the type of the reducer. - * @param v The initial value for the reducer. (A value which can be - * assigned to the numeric type represented by @a tn.) - * - * @see @ref reducers_c_predefined - */ -#define CILK_C_REDUCER_MAX_INDEX(obj, tn, v) \ - CILK_C_REDUCER_MAX_INDEX_TYPE(tn) \ - obj = CILK_C_INIT_REDUCER( \ - _Typeof(obj.value), \ - __CILKRTS_MKIDENT(cilk_c_reducer_max_index_reduce_, tn), \ - __CILKRTS_MKIDENT(cilk_c_reducer_max_index_identity_, tn), \ - 0, {0, v}) - -/** Maximizes with a value. - * - * `CILK_C_REDUCER_MAX_INDEX_CALC(reducer, i, v)` sets the current view of the - * reducer to the max of its previous value and a specified new value. - * This is equivalent to - * - * REDUCER_VIEW(reducer) = max_index(REDUCER_VIEW(reducer), v) - * - * If the value of the reducer is changed to @a v, then the index of the - * reducer is changed to @a i. - * - * @param reducer The reducer whose contained value and index are to be - * updated. - * @param i The index associated with the new value. - * @param v The value that it is to be maximized with. - */ -#define CILK_C_REDUCER_MAX_INDEX_CALC(reducer, i, v) \ - do { \ - _Typeof((reducer).value) *view = &(REDUCER_VIEW(reducer)); \ - _Typeof(v) __value = (v); \ - if (view->value < __value) { \ - view->index = (i); \ - view->value = __value; \ - } \ - } while (0) - -/// @cond internal - -/** Declares the max_index view type. - * - * The view of a max_index reducer is a structure containing both the - * maximum value for the reducer and the index that was associated with - * that value in the sequence of input values. - */ -#define CILK_C_REDUCER_MAX_INDEX_VIEW(t, tn) \ - typedef struct { \ - ptrdiff_t index; \ - t value; \ - } __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_, tn) - -/** Declares the max_index reducer functions for a numeric type. - * - * This macro expands into external function declarations for functions which - * implement the reducer functionality for the max_index reducer type for a - * specified numeric type. - * - * @param t The value type of the reducer. - * @param tn The value "type name" identifier, used to construct the reducer - * type name, function names, etc. - */ -#define CILK_C_REDUCER_MAX_INDEX_DECLARATION(t, tn, id) \ - CILK_C_REDUCER_MAX_INDEX_VIEW(t, tn); \ - typedef CILK_C_DECLARE_REDUCER( \ - __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_, tn)) \ - CILK_C_REDUCER_MAX_INDEX_TYPE(tn); \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_max_index, tn, l, r); \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_max_index, tn); - -/** Defines the max_index reducer functions for a numeric type. - * - * This macro expands into function definitions for functions which implement - * the reducer functionality for the max_index reducer type for a specified - * numeric type. - * - * @param t The value type of the reducer. - * @param tn The value "type name" identifier, used to construct the reducer - * type name, function names, etc. - */ -#define CILK_C_REDUCER_MAX_INDEX_DEFINITION(t, tn, id) \ - CILK_C_REDUCER_MAX_INDEX_VIEW(t, tn); \ - typedef CILK_C_DECLARE_REDUCER( \ - __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_, tn)) \ - CILK_C_REDUCER_MAX_INDEX_TYPE(tn); \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_max_index, tn, l, r) { \ - typedef __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_, tn) view_t; \ - if (((view_t *)l)->value < ((view_t *)r)->value) \ - *(view_t *)l = *(view_t *)r; \ - } \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_max_index, tn) { \ - typedef __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_, tn) view_t; \ - ((view_t *)v)->index = 0; \ - ((view_t *)v)->value = id; \ - } - -//@{ -/** @def CILK_C_REDUCER_MAX_INDEX_INSTANCE - * @brief Declare or define implementation functions for a reducer type. - * - * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS` - * will be defined, and this macro will generate reducer implementation - * functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and - * this macro will expand into external declarations for the functions. - */ -#ifdef CILK_C_DEFINE_REDUCERS -#define CILK_C_REDUCER_MAX_INDEX_INSTANCE(t, tn, id) \ - CILK_C_REDUCER_MAX_INDEX_DEFINITION(t, tn, id) -#else -#define CILK_C_REDUCER_MAX_INDEX_INSTANCE(t, tn, id) \ - CILK_C_REDUCER_MAX_INDEX_DECLARATION(t, tn, id) -#endif -//@} - -/* Declares or defines an instance of the reducer type and its functions for - * each numeric type. - */ -#ifdef __cplusplus -extern "C" { -#endif -CILK_C_REDUCER_MAX_INDEX_INSTANCE(char, char, CHAR_MIN) -CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned char, uchar, 0) -CILK_C_REDUCER_MAX_INDEX_INSTANCE(signed char, schar, SCHAR_MIN) -CILK_C_REDUCER_MAX_INDEX_INSTANCE(wchar_t, wchar_t, WCHAR_MIN) -CILK_C_REDUCER_MAX_INDEX_INSTANCE(short, short, SHRT_MIN) -CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned short, ushort, 0) -CILK_C_REDUCER_MAX_INDEX_INSTANCE(int, int, INT_MIN) -CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned int, uint, 0) -CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned int, unsigned, 0) // alternate name -CILK_C_REDUCER_MAX_INDEX_INSTANCE(long, long, LONG_MIN) -CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned long, ulong, 0) -CILK_C_REDUCER_MAX_INDEX_INSTANCE(long long, longlong, LLONG_MIN) -CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned long long, ulonglong, 0) -CILK_C_REDUCER_MAX_INDEX_INSTANCE(float, float, -HUGE_VALF) -CILK_C_REDUCER_MAX_INDEX_INSTANCE(double, double, -HUGE_VAL) -CILK_C_REDUCER_MAX_INDEX_INSTANCE(long double, longdouble, -HUGE_VALL) -#ifdef __cplusplus -} /* end extern "C" */ -#endif - -/// @endcond - -/** Declares min reducer type name. - * - * This macro expands into the identifier which is the name of the min reducer - * type for a specified numeric type. - * - * @param tn The @ref reducers_c_type_names "numeric type name" specifying - * the type of the reducer. - * - * @see @ref reducers_c_predefined - */ -#define CILK_C_REDUCER_MIN_TYPE(tn) __CILKRTS_MKIDENT(cilk_c_reducer_min_, tn) - -/** Declares a min reducer object. - * - * This macro expands into a declaration of a min reducer object for a - * specified numeric type. For example: - * - * CILK_C_REDUCER_MIN(my_reducer, double, DBL_MAX); - * - * @param obj The variable name to be used for the declared reducer object. - * @param tn The @ref reducers_c_type_names "numeric type name" specifying - * the type of the reducer. - * @param v The initial value for the reducer. (A value which can be - * assigned to the numeric type represented by @a tn.) - * - * @see @ref reducers_c_predefined - */ -#define CILK_C_REDUCER_MIN(obj, tn, v) \ - CILK_C_REDUCER_MIN_TYPE(tn) \ - obj = CILK_C_INIT_REDUCER( \ - _Typeof(obj.value), __CILKRTS_MKIDENT(cilk_c_reducer_min_reduce_, tn), \ - __CILKRTS_MKIDENT(cilk_c_reducer_min_identity_, tn), \ - 0, v) - -/** Minimizes with a value. - * - * `CILK_C_REDUCER_MIN_CALC(reducer, v)` sets the current view of the - * reducer to the min of its previous value and a specified new value. - * This is equivalent to - * - * REDUCER_VIEW(reducer) = min(REDUCER_VIEW(reducer), v) - * - * @param reducer The reducer whose contained value is to be updated. - * @param v The value that it is to be minimized with. - */ -#define CILK_C_REDUCER_MIN_CALC(reducer, v) \ - do { \ - _Typeof((reducer).value) *view = &(REDUCER_VIEW(reducer)); \ - _Typeof(v) __value = (v); \ - if (*view > __value) { \ - *view = __value; \ - } \ - } while (0) - -/// @cond internal - -/** Declares the min reducer functions for a numeric type. - * - * This macro expands into external function declarations for functions which - * implement the reducer functionality for the min reducer type for a specified - * numeric type. - * - * @param t The value type of the reducer. - * @param tn The value "type name" identifier, used to construct the reducer - * type name, function names, etc. - */ -#define CILK_C_REDUCER_MIN_DECLARATION(t, tn, id) \ - typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_MIN_TYPE(tn); \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_min, tn, l, r); \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_min, tn); - -/** Defines the min reducer functions for a numeric type. - * - * This macro expands into function definitions for functions which implement - * the reducer functionality for the min reducer type for a specified numeric - * type. - * - * @param t The value type of the reducer. - * @param tn The value "type name" identifier, used to construct the reducer - * type name, function names, etc. - */ -#define CILK_C_REDUCER_MIN_DEFINITION(t, tn, id) \ - typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_MIN_TYPE(tn); \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_min, tn, l, r) { \ - if (*(t *)l > *(t *)r) \ - *(t *)l = *(t *)r; \ - } \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_min, tn) { *(t *)v = id; } - -//@{ -/** @def CILK_C_REDUCER_MIN_INSTANCE - * @brief Declare or define implementation functions for a reducer type. - * - * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS` - * will be defined, and this macro will generate reducer implementation - * functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and - * this macro will expand into external declarations for the functions. - */ -#ifdef CILK_C_DEFINE_REDUCERS -#define CILK_C_REDUCER_MIN_INSTANCE(t, tn, id) \ - CILK_C_REDUCER_MIN_DEFINITION(t, tn, id) -#else -#define CILK_C_REDUCER_MIN_INSTANCE(t, tn, id) \ - CILK_C_REDUCER_MIN_DECLARATION(t, tn, id) -#endif -//@} - -/* Declares or defines an instance of the reducer type and its functions for - * each numeric type. - */ -#ifdef __cplusplus -extern "C" { -#endif -CILK_C_REDUCER_MIN_INSTANCE(char, char, CHAR_MAX) -CILK_C_REDUCER_MIN_INSTANCE(unsigned char, uchar, CHAR_MAX) -CILK_C_REDUCER_MIN_INSTANCE(signed char, schar, SCHAR_MAX) -CILK_C_REDUCER_MIN_INSTANCE(wchar_t, wchar_t, WCHAR_MAX) -CILK_C_REDUCER_MIN_INSTANCE(short, short, SHRT_MAX) -CILK_C_REDUCER_MIN_INSTANCE(unsigned short, ushort, USHRT_MAX) -CILK_C_REDUCER_MIN_INSTANCE(int, int, INT_MAX) -CILK_C_REDUCER_MIN_INSTANCE(unsigned int, uint, UINT_MAX) -CILK_C_REDUCER_MIN_INSTANCE(unsigned int, unsigned, UINT_MAX) // alternate name -CILK_C_REDUCER_MIN_INSTANCE(long, long, LONG_MAX) -CILK_C_REDUCER_MIN_INSTANCE(unsigned long, ulong, ULONG_MAX) -CILK_C_REDUCER_MIN_INSTANCE(long long, longlong, LLONG_MAX) -CILK_C_REDUCER_MIN_INSTANCE(unsigned long long, ulonglong, ULLONG_MAX) -CILK_C_REDUCER_MIN_INSTANCE(float, float, HUGE_VALF) -CILK_C_REDUCER_MIN_INSTANCE(double, double, HUGE_VAL) -CILK_C_REDUCER_MIN_INSTANCE(long double, longdouble, HUGE_VALL) -#ifdef __cplusplus -} /* end extern "C" */ -#endif - -/// @endcond - -/** Declares `min_index` reducer type name. - * - * This macro expands into the identifier which is the name of the min_index - * reducer type for a specified numeric type. - * - * @param tn The @ref reducers_c_type_names "numeric type name" specifying - * the type of the reducer. - * - * @see @ref reducers_c_predefined - */ -#define CILK_C_REDUCER_MIN_INDEX_TYPE(tn) \ - __CILKRTS_MKIDENT(cilk_c_reducer_min_index_, tn) - -/** Declares an op_min_index reducer object. - * - * This macro expands into a declaration of a min_index reducer object for a - * specified numeric type. For example: - * - * CILK_C_REDUCER_MIN_INDEX(my_reducer, double, -DBL_MIN_INDEX); - * - * @param obj The variable name to be used for the declared reducer object. - * @param tn The @ref reducers_c_type_names "numeric type name" specifying - * the type of the reducer. - * @param v The initial value for the reducer. (A value which can be - * assigned to the numeric type represented by @a tn.) - * - * @see @ref reducers_c_predefined - */ -#define CILK_C_REDUCER_MIN_INDEX(obj, tn, v) \ - CILK_C_REDUCER_MIN_INDEX_TYPE(tn) \ - obj = CILK_C_INIT_REDUCER( \ - _Typeof(obj.value), \ - __CILKRTS_MKIDENT(cilk_c_reducer_min_index_reduce_, tn), \ - __CILKRTS_MKIDENT(cilk_c_reducer_min_index_identity_, tn), \ - 0, {0, v}) - -/** Minimizes with a value. - * - * `CILK_C_REDUCER_MIN_INDEX_CALC(reducer, i, v)` sets the current view of the - * reducer to the min of its previous value and a specified new value. - * This is equivalent to - * - * REDUCER_VIEW(reducer) = min_index(REDUCER_VIEW(reducer), v) - * - * If the value of the reducer is changed to @a v, then the index of the - * reducer is changed to @a i. - * - * @param reducer The reducer whose contained value and index are to be - * updated. - * @param i The index associated with the new value. - * @param v The value that it is to be minimized with. - */ -#define CILK_C_REDUCER_MIN_INDEX_CALC(reducer, i, v) \ - do { \ - _Typeof((reducer).value) *view = &(REDUCER_VIEW(reducer)); \ - _Typeof(v) __value = (v); \ - if (view->value > __value) { \ - view->index = (i); \ - view->value = __value; \ - } \ - } while (0) - -/// @cond internal - -/** Declares the min_index view type. - * - * The view of a min_index reducer is a structure containing both the - * minimum value for the reducer and the index that was associated with - * that value in the sequence of input values. - */ -#define CILK_C_REDUCER_MIN_INDEX_VIEW(t, tn) \ - typedef struct { \ - ptrdiff_t index; \ - t value; \ - } __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_, tn) - -/** Declares the min_index reducer functions for a numeric type. - * - * This macro expands into external function declarations for functions which - * implement the reducer functionality for the min_index reducer type for a - * specified numeric type. - * - * @param t The value type of the reducer. - * @param tn The value "type name" identifier, used to construct the reducer - * type name, function names, etc. - */ -#define CILK_C_REDUCER_MIN_INDEX_DECLARATION(t, tn, id) \ - CILK_C_REDUCER_MIN_INDEX_VIEW(t, tn); \ - typedef CILK_C_DECLARE_REDUCER( \ - __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_, tn)) \ - CILK_C_REDUCER_MIN_INDEX_TYPE(tn); \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_min_index, tn, l, r); \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_min_index, tn); - -/** Defines the min_index reducer functions for a numeric type. - * - * This macro expands into function definitions for functions which implement - * the reducer functionality for the min_index reducer type for a specified - * numeric type. - * - * @param t The value type of the reducer. - * @param tn The value "type name" identifier, used to construct the reducer - * type name, function names, etc. - */ -#define CILK_C_REDUCER_MIN_INDEX_DEFINITION(t, tn, id) \ - CILK_C_REDUCER_MIN_INDEX_VIEW(t, tn); \ - typedef CILK_C_DECLARE_REDUCER( \ - __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_, tn)) \ - CILK_C_REDUCER_MIN_INDEX_TYPE(tn); \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_min_index, tn, l, r) { \ - typedef __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_, tn) view_t; \ - if (((view_t *)l)->value > ((view_t *)r)->value) \ - *(view_t *)l = *(view_t *)r; \ - } \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_min_index, tn) { \ - typedef __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_, tn) view_t; \ - ((view_t *)v)->index = 0; \ - ((view_t *)v)->value = id; \ - } - -//@{ -/** @def CILK_C_REDUCER_MIN_INDEX_INSTANCE - * @brief Declares or defines implementation functions for a reducer type. - * - * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS` - * will be defined, and this macro will generate reducer implementation - * functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and - * this macro will expand into external declarations for the functions. - */ -#ifdef CILK_C_DEFINE_REDUCERS -#define CILK_C_REDUCER_MIN_INDEX_INSTANCE(t, tn, id) \ - CILK_C_REDUCER_MIN_INDEX_DEFINITION(t, tn, id) -#else -#define CILK_C_REDUCER_MIN_INDEX_INSTANCE(t, tn, id) \ - CILK_C_REDUCER_MIN_INDEX_DECLARATION(t, tn, id) -#endif -//@} - -/* Declares or defines an instance of the reducer type and its functions for - * each numeric type. - */ -#ifdef __cplusplus -extern "C" { -#endif -CILK_C_REDUCER_MIN_INDEX_INSTANCE(char, char, CHAR_MAX) -CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned char, uchar, CHAR_MAX) -CILK_C_REDUCER_MIN_INDEX_INSTANCE(signed char, schar, SCHAR_MAX) -CILK_C_REDUCER_MIN_INDEX_INSTANCE(wchar_t, wchar_t, WCHAR_MAX) -CILK_C_REDUCER_MIN_INDEX_INSTANCE(short, short, SHRT_MAX) -CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned short, ushort, USHRT_MAX) -CILK_C_REDUCER_MIN_INDEX_INSTANCE(int, int, INT_MAX) -CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned int, uint, UINT_MAX) -CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned int, unsigned, - UINT_MAX) // alternate name -CILK_C_REDUCER_MIN_INDEX_INSTANCE(long, long, LONG_MAX) -CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned long, ulong, ULONG_MAX) -CILK_C_REDUCER_MIN_INDEX_INSTANCE(long long, longlong, LLONG_MAX) -CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned long long, ulonglong, ULLONG_MAX) -CILK_C_REDUCER_MIN_INDEX_INSTANCE(float, float, HUGE_VALF) -CILK_C_REDUCER_MIN_INDEX_INSTANCE(double, double, HUGE_VAL) -CILK_C_REDUCER_MIN_INDEX_INSTANCE(long double, longdouble, HUGE_VALL) -#ifdef __cplusplus -} /* end extern "C" */ -#endif - -/// @endcond - -//@} - -#endif // defined REDUCER_MIN_MAX_H_INCLUDED diff --git a/include/cilk/reducer_opadd.h b/include/cilk/reducer_opadd.h deleted file mode 100644 index 832ae356..00000000 --- a/include/cilk/reducer_opadd.h +++ /dev/null @@ -1,702 +0,0 @@ -/* reducer_opadd.h -*- C++ -*- - * - * Copyright (C) 2009-2018, Intel Corporation - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY - * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * ********************************************************************* - * - * PLEASE NOTE: This file is a downstream copy of a file maintained in - * a repository at cilkplus.org. Changes made to this file that are not - * submitted through the contribution process detailed at - * http://www.cilkplus.org/submit-cilk-contribution will be lost the next - * time that a new version is released. Changes only submitted to the - * GNU compiler collection or posted to the git repository at - * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are - * not tracked. - * - * We welcome your contributions to this open source project. Thank you - * for your assistance in helping us improve Cilk Plus. - */ - -/** @file reducer_opadd.h - * - * @brief Defines classes for doing parallel addition reductions. - * - * @ingroup ReducersAdd - * - * @see ReducersAdd - */ - -#ifndef REDUCER_OPADD_H_INCLUDED -#define REDUCER_OPADD_H_INCLUDED - -#include - -/** @defgroup ReducersAdd Addition Reducers - * - * Addition reducers allow the computation of the sum of a set of values in - * parallel. - * - * @ingroup Reducers - * - * You should be familiar with @ref pagereducers "Intel(R) Cilk(TM) Plus reducers", - * described in file `reducers.md`, and particularly with @ref reducers_using, - * before trying to use the information in this file. - * - * @section redopadd_usage Usage Example - * - * cilk::reducer< cilk::op_add > r; - * cilk_for (int i = 0; i != N; ++i) { - * *r += a[i]; - * } - * return r.get_value(); - * - * @section redopadd_monoid The Monoid - * - * @subsection redopadd_monoid_values Value Set - * - * The value set of an addition reducer is the set of values of `Type`, which - * is expected to be a builtin numeric type (or something like it, such as - * `std::complex`). - * - * @subsection redopadd_monoid_operator Operator - * - * The operator of an addition reducer is the addition operator, defined by - * the "`+`" binary operator on `Type`. - * - * @subsection redopadd_monoid_identity Identity - * - * The identity value of the reducer is the numeric value "`0`". This is - * expected to be the value of the default constructor `Type()`. - * - * @section redopadd_operations Operations - * - * @subsection redopadd_constructors Constructors - * - * reducer() // identity - * reducer(const Type& value) - * reducer(move_in(Type& variable)) - * - * @subsection redopadd_get_set Set and Get - * - * r.set_value(const Type& value) - * const Type& = r.get_value() const - * r.move_in(Type& variable) - * r.move_out(Type& variable) - * - * @subsection redopadd_initial Initial Values - * - * If an addition reducer is constructed without an explicit initial value, - * then its initial value will be its identity value, as long as `Type` - * satisfies the requirements of @ref redopadd_types. - * - * @subsection redopadd_view_ops View Operations - * - * *r += a - * *r -= a - * ++*r - * --*r - * (*r)++ - * (*r)-- - * *r = *r + a - * *r = *r - a - * *r = *r ± a1 ± a2 … ± an - * - * The post-increment and post-decrement operations do not return a value. (If - * they did, they would expose the value contained in the view, which is - * non-deterministic in the middle of a reduction.) - * - * Note that subtraction operations are allowed on an addition reducer because - * subtraction is equivalent to addition with a negated operand. It is true - * that `(x - y) - z` is not equivalent to `x - (y - z)`, but - * `(x + (-y)) + (-z)` _is_ equivalent to `x + ((-y) + (-z))`. - * - * @section redopadd_floating_point Issues with Floating-Point Types - * - * Because of precision and round-off issues, floating-point addition is not - * really associative. For example, `(1e30 + -1e30) + 1 == 1`, but - * `1e30 + (-1e30 + 1) == 0`. - * - * In many cases, this won't matter, but computations which have been - * carefully ordered to control round-off errors may not deal well with - * being reassociated. In general, you should be sure to understand the - * floating-point behavior of your program before doing any transformation - * that will reassociate its computations. - * - * @section redopadd_types Type and Operator Requirements - * - * `Type` must be `Copy Constructible`, `Default Constructible`, and - * `Assignable`. - * - * The operator "`+=`" must be defined on `Type`, with `x += a` having the - * same meaning as `x = x + a`. In addition, if the code uses the "`-=`", - * pre-increment, post-increment, pre-decrement, or post-decrement operators, - * then the corresponding operators must be defined on `Type`. - * - * The expression `Type()` must be a valid expression which yields the - * identity value (the value of `Type` whose numeric value is zero). - * - * @section redopadd_in_c Addition Reducers in C - * - * The @ref CILK_C_REDUCER_OPADD and @ref CILK_C_REDUCER_OPADD_TYPE macros can - * be used to do addition reductions in C. For example: - * - * CILK_C_REDUCER_OPADD(r, double, 0); - * CILK_C_REGISTER_REDUCER(r); - * cilk_for(int i = 0; i != n; ++i) { - * REDUCER_VIEW(r) += a[i]; - * } - * CILK_C_UNREGISTER_REDUCER(r); - * printf("The sum of the elements of a is %f\n", REDUCER_VIEW(r)); - * - * See @ref reducers_c_predefined. - */ - -#ifdef __cplusplus - -namespace cilk { - -/** The addition reducer view class. - * - * This is the view class for reducers created with - * `cilk::reducer< cilk::op_add >`. It holds the accumulator variable - * for the reduction, and allows only addition and subtraction operations to - * be performed on it. - * - * @note The reducer "dereference" operation (`reducer::operator *()`) - * yields a reference to the view. Thus, for example, the view class's - * `+=` operation would be used in an expression like `*r += a`, where - * `r` is an op_add reducer variable. - * - * @tparam Type The type of the contained accumulator variable. This will - * be the value type of a monoid_with_view that is - * instantiated with this view. - * - * @see ReducersAdd - * @see op_add - * - * @ingroup ReducersAdd - */ -template -class op_add_view : public scalar_view -{ - typedef scalar_view base; - -public: - /** Class to represent the right-hand side of - * `*reducer = *reducer ± value`. - * - * The only assignment operator for the op_add_view class takes an - * rhs_proxy as its operand. This results in the syntactic restriction - * that the only expressions that can be assigned to an op_add_view are - * ones which generate an rhs_proxy - that is, expressions of the form - * `op_add_view ± value ... ± value`. - * - * @warning - * The lhs and rhs views in such an assignment must be the same; - * otherwise, the behavior will be undefined. (I.e., `v1 = v1 + x` is - * legal; `v1 = v2 + x` is illegal.) This condition will be checked with a - * runtime assertion when compiled in debug mode. - * - * @see op_add_view - */ - class rhs_proxy { - friend class op_add_view; - - const op_add_view* m_view; - Type m_value; - - // Constructor is invoked only from op_add_view::operator+() and - // op_add_view::operator-(). - // - rhs_proxy(const op_add_view* view, const Type& value) : - m_view(view), m_value(value) {} - - rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator - rhs_proxy(); // Disable default constructor - - public: - ///@{ - /** Adds or subtracts an additional rhs value. If `v` is an op_add_view - * and `a1` is a value, then the expression `v + a1` invokes the view's - * `operator+()` to create an rhs_proxy for `(v, a1)`; then - * `v + a1 + a2` invokes the rhs_proxy's `operator+()` to create a new - * rhs_proxy for `(v, a1+a2)`. This allows the right-hand side of an - * assignment to be not just `view ± value`, but - * `view ± value ± value ... ± value`. The effect is that - * - * v = v ± a1 ± a2 ... ± an; - * - * is evaluated as - * - * v = v ± (±a1 ± a2 ... ± an); - */ - rhs_proxy& operator+(const Type& x) { m_value += x; return *this; } - rhs_proxy& operator-(const Type& x) { m_value -= x; return *this; } - ///@} - }; - - - /** Default/identity constructor. This constructor initializes the - * contained value to `Type()`, which is expected to be the identity value - * for addition on `Type`. - */ - op_add_view() : base() {} - - /** Construct with a specified initial value. - */ - explicit op_add_view(const Type& v) : base(v) {} - - /** Reduces the views of two strands. - * - * This function is invoked by the @ref op_add monoid to combine the views - * of two strands when the right strand merges with the left one. It adds - * the value contained in the right-strand view to the value contained in - * the left-strand view, and leaves the value in the right-strand view - * undefined. - * - * @param right A pointer to the right-strand view. (`this` points to - * the left-strand view.) - * - * @note Used only by the @ref op_add monoid to implement the monoid - * reduce operation. - */ - void reduce(op_add_view* right) { this->m_value += right->m_value; } - - /** @name Accumulator variable updates. - * - * These functions support the various syntaxes for incrementing or - * decrementing the accumulator variable contained in the view. - */ - ///@{ - - /** Increments the accumulator variable by @a x. - */ - op_add_view& operator+=(const Type& x) { this->m_value += x; return *this; } - - /** Decrements the accumulator variable by @a x. - */ - op_add_view& operator-=(const Type& x) { this->m_value -= x; return *this; } - - /** Pre-increment. - */ - op_add_view& operator++() { ++this->m_value; return *this; } - - /** Post-increments. - * - * @note Conventionally, post-increment operators return the old value - * of the incremented variable. However, reducer views do not - * expose their contained values, so `view++` does not have a - * return value. - */ - void operator++(int) { this->m_value++; } - - /** Pre-decrements. - */ - op_add_view& operator--() { --this->m_value; return *this; } - - /** Post-decrements. - * - * @note Conventionally, post-decrement operators return the old value - * of the decremented variable. However, reducer views do not - * expose their contained values, so `view--` does not have a - * return value. - */ - void operator--(int) { this->m_value--; } - - /** Creates an object representing `*this + x`. - * - * @see rhs_proxy - */ - rhs_proxy operator+(const Type& x) const { return rhs_proxy(this, x); } - - /** Creates an object representing `*this - x`. - * - * @see rhs_proxy - */ - rhs_proxy operator-(const Type& x) const { return rhs_proxy(this, -x); } - - /** Assigns the result of a `view ± value` expression to the view. Note that - * this is the only assignment operator for this class. - * - * @see rhs_proxy - */ - op_add_view& operator=(const rhs_proxy& rhs) { - this->m_value += rhs.m_value; - return *this; - } - - ///@} -}; - - -/** Monoid class for addition reductions. Instantiate the cilk::reducer - * template class with an op_add monoid to create an addition reducer class. - * For example, to compute - * the sum of a set of `int` values: - * - * cilk::reducer< cilk::op_add > r; - * - * @tparam Type The reducer value type. - * @tparam Align If `false` (the default), reducers instantiated on this - * monoid will be naturally aligned (the Intel Cilk Plus library 1.0 - * behavior). If `true`, reducers instantiated on this monoid - * will be cache-aligned for binary compatibility with - * reducers in Intel Cilk Plus library version 0.9. - * - * @see ReducersAdd - * @see op_add_view - * - * @ingroup ReducersAdd - */ -template -struct op_add : public monoid_with_view, Align> {}; - -/** **Deprecated** addition reducer wrapper class. - * - * reducer_opadd is the same as @ref reducer<@ref op_add>, except that - * reducer_opadd is a proxy for the contained view, so that accumulator - * variable update operations can be applied directly to the reducer. For - * example, a value is added to a `reducer<%op_add>` with `*r += a`, but a - * value can be added to a `%reducer_opadd` with `r += a`. - * - * @deprecated Users are strongly encouraged to use `reducer` - * reducers rather than the old wrappers like reducer_opadd. - * The `reducer` reducers show the reducer/monoid/view - * architecture more clearly, are more consistent in their - * implementation, and present a simpler model for new - * user-implemented reducers. - * - * @note Implicit conversions are provided between `%reducer_opadd` - * and `reducer<%op_add>`. This allows incremental code - * conversion: old code that used `%reducer_opadd` can pass a - * `%reducer_opadd` to a converted function that now expects a - * pointer or reference to a `reducer<%op_add>`, and vice - * versa. - * - * @tparam Type The value type of the reducer. - * - * @see op_add - * @see reducer - * @see ReducersAdd - * - * @ingroup ReducersAdd - */ -template -class reducer_opadd : public reducer< op_add > -{ - typedef reducer< op_add > base; - using base::view; - - public: - /// The view type for the reducer. - typedef typename base::view_type view_type; - - /// The view's rhs proxy type. - typedef typename view_type::rhs_proxy rhs_proxy; - - /// The view type for the reducer. - typedef view_type View; - - /// The monoid type for the reducer. - typedef typename base::monoid_type Monoid; - - /** @name Constructors - */ - ///@{ - - /** Default (identity) constructor. - * - * Constructs the wrapper with the default initial value of `Type()`. - */ - reducer_opadd() {} - - /** Value constructor. - * - * Constructs the wrapper with a specified initial value. - */ - explicit reducer_opadd(const Type& initial_value) : base(initial_value) {} - - ///@} - - /** @name Forwarded functions - * @details Functions that update the contained accumulator variable are - * simply forwarded to the contained @ref op_add_view. */ - ///@{ - - /// @copydoc op_add_view::operator+=(const Type&) - reducer_opadd& operator+=(const Type& x) { view() += x; return *this; } - - /// @copydoc op_add_view::operator-=(const Type&) - reducer_opadd& operator-=(const Type& x) { view() -= x; return *this; } - - /// @copydoc op_add_view::operator++() - reducer_opadd& operator++() { ++view(); return *this; } - - /// @copydoc op_add_view::operator++(int) - void operator++(int) { view()++; } - - /// @copydoc op_add_view::operator-\-() - reducer_opadd& operator--() { --view(); return *this; } - - /// @copydoc op_add_view::operator-\-(int) - void operator--(int) { view()--; } - - // The legacy definitions of reducer_opadd::operator+() and - // reducer_opadd::operator-() have different behavior and a different - // return type than this definition. The legacy version is defined as a - // member function, so this new version is defined as a free function to - // give it a different signature, so that they won't end up sharing a - // single object file entry. - - /// @copydoc op_add_view::operator+(const Type&) const - friend rhs_proxy operator+(const reducer_opadd& r, const Type& x) - { - return r.view() + x; - } - /// @copydoc op_add_view::operator-(const Type&) const - friend rhs_proxy operator-(const reducer_opadd& r, const Type& x) - { - return r.view() - x; - } - /// @copydoc op_add_view::operator=(const rhs_proxy&) - reducer_opadd& operator=(const rhs_proxy& temp) - { - view() = temp; - return *this; - } - ///@} - - /** @name Dereference - * @details Dereferencing a wrapper is a no-op. It simply returns the - * wrapper. Combined with the rule that the wrapper forwards view - * operations to its contained view, this means that view operations can - * be written the same way on reducers and wrappers, which is convenient - * for incrementally converting old code using wrappers to use reducers - * instead. That is: - * - * reducer< op_add > r; - * *r += a; // *r returns the view - * // operator += is a view member function - * - * reducer_opadd w; - * *w += a; // *w returns the wrapper - * // operator += is a wrapper member function that - * // calls the corresponding view function - */ - ///@{ - reducer_opadd& operator*() { return *this; } - reducer_opadd const& operator*() const { return *this; } - - reducer_opadd* operator->() { return this; } - reducer_opadd const* operator->() const { return this; } - ///@} - - /** @name Upcast - * @details In Intel Cilk Plus library 0.9, reducers were always cache-aligned. - * In library 1.0, reducer cache alignment is optional. By default, - * reducers are unaligned (i.e., just naturally aligned), but legacy - * wrappers inherit from cache-aligned reducers for binary compatibility. - * - * This means that a wrapper will automatically be upcast to its aligned - * reducer base class. The following conversion operators provide - * pseudo-upcasts to the corresponding unaligned reducer class. - */ - ///@{ - operator reducer< op_add >& () - { - return *reinterpret_cast< reducer< op_add >* >(this); - } - operator const reducer< op_add >& () const - { - return *reinterpret_cast< const reducer< op_add >* >(this); - } - ///@} -}; - -/// @cond internal -/** Metafunction specialization for reducer conversion. - * - * This specialization of the @ref legacy_reducer_downcast template class - * defined in reducer.h causes the `reducer< op_add >` class to have an - * `operator reducer_opadd& ()` conversion operator that statically - * downcasts the `reducer` to the corresponding `reducer_opadd` type. - * (The reverse conversion, from `reducer_opadd` to `reducer`, is just - * an upcast, which is provided for free by the language.) - * - * @ingroup ReducersAdd - */ -template -struct legacy_reducer_downcast > > -{ - typedef reducer_opadd type; -}; -/// @endcond - -} // namespace cilk - -extern "C" { - -#endif // __cplusplus - - -/** @ingroup ReducersAdd - */ -///@{ - -/** @name C Language Reducer Macros - * - * These macros are used to declare and work with numeric op_add reducers in - * C code. - * - * @see @ref page_reducers_in_c - */ - ///@{ - -/** Declares opadd reducer type name. - * - * This macro expands into the identifier which is the name of the op_add - * reducer type for a specified numeric type. - * - * @param tn The @ref reducers_c_type_names "numeric type name" specifying - * the type of the reducer. - * - * @see @ref reducers_c_predefined - * @see ReducersAdd - */ -#define CILK_C_REDUCER_OPADD_TYPE(tn) \ - __CILKRTS_MKIDENT(cilk_c_reducer_opadd_,tn) - -/** Declares an op_add reducer object. - * - * This macro expands into a declaration of an op_add reducer object for a - * specified numeric type. For example: - * - * CILK_C_REDUCER_OPADD(my_reducer, double, 0.0); - * - * @param obj The variable name to be used for the declared reducer object. - * @param tn The @ref reducers_c_type_names "numeric type name" specifying - * the type of the reducer. - * @param v The initial value for the reducer. (A value which can be - * assigned to the numeric type represented by @a tn.) - * - * @see @ref reducers_c_predefined - * @see ReducersAdd - */ -#define CILK_C_REDUCER_OPADD(obj,tn,v) \ - CILK_C_REDUCER_OPADD_TYPE(tn) obj = \ - CILK_C_INIT_REDUCER(_Typeof(obj.value), \ - __CILKRTS_MKIDENT(cilk_c_reducer_opadd_reduce_,tn), \ - __CILKRTS_MKIDENT(cilk_c_reducer_opadd_identity_,tn), \ - 0, v) - -/// @cond internal - -/** Declares the op_add reducer functions for a numeric type. - * - * This macro expands into external function declarations for functions which - * implement the reducer functionality for the op_add reducer type for a - * specified numeric type. - * - * @param t The value type of the reducer. - * @param tn The value "type name" identifier, used to construct the reducer - * type name, function names, etc. - */ -#define CILK_C_REDUCER_OPADD_DECLARATION(t,tn) \ - typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPADD_TYPE(tn); \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opadd,tn,l,r); \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opadd,tn); - -/** Defines the op_add reducer functions for a numeric type. - * - * This macro expands into function definitions for functions which implement - * the reducer functionality for the op_add reducer type for a specified - * numeric type. - * - * @param t The value type of the reducer. - * @param tn The value "type name" identifier, used to construct the reducer - * type name, function names, etc. - */ -#define CILK_C_REDUCER_OPADD_DEFINITION(t,tn) \ - typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPADD_TYPE(tn); \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opadd,tn,l,r) \ - { *(t*)l += *(t*)r; } \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opadd,tn) \ - { *(t*)v = 0; } - -///@{ -/** @def CILK_C_REDUCER_OPADD_INSTANCE - * @brief Declares or defines implementation functions for a reducer type. - * - * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS` - * will be defined, and this macro will generate reducer implementation - * functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, - * and this macro will expand into external declarations for the functions. - */ -#ifdef CILK_C_DEFINE_REDUCERS -# define CILK_C_REDUCER_OPADD_INSTANCE(t,tn) \ - CILK_C_REDUCER_OPADD_DEFINITION(t,tn) -#else -# define CILK_C_REDUCER_OPADD_INSTANCE(t,tn) \ - CILK_C_REDUCER_OPADD_DECLARATION(t,tn) -#endif -///@} - -/* Declares or defines an instance of the reducer type and its functions for each - * numeric type. - */ -CILK_C_REDUCER_OPADD_INSTANCE(char, char) -CILK_C_REDUCER_OPADD_INSTANCE(unsigned char, uchar) -CILK_C_REDUCER_OPADD_INSTANCE(signed char, schar) -CILK_C_REDUCER_OPADD_INSTANCE(wchar_t, wchar_t) -CILK_C_REDUCER_OPADD_INSTANCE(short, short) -CILK_C_REDUCER_OPADD_INSTANCE(unsigned short, ushort) -CILK_C_REDUCER_OPADD_INSTANCE(int, int) -CILK_C_REDUCER_OPADD_INSTANCE(unsigned int, uint) -CILK_C_REDUCER_OPADD_INSTANCE(unsigned int, unsigned) /* alternate name */ -CILK_C_REDUCER_OPADD_INSTANCE(long, long) -CILK_C_REDUCER_OPADD_INSTANCE(unsigned long, ulong) -CILK_C_REDUCER_OPADD_INSTANCE(long long, longlong) -CILK_C_REDUCER_OPADD_INSTANCE(unsigned long long, ulonglong) -CILK_C_REDUCER_OPADD_INSTANCE(float, float) -CILK_C_REDUCER_OPADD_INSTANCE(double, double) -CILK_C_REDUCER_OPADD_INSTANCE(long double, longdouble) - -//@endcond - -#ifdef __cplusplus -} /* end extern "C" */ -#endif - -///@} - -///@} - -#endif /* REDUCER_OPADD_H_INCLUDED */ diff --git a/include/cilk/reducer_opand.h b/include/cilk/reducer_opand.h deleted file mode 100644 index 46c39fcd..00000000 --- a/include/cilk/reducer_opand.h +++ /dev/null @@ -1,617 +0,0 @@ -/* reducer_opand.h -*- C++ -*- - * - * Copyright (C) 2009-2018, Intel Corporation - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY - * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * ********************************************************************* - * - * PLEASE NOTE: This file is a downstream copy of a file maintained in - * a repository at cilkplus.org. Changes made to this file that are not - * submitted through the contribution process detailed at - * http://www.cilkplus.org/submit-cilk-contribution will be lost the next - * time that a new version is released. Changes only submitted to the - * GNU compiler collection or posted to the git repository at - * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are - * not tracked. - * - * We welcome your contributions to this open source project. Thank you - * for your assistance in helping us improve Cilk Plus. - */ - -/** @file reducer_opand.h - * - * @brief Defines classes for doing parallel bitwise AND reductions. - * - * @ingroup ReducersAnd - * - * @see ReducersAnd - */ - -#ifndef REDUCER_OPAND_H_INCLUDED -#define REDUCER_OPAND_H_INCLUDED - -#include - -/** @defgroup ReducersAnd Bitwise AND Reducers - * - * Bitwise AND reducers allow the computation of the bitwise AND of a set of - * values in parallel. - * - * @ingroup Reducers - * - * You should be familiar with @ref pagereducers "Intel(R) Cilk(TM) Plus reducers", - * described in file `reducers.md`, and particularly with @ref reducers_using, - * before trying to use the information in this file. - * - * @section redopand_usage Usage Example - * - * cilk::reducer< cilk::op_and > r; - * cilk_for (int i = 0; i != N; ++i) { - * *r &= a[i]; - * } - * unsigned result; - * r.move_out(result); - * - * @section redopand_monoid The Monoid - * - * @subsection redopand_monoid_values Value Set - * - * The value set of a bitwise AND reducer is the set of values of `Type`, - * which is expected to be a builtin integer type which has a representation - * as a sequence of bits (or something like it, such as `bool` or - * `std::bitset`). - * - * @subsection redopand_monoid_operator Operator - * - * The bitwise AND operator is defined by the "`&`" binary operator on `Type`. - * - * @subsection redopand_monoid_identity Identity - * - * The identity value of the reducer is the value whose representation - * contains all 1-bits. This is expected to be the value of the expression - * `~Type()` (i.e., the bitwise negation operator applied to the default value - * of the value type). - * - * @section redopand_operations Operations - * - * @subsection redopand_constructors Constructors - * - * reducer() // identity - * reducer(const Type& value) - * reducer(move_in(Type& variable)) - * - * @subsection redopand_get_set Set and Get - * - * r.set_value(const Type& value) - * const Type& = r.get_value() const - * r.move_in(Type& variable) - * r.move_out(Type& variable) - * - * @subsection redopand_initial Initial Values - * - * If a bitwise AND reducer is constructed without an explicit initial value, - * then its initial value will be its identity value, as long as `Type` - * satisfies the requirements of @ref redopand_types. - * - * @subsection redopand_view_ops View Operations - * - * *r &= a - * *r = *r & a - * *r = *r & a1 & a2 … & an - * - * @section redopand_types Type and Operator Requirements - * - * `Type` must be `Copy Constructible`, `Default Constructible`, and - * `Assignable`. - * - * The operator "`&=`" must be defined on `Type`, with `x &= a` having the - * same meaning as `x = x & a`. - * - * The expression `~ Type()` must be a valid expression which yields the - * identity value (the value of `Type` whose representation consists of all - * 1-bits). - * - * @section redopand_in_c Bitwise AND Reducers in C - * - * The @ref CILK_C_REDUCER_OPAND and @ref CILK_C_REDUCER_OPAND_TYPE macros can - * be used to do bitwise AND reductions in C. For example: - * - * CILK_C_REDUCER_OPAND(r, uint, ~0); - * CILK_C_REGISTER_REDUCER(r); - * cilk_for(int i = 0; i != n; ++i) { - * REDUCER_VIEW(r) &= a[i]; - * } - * CILK_C_UNREGISTER_REDUCER(r); - * printf("The bitwise AND of the elements of a is %x\n", REDUCER_VIEW(r)); - * - * See @ref reducers_c_predefined. - */ - -#ifdef __cplusplus - -namespace cilk { - -/** The bitwise AND reducer view class. - * - * This is the view class for reducers created with - * `cilk::reducer< cilk::op_and >`. It holds the accumulator variable - * for the reduction, and allows only AND operations to be performed on it. - * - * @note The reducer "dereference" operation (`reducer::operator *()`) - * yields a reference to the view. Thus, for example, the view class's - * `&=` operation would be used in an expression like `*r &= a`, where - * `r` is an opmod reducer variable. - * - * @tparam Type The type of the contained accumulator variable. This will - * be the value type of a monoid_with_view that is - * instantiated with this view. - * - * @see ReducersAnd - * @see op_and - * - * @ingroup ReducersAnd - */ -template -class op_and_view : public scalar_view -{ - typedef scalar_view base; - -public: - /** Class to represent the right-hand side of `*reducer = *reducer & value`. - * - * The only assignment operator for the op_and_view class takes an - * rhs_proxy as its operand. This results in the syntactic restriction - * that the only expressions that can be assigned to an op_and_view are - * ones which generate an rhs_proxy - that is, expressions of the form - * `op_and_view & value ... & value`. - * - * @warning - * The lhs and rhs views in such an assignment must be the same; - * otherwise, the behavior will be undefined. (I.e., `v1 = v1 & x` is - * legal; `v1 = v2 & x` is illegal.) This condition will be checked with - * a runtime assertion when compiled in debug mode. - * - * @see op_and_view - */ - class rhs_proxy { - private: - friend class op_and_view; - - const op_and_view* m_view; - Type m_value; - - // Constructor is invoked only from op_and_view::operator&(). - // - rhs_proxy(const op_and_view* view, const Type& value) : m_view(view), m_value(value) {} - - rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator - rhs_proxy(); // Disable default constructor - - public: - /** Bitwise AND with an additional `rhs` value. If `v` is an op_and_view - * and `a1` is a value, then the expression `v & a1` invokes the - * view's `operator&()` to create an rhs_proxy for `(v, a1)`; then - * `v & a1 & a2` invokes the rhs_proxy's `operator&()` to create a new - * rhs_proxy for `(v, a1&a2)`. This allows the right-hand side of an - * assignment to be not just `view & value`, but - * `view & value & value ... & value`. The effect is that - * - * v = v & a1 & a2 ... & an; - * - * is evaluated as - * - * v = v & (a1 & a2 ... & an); - */ - rhs_proxy& operator&(const Type& x) { m_value &= x; return *this; } - }; - - - /** Default/identity constructor. This constructor initializes the - * contained value to `~ Type()`. - */ - op_and_view() : base(~Type()) {} - - /** Construct with a specified initial value. - */ - explicit op_and_view(const Type& v) : base(v) {} - - - /** Reduces the views of two strands. - * - * This function is invoked by the @ref op_and monoid to combine the views - * of two strands when the right strand merges with the left one. It - * "ANDs" the value contained in the left-strand view with the value - * contained in the right-strand view, and leaves the value in the - * right-strand view undefined. - * - * @param right A pointer to the right-strand view. (`this` points to - * the left-strand view.) - * - * @note Used only by the @ref op_and monoid to implement the monoid - * reduce operation. - */ - void reduce(op_and_view* right) { this->m_value &= right->m_value; } - - /** @name Accumulator variable updates. - * - * These functions support the various syntaxes for "ANDing" the - * accumulator variable contained in the view with some value. - */ - ///@{ - - /** Performs AND between the accumulator variable and @a x. - */ - op_and_view& operator&=(const Type& x) { this->m_value &= x; return *this; } - - /** Creates an object representing `*this & x`. - * - * @see rhs_proxy - */ - rhs_proxy operator&(const Type& x) const { return rhs_proxy(this, x); } - - /** Assigns the result of a `view & value` expression to the view. Note that - * this is the only assignment operator for this class. - * - * @see rhs_proxy - */ - op_and_view& operator=(const rhs_proxy& rhs) { - this->m_value &= rhs.m_value; - return *this; - } - - ///@} -}; - -/** Monoid class for bitwise AND reductions. Instantiate the cilk::reducer - * template class with an op_and monoid to create a bitwise AND reducer - * class. For example, to compute the bitwise AND of a set of `unsigned long` - * values: - * - * cilk::reducer< cilk::op_and > r; - * - * @tparam Type The reducer value type. - * @tparam Align If `false` (the default), reducers instantiated on this - * monoid will be naturally aligned (the Intel Cilk Plus library 1.0 - * behavior). If `true`, reducers instantiated on this monoid - * will be cache-aligned for binary compatibility with - * reducers in Intel Cilk Plus library version 0.9. - * - * @see ReducersAnd - * @see op_and_view - * - * @ingroup ReducersAnd - */ -template -struct op_and : public monoid_with_view, Align> {}; - -/** Deprecated bitwise AND reducer class. - * - * reducer_opand is the same as @ref reducer<@ref op_and>, except that - * reducer_opand is a proxy for the contained view, so that accumulator - * variable update operations can be applied directly to the reducer. For - * example, a value is "ANDed" with a `reducer<%op_and>` with `*r &= a`, but a - * value can be "ANDed" with a `%reducer_opand` with `r &= a`. - * - * @deprecated Users are strongly encouraged to use `reducer` - * reducers rather than the old wrappers like reducer_opand. - * The `reducer` reducers show the reducer/monoid/view - * architecture more clearly, are more consistent in their - * implementation, and present a simpler model for new - * user-implemented reducers. - * - * @note Implicit conversions are provided between `%reducer_opand` - * and `reducer<%op_and>`. This allows incremental code - * conversion: old code that used `%reducer_opand` can pass a - * `%reducer_opand` to a converted function that now expects a - * pointer or reference to a `reducer<%op_and>`, and vice - * versa. - * - * @tparam Type The value type of the reducer. - * - * @see op_and - * @see reducer - * @see ReducersAnd - * - * @ingroup ReducersAnd - */ -template -class reducer_opand : public reducer< op_and > -{ - typedef reducer< op_and > base; - using base::view; - -public: - /// The view type for the reducer. - typedef typename base::view_type view_type; - - /// The view's rhs proxy type. - typedef typename view_type::rhs_proxy rhs_proxy; - - /// The view type for the reducer. - typedef view_type View; - - /// The monoid type for the reducer. - typedef typename base::monoid_type Monoid; - - /** @name Constructors - */ - ///@{ - - /** Default constructor. - * - * Constructs the wrapper with the default initial value of `Type()` - * (not the identity value). - */ - reducer_opand() : base(Type()) {} - - /** Value constructor. - * - * Constructs the wrapper with a specified initial value. - */ - explicit reducer_opand(const Type& initial_value) : base(initial_value) {} - - ///@} - - /** @name Forwarded functions - * @details Functions that update the contained accumulator variable are - * simply forwarded to the contained @ref op_and_view. */ - ///@{ - - /// @copydoc op_and_view::operator&=(const Type&) - reducer_opand& operator&=(const Type& x) - { - view() &= x; - return *this; - } - - // The legacy definition of reducer_opand::operator&() has different - // behavior and a different return type than this definition. The legacy - // version is defined as a member function, so this new version is defined - // as a free function to give it a different signature, so that they won't - // end up sharing a single object file entry. - - /// @copydoc op_and_view::operator&(const Type&) const - friend rhs_proxy operator&(const reducer_opand& r, const Type& x) - { - return r.view() & x; - } - - /// @copydoc op_and_view::operator=(const rhs_proxy&) - reducer_opand& operator=(const rhs_proxy& temp) - { - view() = temp; - return *this; - } - ///@} - - /** @name Dereference - * @details Dereferencing a wrapper is a no-op. It simply returns the - * wrapper. Combined with the rule that the wrapper forwards view - * operations to its contained view, this means that view operations can - * be written the same way on reducers and wrappers, which is convenient - * for incrementally converting old code using wrappers to use reducers - * instead. That is: - * - * reducer< op_and > r; - * *r &= a; // *r returns the view - * // operator &= is a view member function - * - * reducer_opand w; - * *w &= a; // *w returns the wrapper - * // operator &= is a wrapper member function that - * // calls the corresponding view function - */ - ///@{ - reducer_opand& operator*() { return *this; } - reducer_opand const& operator*() const { return *this; } - - reducer_opand* operator->() { return this; } - reducer_opand const* operator->() const { return this; } - ///@} - - /** @name Upcast - * @details In Intel Cilk Plus library 0.9, reducers were always cache-aligned. - * In library 1.0, reducer cache alignment is optional. By default, - * reducers are unaligned (i.e., just naturally aligned), but legacy - * wrappers inherit from cache-aligned reducers for binary compatibility. - * - * This means that a wrapper will automatically be upcast to its aligned - * reducer base class. The following conversion operators provide - * pseudo-upcasts to the corresponding unaligned reducer class. - */ - ///@{ - operator reducer< op_and >& () - { - return *reinterpret_cast< reducer< op_and >* >(this); - } - operator const reducer< op_and >& () const - { - return *reinterpret_cast< const reducer< op_and >* >(this); - } - ///@} -}; - -/// @cond internal -/** Metafunction specialization for reducer conversion. - * - * This specialization of the @ref legacy_reducer_downcast template class - * defined in reducer.h causes the `reducer< op_and >` class to have an - * `operator reducer_opand& ()` conversion operator that statically - * downcasts the `reducer` to the corresponding `reducer_opand` type. - * (The reverse conversion, from `reducer_opand` to `reducer`, is just - * an upcast, which is provided for free by the language.) - * - * @ingroup ReducersAnd - */ -template -struct legacy_reducer_downcast > > -{ - typedef reducer_opand type; -}; -/// @endcond - -} // namespace cilk - -#endif // __cplusplus - - -/** @ingroup ReducersAdd - */ -///@{ - -/** @name C language reducer macros - * - * These macros are used to declare and work with op_and reducers in C code. - * - * @see @ref page_reducers_in_c - */ - ///@{ - -#ifdef __cplusplus -extern "C" { -#endif - -/** Declares `opand` reducer type name. - * - * This macro expands into the identifier which is the name of the op_and - * reducer type for a specified numeric type. - * - * @param tn The @ref reducers_c_type_names "numeric type name" specifying - * the type of the reducer. - * - * @see @ref reducers_c_predefined - * @see ReducersAnd - */ -#define CILK_C_REDUCER_OPAND_TYPE(tn) \ - __CILKRTS_MKIDENT(cilk_c_reducer_opand_,tn) - -/** Declares an op_and reducer object. - * - * This macro expands into a declaration of an op_and reducer object for a - * specified numeric type. For example: - * - * CILK_C_REDUCER_OPAND(my_reducer, ulong, ~0UL); - * - * @param obj The variable name to be used for the declared reducer object. - * @param tn The @ref reducers_c_type_names "numeric type name" specifying - * the type of the reducer. - * @param v The initial value for the reducer. (A value which can be - * assigned to the numeric type represented by @a tn.) - * - * @see @ref reducers_c_predefined - * @see ReducersAnd - */ -#define CILK_C_REDUCER_OPAND(obj,tn,v) \ - CILK_C_REDUCER_OPAND_TYPE(tn) obj = \ - CILK_C_INIT_REDUCER(_Typeof(obj.value), \ - __CILKRTS_MKIDENT(cilk_c_reducer_opand_reduce_,tn), \ - __CILKRTS_MKIDENT(cilk_c_reducer_opand_identity_,tn), \ - 0, v) - -/// @cond internal - -/** Declares the op_and reducer functions for a numeric type. - * - * This macro expands into external function declarations for functions which - * implement the reducer functionality for the op_and reducer type for a - * specified numeric type. - * - * @param t The value type of the reducer. - * @param tn The value "type name" identifier, used to construct the reducer - * type name, function names, etc. - */ -#define CILK_C_REDUCER_OPAND_DECLARATION(t,tn) \ - typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPAND_TYPE(tn); \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opand,tn,l,r); \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opand,tn); - -/** Defines the op_and reducer functions for a numeric type. - * - * This macro expands into function definitions for functions which implement - * the reducer functionality for the op_and reducer type for a specified - * numeric type. - * - * @param t The value type of the reducer. - * @param tn The value "type name" identifier, used to construct the reducer - * type name, function names, etc. - */ -#define CILK_C_REDUCER_OPAND_DEFINITION(t,tn) \ - typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPAND_TYPE(tn); \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opand,tn,l,r) \ - { *(t*)l &= *(t*)r; } \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opand,tn) \ - { *(t*)v = ~((t)0); } - -///@{ -/** @def CILK_C_REDUCER_OPAND_INSTANCE - * @brief Declares or defines implementation functions for a reducer type. - * - * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS` - * will be defined, and this macro will generate reducer implementation - * functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and - * this macro will expand into external declarations for the functions. - */ -#ifdef CILK_C_DEFINE_REDUCERS -# define CILK_C_REDUCER_OPAND_INSTANCE(t,tn) \ - CILK_C_REDUCER_OPAND_DEFINITION(t,tn) -#else -# define CILK_C_REDUCER_OPAND_INSTANCE(t,tn) \ - CILK_C_REDUCER_OPAND_DECLARATION(t,tn) -#endif -///@} - -/* Declares or defines an instance of the reducer type and its functions for - * each numeric type. - */ -CILK_C_REDUCER_OPAND_INSTANCE(char, char) -CILK_C_REDUCER_OPAND_INSTANCE(unsigned char, uchar) -CILK_C_REDUCER_OPAND_INSTANCE(signed char, schar) -CILK_C_REDUCER_OPAND_INSTANCE(wchar_t, wchar_t) -CILK_C_REDUCER_OPAND_INSTANCE(short, short) -CILK_C_REDUCER_OPAND_INSTANCE(unsigned short, ushort) -CILK_C_REDUCER_OPAND_INSTANCE(int, int) -CILK_C_REDUCER_OPAND_INSTANCE(unsigned int, uint) -CILK_C_REDUCER_OPAND_INSTANCE(unsigned int, unsigned) /* alternate name */ -CILK_C_REDUCER_OPAND_INSTANCE(long, long) -CILK_C_REDUCER_OPAND_INSTANCE(unsigned long, ulong) -CILK_C_REDUCER_OPAND_INSTANCE(long long, longlong) -CILK_C_REDUCER_OPAND_INSTANCE(unsigned long long, ulonglong) - -//@endcond - -#ifdef __cplusplus -} /* end extern "C" */ -#endif - -///@} - -///@} - -#endif /* REDUCER_OPAND_H_INCLUDED */ diff --git a/include/cilk/reducer_opmul.h b/include/cilk/reducer_opmul.h deleted file mode 100644 index d2139b24..00000000 --- a/include/cilk/reducer_opmul.h +++ /dev/null @@ -1,456 +0,0 @@ -/* reducer_opmul.h -*- C++ -*- - * - * Copyright (C) 2012-2018, Intel Corporation - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY - * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * ********************************************************************* - * - * PLEASE NOTE: This file is a downstream copy of a file maintained in - * a repository at cilkplus.org. Changes made to this file that are not - * submitted through the contribution process detailed at - * http://www.cilkplus.org/submit-cilk-contribution will be lost the next - * time that a new version is released. Changes only submitted to the - * GNU compiler collection or posted to the git repository at - * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are - * not tracked. - * - * We welcome your contributions to this open source project. Thank you - * for your assistance in helping us improve Cilk Plus. - */ - -/** @file reducer_opmul.h - * - * @brief Defines classes for doing parallel multiplication reductions. - * - * @ingroup ReducersMul - * - * @see ReducersMul - */ - -#ifndef REDUCER_OPMUL_H_INCLUDED -#define REDUCER_OPMUL_H_INCLUDED - -#include - -/** @defgroup ReducersMul Multiplication Reducers - * - * Multiplication reducers allow the computation of the product of a set of - * values in parallel. - * - * @ingroup Reducers - * - * You should be familiar with @ref pagereducers "Intel(R) Cilk(TM) Plus reducers", - * described in file `reducers.md`, and particularly with @ref reducers_using, - * before trying to use the information in this file. - * - * @section redopmul_usage Usage Example - * - * cilk::reducer< cilk::op_mul > r; - * cilk_for (int i = 0; i != N; ++i) { - * *r *= a[i]; - * } - * double product; - * r.move_out(product); - * - * @section redopmul_monoid The Monoid - * - * @subsection redopmul_monoid_values Value Set - * - * The value set of a multiplication reducer is the set of values of `Type`, - * which is expected to be a builtin numeric type (or something like it, such - * as `std::complex`). - * - * @subsection redopmul_monoid_operator Operator - * - * The operator of a multiplication reducer is the multiplication operation, - * defined by the "`*`" binary operator on `Type`. - * - * @subsection redopmul_monoid_identity Identity - * - * The identity value of the reducer is the numeric value "`1`". This is - * expected to be the value of the expression `Type(1)`. - * - * @section redopmul_operations Operations - * - * @subsection redopmul_constructors Constructors - * - * reducer() // identity - * reducer(const Type& value) - * reducer(move_in(Type& variable)) - * - * @subsection redopmul_get_set Set and Get - * - * r.set_value(const Type& value) - * const Type& = r.get_value() const - * r.move_in(Type& variable) - * r.move_out(Type& variable) - * - * @subsection redopmul_initial Initial Values - * - * If a multiplication reducer is constructed without an explicit initial - * value, then its initial value will be its identity value, as long as `Type` - * satisfies the requirements of @ref redopmul_types. - * - * @subsection redopmul_view_ops View Operations - * - * *r *= a - * *r = *r * a - * *r = *r * a1 * a2 … * an - * - * @section redopmul_floating_point Issues with Floating-Point Types - * - * Because of overflow and underflow issues, floating-point multiplication is - * not really associative. For example, `(1e200 * 1e-200) * 1e-200 == 1e-200`, - * but `1e200 * (1e-200 * 1e-200 == 0. - * - * In many cases, this won't matter, but computations which have been - * carefully ordered to control overflow and underflow may not deal well with - * being reassociated. In general, you should be sure to understand the - * floating-point behavior of your program before doing any transformation - * that will reassociate its computations. - * - * @section redopmul_types Type and Operator Requirements - * - * `Type` must be `Copy Constructible`, `Default Constructible`, and - * `Assignable`. - * - * The operator "`*=`" must be defined on `Type`, with `x *= a` having the same - * meaning as `x = x * a`. - * - * The expression `Type(1)` must be a valid expression which yields the - * identity value (the value of `Type` whose numeric value is `1`). - * - * @section redopmul_in_c Multiplication Reducers in C - * - * The @ref CILK_C_REDUCER_OPMUL and @ref CILK_C_REDUCER_OPMUL_TYPE macros can - * be used to do multiplication reductions in C. For example: - * - * CILK_C_REDUCER_OPMUL(r, double, 1); - * CILK_C_REGISTER_REDUCER(r); - * cilk_for(int i = 0; i != n; ++i) { - * REDUCER_VIEW(r) *= a[i]; - * } - * CILK_C_UNREGISTER_REDUCER(r); - * printf("The product of the elements of a is %f\n", REDUCER_VIEW(r)); - * - * See @ref reducers_c_predefined. - */ - -#ifdef __cplusplus - -namespace cilk { - -/** The multiplication reducer view class. - * - * This is the view class for reducers created with - * `cilk::reducer< cilk::op_mul >`. It holds the accumulator variable - * for the reduction, and allows only multiplication operations to be - * performed on it. - * - * @note The reducer "dereference" operation (`reducer::operator *()`) - * yields a reference to the view. Thus, for example, the view class's - * `*=` operation would be used in an expression like `*r *= a`, where - * `r` is an op_mul reducer variable. - * - * @tparam Type The type of the contained accumulator variable. This will - * be the value type of a monoid_with_view that is - * instantiated with this view. - * - * @see ReducersMul - * @see op_mul - * - * @ingroup ReducersMul - */ -template -class op_mul_view : public scalar_view -{ - typedef scalar_view base; - -public: - /** Class to represent the right-hand side of `*reducer = *reducer * value`. - * - * The only assignment operator for the op_mul_view class takes an - * rhs_proxy as its operand. This results in the syntactic restriction - * that the only expressions that can be assigned to an op_mul_view are - * ones which generate an rhs_proxy - that is, expressions of the form - * `op_mul_view * value ... * value`. - * - * @warning - * The lhs and rhs views in such an assignment must be the same; - * otherwise, the behavior will be undefined. (I.e., `v1 = v1 * x` is - * legal; `v1 = v2 * x` is illegal.) This condition will be checked with a - * runtime assertion when compiled in debug mode. - * - * @see op_mul_view - */ - class rhs_proxy { - friend class op_mul_view; - - const op_mul_view* m_view; - Type m_value; - - // Constructor is invoked only from op_mul_view::operator*(). - // - rhs_proxy(const op_mul_view* view, const Type& value) : m_view(view), m_value(value) {} - - rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator - rhs_proxy(); // Disable default constructor - - public: - /** Multiplies by an additional `rhs` value. If `v` is an op_mul_view and - * `a1` is a value, then the expression `v * a1` invokes the view's - * `operator*()` to create an rhs_proxy for `(v, a1)`; then - * `v * a1 * a2` invokes the rhs_proxy's `operator*()` to create a - * new rhs_proxy for `(v, a1*a2)`. This allows the right-hand side of - * an assignment to be not just `view * value`, but - * `view * value * value ... * value`. The effect is that - * - * v = v * a1 * a2 ... * an; - * - * is evaluated as - * - * v = v * (a1 * a2 ... * an); - */ - rhs_proxy& operator*(const Type& x) { m_value *= x; return *this; } - }; - - - /** Default/identity constructor. This constructor initializes the - * contained value to `Type(1)`, which is expected to be the identity - * value for multiplication on `Type`. - */ - op_mul_view() : base(Type(1)) {} - - /** Construct with a specified initial value. - */ - explicit op_mul_view(const Type& v) : base(v) {} - - /** Reduces two strand views. - * - * This function is invoked by the @ref op_mul monoid to combine the views - * of two strands when the right strand merges with the left one. It - * multiplies the value contained in the left-strand view by the value - * contained in the right-strand view, and leaves the value in the - * right-strand view undefined. - * - * @param right A pointer to the right-strand view. (`this` points to - * the left-strand view.) - * - * @note Used only by the @ref op_mul monoid to implement the monoid - * reduce operation. - */ - void reduce(op_mul_view* right) { this->m_value *= right->m_value; } - - /** @name Accumulator variable updates. - * - * These functions support the various syntaxes for multiplying the - * accumulator variable contained in the view by some value. - */ - ///@{ - - /** Multiplies the accumulator variable by @a x. - */ - op_mul_view& operator*=(const Type& x) { this->m_value *= x; return *this; } - - /** Creates an object representing `*this * x`. - * - * @see rhs_proxy - */ - rhs_proxy operator*(const Type& x) const { return rhs_proxy(this, x); } - - /** Assigns the result of a `view * value` expression to the view. Note that - * this is the only assignment operator for this class. - * - * @see rhs_proxy - */ - op_mul_view& operator=(const rhs_proxy& rhs) { - this->m_value *= rhs.m_value; - return *this; - } - - ///@} -}; - -/** Monoid class for multiplication reductions. Instantiate the cilk::reducer - * template class with an op_mul monoid to create a multiplication reducer - * class. For example, to compute the product of a set of `double` values: - * - * cilk::reducer< cilk::op_mul > r; - * - * @see ReducersMul - * @see op_mul_view - * - * @ingroup ReducersMul - */ -template -struct op_mul : public monoid_with_view< op_mul_view > {}; - -} // namespace cilk - -#endif // __cplusplus - - -/** @ingroup ReducersAdd - */ -///@{ - -/** @name C language reducer macros - * - * These macros are used to declare and work with numeric op_mul reducers in - * C code. - * - * @see @ref page_reducers_in_c - */ - ///@{ - -#ifdef __cplusplus -extern "C" { -#endif - -/** Declares `opmul` reducer type name. - * - * This macro expands into the identifier which is the name of the op_mul - * reducer type for a specified numeric type. - * - * @param tn The @ref reducers_c_type_names "numeric type name" specifying - * the type of the reducer. - * - * @see @ref reducers_c_predefined - * @see ReducersMul - */ -#define CILK_C_REDUCER_OPMUL_TYPE(tn) \ - __CILKRTS_MKIDENT(cilk_c_reducer_opmul_,tn) - -/** Declares an op_mul reducer object. - * - * This macro expands into a declaration of an op_mul reducer object for a - * specified numeric type. For example: - * - * CILK_C_REDUCER_OPMUL(my_reducer, double, 1.0); - * - * @param obj The variable name to be used for the declared reducer object. - * @param tn The @ref reducers_c_type_names "numeric type name" specifying - * the type of the reducer. - * @param v The initial value for the reducer. (A value which can be - * assigned to the numeric type represented by @a tn.) - * - * @see @ref reducers_c_predefined - * @see ReducersMul - */ -#define CILK_C_REDUCER_OPMUL(obj,tn,v) \ - CILK_C_REDUCER_OPMUL_TYPE(tn) obj = \ - CILK_C_INIT_REDUCER(_Typeof(obj.value), \ - __CILKRTS_MKIDENT(cilk_c_reducer_opmul_reduce_,tn), \ - __CILKRTS_MKIDENT(cilk_c_reducer_opmul_identity_,tn), \ - 0, v) - -/// @cond internal - -/** Declares the op_mul reducer functions for a numeric type. - * - * This macro expands into external function declarations for functions which - * implement the reducer functionality for the op_mul reducer type for a - * specified numeric type. - * - * @param t The value type of the reducer. - * @param tn The value "type name" identifier, used to construct the reducer - * type name, function names, etc. - */ -#define CILK_C_REDUCER_OPMUL_DECLARATION(t,tn) \ - typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPMUL_TYPE(tn); \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opmul,tn,l,r); \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opmul,tn); - -/** Defines the op_mul reducer functions for a numeric type. - * - * This macro expands into function definitions for functions which implement - * the reducer functionality for the op_mul reducer type for a specified - * numeric type. - * - * @param t The value type of the reducer. - * @param tn The value "type name" identifier, used to construct the reducer - * type name, function names, etc. - */ -#define CILK_C_REDUCER_OPMUL_DEFINITION(t,tn) \ - typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPMUL_TYPE(tn); \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opmul,tn,l,r) \ - { *(t*)l *= *(t*)r; } \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opmul,tn) \ - { *(t*)v = 1; } - -///@{ -/** @def CILK_C_REDUCER_OPMUL_INSTANCE - * @brief Declares or defines implementation functions for a reducer type. - * - * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS` - * will be defined, and this macro will generate reducer implementation - * functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and - * this macro will expand into external declarations for the functions. - */ -#ifdef CILK_C_DEFINE_REDUCERS -# define CILK_C_REDUCER_OPMUL_INSTANCE(t,tn) \ - CILK_C_REDUCER_OPMUL_DEFINITION(t,tn) -#else -# define CILK_C_REDUCER_OPMUL_INSTANCE(t,tn) \ - CILK_C_REDUCER_OPMUL_DECLARATION(t,tn) -#endif -///@} - -/* Declares or defines an instance of the reducer type and its functions for each - * numeric type. - */ -CILK_C_REDUCER_OPMUL_INSTANCE(char, char) -CILK_C_REDUCER_OPMUL_INSTANCE(unsigned char, uchar) -CILK_C_REDUCER_OPMUL_INSTANCE(signed char, schar) -CILK_C_REDUCER_OPMUL_INSTANCE(wchar_t, wchar_t) -CILK_C_REDUCER_OPMUL_INSTANCE(short, short) -CILK_C_REDUCER_OPMUL_INSTANCE(unsigned short, ushort) -CILK_C_REDUCER_OPMUL_INSTANCE(int, int) -CILK_C_REDUCER_OPMUL_INSTANCE(unsigned int, uint) -CILK_C_REDUCER_OPMUL_INSTANCE(unsigned int, unsigned) /* alternate name */ -CILK_C_REDUCER_OPMUL_INSTANCE(long, long) -CILK_C_REDUCER_OPMUL_INSTANCE(unsigned long, ulong) -CILK_C_REDUCER_OPMUL_INSTANCE(long long, longlong) -CILK_C_REDUCER_OPMUL_INSTANCE(unsigned long long, ulonglong) -CILK_C_REDUCER_OPMUL_INSTANCE(float, float) -CILK_C_REDUCER_OPMUL_INSTANCE(double, double) -CILK_C_REDUCER_OPMUL_INSTANCE(long double, longdouble) - -//@endcond - -#ifdef __cplusplus -} /* end extern "C" */ -#endif - -///@} - -///@} - -#endif /* REDUCER_OPMUL_H_INCLUDED */ diff --git a/include/cilk/reducer_opor.h b/include/cilk/reducer_opor.h deleted file mode 100644 index 20ae2d1e..00000000 --- a/include/cilk/reducer_opor.h +++ /dev/null @@ -1,612 +0,0 @@ -/* reducer_opor.h -*- C++ -*- - * - * Copyright (C) 2009-2018, Intel Corporation - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY - * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * ********************************************************************* - * - * PLEASE NOTE: This file is a downstream copy of a file maintained in - * a repository at cilkplus.org. Changes made to this file that are not - * submitted through the contribution process detailed at - * http://www.cilkplus.org/submit-cilk-contribution will be lost the next - * time that a new version is released. Changes only submitted to the - * GNU compiler collection or posted to the git repository at - * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are - * not tracked. - * - * We welcome your contributions to this open source project. Thank you - * for your assistance in helping us improve Cilk Plus. - */ - -/** @file reducer_opor.h - * - * @brief Defines classes for doing parallel bitwise OR reductions. - * - * @ingroup ReducersOr - * - * @see ReducersOr - */ - -#ifndef REDUCER_OPOR_H_INCLUDED -#define REDUCER_OPOR_H_INCLUDED - -#include - -/** @defgroup ReducersOr Bitwise `OR` Reducers - * - * Bitwise `OR` reducers allow the computation of the bitwise `OR` of a set of - * values in parallel. - * - * @ingroup Reducers - * - * You should be familiar with @ref pagereducers "Intel(R) Cilk(TM) Plus reducers", - * described in file `reducers.md`, and particularly with @ref reducers_using, - * before trying to use the information in this file. - * - * @section redopor_usage Usage Example - * - * cilk::reducer< cilk::op_or > r; - * cilk_for (int i = 0; i != N; ++i) { - * *r |= a[i]; - * } - * unsigned result; - * r.move_out(result); - * - * @section redopor_monoid The Monoid - * - * @subsection redopor_monoid_values Value Set - * - * The value set of a bitwise `OR` reducer is the set of values of `Type`, which - * is expected to be a builtin integer type which has a representation as a - * sequence of bits (or something like it, such as `bool` or `std::bitset`). - * - * @subsection redopor_monoid_operator Operator - * - * The operator of a bitwise `OR` reducer is the bitwise OR operator, defined by - * the "`|`" binary operator on `Type`. - * - * @subsection redopor_monoid_identity Identity - * - * The identity value of the reducer is the value whose representation - * contains all 0-bits. This is expected to be the value of the default - * constructor `Type()`. - * - * @section redopor_operations Operations - * - * @subsection redopor_constructors Constructors - * - * reducer() // identity - * reducer(const Type& value) - * reducer(move_in(Type& variable)) - * - * @subsection redopor_get_set Set and Get - * - * r.set_value(const Type& value) - * const Type& = r.get_value() const - * r.move_in(Type& variable) - * r.move_out(Type& variable) - * - * @subsection redopor_initial Initial Values - * - * If a bitwise OR reducer is constructed without an explicit initial value, - * then its initial value will be its identity value, as long as `Type` - * satisfies the requirements of @ref redopor_types. - * - * @subsection redopor_view_ops View Operations - * - * *r |= a - * *r = *r | a - * *r = *r | a1 | a2 … | an - * - * @section redopor_types Type and Operator Requirements - * - * `Type` must be `Copy Constructible`, `Default Constructible`, and - * `Assignable`. - * - * The operator "`|=`" must be defined on `Type`, with `x |= a` having the - * same meaning as `x = x | a`. - * - * The expression `Type()` must be a valid expression which yields the - * identity value (the value of `Type` whose representation consists of all - * 0-bits). - * - * @section redopor_in_c Bitwise OR Reducers in C - * - * The @ref CILK_C_REDUCER_OPOR and @ref CILK_C_REDUCER_OPOR_TYPE macros can - * be used to do bitwise OR reductions in C. For example: - * - * CILK_C_REDUCER_OPOR(r, uint, 0); - * CILK_C_REGISTER_REDUCER(r); - * cilk_for(int i = 0; i != n; ++i) { - * REDUCER_VIEW(r) |= a[i]; - * } - * CILK_C_UNREGISTER_REDUCER(r); - * printf("The bitwise OR of the elements of a is %x\n", REDUCER_VIEW(r)); - * - * See @ref reducers_c_predefined. - */ - -#ifdef __cplusplus - -namespace cilk { - -/** The bitwise OR reducer view class. - * - * This is the view class for reducers created with - * `cilk::reducer< cilk::op_or >`. It holds the accumulator variable for - * the reduction, and allows only `or` operations to be performed on it. - * - * @note The reducer "dereference" operation (`reducer::operator *()`) - * yields a reference to the view. Thus, for example, the view class's - * `|=` operation would be used in an expression like `*r |= a`, where - * `r` is an opmod reducer variable. - * - * @tparam Type The type of the contained accumulator variable. This will - * be the value type of a monoid_with_view that is - * instantiated with this view. - * - * @see ReducersOr - * @see op_or - * - * @ingroup ReducersOr - */ -template -class op_or_view : public scalar_view -{ - typedef scalar_view base; - -public: - /** Class to represent the right-hand side of `*reducer = *reducer | value`. - * - * The only assignment operator for the op_or_view class takes an - * rhs_proxy as its operand. This results in the syntactic restriction - * that the only expressions that can be assigned to an op_or_view are - * ones which generate an rhs_proxy - that is, expressions of the form - * `op_or_view | value ... | value`. - * - * @warning - * The lhs and rhs views in such an assignment must be the same; - * otherwise, the behavior will be undefined. (I.e., `v1 = v1 | x` is - * legal; `v1 = v2 | x` is illegal.) This condition will be checked with - * a runtime assertion when compiled in debug mode. - * - * @see op_or_view - */ - class rhs_proxy { - friend class op_or_view; - - const op_or_view* m_view; - Type m_value; - - // Constructor is invoked only from op_or_view::operator|(). - // - rhs_proxy(const op_or_view* view, const Type& value) : m_view(view), m_value(value) {} - - rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator - rhs_proxy(); // Disable default constructor - - public: - /** bitwise OR with an additional rhs value. If `v` is an op_or_view - * and `a1` is a value, then the expression `v | a1` invokes the - * view's `operator|()` to create an rhs_proxy for `(v, a1)`; then - * `v | a1 | a2` invokes the rhs_proxy's `operator|()` to create a new - * rhs_proxy for `(v, a1|a2)`. This allows the right-hand side of an - * assignment to be not just `view | value`, but - ( `view | value | value ... | value`. The effect is that - * - * v = v | a1 | a2 ... | an; - * - * is evaluated as - * - * v = v | (a1 | a2 ... | an); - */ - rhs_proxy& operator|(const Type& x) { m_value |= x; return *this; } - }; - - - /** Default/identity constructor. This constructor initializes the - * contained value to `Type()`. - */ - op_or_view() : base() {} - - /** Construct with a specified initial value. - */ - explicit op_or_view(const Type& v) : base(v) {} - - /** Reduces the views of two strands. - * - * This function is invoked by the @ref op_or monoid to combine the views - * of two strands when the right strand merges with the left one. It - * "ORs" the value contained in the left-strand view by the value - * contained in the right-strand view, and leaves the value in the - * right-strand view undefined. - * - * @param right A pointer to the right-strand view. (`this` points to - * the left-strand view.) - * - * @note Used only by the @ref op_or monoid to implement the monoid - * reduce operation. - */ - void reduce(op_or_view* right) { this->m_value |= right->m_value; } - - /** @name Accumulator variable updates. - * - * These functions support the various syntaxes for "ORing" the - * accumulator variable contained in the view with some value. - */ - ///@{ - - /** Perfoms an OR operation between the accumulator variable and @a x. - */ - op_or_view& operator|=(const Type& x) { this->m_value |= x; return *this; } - - /** Creates an object representing `*this | x`. - * - * @see rhs_proxy - */ - rhs_proxy operator|(const Type& x) const { return rhs_proxy(this, x); } - - /** Assigns the result of a `view | value` expression to the view. Note that - * this is the only assignment operator for this class. - * - * @see rhs_proxy - */ - op_or_view& operator=(const rhs_proxy& rhs) { - this->m_value |= rhs.m_value; - return *this; - } - - ///@} -}; - -/** Monoid class for bitwise OR reductions. Instantiate the cilk::reducer - * template class with an op_or monoid to create a bitwise OR reducer - * class. For example, to compute the bitwise OR of a set of `unsigned long` - * values: - * - * cilk::reducer< cilk::op_or > r; - * - * @tparam Type The reducer value type. - * @tparam Align If `false` (the default), reducers instantiated on this - * monoid will be naturally aligned (the Intel Cilk Plus library 1.0 - * behavior). If `true`, reducers instantiated on this monoid - * will be cache-aligned for binary compatibility with - * reducers in Intel Cilk Plus library version 0.9. - * - * @see ReducersOr - * @see op_or_view - * - * @ingroup ReducersOr - */ -template -struct op_or : public monoid_with_view, Align> {}; - -/** Deprecated bitwise OR reducer class. - * - * reducer_opor is the same as @ref reducer<@ref op_or>, except that - * reducer_opor is a proxy for the contained view, so that accumulator - * variable update operations can be applied directly to the reducer. For - * example, a value is "ORed" with a `reducer<%op_or>` with `*r |= a`, but a - * value can be "ORed" with a `%reducer_opor` with `r |= a`. - * - * @deprecated Users are strongly encouraged to use `reducer` - * reducers rather than the old wrappers like reducer_opor. - * The `reducer` reducers show the reducer/monoid/view - * architecture more clearly, are more consistent in their - * implementation, and present a simpler model for new - * user-implemented reducers. - * - * @note Implicit conversions are provided between `%reducer_opor` - * and `reducer<%op_or>`. This allows incremental code - * conversion: old code that used `%reducer_opor` can pass a - * `%reducer_opor` to a converted function that now expects a - * pointer or reference to a `reducer<%op_or>`, and vice - * versa. - * - * @tparam Type The value type of the reducer. - * - * @see op_or - * @see reducer - * @see ReducersOr - * - * @ingroup ReducersOr - */ -template -class reducer_opor : public reducer< op_or > -{ - typedef reducer< op_or > base; - using base::view; - - public: - /// The view type for the reducer. - typedef typename base::view_type view_type; - - /// The view's rhs proxy type. - typedef typename view_type::rhs_proxy rhs_proxy; - - /// The view type for the reducer. - typedef view_type View; - - /// The monoid type for the reducer. - typedef typename base::monoid_type Monoid; - - /** @name Constructors - */ - ///@{ - - /** Default (identity) constructor. - * - * Constructs the wrapper with the default initial value of `Type()`. - */ - reducer_opor() {} - - /** Value constructor. - * - * Constructs the wrapper with a specified initial value. - */ - explicit reducer_opor(const Type& initial_value) : base(initial_value) {} - - ///@} - - /** @name Forwarded functions - * @details Functions that update the contained accumulator variable are - * simply forwarded to the contained @ref op_and_view. */ - ///@{ - - /// @copydoc op_or_view::operator|=(const Type&) - reducer_opor& operator|=(const Type& x) - { - view() |= x; return *this; - } - - // The legacy definition of reducer_opor::operator|() has different - // behavior and a different return type than this definition. The legacy - // version is defined as a member function, so this new version is defined - // as a free function to give it a different signature, so that they won't - // end up sharing a single object file entry. - - /// @copydoc op_or_view::operator|(const Type&) const - friend rhs_proxy operator|(const reducer_opor& r, const Type& x) - { - return r.view() | x; - } - - /// @copydoc op_and_view::operator=(const rhs_proxy&) - reducer_opor& operator=(const rhs_proxy& temp) - { - view() = temp; return *this; - } - ///@} - - /** @name Dereference - * @details Dereferencing a wrapper is a no-op. It simply returns the - * wrapper. Combined with the rule that the wrapper forwards view - * operations to its contained view, this means that view operations can - * be written the same way on reducers and wrappers, which is convenient - * for incrementally converting old code using wrappers to use reducers - * instead. That is: - * - * reducer< op_and > r; - * *r &= a; // *r returns the view - * // operator &= is a view member function - * - * reducer_opand w; - * *w &= a; // *w returns the wrapper - * // operator &= is a wrapper member function that - * // calls the corresponding view function - */ - ///@{ - reducer_opor& operator*() { return *this; } - reducer_opor const& operator*() const { return *this; } - - reducer_opor* operator->() { return this; } - reducer_opor const* operator->() const { return this; } - ///@} - - /** @name Upcast - * @details In Intel Cilk Plus library 0.9, reducers were always cache-aligned. - * In library 1.0, reducer cache alignment is optional. By default, - * reducers are unaligned (i.e., just naturally aligned), but legacy - * wrappers inherit from cache-aligned reducers for binary compatibility. - * - * This means that a wrapper will automatically be upcast to its aligned - * reducer base class. The following conversion operators provide - * pseudo-upcasts to the corresponding unaligned reducer class. - */ - ///@{ - operator reducer< op_or >& () - { - return *reinterpret_cast< reducer< op_or >* >(this); - } - operator const reducer< op_or >& () const - { - return *reinterpret_cast< const reducer< op_or >* >(this); - } - ///@} - -}; - -/// @cond internal -/** Metafunction specialization for reducer conversion. - * - * This specialization of the @ref legacy_reducer_downcast template class - * defined in reducer.h causes the `reducer< op_or >` class to have an - * `operator reducer_opor& ()` conversion operator that statically - * downcasts the `reducer` to the corresponding `reducer_opor` type. - * (The reverse conversion, from `reducer_opor` to `reducer`, is just - * an upcast, which is provided for free by the language.) - * - * @ingroup ReducersOr - */ -template -struct legacy_reducer_downcast > > -{ - typedef reducer_opor type; -}; -/// @endcond - -} // namespace cilk - -#endif /* __cplusplus */ - - -/** @ingroup ReducersOr - */ -///@{ - -/** @name C language reducer macros - * - * These macros are used to declare and work with op_or reducers in C code. - * - * @see @ref page_reducers_in_c - */ - ///@{ - -#ifdef __cplusplus -extern "C" { -#endif - -/** Declares OPOR reducer type name. - * - * This macro expands into the identifier which is the name of the op_or - * reducer type for a specified numeric type. - * - * @param tn The @ref reducers_c_type_names "numeric type name" specifying - * the type of the reducer. - * - * @see @ref reducers_c_predefined - * @see ReducersOr - */ -#define CILK_C_REDUCER_OPOR_TYPE(tn) \ - __CILKRTS_MKIDENT(cilk_c_reducer_opor_,tn) - -/** Declares an op_or reducer object. - * - * This macro expands into a declaration of an op_or reducer object for a - * specified numeric type. For example: - * - * CILK_C_REDUCER_OPOR(my_reducer, ulong, 0); - * - * @param obj The variable name to be used for the declared reducer object. - * @param tn The @ref reducers_c_type_names "numeric type name" specifying - * the type of the reducer. - * @param v The initial value for the reducer. (A value which can be - * assigned to the numeric type represented by @a tn.) - * - * @see @ref reducers_c_predefined - * @see ReducersOr - */ -#define CILK_C_REDUCER_OPOR(obj,tn,v) \ - CILK_C_REDUCER_OPOR_TYPE(tn) obj = \ - CILK_C_INIT_REDUCER(_Typeof(obj.value), \ - __CILKRTS_MKIDENT(cilk_c_reducer_opor_reduce_,tn), \ - __CILKRTS_MKIDENT(cilk_c_reducer_opor_identity_,tn), \ - 0, v) - -/// @cond internal - -/** Declares the op_or reducer functions for a numeric type. - * - * This macro expands into external function declarations for functions which - * implement the reducer functionality for the op_or reducer type for a - * specified numeric type. - * - * @param t The value type of the reducer. - * @param tn The value "type name" identifier, used to construct the reducer - * type name, function names, etc. - */ -#define CILK_C_REDUCER_OPOR_DECLARATION(t,tn) \ - typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPOR_TYPE(tn); \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opor,tn,l,r); \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opor,tn); - -/** Defines the op_or reducer functions for a numeric type. - * - * This macro expands into function definitions for functions which implement - * the reducer functionality for the op_or reducer type for a specified - * numeric type. - * - * @param t The value type of the reducer. - * @param tn The value "type name" identifier, used to construct the reducer - * type name, function names, etc. - */ -#define CILK_C_REDUCER_OPOR_DEFINITION(t,tn) \ - typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPOR_TYPE(tn); \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opor,tn,l,r) \ - { *(t*)l |= *(t*)r; } \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opor,tn) \ - { *(t*)v = 0; } - -///@{ -/** @def CILK_C_REDUCER_OPOR_INSTANCE - * @brief Declares or defines implementation functions for a reducer type. - * - * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS` - * will be defined, and this macro will generate reducer implementation - * functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and - * this macro will expand into external declarations for the functions. - */ -#ifdef CILK_C_DEFINE_REDUCERS -# define CILK_C_REDUCER_OPOR_INSTANCE(t,tn) \ - CILK_C_REDUCER_OPOR_DEFINITION(t,tn) -#else -# define CILK_C_REDUCER_OPOR_INSTANCE(t,tn) \ - CILK_C_REDUCER_OPOR_DECLARATION(t,tn) -#endif -///@} - -/* Declare or define an instance of the reducer type and its functions for each - * numeric type. - */ -CILK_C_REDUCER_OPOR_INSTANCE(char, char) -CILK_C_REDUCER_OPOR_INSTANCE(unsigned char, uchar) -CILK_C_REDUCER_OPOR_INSTANCE(signed char, schar) -CILK_C_REDUCER_OPOR_INSTANCE(wchar_t, wchar_t) -CILK_C_REDUCER_OPOR_INSTANCE(short, short) -CILK_C_REDUCER_OPOR_INSTANCE(unsigned short, ushort) -CILK_C_REDUCER_OPOR_INSTANCE(int, int) -CILK_C_REDUCER_OPOR_INSTANCE(unsigned int, uint) -CILK_C_REDUCER_OPOR_INSTANCE(unsigned int, unsigned) /* alternate name */ -CILK_C_REDUCER_OPOR_INSTANCE(long, long) -CILK_C_REDUCER_OPOR_INSTANCE(unsigned long, ulong) -CILK_C_REDUCER_OPOR_INSTANCE(long long, longlong) -CILK_C_REDUCER_OPOR_INSTANCE(unsigned long long, ulonglong) - -//@endcond - -#ifdef __cplusplus -} /* end extern "C" */ -#endif - -///@} - -///@} - -#endif /* REDUCER_OPOR_H_INCLUDED */ diff --git a/include/cilk/reducer_opxor.h b/include/cilk/reducer_opxor.h deleted file mode 100644 index 2e724a5c..00000000 --- a/include/cilk/reducer_opxor.h +++ /dev/null @@ -1,611 +0,0 @@ -/* reducer_opxor.h -*- C++ -*- - * - * Copyright (C) 2009-2018, Intel Corporation - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY - * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * ********************************************************************* - * - * PLEASE NOTE: This file is a downstream copy of a file maintained in - * a repository at cilkplus.org. Changes made to this file that are not - * submitted through the contribution process detailed at - * http://www.cilkplus.org/submit-cilk-contribution will be lost the next - * time that a new version is released. Changes only submitted to the - * GNU compiler collection or posted to the git repository at - * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are - * not tracked. - * - * We welcome your contributions to this open source project. Thank you - * for your assistance in helping us improve Cilk Plus. - */ - -/** @file reducer_opxor.h - * - * @brief Defines classes for doing parallel bitwise or reductions. - * - * @ingroup ReducersXor - * - * @see ReducersXor - */ - -#ifndef REDUCER_OPXOR_H_INCLUDED -#define REDUCER_OPXOR_H_INCLUDED - -#include - -/** @defgroup ReducersXor Bitwise XOR Reducers - * - * Bitwise XOR reducers allow the computation of the bitwise XOR of a set of - * values in parallel. - * - * @ingroup Reducers - * - * You should be familiar with @ref pagereducers "Intel(R) Cilk(TM) Plus reducers", - * described in file `reducers.md`, and particularly with @ref reducers_using, - * before trying to use the information in this file. - * - * @section redopxor_usage Usage Example - * - * cilk::reducer< cilk::op_xor > r; - * cilk_for (int i = 0; i != N; ++i) { - * *r ^= a[i]; - * } - * unsigned result; - * r.move_out(result); - * - * @section redopxor_monoid The Monoid - * - * @subsection redopxor_monoid_values Value Set - * - * The value set of a bitwise XOR reducer is the set of values of `Type`, which - * is expected to be a builtin integer type which has a representation as a - * sequence of bits (or something like it, such as `bool` or `std::bitset`). - * - * @subsection redopxor_monoid_operator Operator - * - * The bitwise XOR operator is defined by the "`^`" binary operator on `Type`. - * - * @subsection redopxor_monoid_identity Identity - * - * The identity value of the reducer is the value whose representation - * contains all 0-bits. This is expected to be the value of the default - * constructor `Type()`. - * - * @section redopxor_operations Operations - * - * @subsection redopxor_constructors Constructors - * - * reducer() // identity - * reducer(const Type& value) - * reducer(move_in(Type& variable)) - * - * @subsection redopxor_get_set Set and Get - * - * r.set_value(const Type& value) - * const Type& = r.get_value() const - * r.move_in(Type& variable) - * r.move_out(Type& variable) - * - * @subsection redopxor_initial Initial Values - * - * If a bitwise XOR reducer is constructed without an explicit initial value, - * then its initial value will be its identity value, as long as `Type` - * satisfies the requirements of @ref redopxor_types. - * - * @subsection redopxor_view_ops View Operations - * - * *r ^= a - * *r = *r ^ a - * *r = *r ^ a1 ^ a2 … ^ an - * - * @section redopxor_types Type and Operator Requirements - * - * `Type` must be `Copy Constructible`, `Default Constructible`, and - * `Assignable`. - * - * The operator "`^=`" must be defined on `Type`, with `x ^= a` having the - * same meaning as `x = x ^ a`. - * - * The expression `Type()` must be a valid expression which yields the - * identity value (the value of `Type` whose representation consists of all - * 0-bits). - * - * @section redopxor_in_c Bitwise XOR Reducers in C - * - * The @ref CILK_C_REDUCER_OPXOR and @ref CILK_C_REDUCER_OPXOR_TYPE macros can - * be used to do bitwise XOR reductions in C. For example: - * - * CILK_C_REDUCER_OPXOR(r, uint, 0); - * CILK_C_REGISTER_REDUCER(r); - * cilk_for(int i = 0; i != n; ++i) { - * REDUCER_VIEW(r) ^= a[i]; - * } - * CILK_C_UNREGISTER_REDUCER(r); - * printf("The bitwise XOR of the elements of a is %x\n", REDUCER_VIEW(r)); - * - * See @ref reducers_c_predefined. - */ - -#ifdef __cplusplus - -namespace cilk { - -/** The bitwise XOR reducer view class. - * - * This is the view class for reducers created with - * `cilk::reducer< cilk::op_xor >`. It holds the accumulator variable - * for the reduction, and allows only `xor` operations to be performed on it. - * - * @note The reducer "dereference" operation (`reducer::operator *()`) - * yields a reference to the view. Thus, for example, the view class's - * `^=` operation would be used in an expression like `*r ^= a`, where - * `r` is an opmod reducer variable. - * - * @tparam Type The type of the contained accumulator variable. This will - * be the value type of a monoid_with_view that is - * instantiated with this view. - * - * @see ReducersXor - * @see op_xor - * - * @ingroup ReducersXor - */ -template -class op_xor_view : public scalar_view -{ - typedef scalar_view base; - -public: - /** Class to represent the right-hand side of `*reducer = *reducer ^ value`. - * - * The only assignment operator for the op_xor_view class takes an - * rhs_proxy as its operand. This results in the syntactic restriction - * that the only expressions that can be assigned to an op_xor_view are - * ones which generate an rhs_proxy - that is, expressions of the form - * `op_xor_view ^ value ... ^ value`. - * - * @warning - * The lhs and rhs views in such an assignment must be the same; - * otherwise, the behavior will be undefined. (I.e., `v1 = v1 ^ x` is - * legal; `v1 = v2 ^ x` is illegal.) This condition will be checked with - * a runtime assertion when compiled in debug mode. - * - * @see op_xor_view - */ - class rhs_proxy { - friend class op_xor_view; - - const op_xor_view* m_view; - Type m_value; - - // Constructor is invoked only from op_xor_view::operator^(). - // - rhs_proxy(const op_xor_view* view, const Type& value) : m_view(view), m_value(value) {} - - rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator - rhs_proxy(); // Disable default constructor - - public: - /** bitwise XOR with an additional rhs value. If `v` is an op_xor_view - * and `a1` is a value, then the expression `v ^ a1` invokes the - * view's `operator^()` to create an rhs_proxy for `(v, a1)`; then - * `v ^ a1 ^ a2` invokes the rhs_proxy's `operator^()` to create a new - * rhs_proxy for `(v, a1^a2)`. This allows the right-hand side of an - * assignment to be not just `view ^ value`, but - ( `view ^ value ^ value ... ^ value`. The effect is that - * - * v = v ^ a1 ^ a2 ... ^ an; - * - * is evaluated as - * - * v = v ^ (a1 ^ a2 ... ^ an); - */ - rhs_proxy& operator^(const Type& x) { m_value ^= x; return *this; } - }; - - - /** Default/identity constructor. This constructor initializes the - * contained value to `Type()`. - */ - op_xor_view() : base() {} - - /** Construct with a specified initial value. - */ - explicit op_xor_view(const Type& v) : base(v) {} - - /** Reduces the views of two strands. - * - * This function is invoked by the @ref op_xor monoid to combine the views - * of two strands when the right strand merges with the left one. It - * "XORs" the value contained in the left-strand view by the value - * contained in the right-strand view, and leaves the value in the - * right-strand view undefined. - * - * @param right A pointer to the right-strand view. (`this` points to - * the left-strand view.) - * - * @note Used only by the @ref op_xor monoid to implement the monoid - * reduce operation. - */ - void reduce(op_xor_view* right) { this->m_value ^= right->m_value; } - - /** @name Accumulator variable updates. - * - * These functions support the various syntaxes for "XORing" the - * accumulator variable contained in the view with some value. - */ - ///@{ - - /** Performs XOR operation between the accumulator variable and @a x. - */ - op_xor_view& operator^=(const Type& x) { this->m_value ^= x; return *this; } - - /** Creates an object representing `*this ^ x`. - * - * @see rhs_proxy - */ - rhs_proxy operator^(const Type& x) const { return rhs_proxy(this, x); } - - /** Assigns the result of a `view ^ value` expression to the view. Note that - * this is the only assignment operator for this class. - * - * @see rhs_proxy - */ - op_xor_view& operator=(const rhs_proxy& rhs) { - this->m_value ^= rhs.m_value; - return *this; - } - - ///@} -}; - -/** Monoid class for bitwise XOR reductions. Instantiate the cilk::reducer - * template class with an op_xor monoid to create a bitwise XOR reducer - * class. For example, to compute the bitwise XOR of a set of `unsigned long` - * values: - * - * cilk::reducer< cilk::op_xor > r; - * - * @tparam Type The reducer value type. - * @tparam Align If `false` (the default), reducers instantiated on this - * monoid will be naturally aligned (the Intel Cilk Plus library 1.0 - * behavior). If `true`, reducers instantiated on this monoid - * will be cache-aligned for binary compatibility with - * reducers in Intel Cilk Plus library version 0.9. - * - * @see ReducersXor - * @see op_xor_view - * - * @ingroup ReducersXor - */ -template -struct op_xor : public monoid_with_view, Align> {}; - -/** Deprecated bitwise XOR reducer class. - * - * reducer_opxor is the same as @ref reducer<@ref op_xor>, except that - * reducer_opxor is a proxy for the contained view, so that accumulator - * variable update operations can be applied directly to the reducer. For - * example, a value is "XORed" with a `reducer<%op_xor>` with `*r ^= a`, but a - * value can be "XORed" with a `%reducer_opxor` with `r ^= a`. - * - * @deprecated Users are strongly encouraged to use `reducer` - * reducers rather than the old wrappers like reducer_opand. - * The `reducer` reducers show the reducer/monoid/view - * architecture more clearly, are more consistent in their - * implementation, and present a simpler model for new - * user-implemented reducers. - * - * @note Implicit conversions are provided between `%reducer_opxor` - * and `reducer<%op_xor>`. This allows incremental code - * conversion: old code that used `%reducer_opxor` can pass a - * `%reducer_opxor` to a converted function that now expects a - * pointer or reference to a `reducer<%op_xor>`, and vice - * versa. - * - * @tparam Type The value type of the reducer. - * - * @see op_xor - * @see reducer - * @see ReducersXor - * - * @ingroup ReducersXor - */ -template -class reducer_opxor : public reducer< op_xor > -{ - typedef reducer< op_xor > base; - using base::view; - - public: - /// The view type for the reducer. - typedef typename base::view_type view_type; - - /// The view's rhs proxy type. - typedef typename view_type::rhs_proxy rhs_proxy; - - /// The view type for the reducer. - typedef view_type View; - - /// The monoid type for the reducer. - typedef typename base::monoid_type Monoid; - - /** @name Constructors - */ - ///@{ - - /** Default (identity) constructor. - * - * Constructs the wrapper with the default initial value of `Type()`. - */ - reducer_opxor() {} - - /** Value constructor. - * - * Constructs the wrapper with a specified initial value. - */ - explicit reducer_opxor(const Type& initial_value) : base(initial_value) {} - - ///@} - - /** @name Forwarded functions - * @details Functions that update the contained accumulator variable are - * simply forwarded to the contained @ref op_and_view. */ - ///@{ - - /// @copydoc op_xor_view::operator^=(const Type&) - reducer_opxor& operator^=(const Type& x) - { - view() ^= x; return *this; - } - - // The legacy definition of reducer_opxor::operator^() has different - // behavior and a different return type than this definition. The legacy - // version is defined as a member function, so this new version is defined - // as a free function to give it a different signature, so that they won't - // end up sharing a single object file entry. - - /// @copydoc op_xor_view::operator^(const Type&) const - friend rhs_proxy operator^(const reducer_opxor& r, const Type& x) - { - return r.view() ^ x; - } - - /// @copydoc op_and_view::operator=(const rhs_proxy&) - reducer_opxor& operator=(const rhs_proxy& temp) - { - view() = temp; return *this; - } - ///@} - - /** @name Dereference - * @details Dereferencing a wrapper is a no-op. It simply returns the - * wrapper. Combined with the rule that the wrapper forwards view - * operations to its contained view, this means that view operations can - * be written the same way on reducers and wrappers, which is convenient - * for incrementally converting old code using wrappers to use reducers - * instead. That is: - * - * reducer< op_and > r; - * *r &= a; // *r returns the view - * // operator &= is a view member function - * - * reducer_opand w; - * *w &= a; // *w returns the wrapper - * // operator &= is a wrapper member function that - * // calls the corresponding view function - */ - ///@{ - reducer_opxor& operator*() { return *this; } - reducer_opxor const& operator*() const { return *this; } - - reducer_opxor* operator->() { return this; } - reducer_opxor const* operator->() const { return this; } - ///@} - - /** @name Upcast - * @details In Intel Cilk Plus library 0.9, reducers were always cache-aligned. - * In library 1.0, reducer cache alignment is optional. By default, - * reducers are unaligned (i.e., just naturally aligned), but legacy - * wrappers inherit from cache-aligned reducers for binary compatibility. - * - * This means that a wrapper will automatically be upcast to its aligned - * reducer base class. The following conversion operators provide - * pseudo-upcasts to the corresponding unaligned reducer class. - */ - ///@{ - operator reducer< op_xor >& () - { - return *reinterpret_cast< reducer< op_xor >* >(this); - } - operator const reducer< op_xor >& () const - { - return *reinterpret_cast< const reducer< op_xor >* >(this); - } - ///@} - -}; - -/// @cond internal -/** Metafunction specialization for reducer conversion. - * - * This specialization of the @ref legacy_reducer_downcast template class - * defined in reducer.h causes the `reducer< op_xor >` class to have an - * `operator reducer_opxor& ()` conversion operator that statically - * downcasts the `reducer` to the corresponding `reducer_opxor` type. - * (The reverse conversion, from `reducer_opxor` to `reducer`, is just - * an upcast, which is provided for free by the language.) - * - * @ingroup ReducersXor - */ -template -struct legacy_reducer_downcast > > -{ - typedef reducer_opxor type; -}; -/// @endcond - -} // namespace cilk - -#endif /* __cplusplus */ - - -/** @ingroup ReducersXor - */ -///@{ - -/** @name C language reducer macros - * - * These macros are used to declare and work with op_xor reducers in C code. - * - * @see @ref page_reducers_in_c - */ - ///@{ - -#ifdef __cplusplus -extern "C" { -#endif - -/** Declares OPXOR reducer type name. - * - * This macro expands into the identifier which is the name of the op_xor - * reducer type for a specified numeric type. - * - * @param tn The @ref reducers_c_type_names "numeric type name" specifying - * the type of the reducer. - * - * @see @ref reducers_c_predefined - * @see ReducersXor - */ -#define CILK_C_REDUCER_OPXOR_TYPE(tn) \ - __CILKRTS_MKIDENT(cilk_c_reducer_opxor_,tn) - -/** Declares an op_xor reducer object. - * - * This macro expands into a declaration of an op_xor reducer object for a - * specified numeric type. For example: - * - * CILK_C_REDUCER_OPXOR(my_reducer, ulong, 0); - * - * @param obj The variable name to be used for the declared reducer object. - * @param tn The @ref reducers_c_type_names "numeric type name" specifying - * the type of the reducer. - * @param v The initial value for the reducer. (A value which can be - * assigned to the numeric type represented by @a tn.) - * - * @see @ref reducers_c_predefined - * @see ReducersXor - */ -#define CILK_C_REDUCER_OPXOR(obj,tn,v) \ - CILK_C_REDUCER_OPXOR_TYPE(tn) obj = \ - CILK_C_INIT_REDUCER(_Typeof(obj.value), \ - __CILKRTS_MKIDENT(cilk_c_reducer_opxor_reduce_,tn), \ - __CILKRTS_MKIDENT(cilk_c_reducer_opxor_identity_,tn), \ - 0, v) - -/// @cond internal - -/** Declares the op_xor reducer functions for a numeric type. - * - * This macro expands into external function declarations for functions which - * implement the reducer functionality for the op_xor reducer type for a - * specified numeric type. - * - * @param t The value type of the reducer. - * @param tn The value "type name" identifier, used to construct the reducer - * type name, function names, etc. - */ -#define CILK_C_REDUCER_OPXOR_DECLARATION(t,tn) \ - typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPXOR_TYPE(tn); \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opxor,tn,l,r); \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opxor,tn); - -/** Defines the op_xor reducer functions for a numeric type. - * - * This macro expands into function definitions for functions which implement - * the reducer functionality for the op_xor reducer type for a specified - * numeric type. - * - * @param t The value type of the reducer. - * @param tn The value "type name" identifier, used to construct the reducer - * type name, function names, etc. - */ -#define CILK_C_REDUCER_OPXOR_DEFINITION(t,tn) \ - typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPXOR_TYPE(tn); \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opxor,tn,l,r) \ - { *(t*)l ^= *(t*)r; } \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opxor,tn) \ - { *(t*)v = 0; } - -///@{ -/** @def CILK_C_REDUCER_OPXOR_INSTANCE - * @brief Declares or defines implementation functions for a reducer type. - * - * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS` - * will be defined, and this macro will generate reducer implementation - * functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and - * this macro will expand into external declarations for the functions. - */ -#ifdef CILK_C_DEFINE_REDUCERS -# define CILK_C_REDUCER_OPXOR_INSTANCE(t,tn) \ - CILK_C_REDUCER_OPXOR_DEFINITION(t,tn) -#else -# define CILK_C_REDUCER_OPXOR_INSTANCE(t,tn) \ - CILK_C_REDUCER_OPXOR_DECLARATION(t,tn) -#endif -///@} - -/* Declares or defines an instance of the reducer type and its functions for each - * numeric type. - */ -CILK_C_REDUCER_OPXOR_INSTANCE(char, char) -CILK_C_REDUCER_OPXOR_INSTANCE(unsigned char, uchar) -CILK_C_REDUCER_OPXOR_INSTANCE(signed char, schar) -CILK_C_REDUCER_OPXOR_INSTANCE(wchar_t, wchar_t) -CILK_C_REDUCER_OPXOR_INSTANCE(short, short) -CILK_C_REDUCER_OPXOR_INSTANCE(unsigned short, ushort) -CILK_C_REDUCER_OPXOR_INSTANCE(int, int) -CILK_C_REDUCER_OPXOR_INSTANCE(unsigned int, uint) -CILK_C_REDUCER_OPXOR_INSTANCE(unsigned int, unsigned) /* alternate name */ -CILK_C_REDUCER_OPXOR_INSTANCE(long, long) -CILK_C_REDUCER_OPXOR_INSTANCE(unsigned long, ulong) -CILK_C_REDUCER_OPXOR_INSTANCE(long long, longlong) -CILK_C_REDUCER_OPXOR_INSTANCE(unsigned long long, ulonglong) - -//@endcond - -#ifdef __cplusplus -} /* end extern "C" */ -#endif - -///@} - -///@} - -#endif /* REDUCER_OPXOR_H_INCLUDED */ diff --git a/include/cilk/reducer_ostream.h b/include/cilk/reducer_ostream.h deleted file mode 100644 index b839ea45..00000000 --- a/include/cilk/reducer_ostream.h +++ /dev/null @@ -1,496 +0,0 @@ -/* reducer_ostream.h -*- C++ -*- - * - * Copyright (C) 2009-2018, Intel Corporation - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY - * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * ********************************************************************* - * - * PLEASE NOTE: This file is a downstream copy of a file maintained in - * a repository at cilkplus.org. Changes made to this file that are not - * submitted through the contribution process detailed at - * http://www.cilkplus.org/submit-cilk-contribution will be lost the next - * time that a new version is released. Changes only submitted to the - * GNU compiler collection or posted to the git repository at - * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are - * not tracked. - * - * We welcome your contributions to this open source project. Thank you - * for your assistance in helping us improve Cilk Plus. - */ - -/** @file reducer_ostream.h - * - * @brief Defines a class for writing to an ostream in parallel. - * - * @ingroup ReducersOstream - * - * @see @ref ReducersOstream - */ - -#ifndef REDUCER_OSTREAM_H_INCLUDED -#define REDUCER_OSTREAM_H_INCLUDED - -#include -#include -#include - -/** @defgroup ReducersOstream Ostream Reducers - * - * Ostream reducers allow multiple strands to write to an ostream in parallel. - * - * @ingroup Reducers - * - * You should be familiar with @ref pagereducers "Intel(R) Cilk(TM) Plus reducers", - * described in file reducers.md, and particularly with @ref reducers_using, - * before trying to use the information in this file. - * - * @section redostream_usage Usage Example - * - * One of the most common debugging techniques is adding `print` statements - * to the code being debugged. When the code is parallelized, the results can - * be less than satisfactory, as output from multiple strands is mingled in an - * unpredictable way. Like other reducers, an ostream reducer requires minimal - * recoding to guarantee that the output from parallelized computation will be - * ordered the same as though the computation were executed serially. - * - * cilk::reducer r(std::cerr); - * cilk_for (int i = 0; i != data.size(); ++i) { - * *r << "Iteration " << i << ":\n"; - * ... some computation ... - * *r << " Step 1:" << some information; - * ... some more computation ... - * *r << " Step 2:" << some more information; - * ... still more computation ... - * *r << " Step 3:" << still more information; - * } - * - * Output on standard error: - * - * Iteration 1: - * Step 1: ... - * Step 2: ... - * Step 3: ... - * Iteration 2: - * Step 1: ... - * Step 2: ... - * Step 3: ... - * Iteration 3: - * Step 1: ... - * Step 2: ... - * Step 3: ... - * ... - * - * @section redostream_overview Overview - * - * An "ostream reducer" is not really a reducer. It uses the reducer - * technology to coordinate operations on parallel strands to achieve - * the same behavior in a parallel computation that would be seen in a - * serial computation, but it does not have a monoid. It has a "monoid - * class," because that is part of the implementation framework, but it - * does not represent a mathematical monoid: there is no value type, no - * associative operation, and no identity value. The reducer is used for - * its side effect rather than to construct a value. - * - * You might think of an ostream reducer as a relative of a - * @ref ReducersString "string reducer" which uses stream output - * syntax (`stream << value`) instead of string append syntax - * (`string += value`), and which writes its result string to an - * ostream instead of making it available as the reducer value. - * - * Another difference is that "real" reducers protect their contained - * value quite strongly from improper access by the user. Ostream reducers, - * on the other hand, pretty much have to expose the ostream, since normal - * use of an ostream involves accessing its internal state. Furthermore, - * the ostream reducer just coordinates output to an existing ostream - - * there is nothing to keep the user from writing directly to the attached - * stream, with unpredictable results. - * - * @section redostream_operations Operations - * - * In the operation descriptions below, the type name `Ostream` refers to the - * reducer's ostream type, `std::basic_ostream`. - * - * @subsection redostream_constructors Constructors - * - * The only constructor is - * - * reducer(const Ostream& os) - * - * This creates a reducer that is associated with the existing ostream `os`. - * Anything "written to" the reducer will (eventually) be written to `os`. - * - * @subsection redostream_get_set Set and Get - * - * Just as a stream does not have a "value," neither does an ostream - * reducer. Therefore, none of the usual `set_value`, `get_value`, - * `move_in`, or `move_out` functions are available for ostream reducers. - * - * @subsection redostream_initial Initial Values - * - * Ostream reducers do not have default constructors. - * - * @subsection redostream_view_ops View Operations - * - * An ostream reducer view is actually a kind of `std::ostream`. Therefore, - * any operation that can be used on an ostream can be used on an ostream - * reducer view. For example: - * - * reducer r(cout); - * *r << setw(5) << (x=1) << endl; - * - * - * @section redostream_performance Performance Considerations - * - * Ostream reducers work by creating a string stream for each non-leftmost - * view. When two strands are merged, the contents of the string buffer of the - * right view are written to the left view. Since all non-leftmost strands are - * eventually merged, all output is eventually written to the associated - * ostream. - * - * This implementation has two consequences. - * - * First, all output written to an ostream reducer on a stolen strand is kept - * in memory (in a string buffer) until the strand is merged with the leftmost - * strand. This means that some portion of the output written to an ostream - * reducer during a parallel computation - half of the total output, on - * average - will temporarily be held in memory during the computation. - * Obviously, ostream reducers will work better for small and moderate amounts - * of output. - * - * Second, buffered ostream reducer content must be copied at every merge. - * The total amount of copying is potentially proportional to the total amount - * of output multiplied by the number of strands stolen during the computation. - * - * In short, writing to an ostream in a parallel computation with an ostream - * reducer will always be less efficient than writing the same output directly - * to the ostream in a serial computation. The value of the ostream - * reducer is not in the writing of the ostream itself, but in removing the - * race and serialization obstacles that the ostream output would cause in an - * otherwise parallelizable computation. - * - * - * @section redostream_state Stream State - * - * The reducer implementation can correctly order the output that is written - * to an ostream. However, an ostream has additional state that controls its - * behavior, such as its formatting attributes, error state, extensible arrays, * and registered callbacks. If these are modified during the computation, the * reducer implementation cannot guarantee that they will be the same in a - * parallel computation as in a serial computation. In particular: - * - * - In the serial execution, the ostream state in the continuation of a - * spawn will be the same as the state at the end of the spawned function. - * In the parallel execution, if the continuation is stolen, its view will - * contain a newly created ostream with the default initial state. - * - In the serial execution, the ostream state following a sync is the same - * as the state before the sync. In the parallel execution, if the - * continuation is stolen, then the state following the sync will be the - * same as the state at the end of some spawned function. - * - * In short, you must not make any assumptions about the stream state of an - * ostream reducer: - * - * - Following a `cilk_spawn`. - * - Following a `cilk_sync`. - * - At the start of an iteration of a `cilk_for` loop. - * - Following the completion of a `cilk_for` loop. - * - * @section redostream_types Type and Operator Requirements - * - * `std::basic_ostream` must be a valid type. -*/ - -namespace cilk { - -/** @ingroup ReducersOstream */ -//@{ - -/** The ostream reducer view class. - * - * This is the view class for reducers created with - * `cilk::reducer< cilk::op_basic_ostream >`. It holds the - * actual ostream for a parallel strand, and allows only stream output - * operations to be performed on it. - * - * @note The reducer "dereference" operation (`reducer::operator *()`) - * yields a reference to the view. Thus, for example, the view - * class's `<<` operation would be used in an expression like - * `*r << "x = " << x`, where `r` is an ostream reducer. - * - * @tparam Char The ostream element type (not the ostream type). - * @tparam Traits The character traits type. - * - * @see ReducersOstream - * @see op_basic_ostream - */ -template -class op_basic_ostream_view : public std::basic_ostream -{ - typedef std::basic_ostream base; - typedef std::basic_ostream ostream_type; - - // A non-leftmost view is associated with a private string buffer. (The - // leftmost view is associated with the buffer of the reducer's associated - // ostream, so its private buffer is unused.) - // - std::basic_stringbuf m_buffer; - -public: - - /** Value type. Required by @ref monoid_with_view. - */ - typedef ostream_type value_type; - - /** Reduce operation. Required by @ref monoid_with_view. - */ - void reduce(op_basic_ostream_view* other) - { - // Writing an empty buffer results in failure. Testing `sgetc()` is the - // easiest way of checking for an empty buffer. - if (other->m_buffer.sgetc() != Traits::eof()) { - *this << (&other->m_buffer); - } - } - - /** Non-leftmost (identity) view constructor. The view is associated with - * its internal buffer. Required by @ref monoid_base. - */ - op_basic_ostream_view() : base(&m_buffer) {} - - /** Leftmost view constructor. The view is associated with an existing - * ostream. - */ - op_basic_ostream_view(const ostream_type& os) : base(0) - { - base::rdbuf(os.rdbuf()); // Copy stream buffer - base::flags(os.flags()); // Copy formatting flags - base::setstate(os.rdstate()); // Copy error state - } - - /** Sets/gets. - * - * These are all no-ops. - */ - //@{ - - void view_set_value(const value_type&) - { assert("set_value() is not allowed on ostream reducers" && 0); } - const value_type& view_get_value() const - { assert("get_value() is not allowed on ostream reducers" && 0); - return *this; } - typedef value_type const& return_type_for_get_value; - void view_move_in(const value_type&) - { assert("move_in() is not allowed on ostream reducers" && 0); } - void view_move_out(const value_type&) - { assert("move_out() is not allowed on ostream reducers" && 0); } - - //@} -}; - -/** Ostream monoid class. Instantiate the cilk::reducer template class with an - * op_basic_ostream monoid to create an ostream reducer class: - * - * cilk::reducer< cilk::op_basic_string > r; - * - * @tparam Char The stream element type (not the stream type). - * @tparam Traits The character traits type. - * - * @see ReducersOstream - * @see op_basic_ostream_view - * @see reducer_ostream - * @see op_ostream - * @see op_wostream - */ -template, - bool Align = false> -class op_basic_ostream : - public monoid_with_view< op_basic_ostream_view, Align > -{ - typedef monoid_with_view< op_basic_ostream_view, Align > - base; - typedef std::basic_ostream ostream_type; - typedef provisional_guard view_guard; - -public: - - /** View type of the monoid. - */ - typedef typename base::view_type view_type; - - /** @name Construct function. - * - * The only supported ostream reducer constructor takes a reference to - * an existing ostream. - * - * @param os The ostream destination for receive all data written to the - * reducer. - */ - static void construct( - op_basic_ostream* monoid, - view_type* view, - const ostream_type& os) - { - view_guard vg( new((void*) view) view_type(os) ); - vg.confirm_if( new((void*) monoid) op_basic_ostream ); - } -}; - - -/** - * Convenience typedef for narrow ostreams. - */ -typedef op_basic_ostream op_ostream; - -/** - * Convenience typedef for wide ostreams. - */ -typedef op_basic_ostream op_wostream; - -/// @cond internal - -class reducer_ostream; - -/** Metafunction specialization for reducer conversion. - * - * This specialization of the @ref legacy_reducer_downcast template class - * defined in reducer.h causes the `reducer >` class - * to have an `operator reducer_ostream& ()` conversion operator that - * statically downcasts the `reducer >` to - * `reducer_ostream`. (The reverse conversion, from `reducer_ostream` to - * `reducer >`, is just an upcast, which is provided - * for free by the language.) - */ -template -struct legacy_reducer_downcast< - reducer, Align> > > -{ - typedef reducer_ostream type; -}; - -/// @endcond - -/** Deprecated ostream reducer class. - * - * reducer_ostream is the same as @ref cilk::reducer<@ref op_ostream>, except - * that reducer_ostream is a proxy for the contained view, so that ostream - * operations can be applied directly to the reducer. For example, a number is - * written to a `reducer` with `*r << x`, but a number can be - * written to a `reducer_ostream` with `r << x`. - * - * @deprecated Users are strongly encouraged to use `reducer` - * reducers rather than the old wrappers like reducer_ostream. The - * `reducer` reducers show the reducer/monoid/view - * architecture more clearly, are more consistent in their - * implementation, and present a simpler model for new - * user-implemented reducers. - * - * @note Implicit conversions are provided between `%reducer_ostream` - * and `reducer<%op_ostream>`. This allows incremental code - * conversion: old code that used `%reducer_ostream` can pass a - * `%reducer_ostream` to a converted function that now expects a - * pointer or reference to a `reducer<%op_ostream>`, and vice versa. - * - * @tparam Char The stream element type (not the stream type). - * @tparam Traits The character traits type. - * - * @see op_ostream - * @see reducer - * @see ReducersOstream - */ -class reducer_ostream : - public reducer, true> > -{ - typedef reducer, true> > base; - using base::view; -public: - - /// The view type for the reducer. - typedef base::view_type View; - - /// The monoid type for the reducer. - typedef base::monoid_type Monoid; - - /** Constructs an initial `reducer_ostream` from a `std::ostream`. The - * specified stream is used as the eventual destination for all text - * streamed to this hyperobject. - */ - explicit reducer_ostream(const std::ostream &os) : base(os) {} - - /** Returns a modifiable reference to the underlying 'ostream' object. - */ - std::ostream& get_reference() { return view(); } - - /** Writes to the ostream. - */ - template - std::ostream& operator<< (const T &v) - { - return view() << v; - } - - /** - * Calls a manipulator. - * - * @param _Pfn Pointer to the manipulator function. - */ - reducer_ostream& operator<< (std::ostream &(*_Pfn)(std::ostream &)) - { - (*_Pfn)(view()); - return *this; - } - - /** @name Dereference - * @details Dereferencing a wrapper is a no-op. It simply returns the - * wrapper. Combined with the rule that the wrapper forwards view - * operations to its contained view, this means that view operations can - * be written the same way on reducers and wrappers, which is convenient - * for incrementally converting old code using wrappers to use reducers - * instead. That is: - * - * reducer r; - * *r << "a"; // *r returns the view - * // operator<<() is a view member function - * - * reducer_ostream w; - * *w << "a"; // *w returns the wrapper - * // operator<<() is a wrapper member function - * // that calls the corresponding view function - */ - //@{ - reducer_ostream& operator*() { return *this; } - reducer_ostream const& operator*() const { return *this; } - - reducer_ostream* operator->() { return this; } - reducer_ostream const* operator->() const { return this; } - //@} -}; - -} // namespace cilk - -#endif // REDUCER_OSTREAM_H_INCLUDED diff --git a/include/cilk/reducer_string.h b/include/cilk/reducer_string.h deleted file mode 100644 index 376b0bc5..00000000 --- a/include/cilk/reducer_string.h +++ /dev/null @@ -1,763 +0,0 @@ -/* reducer_string.h -*- C++ -*- - * - * Copyright (C) 2009-2018, Intel Corporation - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY - * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * ********************************************************************* - * - * PLEASE NOTE: This file is a downstream copy of a file maintained in - * a repository at cilkplus.org. Changes made to this file that are not - * submitted through the contribution process detailed at - * http://www.cilkplus.org/submit-cilk-contribution will be lost the next - * time that a new version is released. Changes only submitted to the - * GNU compiler collection or posted to the git repository at - * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are - * not tracked. - * - * We welcome your contributions to this open source project. Thank you - * for your assistance in helping us improve Cilk Plus. - */ - -/** @file reducer_string.h - * - * @brief Defines classes for doing parallel string creation by appending. - * - * @ingroup ReducersString - * - * @see ReducersString - */ - -#ifndef REDUCER_STRING_H_INCLUDED -#define REDUCER_STRING_H_INCLUDED - -#include -#include -#include - -/** @defgroup ReducersString String Reducers - * - * String reducers allow the creation of a string by concatenating a set of - * strings or characters in parallel. - * - * @ingroup Reducers - * - * You should be familiar with @ref pagereducers "Intel(R) Cilk(TM) Plus reducers", - * described in file reducers.md, and particularly with @ref reducers_using, - * before trying to use the information in this file. - * - * @section redstring_usage Usage Example - * - * vector data; - * void expensive_string_computation(const Data& x, string& s); - * cilk::reducer r; - * cilk_for (int i = 0; i != data.size(); ++i) { - * string temp; - * expensive_string_computation(data[i], temp); - * *r += temp; - * } - * string result; - * r.move_out(result); - * - * @section redstring_monoid The Monoid - * - * @subsection redstring_monoid_values Value Set - * - * The value set of a string reducer is the set of values of the class - * `std::basic_string`, which we refer to as "the - * reducer's string type". - * - * @subsection redstring_monoid_operator Operator - * - * The operator of a string reducer is the string concatenation operator, - * defined by the "`+`" binary operator on the reducer's string type. - * - * @subsection redstring_monoid_identity Identity - * - * The identity value of a string reducer is the empty string, which is the - * value of the expression - * `std::basic_string([allocator])`. - * - * @section redstring_operations Operations - * - * In the operation descriptions below, the type name `String` refers to the - * reducer's string type, `std::basic_string`. - * - * @subsection redstring_constructors Constructors - * - * Any argument list which is valid for a `std::basic_string` constructor is - * valid for a string reducer constructor. The usual move-in constructor is - * also provided: - * - * reducer(move_in(String& variable)) - * - * @subsection redstring_get_set Set and Get - * - * r.set_value(const String& value) - * const String& = r.get_value() const - * r.move_in(String& variable) - * r.move_out(String& variable) - * - * @subsection redstring_initial Initial Values - * - * A string reducer with no constructor arguments, or with only an allocator - * argument, will initially contain the identity value, an empty string. - * - * @subsection redstring_view_ops View Operations - * - * *r += a - * r->append(a) - * r->append(a, b) - * r->push_back(a) - * - * These operations on string reducer views are the same as the corresponding - * operations on strings. - * - * @section redstring_performance Performance Considerations - * - * String reducers work by creating a string for each view, collecting those - * strings in a list, and then concatenating them into a single result string - * at the end of the computation. This last step takes place in serial code, - * and necessarily takes time proportional to the length of the result string. - * Thus, a parallel string reducer cannot actually speed up the time spent - * directly creating the string. This trivial example would probably be slower - * (because of reducer overhead) than the corresponding serial code: - * - * vector a; - * reducer r; - * cilk_for (int i = 0; i != a.length(); ++i) { - * *r += a[i]; - * } - * string result; - * r.move_out(result); - * - * What a string reducer _can_ do is to allow the _remainder_ of the - * computation to be done in parallel, without having to worry about managing - * the string computation. - * - * The strings for new views are created (by the view identity constructor) - * using the same allocator as the string that was created when the reducer - * was constructed. Note that this allocator is determined when the reducer is - * constructed. The following two examples may have very different behavior: - * - * string a_string; - * - * reducer< op_string reducer1(move_in(a_string)); - * ... parallel computation ... - * reducer1.move_out(a_string); - * - * reducer< op_string reducer2; - * reducer2.move_in(a_string); - * ... parallel computation ... - * reducer2.move_out(a_string); - * - * * `reducer1` will be constructed with the same allocator as `a_string`, - * because the string was specified in the constructor. The `move_in` - * and `move_out` can therefore be done with a `swap` in constant time. - * * `reducer2` will be constructed with a _default_ allocator of type - * `Allocator`, which may not be the same as the allocator of `a_string`. - * Therefore, the `move_in` and `move_out` may have to be done with a copy - * in _O(N)_ time. - * - * (All instances of an allocator type with no internal state (like - * `std::allocator`) are "the same". You only need to worry about the "same - * allocator" issue when you create string reducers with custom allocator - * types.) - * - * @section redstring_types Type and Operator Requirements - * - * `std::basic_string` must be a valid type. -*/ - -namespace cilk { - -/** @ingroup ReducersString */ -//@{ - -/** The string append reducer view class. - * - * This is the view class for reducers created with - * `cilk::reducer< cilk::op_basic_string >`. It holds - * the accumulator variable for the reduction, and allows only append - * operations to be performed on it. - * - * @note The reducer "dereference" operation (`reducer::operator *()`) - * yields a reference to the view. Thus, for example, the view class's - * `append` operation would be used in an expression like - * `r->append(a)`, where `r` is a string append reducer variable. - * - * @tparam Char The string element type (not the string type). - * @tparam Traits The character traits type. - * @tparam Alloc The string allocator type. - * - * @see ReducersString - * @see op_basic_string - */ -template -class op_basic_string_view -{ - typedef std::basic_string string_type; - typedef std::list list_type; - typedef typename string_type::size_type size_type; - - // The view's value is represented by a list of strings and a single - // string. The value is the concatenation of the strings in the list with - // the single string at the end. All string operations apply to the single - // string; reduce operations cause lists of partial strings from multiple - // strands to be combined. - // - mutable string_type m_string; - mutable list_type m_list; - - // Before returning the value of the reducer, concatenate all the strings - // in the list with the single string. - // - void flatten() const - { - if (m_list.empty()) return; - - typename list_type::iterator i; - - size_type len = m_string.size(); - for (i = m_list.begin(); i != m_list.end(); ++i) - len += i->size(); - - string_type result(get_allocator()); - result.reserve(len); - - for (i = m_list.begin(); i != m_list.end(); ++i) - result += *i; - m_list.clear(); - - result += m_string; - result.swap(m_string); - } - -public: - - /** @name Monoid support. - */ - //@{ - - /// Required by @ref monoid_with_view - typedef string_type value_type; - - /// Required by @ref op_string - Alloc get_allocator() const - { - return m_string.get_allocator(); - } - - /** Reduces the views of two strands. - * - * This function is invoked by the @ref op_basic_string monoid to combine - * the views of two strands when the right strand merges with the left - * one. It appends the value contained in the right-strand view to the - * value contained in the left-strand view, and leaves the value in the - * right-strand view undefined. - * - * @param right A pointer to the right-strand view. (`this` points to - * the left-strand view.) - * - * @note Used only by the @ref op_basic_string monoid to implement the - * monoid reduce operation. - */ - void reduce(op_basic_string_view* right) - { - if (!right->m_string.empty() || !right->m_list.empty()) { - // (list, string) + (right_list, right_string) => - // (list + {string} + right_list, right_string) - if (!m_string.empty()) { - // simulate m_list.push_back(std::move(m_string)) - m_list.push_back(string_type(get_allocator())); - m_list.back().swap(m_string); - } - m_list.splice(m_list.end(), right->m_list); - m_string.swap(right->m_string); - } - } - - //@} - - /** @name Passes constructor arguments to the string constructor. - */ - //@{ - - op_basic_string_view() : m_string() {} - - template - op_basic_string_view(const T1& x1) : m_string(x1) {} - - template - op_basic_string_view(const T1& x1, const T2& x2) : m_string(x1, x2) {} - - template - op_basic_string_view(const T1& x1, const T2& x2, const T3& x3) : m_string(x1, x2, x3) {} - - template - op_basic_string_view(const T1& x1, const T2& x2, const T3& x3, const T4& x4) : - m_string(x1, x2, x3, x4) {} - - //@} - - /** Move-in constructor. - */ - explicit op_basic_string_view(move_in_wrapper w) - : m_string(w.value().get_allocator()) - { - m_string.swap(w.value()); - } - - /** @name @ref reducer support. - */ - //@{ - - void view_move_in(string_type& s) - { - m_list.clear(); - if (m_string.get_allocator() == s.get_allocator()) - // Equal allocators. Do a (fast) swap. - m_string.swap(s); - else - // Unequal allocators. Do a (slow) copy. - m_string = s; - s.clear(); - } - - void view_move_out(string_type& s) - { - flatten(); - if (m_string.get_allocator() == s.get_allocator()) - // Equal allocators. Do a (fast) swap. - m_string.swap(s); - else - // Unequal allocators. Do a (slow) copy. - s = m_string; - m_string.clear(); - } - - void view_set_value(const string_type& s) - { m_list.clear(); m_string = s; } - - string_type const& view_get_value() const - { flatten(); return m_string; } - - typedef string_type const& return_type_for_get_value; - - string_type & view_get_reference() - { flatten(); return m_string; } - - string_type const& view_get_reference() const - { flatten(); return m_string; } - - //@} - - /** @name View modifier operations. - * - * @details These simply wrap the corresponding operations on the underlying string. - */ - //@{ - - template - op_basic_string_view& operator +=(const T& x) - { m_string += x; return *this; } - - template - op_basic_string_view& append(const T1& x1) - { m_string.append(x1); return *this; } - - template - op_basic_string_view& append(const T1& x1, const T2& x2) - { m_string.append(x1, x2); return *this; } - - template - op_basic_string_view& append(const T1& x1, const T2& x2, const T3& x3) - { m_string.append(x1, x2, x3); return *this; } - - void push_back(const Char x) { m_string.push_back(x); } - - //@} -}; - - -/** String append monoid class. Instantiate the cilk::reducer template class - * with an op_basic_string monoid to create a string append reducer class. For - * example, to concatenate a collection of standard strings: - * - * cilk::reducer< cilk::op_basic_string > r; - * - * @tparam Char The string element type (not the string type). - * @tparam Traits The character traits type. - * @tparam Alloc The string allocator type. - * @tparam Align If `false` (the default), reducers instantiated on this - * monoid will be naturally aligned (the Intel Cilk Plus library 1.0 - * behavior). If `true`, reducers instantiated on this monoid - * will be cache-aligned for binary compatibility with - * reducers in Intel Cilk Plus library version 0.9. - * - * @see ReducersString - * @see op_basic_string_view - * @see reducer_basic_string - * @see op_string - * @see op_wstring - */ -template, - typename Alloc = std::allocator, - bool Align = false> -class op_basic_string : - public monoid_with_view< op_basic_string_view, Align > -{ - typedef monoid_with_view< op_basic_string_view, Align > - base; - typedef provisional_guard view_guard; - - Alloc m_allocator; - -public: - - /** View type of the monoid. - */ - typedef typename base::view_type view_type; - - /** Constructor. - * - * There is no default constructor for string monoids, because the - * allocator must always be specified. - * - * @param allocator The list allocator to be used when - * identity-constructing new views. - */ - op_basic_string(const Alloc& allocator = Alloc()) : m_allocator(allocator) - {} - - /** Creates an identity view. - * - * String view identity constructors take the string allocator as an - * argument. - * - * @param v The address of the uninitialized memory in which the view - * will be constructed. - */ - void identity(view_type *v) const - { ::new((void*) v) view_type(m_allocator); } - - /** @name Construct functions - * - * A string append reduction monoid must have a copy of the allocator of - * the leftmost view's string, so that it can use it in the `identity` - * operation. This, in turn, requires that string reduction monoids have a - * specialized `construct()` function. - * - * All string reducer monoid `construct()` functions first construct the - * leftmost view, using the arguments that were passed in from the reducer - * constructor. They then call the view's `get_allocator()` function to - * get the string allocator from the string in the leftmost view, and pass - * that to the monoid constructor. - */ - //@{ - - static void construct(op_basic_string* monoid, view_type* view) - { - view_guard vg( new((void*) view) view_type() ); - vg.confirm_if( - new((void*) monoid) op_basic_string(view->get_allocator()) ); - } - - template - static void construct(op_basic_string* monoid, view_type* view, - const T1& x1) - { - view_guard vg( new((void*) view) view_type(x1) ); - vg.confirm_if( - new((void*) monoid) op_basic_string(view->get_allocator()) ); - } - - template - static void construct(op_basic_string* monoid, view_type* view, - const T1& x1, const T2& x2) - { - view_guard vg( new((void*) view) view_type(x1, x2) ); - vg.confirm_if( - new((void*) monoid) op_basic_string(view->get_allocator()) ); - } - - template - static void construct(op_basic_string* monoid, view_type* view, - const T1& x1, const T2& x2, const T3& x3) - { - view_guard vg( new((void*) view) view_type(x1, x2, x3) ); - vg.confirm_if( - new((void*) monoid) op_basic_string(view->get_allocator()) ); - } - - template - static void construct(op_basic_string* monoid, view_type* view, - const T1& x1, const T2& x2, const T3& x3, - const T4& x4) - { - view_guard vg( new((void*) view) view_type(x1, x2, x3, x4) ); - vg.confirm_if( - new((void*) monoid) op_basic_string(view->get_allocator()) ); - } - - //@} -}; - - -/** Convenience typedef for 8-bit strings - */ -typedef op_basic_string op_string; - -/** Convenience typedef for 16-bit strings - */ -typedef op_basic_string op_wstring; - - -/** Deprecated string append reducer class. - * - * reducer_basic_string is the same as @ref reducer<@ref op_basic_string>, - * except that reducer_basic_string is a proxy for the contained view, so that - * accumulator variable update operations can be applied directly to the - * reducer. For example, a value is appended to a `reducer<%op_basic_string>` - * with `r->push_back(a)`, but a value can be appended to a `%reducer_opand` - * with `r.push_back(a)`. - * - * @deprecated Users are strongly encouraged to use `reducer` - * reducers rather than the old wrappers like reducer_basic_string. - * The `reducer` reducers show the reducer/monoid/view - * architecture more clearly, are more consistent in their - * implementation, and present a simpler model for new - * user-implemented reducers. - * - * @note Implicit conversions are provided between `%reducer_basic_string` - * and `reducer<%op_basic_string>`. This allows incremental code - * conversion: old code that used `%reducer_basic_string` can pass a - * `%reducer_basic_string` to a converted function that now expects a - * pointer or reference to a `reducer<%op_basic_string>`, and vice - * versa. - * - * @tparam Char The string element type (not the string type). - * @tparam Traits The character traits type. - * @tparam Alloc The string allocator type. - * - * @see op_basic_string - * @see reducer - * @see ReducersString - */ -template, - typename Alloc = std::allocator > -class reducer_basic_string : - public reducer< op_basic_string > -{ - typedef reducer< op_basic_string > base; - using base::view; -public: - - /// The reducer's string type. - typedef typename base::value_type string_type; - - /// The reducer's primitive component type. - typedef Char basic_value_type; - - /// The string size type. - typedef typename string_type::size_type size_type; - - /// The view type for the reducer. - typedef typename base::view_type View; - - /// The monoid type for the reducer. - typedef typename base::monoid_type Monoid; - - - /** @name Constructors - */ - //@{ - - /** @name Forward constructor calls to the base class. - * - * All basic_string constructor forms are supported. - */ - //@{ - reducer_basic_string() {} - - template - reducer_basic_string(const T1& x1) : - base(x1) {} - - template - reducer_basic_string(const T1& x1, const T2& x2) : - base(x1, x2) {} - - template - reducer_basic_string(const T1& x1, const T2& x2, const T3& x3) : - base(x1, x2, x3) {} - - template - reducer_basic_string(const T1& x1, const T2& x2, const T3& x3, const T4& x4) : - base(x1, x2, x3, x4) {} - //@} - - /** Allows mutable access to the string within the current view. - * - * @warning If this method is called before the parallel calculation is - * complete, the string returned by this method will be a - * partial result. - * - * @returns A mutable reference to the string within the current view. - */ - string_type &get_reference() - { return view().view_get_reference(); } - - /** Allows read-only access to the string within the current view. - * - * @warning If this method is called before the parallel calculation is - * complete, the string returned by this method will be a - * partial result. - * - * @returns A const reference to the string within the current view. - */ - string_type const &get_reference() const - { return view().view_get_reference(); } - - /** @name Appends to the string. - * - * These operations are simply forwarded to the view. - */ - //@{ - void append(const Char *ptr) - { view().append(ptr); } - void append(const Char *ptr, size_type count) - { view().append(ptr, count); } - void append(const string_type &str, size_type offset, size_type count) - { view().append(str, offset, count); } - void append(const string_type &str) - { view().append(str); } - void append(size_type count, Char ch) - { view().append(count, ch); } - - // Appends to the string - reducer_basic_string &operator+=(Char ch) - { view() += ch; return *this; } - reducer_basic_string &operator+=(const Char *ptr) - { view() += ptr; return *this; } - reducer_basic_string &operator+=(const string_type &right) - { view() += right; return *this; } - //@} - - /** @name Dereference - * @details Dereferencing a wrapper is a no-op. It simply returns the - * wrapper. Combined with the rule that the wrapper forwards view - * operations to its contained view, this means that view operations can - * be written the same way on reducers and wrappers, which is convenient - * for incrementally converting old code using wrappers to use reducers - * instead. That is: - * - * reducer r; - * r->push_back(a); // r-> returns the view - * // push_back() is a view member function - * - * reducer_string w; - * w->push_back(a); // *w returns the wrapper - * // push_back() is a wrapper member function - * // that calls the corresponding view function - */ - //@{ - reducer_basic_string& operator*() { return *this; } - reducer_basic_string const& operator*() const { return *this; } - - reducer_basic_string* operator->() { return this; } - reducer_basic_string const* operator->() const { return this; } - //@} - - /** @name Upcast - * @details In Intel Cilk Plus library 0.9, reducers were always cache-aligned. - * In library 1.0, reducer cache alignment is optional. By default, - * reducers are unaligned (i.e., just naturally aligned), but legacy - * wrappers inherit from cache-aligned reducers for binary compatibility. - * - * This means that a wrapper will automatically be upcast to its aligned - * reducer base class. The following conversion operators provide - * pseudo-upcasts to the corresponding unaligned reducer class. - */ - //@{ - operator reducer< op_basic_string >& () - { - return *reinterpret_cast< reducer< - op_basic_string >* - >(this); - } - operator const reducer< op_basic_string >& () const - { - return *reinterpret_cast< const reducer< - op_basic_string >* - >(this); - } - //@} -}; - - -/** Convenience typedef for 8-bit strings - */ -typedef reducer_basic_string reducer_string; - -/** Convenience typedef for 16-bit strings - */ -typedef reducer_basic_string reducer_wstring; - -/// @cond internal - -/// @cond internal -/** Metafunction specialization for reducer conversion. - * - * This specialization of the @ref legacy_reducer_downcast template class - * defined in reducer.h causes the `reducer< op_basic_string >` class to - * have an `operator reducer_basic_string& ()` conversion operator that - * statically downcasts the `reducer` to the corresponding - * `reducer_basic_string` type. (The reverse conversion, from - * `reducer_basic_string` to `reducer`, is just an upcast, - * which is provided for free by the language.) - * - * @ingroup ReducersString - */ -template -struct legacy_reducer_downcast< - reducer > > -{ - typedef reducer_basic_string type; -}; - -/// @endcond - -//@} - -} // namespace cilk - -#endif // REDUCER_STRING_H_INCLUDED diff --git a/include/cilk/reducer_vector.h b/include/cilk/reducer_vector.h deleted file mode 100644 index a5f00419..00000000 --- a/include/cilk/reducer_vector.h +++ /dev/null @@ -1,533 +0,0 @@ -/* reducer_vector.h -*- C++ -*- - * - * Copyright (C) 2009-2018, Intel Corporation - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY - * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * ********************************************************************* - * - * PLEASE NOTE: This file is a downstream copy of a file maintained in - * a repository at cilkplus.org. Changes made to this file that are not - * submitted through the contribution process detailed at - * http://www.cilkplus.org/submit-cilk-contribution will be lost the next - * time that a new version is released. Changes only submitted to the - * GNU compiler collection or posted to the git repository at - * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime are - * not tracked. - * - * We welcome your contributions to this open source project. Thank you - * for your assistance in helping us improve Cilk Plus. - */ - -/** @file reducer_vector.h - * - * @brief Defines classes for doing parallel vector creation by appending. - * - * @ingroup ReducersVector - * - * @see ReducersVector - */ - -#ifndef REDUCER_VECTOR_H_INCLUDED -#define REDUCER_VECTOR_H_INCLUDED - -#include -#include -#include - -/** @defgroup ReducersVector Vector Reducers - * - * Vector reducers allow the creation of a standard vector by - * appending a set of elements in parallel. - * - * @ingroup Reducers - * - * You should be familiar with @ref pagereducers "Intel(R) Cilk(TM) Plus reducers", - * described in file `reducers.md`, and particularly with @ref reducers_using, - * before trying to use the information in this file. - * - * @section redvector_usage Usage Example - * - * typedef ... SourceData; - * typedef ... ResultData; - * vector input; - * ResultData expensive_computation(const SourceData& x); - * cilk::reducer< cilk::op_vector > r; - * cilk_for (int i = 0; i != input.size(); ++i) { - * r->push_back(expensive_computation(input[i])); - * } - * vector result; - * r.move_out(result); - * - * @section redvector_monoid The Monoid - * - * @subsection redvector_monoid_values Value Set - * - * The value set of a vector reducer is the set of values of the class - * `std::vector`, which we refer to as "the reducer's vector - * type". - * - * @subsection redvector_monoid_operator Operator - * - * The operator of a vector reducer is vector concatenation. - * - * @subsection redvector_monoid_identity Identity - * - * The identity value of a vector reducer is the empty vector, which is the - * value of the expression `std::vector([allocator])`. - * - * @section redvector_operations Operations - * - * In the operation descriptions below, the type name `Vector` refers to - * the reducer's vector type, `std::vector`. - * - * @subsection redvector_constructors Constructors - * - * Any argument list which is valid for a `std::vector` constructor is valid - * for a vector reducer constructor. The usual move-in constructor is also - * provided: - * - * reducer(move_in(Vector& variable)) - * - * @subsection redvector_get_set Set and Get - * - * void r.set_value(const Vector& value) - * const Vector& = r.get_value() const - * void r.move_in(Vector& variable) - * void r.move_out(Vector& variable) - * - * @subsection redvector_initial Initial Values - * - * A vector reducer with no constructor arguments, or with only an allocator - * argument, will initially contain the identity value, an empty vector. - * - * @subsection redvector_view_ops View Operations - * - * The view of a vector reducer provides the following member functions: - * - * void push_back(const Type& element) - * void insert_back(const Type& element) - * void insert_back(Vector::size_type n, const Type& element) - * template void insert_back(Iter first, Iter last) - * - * The `push_back` functions is the same as the corresponding `std::vector` - * function. The `insert_back` function is the same as the `std::vector` - * `insert` function, with the first parameter fixed to the end of the vector. - * - * @section redvector_performance Performance Considerations - * - * Vector reducers work by creating a vector for each view, collecting those - * vectors in a list, and then concatenating them into a single result vector - * at the end of the computation. This last step takes place in serial code, - * and necessarily takes time proportional to the length of the result vector. - * Thus, a parallel vector reducer cannot actually speed up the time spent - * directly creating the vector. This trivial example would probably be slower - * (because of reducer overhead) than the corresponding serial code: - * - * vector a; - * reducer > r; - * cilk_for (int i = 0; i != a.length(); ++i) { - * r->push_back(a[i]); - * } - * vector result; - * r.move_out(result); - * - * What a vector reducer _can_ do is to allow the _remainder_ of the - * computation to be done in parallel, without having to worry about - * managing the vector computation. - * - * The vectors for new views are created (by the view identity constructor) - * using the same allocator as the vector that was created when the reducer - * was constructed. Note that this allocator is determined when the reducer - * is constructed. The following two examples may have very different - * behavior: - * - * vector a_vector; - * - * reducer< op_vector reducer1(move_in(a_vector)); - * ... parallel computation ... - * reducer1.move_out(a_vector); - * - * reducer< op_vector reducer2; - * reducer2.move_in(a_vector); - * ... parallel computation ... - * reducer2.move_out(a_vector); - * - * * `reducer1` will be constructed with the same allocator as `a_vector`, - * because the vector was specified in the constructor. The `move_in` - * and`move_out` can therefore be done with a `swap` in constant time. - * * `reducer2` will be constructed with a _default_ allocator of type - * `Allocator`, which may not be the same as the allocator of `a_vector`. - * Therefore, the `move_in` and `move_out` may have to be done with a - * copy in _O(N)_ time. - * - * (All instances of an allocator class with no internal state (like - * `std::allocator`) are "the same". You only need to worry about the "same - * allocator" issue when you create vector reducers with a custom allocator - * class that has data members.) - * - * @section redvector_types Type and Operator Requirements - * - * `std::vector` must be a valid type. -*/ - -namespace cilk { - -/** @ingroup ReducersVector */ -//@{ - -/** @brief The vector reducer view class. - * - * This is the view class for reducers created with - * `cilk::reducer< cilk::op_vector >`. It holds the - * accumulator variable for the reduction, and allows only append operations - * to be performed on it. - * - * @note The reducer "dereference" operation (`reducer::operator *()`) - * yields a reference to the view. Thus, for example, the view - * class's `push_back` operation would be used in an expression like - * `r->push_back(a)`, where `r` is a vector reducer variable. - * - * @tparam Type The vector element type (not the vector type). - * @tparam Alloc The vector allocator type. - * - * @see @ref ReducersVector - * @see op_vector - */ -template -class op_vector_view -{ - typedef std::vector vector_type; - typedef std::list::other> - list_type; - typedef typename vector_type::size_type size_type; - - // The view's value is represented by a list of vectors and a single - // vector. The value is the concatenation of the vectors in the list with - // the single vector at the end. All vector operations apply to the single - // vector; reduce operations cause lists of partial vectors from multiple - // strands to be combined. - // - mutable vector_type m_vector; - mutable list_type m_list; - - // Before returning the value of the reducer, concatenate all the vectors - // in the list with the single vector. - // - void flatten() const - { - if (m_list.empty()) return; - - typename list_type::iterator i; - - size_type len = m_vector.size(); - for (i = m_list.begin(); i != m_list.end(); ++i) - len += i->size(); - - vector_type result(get_allocator()); - result.reserve(len); - - for (i = m_list.begin(); i != m_list.end(); ++i) - result.insert(result.end(), i->begin(), i->end()); - m_list.clear(); - - result.insert(result.end(), m_vector.begin(), m_vector.end()); - result.swap(m_vector); - } - -public: - - /** @name Monoid support. - */ - //@{ - - /// Required by cilk::monoid_with_view - typedef vector_type value_type; - - /// Required by @ref op_vector - Alloc get_allocator() const - { - return m_vector.get_allocator(); - } - - /** Reduces the views of two strands. - * - * This function is invoked by the @ref op_vector monoid to combine - * the views of two strands when the right strand merges with the left - * one. It appends the value contained in the right-strand view to the - * value contained in the left-strand view, and leaves the value in the - * right-strand view undefined. - * - * @param other A pointer to the right-strand view. (`this` points to - * the left-strand view.) - * - * @note Used only by the @ref op_vector monoid to implement the - * monoid reduce operation. - */ - void reduce(op_vector_view* other) - { - if (!other->m_vector.empty() || !other->m_list.empty()) { - // (list, string) + (other_list, other_string) => - // (list + {string} + other_list, other_string) - if (!m_vector.empty()) { - // simulate m_list.push_back(std::move(m_vector)) - m_list.push_back(vector_type(get_allocator())); - m_list.back().swap(m_vector); - } - m_list.splice(m_list.end(), other->m_list); - m_vector.swap(other->m_vector); - } - } - - //@} - - /** @name Passes constructor arguments to the vector constructor. - */ - //@{ - - op_vector_view() : - m_vector(), m_list(get_allocator()) {} - - template - op_vector_view(const T1& x1) : - m_vector(x1), m_list(get_allocator()) {} - - template - op_vector_view(const T1& x1, const T2& x2) : - m_vector(x1, x2), m_list(get_allocator()) {} - - template - op_vector_view(const T1& x1, const T2& x2, const T3& x3) : - m_vector(x1, x2, x3), m_list(get_allocator()) {} - - template - op_vector_view(const T1& x1, const T2& x2, const T3& x3, const T4& x4) : - m_vector(x1, x2, x3, x4), m_list(get_allocator()) {} - - //@} - - /** Move-in constructor. - */ - explicit op_vector_view(cilk::move_in_wrapper w) : - m_vector(w.value().get_allocator()), - m_list(w.value().get_allocator()) - { - m_vector.swap(w.value()); - } - - /** @name Reducer support. - */ - //@{ - - void view_move_in(vector_type& v) - { - m_list.clear(); - if (get_allocator() == v.get_allocator()) { - // Equal allocators. Do a (fast) swap. - m_vector.swap(v); - } - else { - // Unequal allocators. Do a (slow) copy. - m_vector = v; - } - v.clear(); - } - - void view_move_out(vector_type& v) - { - flatten(); - if (get_allocator() == v.get_allocator()) { - // Equal allocators. Do a (fast) swap. - m_vector.swap(v); - } - else { - // Unequal allocators. Do a (slow) copy. - v = m_vector; - m_vector.clear(); - } - } - - void view_set_value(const vector_type& v) - { - m_list.clear(); - m_vector = v; - } - - vector_type const& view_get_value() const - { - flatten(); - return m_vector; - } - - typedef vector_type const& return_type_for_get_value; - - //@} - - /** @name View modifier operations. - * - * @details These simply wrap the corresponding operations on the - * underlying vector. - */ - //@{ - - /** Adds an element at the end of the list. - * - * Equivalent to `vector.push_back(…)` - */ - void push_back(const Type x) - { - m_vector.push_back(x); - } - - /** @name Insert elements at the end of the vector. - * - * Equivalent to `vector.insert(vector.end(), …)` - */ - //@{ - - void insert_back(const Type& element) - { m_vector.insert(m_vector.end(), element); } - - void insert_back(typename vector_type::size_type n, const Type& element) - { m_vector.insert(m_vector.end(), n, element); } - - template - void insert_back(Iter first, Iter last) - { m_vector.insert(m_vector.end(), first, last); } - - //@} - - //@} -}; - - -/** @brief The vector append monoid class. - * - * Instantiate the cilk::reducer template class with an op_vector monoid to - * create a vector reducer class. For example, to concatenate a - * collection of integers: - * - * cilk::reducer< cilk::op_vector > r; - * - * @tparam Type The vector element type (not the vector type). - * @tparam Alloc The vector allocator type. - * - * @see ReducersVector - * @see op_vector_view - * @ingroup ReducersVector - */ -template > -class op_vector : - public cilk::monoid_with_view< op_vector_view, false > -{ - typedef cilk::monoid_with_view< op_vector_view, false > base; - typedef provisional_guard view_guard; - - // The allocator to be used when constructing new views. - Alloc m_allocator; - -public: - - /// View type. - typedef typename base::view_type view_type; - - /** Constructor. - * - * There is no default constructor for vector monoids, because the - * allocator must always be specified. - * - * @param allocator The list allocator to be used when - * identity-constructing new views. - */ - op_vector(const Alloc& allocator = Alloc()) : m_allocator(allocator) {} - - /** Creates an identity view. - * - * Vector view identity constructors take the vector allocator as an - * argument. - * - * @param v The address of the uninitialized memory in which the view - * will be constructed. - */ - void identity(view_type *v) const - { - ::new((void*) v) view_type(m_allocator); - } - - /** @name construct functions - * - * A vector append monoid must have a copy of the allocator of - * the leftmost view's vector, so that it can use it in the `identity` - * operation. This, in turn, requires that vector append monoids have a - * specialized `construct()` function. - * - * All vector append monoid `construct()` functions first construct the - * leftmost view, using the arguments that were passed in from the reducer - * constructor. They then call the view's `get_allocator()` function to - * get the vector allocator from the vector in the leftmost view, and pass - * that to the monoid constructor. - */ - //@{ - - static void construct(op_vector* monoid, view_type* view) - { - view_guard vg( new((void*) view) view_type() ); - vg.confirm_if( new((void*) monoid) op_vector(view->get_allocator()) ); - } - - template - static void construct(op_vector* monoid, view_type* view, const T1& x1) - { - view_guard vg( new((void*) view) view_type(x1) ); - vg.confirm_if( new((void*) monoid) op_vector(view->get_allocator()) ); - } - - template - static void construct(op_vector* monoid, view_type* view, - const T1& x1, const T2& x2) - { - view_guard vg( new((void*) view) view_type(x1, x2) ); - vg.confirm_if( new((void*) monoid) op_vector(view->get_allocator()) ); - } - - template - static void construct(op_vector* monoid, view_type* view, - const T1& x1, const T2& x2, const T3& x3) - { - view_guard vg( new((void*) view) view_type(x1, x2, x3) ); - vg.confirm_if( new((void*) monoid) op_vector(view->get_allocator()) ); - } - - //@} -}; - - -} // namespace cilk - -#endif // REDUCER_VECTOR_H_INCLUDED diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index 42235995..19b47769 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -2,7 +2,6 @@ set(CHEETAH_LIB_CMAKEFILES_DIR "${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTO # Get sources set(CHEETAH_SOURCES - c_reducers.c cilk2c.c cilk2c_inlined.c cilkred_map.c @@ -10,9 +9,12 @@ set(CHEETAH_SOURCES fiber.c fiber-pool.c global.c + hypertable.c init.c internal-malloc.c + pedigree_globals.c personality.c + reducer_api.c reducer_impl.c sched_stats.c scheduler.c @@ -24,8 +26,8 @@ set(CHEETAH_ABI_SOURCE cilk2c_inlined.c ) -set(CHEETAH_PEDIGREE_GLOBALS_SOURCES - pedigree_globals.c +set(CHEETAH_PEDIGREE_LIB_SOURCES + pedigree_lib.c ) set(CHEETAH_PERSONALITY_C_SOURCES @@ -52,6 +54,8 @@ set(CHEETAH_DYNAMIC_LIBS ${CHEETAH_COMMON_LIBS}) add_flags_if_supported(-g3) add_flags_if_supported(-Wno-covered-switch-default) +add_flags_if_supported(-fdebug-default-version=4) +add_flags_if_supported(-Werror=int-conversion) if (CHEETAH_HAS_FOMIT_FRAME_POINTER_FLAG) set_source_files_properties(invoke-main.c PROPERTIES COMPILE_FLAGS -fno-omit-frame-pointer) endif() @@ -70,8 +74,6 @@ set(CHEETAH_BITCODE_ABI_COMPILE_DEFS ${CHEETAH_COMPILE_DEFS} "CHEETAH_INTERNAL=" "CHEETAH_INTERNAL_NORETURN=__attribute__((noreturn))" "CILK_DEBUG=0") -set(CHEETAH_BITCODE_PEDIGREE_ABI_COMPILE_DEFS ${CHEETAH_BITCODE_ABI_COMPILE_DEFS} - "ENABLE_CILKRTS_PEDIGREE=1") # Set compile flags, compile defs, and link flags for ASan build set(CHEETAH_ASAN_COMPILE_FLAGS ${CHEETAH_COMPILE_FLAGS}) @@ -80,9 +82,6 @@ set(CHEETAH_ASAN_LINK_FLAGS ${CHEETAH_LINK_FLAGS} -fsanitize=address) set(CHEETAH_BITCODE_ABI_ASAN_COMPILE_FLAGS ${CHEETAH_BITCODE_ABI_COMPILE_FLAGS}) set(CHEETAH_BITCODE_ABI_ASAN_COMPILE_DEFS ${CHEETAH_BITCODE_ABI_COMPILE_DEFS} "CILK_ENABLE_ASAN_HOOKS=1") -set(CHEETAH_BITCODE_PEDIGREE_ABI_ASAN_COMPILE_DEFS - ${CHEETAH_BITCODE_PEDIGREE_ABI_COMPILE_DEFS} - "CILK_ENABLE_ASAN_HOOKS=1") set(CHEETAH_BUILD_ASAN_VER OFF) if (CHEETAH_ENABLE_ASAN AND (CHEETAH_HAS_ASAN OR TARGET asan)) @@ -104,14 +103,6 @@ if (APPLE) DEFS ${CHEETAH_BITCODE_ABI_COMPILE_DEFS} PARENT_TARGET cheetah) - add_cheetah_bitcode(opencilk-pedigrees-abi - OS ${CHEETAH_SUPPORTED_OS} - ARCHS ${CHEETAH_SUPPORTED_ARCH} - SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/${CHEETAH_ABI_SOURCE} - CFLAGS ${CHEETAH_BITCODE_ABI_COMPILE_FLAGS} - DEFS ${CHEETAH_BITCODE_PEDIGREE_ABI_COMPILE_DEFS} - PARENT_TARGET cheetah) - if (CHEETAH_BUILD_ASAN_VER) add_cheetah_bitcode(opencilk-asan-abi OS ${CHEETAH_SUPPORTED_OS} @@ -120,14 +111,6 @@ if (APPLE) CFLAGS ${CHEETAH_BITCODE_ABI_ASAN_COMPILE_FLAGS} DEFS ${CHEETAH_BITCODE_ABI_ASAN_COMPILE_DEFS} PARENT_TARGET cheetah) - - add_cheetah_bitcode(opencilk-pedigrees-asan-abi - OS ${CHEETAH_SUPPORTED_OS} - ARCHS ${CHEETAH_SUPPORTED_ARCH} - SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/${CHEETAH_ABI_SOURCE} - CFLAGS ${CHEETAH_BITCODE_ABI_ASAN_COMPILE_FLAGS} - DEFS ${CHEETAH_BITCODE_PEDIGREE_ABI_ASAN_COMPILE_DEFS} - PARENT_TARGET cheetah) endif() if (CHEETAH_ENABLE_SHARED) @@ -174,7 +157,7 @@ if (APPLE) SHARED OS ${CHEETAH_SUPPORTED_OS} ARCHS ${CHEETAH_SUPPORTED_ARCH} - SOURCES ${CHEETAH_PEDIGREE_GLOBALS_SOURCES} + SOURCES ${CHEETAH_PEDIGREE_LIB_SOURCES} CFLAGS ${CHEETAH_COMPILE_FLAGS} LINK_FLAGS ${CHEETAH_LINK_FLAGS} LINK_LIBS ${CHEETAH_DYNAMIC_LIBS} @@ -226,7 +209,7 @@ if (APPLE) SHARED OS ${CHEETAH_SUPPORTED_OS} ARCHS ${CHEETAH_SUPPORTED_ARCH} - SOURCES ${CHEETAH_PEDIGREE_GLOBALS_SOURCES} + SOURCES ${CHEETAH_PEDIGREE_LIB_SOURCES} CFLAGS ${CHEETAH_ASAN_COMPILE_FLAGS} LINK_FLAGS ${CHEETAH_ASAN_LINK_FLAGS} LINK_LIBS ${CHEETAH_DYNAMIC_LIBS} @@ -275,7 +258,7 @@ if (APPLE) STATIC OS ${CHEETAH_SUPPORTED_OS} ARCHS ${CHEETAH_SUPPORTED_ARCH} - SOURCES ${CHEETAH_PEDIGREE_GLOBALS_SOURCES} + SOURCES ${CHEETAH_PEDIGREE_LIB_SOURCES} CFLAGS ${CHEETAH_COMPILE_FLAGS} LINK_FLAGS ${CHEETAH_LINK_FLAGS} LINK_LIBS ${CHEETAH_COMMON_LIBS} @@ -320,7 +303,7 @@ if (APPLE) STATIC OS ${CHEETAH_SUPPORTED_OS} ARCHS ${CHEETAH_SUPPORTED_ARCH} - SOURCES ${CHEETAH_PEDIGREE_GLOBALS_SOURCES} + SOURCES ${CHEETAH_PEDIGREE_LIB_SOURCES} CFLAGS ${CHEETAH_ASAN_COMPILE_FLAGS} LINK_FLAGS ${CHEETAH_ASAN_LINK_FLAGS} LINK_LIBS ${CHEETAH_COMMON_LIBS} @@ -337,13 +320,6 @@ else() # Not APPLE DEFS ${CHEETAH_BITCODE_ABI_COMPILE_DEFS} PARENT_TARGET cheetah) - add_cheetah_bitcode(opencilk-pedigrees-abi - ARCHS ${arch} - SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/${CHEETAH_ABI_SOURCE} - CFLAGS ${CHEETAH_BITCODE_ABI_COMPILE_FLAGS} - DEFS ${CHEETAH_BITCODE_PEDIGREE_ABI_COMPILE_DEFS} - PARENT_TARGET cheetah) - if (CHEETAH_BUILD_ASAN_VER) add_cheetah_bitcode(opencilk-asan-abi ARCHS ${arch} @@ -351,13 +327,6 @@ else() # Not APPLE CFLAGS ${CHEETAH_BITCODE_ABI_ASAN_COMPILE_FLAGS} DEFS ${CHEETAH_BITCODE_ABI_ASAN_COMPILE_DEFS} PARENT_TARGET cheetah) - - add_cheetah_bitcode(opencilk-pedigrees-asan-abi - ARCHS ${arch} - SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/${CHEETAH_ABI_SOURCE} - CFLAGS ${CHEETAH_BITCODE_ABI_ASAN_COMPILE_FLAGS} - DEFS ${CHEETAH_BITCODE_PEDIGREE_ABI_ASAN_COMPILE_DEFS} - PARENT_TARGET cheetah) endif() if (CHEETAH_ENABLE_SHARED) @@ -400,7 +369,7 @@ else() # Not APPLE add_cheetah_runtime(opencilk-pedigrees SHARED ARCHS ${arch} - SOURCES ${CHEETAH_PEDIGREE_GLOBALS_SOURCES} + SOURCES ${CHEETAH_PEDIGREE_LIB_SOURCES} CFLAGS ${CHEETAH_COMPILE_FLAGS} LINK_FLAGS ${CHEETAH_LINK_FLAGS} LINK_LIBS ${CHEETAH_DYNAMIC_LIBS} @@ -449,7 +418,7 @@ else() # Not APPLE add_cheetah_runtime(opencilk-pedigrees-asan SHARED ARCHS ${arch} - SOURCES ${CHEETAH_PEDIGREE_GLOBALS_SOURCES} + SOURCES ${CHEETAH_PEDIGREE_LIB_SOURCES} CFLAGS ${CHEETAH_ASAN_COMPILE_FLAGS} LINK_FLAGS ${CHEETAH_ASAN_LINK_FLAGS} LINK_LIBS ${CHEETAH_DYNAMIC_LIBS} @@ -493,7 +462,7 @@ else() # Not APPLE add_cheetah_runtime(opencilk-pedigrees STATIC ARCHS ${arch} - SOURCES ${CHEETAH_PEDIGREE_GLOBALS_SOURCES} + SOURCES ${CHEETAH_PEDIGREE_LIB_SOURCES} CFLAGS ${CHEETAH_COMPILE_FLAGS} LINK_FLAGS ${CHEETAH_LINK_FLAGS} LINK_LIBS ${CHEETAH_COMMON_LIBS} @@ -534,7 +503,7 @@ else() # Not APPLE add_cheetah_runtime(opencilk-pedigrees-asan STATIC ARCHS ${arch} - SOURCES ${CHEETAH_PEDIGREE_GLOBALS_SOURCES} + SOURCES ${CHEETAH_PEDIGREE_LIB_SOURCES} CFLAGS ${CHEETAH_ASAN_COMPILE_FLAGS} LINK_FLAGS ${CHEETAH_ASAN_LINK_FLAGS} LINK_LIBS ${CHEETAH_COMMON_LIBS} diff --git a/runtime/Makefile b/runtime/Makefile index 7c8f7947..3b9163c4 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -8,17 +8,19 @@ REDUCER_DEF = -DREDUCER_MODULE RESOURCE_DIR=$(realpath ..) MAIN = $(RTS_LIBDIR)/$(RTS_LIB) BITCODE_ABI = $(MAIN)-abi.bc -SRCS = $(filter-out $(PERSON_C_SRC).c, $(filter-out $(PERSON_CPP_SRC).c, $(wildcard *.c))) +SRCS = $(filter-out pedigree_ext.c, $(filter-out $(PEDIGREE_LIB_SRC).c, $(filter-out $(PERSON_C_SRC).c, $(filter-out $(PERSON_CPP_SRC).c, $(wildcard *.c))))) HDRS = $(wildcard *.h) OBJS = $(patsubst %.c,./build/%.o,$(SRCS)) INCLUDES = -I../include/ DEFINES = $(REDUCER_DEF) $(ABI_DEF) $(ALERT_DEF) -OPTIONS = $(OPT) $(DBG) $(ARCH) -Werror -Wall -fpic $(DEFINES) $(INCLUDES) +OPTIONS = $(OPT) $(DBG) $(ARCH) -Werror -Wall -fpic $(DEFINES) $(INCLUDES) -gdwarf-4 PERSON_C = $(RTS_LIBDIR)/$(RTS_C_PERSONALITY_LIB) PERSON_CPP = $(RTS_LIBDIR)/$(RTS_CXX_PERSONALITY_LIB) PERSON_C_SRC = personality-c PERSON_CPP_SRC = personality-cpp +PEDIGREE_LIB = $(RTS_LIBDIR)/$(RTS_PEDIGREE_LIB) +PEDIGREE_LIB_SRC = pedigree_lib .PHONY: all clean build @@ -42,6 +44,12 @@ $(PERSON_CPP).a: build/$(PERSON_CPP_SRC).o $(PERSON_CPP).so: ./build/$(PERSON_CPP_SRC).o $(CC) -shared -o $@ $^ +$(PEDIGREE_LIB).a: build/$(PEDIGREE_LIB_SRC).o + ar rcs $@ $^ + +$(PEDIGREE_LIB).so: ./build/$(PEDIGREE_LIB_SRC).o + $(CC) -shared -o $@ $^ + build: mkdir -p $@ @@ -51,15 +59,8 @@ $(RTS_LIBDIR): build/%.o: %.c $(HDRS) $(CC) -c $(OPTIONS) -o $@ $< - -build/pedigree_globals.a: build/pedigree_globals.o - ar rcs $@ $^ - -build/libpedigree_globals.so: build/pedigree_globals.o - $(CC) -shared -o $@ $^ - build/cilk2c_inlined.bc: cilk2c_inlined.c $(HDRS) - $(CC) -O3 -DCHEETAH_API="" -DCHEETAH_INTERNAL_NORETURN='__attribute__((noreturn))' -DCHEETAH_INTERNAL="" -DCILK_DEBUG=0 -DENABLE_CILKRTS_PEDIGREE=1 -c -emit-llvm $(INCLUDES) -o $@ $< + $(CC) -O3 -DCHEETAH_API="" -DCHEETAH_INTERNAL_NORETURN='__attribute__((noreturn))' -DCHEETAH_INTERNAL="" -DCILK_DEBUG=0 -c -emit-llvm $(INCLUDES) -gdwarf-4 -g -o $@ $< $(BITCODE_ABI) : build/cilk2c_inlined.bc cp $< $@ diff --git a/runtime/c_reducers.c b/runtime/c_reducers.c deleted file mode 100644 index f13597cd..00000000 --- a/runtime/c_reducers.c +++ /dev/null @@ -1,7 +0,0 @@ -#define CILK_C_DEFINE_REDUCERS -#include -#include -#include -#include -#include -#include diff --git a/runtime/cilk-internal.h b/runtime/cilk-internal.h index e4371713..a3e0f67c 100644 --- a/runtime/cilk-internal.h +++ b/runtime/cilk-internal.h @@ -12,11 +12,13 @@ extern "C" { #include "debug.h" #include "fiber.h" +#include "frame.h" #include "internal-malloc.h" #include "jmpbuf.h" #include "rts-config.h" #include "sched_stats.h" #include "types.h" +#include "worker.h" #if defined __i386__ || defined __x86_64__ #ifdef __SSE__ @@ -28,186 +30,6 @@ struct global_state; typedef struct global_state global_state; typedef struct local_state local_state; -//=============================================== -// Cilk stack frame related defs -//=============================================== - - - -/** - * Every spawning function has a frame descriptor. A spawning function - * is a function that spawns or detaches. Only spawning functions - * are visible to the Cilk runtime. - */ -struct __cilkrts_stack_frame { - // Flags is a bitfield with values defined below. Client code - // initializes flags to 0 before the first Cilk operation. - uint32_t flags; - // The magic number includes the ABI version and a hash of the - // layout of this structure. - uint32_t magic; - - // call_parent points to the __cilkrts_stack_frame of the closest - // ancestor spawning function, including spawn helpers, of this frame. - // It forms a linked list ending at the first stolen frame. - __cilkrts_stack_frame *call_parent; - - // The client copies the worker from TLS here when initializing - // the structure. The runtime ensures that the field always points - // to the __cilkrts_worker which currently "owns" the frame. - // - // TODO: Remove this pointer? This pointer only seems to be needed for - // debugging purposes. When the worker structure is genuinely needed, it - // seems to be accessible by calling __cilkrts_get_tls_worker(), which will - // be inlined and optimized to a simple move from TLS. - _Atomic(__cilkrts_worker *) worker; - - // Before every spawn and nontrivial sync the client function - // saves its continuation here. - jmpbuf ctx; - -#ifdef ENABLE_CILKRTS_PEDIGREE - __cilkrts_pedigree pedigree; // Fields for pedigrees. - int64_t rank; - uint64_t dprng_dotproduct; - int64_t dprng_depth; -#endif -}; - -//=========================================================== -// Value defines for the flags field in cilkrts_stack_frame -//=========================================================== - -/* CILK_FRAME_STOLEN is set if the frame has ever been stolen. */ -#define CILK_FRAME_STOLEN 0x001 - -/* CILK_FRAME_UNSYNCHED is set if the frame has been stolen and - is has not yet executed _Cilk_sync. It is technically a misnomer in that a - frame can have this flag set even if all children have returned. */ -#define CILK_FRAME_UNSYNCHED 0x002 - -/* Is this frame detached (spawned)? If so the runtime needs - to undo-detach in the slow path epilogue. */ -#define CILK_FRAME_DETACHED 0x004 - -/* CILK_FRAME_EXCEPTION_PENDING is set if the frame has an exception - to handle after syncing. */ -#define CILK_FRAME_EXCEPTION_PENDING 0x008 - -/* Is this frame excepting, meaning that a stolen continuation threw? */ -#define CILK_FRAME_EXCEPTING 0x010 - -/* Is this the last (oldest) Cilk frame? */ -#define CILK_FRAME_LAST 0x080 - -/* Is this frame in the epilogue, or more generally after the last - sync when it can no longer do any Cilk operations? */ -#define CILK_FRAME_EXITING 0x100 - -/* Is this frame handling an exception? */ -// TODO: currently only used when throwing an exception from the continuation -// (i.e. from the personality function). Used in scheduler.c to disable -// asserts that fail if trying to longjmp back to the personality -// function. -#define CILK_FRAME_SYNC_READY 0x200 - -static const uint32_t frame_magic = - ((((((((((((__CILKRTS_ABI_VERSION * 13) + - offsetof(struct __cilkrts_stack_frame, worker)) * - 13) + - offsetof(struct __cilkrts_stack_frame, ctx)) * - 13) + - offsetof(struct __cilkrts_stack_frame, magic)) * - 13) + - offsetof(struct __cilkrts_stack_frame, flags)) * - 13) + - offsetof(struct __cilkrts_stack_frame, call_parent)) - )) - ; - -#define CHECK_CILK_FRAME_MAGIC(G, F) (frame_magic == (F)->magic) - -//=========================================================== -// Helper functions for the flags field in cilkrts_stack_frame -//=========================================================== - -/* A frame is set to be stolen as long as it has a corresponding Closure */ -static inline void __cilkrts_set_stolen(__cilkrts_stack_frame *sf) { - sf->flags |= CILK_FRAME_STOLEN; -} - -/* A frame is set to be unsynced only if it has parallel subcomputation - * underneathe, i.e., only if it has spawned children executing on a different - * worker - */ -static inline void __cilkrts_set_unsynced(__cilkrts_stack_frame *sf) { - sf->flags |= CILK_FRAME_UNSYNCHED; -} - -static inline void __cilkrts_set_synced(__cilkrts_stack_frame *sf) { - sf->flags &= ~CILK_FRAME_UNSYNCHED; -} - -/* Returns nonzero if the frame is not synched. */ -static inline int __cilkrts_unsynced(__cilkrts_stack_frame *sf) { - return (sf->flags & CILK_FRAME_UNSYNCHED); -} - -/* Returns nonzero if the frame has been stolen. */ -static inline int __cilkrts_stolen(__cilkrts_stack_frame *sf) { - return (sf->flags & CILK_FRAME_STOLEN); -} - -/* Returns nonzero if the frame is synched. */ -static inline int __cilkrts_synced(__cilkrts_stack_frame *sf) { - return ((sf->flags & CILK_FRAME_UNSYNCHED) == 0); -} - -/* Returns nonzero if the frame has never been stolen. */ -static inline int __cilkrts_not_stolen(__cilkrts_stack_frame *sf) { - return ((sf->flags & CILK_FRAME_STOLEN) == 0); -} - -//=============================================== -// Worker related definition -//=============================================== - -// Actual declaration - -enum __cilkrts_worker_state { - WORKER_IDLE = 10, - WORKER_SCHED, - WORKER_STEAL, - WORKER_RUN -}; - -struct __cilkrts_worker { - // T and H pointers in the THE protocol - _Atomic(__cilkrts_stack_frame **) tail; - _Atomic(__cilkrts_stack_frame **) head; - _Atomic(__cilkrts_stack_frame **) exc; - - // Limit of the Lazy Task Queue, to detect queue overflow - __cilkrts_stack_frame **ltq_limit; - - // Worker id, a small integer - worker_id self; - - // Global state of the runtime system, opaque to the client. - global_state *g; - - // Additional per-worker state hidden from the client. - local_state *l; - - // A slot that points to the currently executing Cilk frame. - __cilkrts_stack_frame *current_stack_frame; - - // Map from reducer names to reducer values - cilkred_map *reducer_map; -} __attribute__((aligned(256))); // This alignment reduces false sharing induced - // by hardware prefetchers on some systems, - // such as Intel CPUs. - struct cilkrts_callbacks { unsigned last_init; unsigned last_exit; @@ -218,11 +40,39 @@ struct cilkrts_callbacks { extern CHEETAH_INTERNAL struct cilkrts_callbacks cilkrts_callbacks; -extern __thread __cilkrts_worker *tls_worker; +extern bool __cilkrts_use_extension; +#if ENABLE_EXTENSION +#define USE_EXTENSION __cilkrts_use_extension +#else +#define USE_EXTENSION false +#endif +extern __thread __cilkrts_worker *__cilkrts_tls_worker; +CHEETAH_INTERNAL extern __thread bool is_boss_thread; static inline __attribute__((always_inline)) __cilkrts_worker * __cilkrts_get_tls_worker(void) { - return tls_worker; + return __cilkrts_tls_worker; +} + +void __cilkrts_register_extension(void *extension); +void *__cilkrts_get_extension(void); +void __cilkrts_extend_spawn(__cilkrts_worker *w, void **parent_extension, + void **child_extension); +void __cilkrts_extend_return_from_spawn(__cilkrts_worker *w, void **extension); +void __cilkrts_extend_sync(void **extension); + +static inline __attribute__((always_inline)) void * +__cilkrts_push_ext_stack(__cilkrts_worker *w, size_t size) { + uint8_t *ext_stack_ptr = ((uint8_t *)w->ext_stack) - size; + w->ext_stack = (void *)ext_stack_ptr; + return ext_stack_ptr; +} + +static inline __attribute__((always_inline)) void * +__cilkrts_pop_ext_stack(__cilkrts_worker *w, size_t size) { + uint8_t *ext_stack_ptr = ((uint8_t *)w->ext_stack) + size; + w->ext_stack = (void *)ext_stack_ptr; + return ext_stack_ptr; } #ifdef __cplusplus diff --git a/runtime/cilk2c.c b/runtime/cilk2c.c index 39a16ada..d9353f96 100644 --- a/runtime/cilk2c.c +++ b/runtime/cilk2c.c @@ -73,9 +73,10 @@ void __cilkrts_check_exception_raise(__cilkrts_stack_frame *sf) { __cilkrts_worker *w = sf->worker; CILK_ASSERT(w, sf->worker == __cilkrts_get_tls_worker()); + ReadyDeque *deques = w->g->deques; - deque_lock_self(w); - Closure *t = deque_peek_bottom(w, w->self); + deque_lock_self(deques, w); + Closure *t = deque_peek_bottom(deques, w, w->self); Closure_lock(w, t); char *exn = t->user_exn.exn; @@ -85,7 +86,7 @@ void __cilkrts_check_exception_raise(__cilkrts_stack_frame *sf) { sf->flags &= ~CILK_FRAME_EXCEPTION_PENDING; Closure_unlock(w, t); - deque_unlock_self(w); + deque_unlock_self(deques, w); if (exn != NULL) { _Unwind_RaiseException((struct _Unwind_Exception *)exn); // noreturn } @@ -99,9 +100,10 @@ void __cilkrts_check_exception_resume(__cilkrts_stack_frame *sf) { __cilkrts_worker *w = sf->worker; CILK_ASSERT(w, sf->worker == __cilkrts_get_tls_worker()); + ReadyDeque *deques = w->g->deques; - deque_lock_self(w); - Closure *t = deque_peek_bottom(w, w->self); + deque_lock_self(deques, w); + Closure *t = deque_peek_bottom(deques, w, w->self); Closure_lock(w, t); char *exn = t->user_exn.exn; @@ -111,7 +113,7 @@ void __cilkrts_check_exception_resume(__cilkrts_stack_frame *sf) { sf->flags &= ~CILK_FRAME_EXCEPTION_PENDING; Closure_unlock(w, t); - deque_unlock_self(w); + deque_unlock_self(deques, w); if (exn != NULL) { _Unwind_Resume((struct _Unwind_Exception *)exn); // noreturn } @@ -127,9 +129,10 @@ void __cilkrts_cleanup_fiber(__cilkrts_stack_frame *sf, int32_t sel) { __cilkrts_worker *w = sf->worker; CILK_ASSERT(w, sf->worker == __cilkrts_get_tls_worker()); + ReadyDeque *deques = w->g->deques; - deque_lock_self(w); - Closure *t = deque_peek_bottom(w, w->self); + deque_lock_self(deques, w); + Closure *t = deque_peek_bottom(deques, w, w->self); // If t->parent_rsp is non-null, then the Cilk personality function executed // __cilkrts_sync(sf), which implies that sf is at the top of the deque. @@ -140,7 +143,7 @@ void __cilkrts_cleanup_fiber(__cilkrts_stack_frame *sf, int32_t sel) { // non-null. if (NULL == t->parent_rsp) { - deque_unlock_self(w); + deque_unlock_self(deques, w); return; } @@ -152,7 +155,7 @@ void __cilkrts_cleanup_fiber(__cilkrts_stack_frame *sf, int32_t sel) { t->saved_throwing_fiber = NULL; } - deque_unlock_self(w); + deque_unlock_self(deques, w); __builtin_longjmp(sf->ctx, 1); // Does not return return; } @@ -173,3 +176,24 @@ void __cilkrts_sync(__cilkrts_stack_frame *sf) { longjmp_to_runtime(w); } } + +/////////////////////////////////////////////////////////////////////////// +/// Methods for handling extensions + +static inline __cilkrts_worker *get_worker_or_default(void) { + __cilkrts_worker *w = __cilkrts_get_tls_worker(); + if (NULL == w) + w = default_cilkrts->workers[0]; + return w; +} + +void __cilkrts_register_extension(void *extension) { + __cilkrts_use_extension = true; + __cilkrts_worker *w = get_worker_or_default(); + w->extension = extension; +} + +void *__cilkrts_get_extension(void) { + __cilkrts_worker *w = get_worker_or_default(); + return w->extension; +} diff --git a/runtime/cilk2c.h b/runtime/cilk2c.h index 18b1c9ca..2821f5f6 100644 --- a/runtime/cilk2c.h +++ b/runtime/cilk2c.h @@ -98,6 +98,5 @@ CHEETAH_INTERNAL uint64_t __cilkrts_cilk_for_grainsize_64(uint64_t n); // Not marked as CHEETAH_API as it may be deprecated soon unsigned __cilkrts_get_nworkers(void); -//CHEETAH_API int64_t* __cilkrts_get_pedigree(void); -//void __cilkrts_pedigree_bump_rank(void); + #endif diff --git a/runtime/cilk2c_inlined.c b/runtime/cilk2c_inlined.c index bc6ece4d..9abdd842 100644 --- a/runtime/cilk2c_inlined.c +++ b/runtime/cilk2c_inlined.c @@ -17,40 +17,25 @@ #include "readydeque.h" #include "scheduler.h" -#ifdef ENABLE_CILKRTS_PEDIGREE -extern __cilkrts_pedigree cilkrts_root_pedigree_node; -extern uint64_t DPRNG_PRIME; -extern uint64_t* dprng_m_array; -extern uint64_t dprng_m_X; - -uint64_t __cilkrts_dprng_swap_halves(uint64_t x); -uint64_t __cilkrts_dprng_mix(uint64_t x); -uint64_t __cilkrts_dprng_mix_mod_p(uint64_t x); -uint64_t __cilkrts_dprng_sum_mod_p(uint64_t a, uint64_t b); -void __cilkrts_init_dprng(void); - -uint64_t __cilkrts_get_dprand(void) { - __cilkrts_worker *w = __cilkrts_get_tls_worker(); - __cilkrts_bump_worker_rank(); - return __cilkrts_dprng_mix_mod_p(w->current_stack_frame->dprng_dotproduct); -} +#include "pedigree_ext.c" -#endif +// This variable encodes the alignment of a __cilkrts_stack_frame, both in its +// value and in its own alignment. Because LLVM IR does not associate +// alignments with types, this variable communicates the desired alignment to +// the compiler instead. +_Alignas(__cilkrts_stack_frame) +size_t __cilkrts_stack_frame_align = __alignof__(__cilkrts_stack_frame); // Begin a Cilkified region. The routine runs on a Cilkifying thread to // transfer the execution of this function to the workers in global_state g. // This routine must be inlined for correctness. static inline __attribute__((always_inline)) void -cilkify(global_state *g, __cilkrts_stack_frame *sf) { -#ifdef ENABLE_CILKRTS_PEDIGREE - __cilkrts_init_dprng(); -#endif - +cilkify(__cilkrts_stack_frame *sf) { // After inlining, the setjmp saves the processor state, including the frame // pointer, of the Cilk function. if (__builtin_setjmp(sf->ctx) == 0) { sysdep_save_fp_ctrl_state(sf); - __cilkrts_internal_invoke_cilkified_root(g, sf); + __cilkrts_internal_invoke_cilkified_root(sf); } else { sanitizer_finish_switch_fiber(); } @@ -74,32 +59,6 @@ uncilkify(global_state *g, __cilkrts_stack_frame *sf) { } } -#ifdef ENABLE_CILKRTS_PEDIGREE -__attribute__((always_inline)) __cilkrts_pedigree __cilkrts_get_pedigree(void) { - __cilkrts_worker *w = __cilkrts_get_tls_worker(); - if (w == NULL) { - return cilkrts_root_pedigree_node; - } else { - __cilkrts_pedigree ret_ped; - ret_ped.parent = &(w->current_stack_frame->pedigree); - ret_ped.rank = w->current_stack_frame->rank; - return ret_ped; - } -} - -__attribute__((always_inline)) void __cilkrts_bump_worker_rank(void) { - __cilkrts_worker *w = __cilkrts_get_tls_worker(); - if (w == NULL) { - cilkrts_root_pedigree_node.rank++; - } else { - w->current_stack_frame->rank++; - } - w->current_stack_frame->dprng_dotproduct = __cilkrts_dprng_sum_mod_p( - w->current_stack_frame->dprng_dotproduct, - dprng_m_array[w->current_stack_frame->dprng_depth]); -} -#endif - // Enter a new Cilk function, i.e., a function that contains a cilk_spawn. This // function must be inlined for correctness. __attribute__((always_inline)) void @@ -107,7 +66,7 @@ __cilkrts_enter_frame(__cilkrts_stack_frame *sf) { __cilkrts_worker *w = __cilkrts_get_tls_worker(); sf->flags = 0; if (NULL == w) { - cilkify(default_cilkrts, sf); + cilkify(sf); w = __cilkrts_get_tls_worker(); } cilkrts_alert(CFRAME, w, "__cilkrts_enter_frame %p", (void *)sf); @@ -117,25 +76,6 @@ __cilkrts_enter_frame(__cilkrts_stack_frame *sf) { atomic_store_explicit(&sf->worker, w, memory_order_relaxed); w->current_stack_frame = sf; // WHEN_CILK_DEBUG(sf->magic = CILK_STACKFRAME_MAGIC); - -#ifdef ENABLE_CILKRTS_PEDIGREE - // Pedigree maintenance. - if (sf->call_parent != NULL && !(sf->flags & CILK_FRAME_LAST)) { - sf->pedigree.rank = sf->call_parent->rank++; - sf->pedigree.parent = &(sf->call_parent->pedigree); - sf->dprng_depth = sf->call_parent->dprng_depth + 1; - sf->call_parent->dprng_dotproduct = __cilkrts_dprng_sum_mod_p( - sf->call_parent->dprng_dotproduct, - dprng_m_array[sf->call_parent->dprng_depth]); - sf->dprng_dotproduct = sf->call_parent->dprng_dotproduct; - } else { - sf->pedigree.rank = 0; - sf->pedigree.parent = NULL; - sf->dprng_depth = 0; - sf->dprng_dotproduct = dprng_m_X; - } - sf->rank = 0; -#endif } // Enter a spawn helper, i.e., a fucntion containing code that was cilk_spawn'd. @@ -152,25 +92,6 @@ __cilkrts_enter_frame_helper(__cilkrts_stack_frame *sf) { sf->call_parent = w->current_stack_frame; atomic_store_explicit(&sf->worker, w, memory_order_relaxed); w->current_stack_frame = sf; - -#ifdef ENABLE_CILKRTS_PEDIGREE - // Pedigree maintenance. - if (sf->call_parent != NULL && !(sf->flags & CILK_FRAME_LAST)) { - sf->pedigree.rank = sf->call_parent->rank++; - sf->pedigree.parent = &(sf->call_parent->pedigree); - sf->dprng_depth = sf->call_parent->dprng_depth + 1; - sf->call_parent->dprng_dotproduct = __cilkrts_dprng_sum_mod_p( - sf->call_parent->dprng_dotproduct, - dprng_m_array[sf->call_parent->dprng_depth]); - sf->dprng_dotproduct = sf->call_parent->dprng_dotproduct; - } else { - sf->pedigree.rank = 0; - sf->pedigree.parent = NULL; - sf->dprng_depth = 0; - sf->dprng_dotproduct = dprng_m_X; - } - sf->rank = 0; -#endif } __attribute__((always_inline)) int @@ -183,7 +104,8 @@ __cilk_prepare_spawn(__cilkrts_stack_frame *sf) { return res; } -static inline __cilkrts_worker *get_tls_worker(__cilkrts_stack_frame *sf) { +static inline +__cilkrts_worker *get_worker_from_stack(__cilkrts_stack_frame *sf) { // In principle, we should be able to get the worker efficiently by calling // __cilkrts_get_tls_worker(). But code-generation on many systems assumes // that the thread on which a function runs never changes. As a result, it @@ -200,7 +122,7 @@ static inline __cilkrts_worker *get_tls_worker(__cilkrts_stack_frame *sf) { // parent frame. __attribute__((always_inline)) void __cilkrts_detach(__cilkrts_stack_frame *sf) { - __cilkrts_worker *w = get_tls_worker(sf); + __cilkrts_worker *w = get_worker_from_stack(sf); cilkrts_alert(CFRAME, w, "__cilkrts_detach %p", (void *)sf); CILK_ASSERT(w, CHECK_CILK_FRAME_MAGIC(w->g, sf)); @@ -208,6 +130,11 @@ __cilkrts_detach(__cilkrts_stack_frame *sf) { CILK_ASSERT(w, w->current_stack_frame == sf); struct __cilkrts_stack_frame *parent = sf->call_parent; + + if (USE_EXTENSION) { + __cilkrts_extend_spawn(w, &parent->extension, &w->extension); + } + sf->flags |= CILK_FRAME_DETACHED; struct __cilkrts_stack_frame **tail = atomic_load_explicit(&w->tail, memory_order_relaxed); @@ -220,44 +147,58 @@ __cilkrts_detach(__cilkrts_stack_frame *sf) { } __attribute__((always_inline)) void __cilk_sync(__cilkrts_stack_frame *sf) { - if (sf->flags & CILK_FRAME_UNSYNCHED) { - if (__builtin_setjmp(sf->ctx) == 0) { - sysdep_save_fp_ctrl_state(sf); - __cilkrts_sync(sf); - } else { - sanitizer_finish_switch_fiber(); - if (sf->flags & CILK_FRAME_EXCEPTION_PENDING) { - __cilkrts_check_exception_raise(sf); + if (sf->flags & CILK_FRAME_UNSYNCHED || USE_EXTENSION) { + if (sf->flags & CILK_FRAME_UNSYNCHED) { + if (__builtin_setjmp(sf->ctx) == 0) { + sysdep_save_fp_ctrl_state(sf); + __cilkrts_sync(sf); + } else { + sanitizer_finish_switch_fiber(); + if (sf->flags & CILK_FRAME_EXCEPTION_PENDING) { + __cilkrts_check_exception_raise(sf); + } } } + if (USE_EXTENSION) { + __cilkrts_worker *w = get_worker_from_stack(sf); + __cilkrts_extend_sync(&w->extension); + } } } __attribute__((always_inline)) void __cilk_sync_nothrow(__cilkrts_stack_frame *sf) { - if (sf->flags & CILK_FRAME_UNSYNCHED) { - if (__builtin_setjmp(sf->ctx) == 0) { - sysdep_save_fp_ctrl_state(sf); - __cilkrts_sync(sf); - } else { - sanitizer_finish_switch_fiber(); + if (sf->flags & CILK_FRAME_UNSYNCHED || USE_EXTENSION) { + if (sf->flags & CILK_FRAME_UNSYNCHED) { + if (__builtin_setjmp(sf->ctx) == 0) { + sysdep_save_fp_ctrl_state(sf); + __cilkrts_sync(sf); + } else { + sanitizer_finish_switch_fiber(); + } + } + if (USE_EXTENSION) { + __cilkrts_worker *w = get_worker_from_stack(sf); + __cilkrts_extend_sync(&w->extension); } } } __attribute__((always_inline)) void __cilkrts_leave_frame(__cilkrts_stack_frame *sf) { - __cilkrts_worker *w = get_tls_worker(sf); + __cilkrts_worker *w = get_worker_from_stack(sf); cilkrts_alert(CFRAME, w, "__cilkrts_leave_frame %p", (void *)sf); CILK_ASSERT(w, CHECK_CILK_FRAME_MAGIC(w->g, sf)); CILK_ASSERT(w, sf->worker == __cilkrts_get_tls_worker()); // WHEN_CILK_DEBUG(sf->magic = ~CILK_STACKFRAME_MAGIC); + __cilkrts_stack_frame *parent = sf->call_parent; + // Pop this frame off the cactus stack. This logic used to be in // __cilkrts_pop_frame, but has been manually inlined to avoid reloading the // worker unnecessarily. - w->current_stack_frame = sf->call_parent; + w->current_stack_frame = parent; sf->call_parent = NULL; // Check if sf is the final stack frame, and if so, terminate the Cilkified @@ -290,7 +231,7 @@ __cilkrts_leave_frame(__cilkrts_stack_frame *sf) { __attribute__((always_inline)) void __cilkrts_leave_frame_helper(__cilkrts_stack_frame *sf) { - __cilkrts_worker *w = get_tls_worker(sf); + __cilkrts_worker *w = get_worker_from_stack(sf); cilkrts_alert(CFRAME, w, "__cilkrts_leave_frame_helper %p", (void *)sf); CILK_ASSERT(w, CHECK_CILK_FRAME_MAGIC(w->g, sf)); @@ -300,7 +241,12 @@ __cilkrts_leave_frame_helper(__cilkrts_stack_frame *sf) { // Pop this frame off the cactus stack. This logic used to be in // __cilkrts_pop_frame, but has been manually inlined to avoid reloading the // worker unnecessarily. - w->current_stack_frame = sf->call_parent; + __cilkrts_stack_frame *parent = sf->call_parent; + w->current_stack_frame = parent; + if (USE_EXTENSION) { + __cilkrts_extend_return_from_spawn(w, &w->extension); + w->extension = parent->extension; + } sf->call_parent = NULL; CILK_ASSERT(w, sf->flags & CILK_FRAME_DETACHED); @@ -347,21 +293,27 @@ void __cilkrts_enter_landingpad(__cilkrts_stack_frame *sf, int32_t sel) { __attribute__((always_inline)) void __cilkrts_pause_frame(__cilkrts_stack_frame *sf, char *exn) { - __cilkrts_worker *w = get_tls_worker(sf); + __cilkrts_worker *w = get_worker_from_stack(sf); cilkrts_alert(CFRAME, w, "__cilkrts_pause_frame %p", (void *)sf); CILK_ASSERT(w, CHECK_CILK_FRAME_MAGIC(w->g, sf)); CILK_ASSERT(w, sf->worker == __cilkrts_get_tls_worker()); + __cilkrts_stack_frame *parent = sf->call_parent; + // Pop this frame off the cactus stack. This logic used to be in // __cilkrts_pop_frame, but has been manually inlined to avoid reloading the // worker unnecessarily. - w->current_stack_frame = sf->call_parent; + w->current_stack_frame = parent; sf->call_parent = NULL; // A __cilkrts_pause_frame may be reached before the spawn-helper frame has // detached. In that case, THE is not required. if (sf->flags & CILK_FRAME_DETACHED) { + if (USE_EXTENSION) { + __cilkrts_extend_return_from_spawn(w, &w->extension); + w->extension = parent->extension; + } __cilkrts_stack_frame **tail = atomic_load_explicit(&w->tail, memory_order_relaxed); --tail; diff --git a/runtime/cilkred_map.c b/runtime/cilkred_map.c index 902df357..8432246a 100644 --- a/runtime/cilkred_map.c +++ b/runtime/cilkred_map.c @@ -13,23 +13,19 @@ static inline void swap_views(ViewInfo *v1, ViewInfo *v2) { } static inline void swap_vals(ViewInfo *v1, ViewInfo *v2) { - void *val = v1->val; - v1->val = v2->val; - v2->val = val; + void *val = v1->view; + v1->view = v2->view; + v2->view = val; } -static inline void clear_view(ViewInfo *view) { - __cilkrts_hyperobject_base *key = view->key; +static void clear_view(ViewInfo *view) { + hyperobject_base *hyper = view->hyper; - if (key != NULL) { - cilk_destroy_fn_t destroy = key->__c_monoid.destroy_fn; - if (destroy) { - key->__c_monoid.destroy_fn(key, view->val); // calls destructor - } - key->__c_monoid.deallocate_fn(key, view->val); // free the memory + if (hyper != NULL) { + __cilkrts_hyper_dealloc(view->view, hyper->view_size); } - view->key = NULL; - view->val = NULL; + view->view = NULL; + view->hyper = NULL; } // ================================================================= @@ -61,8 +57,8 @@ void cilkred_map_unlog_id(__cilkrts_worker *const w, cilkred_map *this_map, CILK_ASSERT(w, this_map->num_of_vinfo <= this_map->spa_cap); CILK_ASSERT(w, id < this_map->spa_cap); - this_map->vinfo[id].key = NULL; - this_map->vinfo[id].val = NULL; + this_map->vinfo[id].hyper = NULL; + this_map->vinfo[id].view = NULL; this_map->num_of_vinfo--; if (this_map->num_of_vinfo == 0) { @@ -71,18 +67,16 @@ void cilkred_map_unlog_id(__cilkrts_worker *const w, cilkred_map *this_map, } /** @brief Return element mapped to 'key' or null if not found. */ -ViewInfo *cilkred_map_lookup(cilkred_map *this_map, - __cilkrts_hyperobject_base *key) { - hyper_id_t id = key->__id_num; - if (__builtin_expect(!(id & HYPER_ID_VALID), 0)) { +ViewInfo *cilkred_map_lookup(cilkred_map *this_map, hyperobject_base *hyper) { + hyper_id_t id = hyper->id_num; + if (__builtin_expect(!hyper->valid, 0)) { return NULL; } - id &= ~HYPER_ID_VALID; if (id >= this_map->spa_cap) { return NULL; /* TODO: grow map */ } ViewInfo *ret = this_map->vinfo + id; - if (ret->key == NULL && ret->val == NULL) { + if (ret->hyper == NULL && ret->view == NULL) { return NULL; } @@ -132,7 +126,7 @@ void cilkred_map_destroy_map(__cilkrts_worker *w, cilkred_map *h) { } if (DEBUG_ENABLED(REDUCER)) { for (hyper_id_t i = 0; i < h->spa_cap; ++i) - CILK_ASSERT(w, !h->vinfo[i].val); + CILK_ASSERT(w, !h->vinfo[i].view); } free(h->vinfo); h->vinfo = NULL; @@ -160,6 +154,7 @@ void cilkred_map_merge(cilkred_map *this_map, __cilkrts_worker *w, if (other_map->num_of_vinfo == 0) { cilkred_map_destroy_map(w, other_map); + this_map->merging = false; return; } @@ -168,20 +163,26 @@ void cilkred_map_merge(cilkred_map *this_map, __cilkrts_worker *w, for (i = 0; i < other_map->num_of_logs; i++) { hyper_id_t vindex = other_map->log[i]; - __cilkrts_hyperobject_base *key = other_map->vinfo[vindex].key; + hyperobject_base *hyper = other_map->vinfo[vindex].hyper; - if (this_map->vinfo[vindex].key != NULL) { - CILK_ASSERT(w, key == this_map->vinfo[vindex].key); + if (hyper == NULL) { + /* The other map's hyperobject was deleted. + The corresponding index in this map may + belong to a different hyperobject. */ + continue; + } + if (this_map->vinfo[vindex].hyper != NULL) { + CILK_ASSERT(w, hyper == this_map->vinfo[vindex].hyper); if (kind == MERGE_INTO_RIGHT) { // other_map is the left val swap_vals(&other_map->vinfo[vindex], &this_map->vinfo[vindex]); } // updated val is stored back into the left - key->__c_monoid.reduce_fn(key, this_map->vinfo[vindex].val, - other_map->vinfo[vindex].val); + hyper->reduce_fn(this_map->vinfo[vindex].view, + other_map->vinfo[vindex].view); clear_view(&other_map->vinfo[vindex]); } else { - CILK_ASSERT(w, this_map->vinfo[vindex].val == NULL); + CILK_ASSERT(w, this_map->vinfo[vindex].view == NULL); swap_views(&other_map->vinfo[vindex], &this_map->vinfo[vindex]); cilkred_map_log_id(w, this_map, vindex); } @@ -190,20 +191,20 @@ void cilkred_map_merge(cilkred_map *this_map, __cilkrts_worker *w, } else { hyper_id_t i; for (i = 0; i < other_map->spa_cap; i++) { - if (other_map->vinfo[i].key != NULL) { - __cilkrts_hyperobject_base *key = other_map->vinfo[i].key; + if (other_map->vinfo[i].hyper != NULL) { + hyperobject_base *hyper = other_map->vinfo[i].hyper; - if (this_map->vinfo[i].key != NULL) { - CILK_ASSERT(w, key == this_map->vinfo[i].key); + if (this_map->vinfo[i].hyper != NULL) { + CILK_ASSERT(w, hyper == this_map->vinfo[i].hyper); if (kind == MERGE_INTO_RIGHT) { // other_map is the left val swap_vals(&other_map->vinfo[i], &this_map->vinfo[i]); } // updated val is stored back into the left - key->__c_monoid.reduce_fn(key, this_map->vinfo[i].val, - other_map->vinfo[i].val); + hyper->reduce_fn(this_map->vinfo[i].view, + other_map->vinfo[i].view); clear_view(&other_map->vinfo[i]); } else { // the 'this_map' page does not contain view - CILK_ASSERT(w, this_map->vinfo[i].val == NULL); + CILK_ASSERT(w, this_map->vinfo[i].view == NULL); // transfer the key / val over swap_views(&other_map->vinfo[i], &this_map->vinfo[i]); cilkred_map_log_id(w, this_map, i); diff --git a/runtime/cilkred_map.h b/runtime/cilkred_map.h index 2d0d5717..7bafae54 100644 --- a/runtime/cilkred_map.h +++ b/runtime/cilkred_map.h @@ -3,14 +3,11 @@ #include "cilk-internal.h" #include "debug.h" -#include +#include "hyperobject_base.h" #include #include #include -typedef uint32_t hyper_id_t; /* must match cilk/hyperobject_base.h */ -#define HYPER_ID_VALID 0x80000000 - enum merge_kind { MERGE_UNORDERED, ///< Assertion fails MERGE_INTO_LEFT, ///< Merges the argument from the right into the left @@ -19,9 +16,8 @@ enum merge_kind { typedef enum merge_kind merge_kind; typedef struct view_info { - void *val; // pointer to the actual view for the reducer - // pointer to the hyperbase object for a given reducer - __cilkrts_hyperobject_base *key; + void *view; + hyperobject_base *hyper; } ViewInfo; /** @@ -45,14 +41,12 @@ void cilkred_map_log_id(__cilkrts_worker *const w, cilkred_map *this_map, hyper_id_t id); CHEETAH_INTERNAL void cilkred_map_unlog_id(__cilkrts_worker *const w, cilkred_map *this_map, - hyper_id_t id); /* Calling this function potentially invalidates any older ViewInfo pointers from the same map. */ CHEETAH_INTERNAL -ViewInfo *cilkred_map_lookup(cilkred_map *this_map, - __cilkrts_hyperobject_base *key); +ViewInfo *cilkred_map_lookup(cilkred_map *this_map, hyperobject_base *hyper); /** * Construct an empty reducer map from the memory pool associated with the * given worker. This reducer map must be destroyed before the worker's diff --git a/runtime/closure-type.h b/runtime/closure-type.h index d636aea7..55137e51 100644 --- a/runtime/closure-type.h +++ b/runtime/closure-type.h @@ -39,6 +39,9 @@ struct Closure { struct cilk_fiber *fiber; struct cilk_fiber *fiber_child; + struct cilk_fiber *ext_fiber; + struct cilk_fiber *ext_fiber_child; + worker_id owner_ready_deque; /* debug only */ worker_id mutex_owner; /* debug only */ diff --git a/runtime/closure.h b/runtime/closure.h index c0f7b1ce..dbdd7fcf 100644 --- a/runtime/closure.h +++ b/runtime/closure.h @@ -156,6 +156,8 @@ static inline void Closure_init(Closure *t) { t->frame = NULL; t->fiber = NULL; t->fiber_child = NULL; + t->ext_fiber = NULL; + t->ext_fiber_child = NULL; t->orig_rsp = NULL; @@ -340,32 +342,35 @@ void Closure_remove_callee(__cilkrts_worker *const w, Closure *caller) { /* This function is used for steal, the next function for sync. The invariants are slightly different. */ -static inline -void Closure_suspend_victim(__cilkrts_worker *thief, __cilkrts_worker *victim, - Closure *cl) { +static inline void Closure_suspend_victim(struct ReadyDeque *deques, + __cilkrts_worker *thief, + __cilkrts_worker *victim, + Closure *cl) { Closure *cl1; + worker_id victim_id = victim->self; CILK_ASSERT(thief, !cl->user_rmap); Closure_checkmagic(thief, cl); Closure_assert_ownership(thief, cl); - deque_assert_ownership(thief, victim->self); + deque_assert_ownership(deques, thief, victim_id); CILK_ASSERT(thief, cl == thief->g->root_closure || cl->spawn_parent || cl->call_parent); Closure_change_status(thief, cl, CLOSURE_RUNNING, CLOSURE_SUSPENDED); - cl1 = deque_xtract_bottom(thief, victim->self); + cl1 = deque_xtract_bottom(deques, thief, victim_id); CILK_ASSERT(thief, cl == cl1); USE_UNUSED(cl1); } -static inline -void Closure_suspend(__cilkrts_worker *const w, Closure *cl) { +static inline void Closure_suspend(struct ReadyDeque *deques, + __cilkrts_worker *const w, Closure *cl) { Closure *cl1; + worker_id self = w->self; CILK_ASSERT(w, !cl->user_rmap); @@ -373,18 +378,18 @@ void Closure_suspend(__cilkrts_worker *const w, Closure *cl) { Closure_checkmagic(w, cl); Closure_assert_ownership(w, cl); - deque_assert_ownership(w, w->self); + deque_assert_ownership(deques, w, self); CILK_ASSERT(w, cl == w->g->root_closure || cl->spawn_parent || cl->call_parent); CILK_ASSERT(w, cl->frame != NULL); CILK_ASSERT(w, __cilkrts_stolen(cl->frame)); - CILK_ASSERT(w, cl->frame->worker->self == w->self); + CILK_ASSERT(w, cl->frame->worker->self == self); Closure_change_status(w, cl, CLOSURE_RUNNING, CLOSURE_SUSPENDED); atomic_store_explicit(&cl->frame->worker, INVALID, memory_order_relaxed); - cl1 = deque_xtract_bottom(w, w->self); + cl1 = deque_xtract_bottom(deques, w, self); CILK_ASSERT(w, cl == cl1); USE_UNUSED(cl1); diff --git a/runtime/fiber-pool.c b/runtime/fiber-pool.c index 8d3ae800..b56e0171 100644 --- a/runtime/fiber-pool.c +++ b/runtime/fiber-pool.c @@ -9,7 +9,7 @@ #include "local.h" #include "mutex.h" -// Whent the pool becomes full (empty), free (allocate) this fraction +// When the pool becomes full (empty), free (allocate) this fraction // of the pool back to (from) parent / the OS. #define BATCH_FRACTION 2 #define GLOBAL_POOL_RATIO 10 // make global pool this much larger @@ -90,6 +90,10 @@ static void fiber_pool_init(struct cilk_fiber_pool *pool, size_t stacksize, static void fiber_pool_destroy(struct cilk_fiber_pool *pool) { CILK_ASSERT_G(pool->size == 0); cilk_mutex_destroy(&pool->lock); + // pool->fibers might be NULL if the fiber pool was never actually + // initialized, e.g., because no Cilk code was run. + if (pool->fibers == NULL) + return; free(pool->fibers); pool->parent = NULL; pool->fibers = NULL; @@ -282,6 +286,20 @@ void cilk_fiber_pool_global_destroy(global_state *g) { fiber_pool_destroy(&g->fiber_pool); // worker 0 should have freed everything } +/** + * Per-worker fiber pool zero initialization. Initializes the fiber pool to a + * safe zero state, in case that worker is created by + * cilk_fiber_pool_per_worker_init() never gets called on that worker. Should + * initialize the fiber bool sufficiently for calls to + * cilk_fiber_pool_per_worker_terminate() and + * cilk_fiber_pool_per_worker_destroy() to succeed. + */ +void cilk_fiber_pool_per_worker_zero_init(__cilkrts_worker *w) { + struct cilk_fiber_pool *pool = &(w->l->fiber_pool); + pool->size = 0; + pool->fibers = NULL; +} + /** * Per-worker fiber pool initialization: should be called per worker so * so that fiber comes from the core on which the worker is running on. @@ -295,8 +313,8 @@ void cilk_fiber_pool_per_worker_init(__cilkrts_worker *w) { CILK_ASSERT(w, NULL != pool->fibers); CILK_ASSERT(w, w->g->fiber_pool.stack_size == pool->stack_size); - fiber_pool_allocate_batch(w, pool, bufsize / BATCH_FRACTION); fiber_pool_stat_init(pool); + fiber_pool_allocate_batch(w, pool, bufsize / BATCH_FRACTION); } /* This does not yet destroy the fiber pool; merely collects diff --git a/runtime/fiber.c b/runtime/fiber.c index acc067ea..ad132d40 100644 --- a/runtime/fiber.c +++ b/runtime/fiber.c @@ -257,6 +257,14 @@ static void fiber_init(struct cilk_fiber *fiber) { // Supported public functions //=============================================================== +char *sysdep_get_stack_start(struct cilk_fiber *fiber) { + size_t align = 64; + char *sp = fiber->stack_high - align; + /* Debugging: make sure stack is accessible. */ + ((volatile char *)sp)[-1]; + return sp; +} + char *sysdep_reset_stack_for_resume(struct cilk_fiber *fiber, __cilkrts_stack_frame *sf) { CILK_ASSERT_G(fiber); diff --git a/runtime/fiber.h b/runtime/fiber.h index f4096d3e..6767d656 100644 --- a/runtime/fiber.h +++ b/runtime/fiber.h @@ -77,6 +77,8 @@ void sysdep_restore_fp_state(__cilkrts_stack_frame *sf) { #endif } +CHEETAH_INTERNAL +char *sysdep_get_stack_start(struct cilk_fiber *fiber); CHEETAH_INTERNAL char *sysdep_reset_stack_for_resume(struct cilk_fiber *fiber, __cilkrts_stack_frame *sf); @@ -86,6 +88,7 @@ void sysdep_longjmp_to_sf(__cilkrts_stack_frame *sf); CHEETAH_INTERNAL void cilk_fiber_pool_global_init(global_state *g); CHEETAH_INTERNAL void cilk_fiber_pool_global_terminate(global_state *g); CHEETAH_INTERNAL void cilk_fiber_pool_global_destroy(global_state *g); +CHEETAH_INTERNAL void cilk_fiber_pool_per_worker_zero_init(__cilkrts_worker *w); CHEETAH_INTERNAL void cilk_fiber_pool_per_worker_init(__cilkrts_worker *w); CHEETAH_INTERNAL void cilk_fiber_pool_per_worker_terminate(__cilkrts_worker *w); CHEETAH_INTERNAL void cilk_fiber_pool_per_worker_destroy(__cilkrts_worker *w); diff --git a/runtime/frame.h b/runtime/frame.h new file mode 100644 index 00000000..3af5877f --- /dev/null +++ b/runtime/frame.h @@ -0,0 +1,135 @@ +#ifndef _CILK_FRAME_H +#define _CILK_FRAME_H + +#include "rts-config.h" + +#include +#include "jmpbuf.h" + +struct __cilkrts_worker; +struct __cilkrts_stack_frame; + +/** + * Every spawning function has a frame descriptor. A spawning function + * is a function that spawns or detaches. Only spawning functions + * are visible to the Cilk runtime. + */ +struct __cilkrts_stack_frame { + // Flags is a bitfield with values defined below. Client code + // initializes flags to 0 before the first Cilk operation. + uint32_t flags; + // The magic number includes the ABI version and a hash of the + // layout of this structure. + uint32_t magic; + + // call_parent points to the __cilkrts_stack_frame of the closest + // ancestor spawning function, including spawn helpers, of this frame. + // It forms a linked list ending at the first stolen frame. + struct __cilkrts_stack_frame *call_parent; + + // The client copies the worker from TLS here when initializing + // the structure. The runtime ensures that the field always points + // to the __cilkrts_worker which currently "owns" the frame. + // + // TODO: Remove this pointer? This pointer only seems to be needed for + // debugging purposes. When the worker structure is genuinely needed, it + // seems to be accessible by calling __cilkrts_get_tls_worker(), which will + // be inlined and optimized to a simple move from TLS. + _Atomic(struct __cilkrts_worker *) worker; + + // Before every spawn and nontrivial sync the client function + // saves its continuation here. + jmpbuf ctx; + + // Optional state for an extension, only maintained if + // __cilkrts_use_extension == true. + void *extension; +}; + +//=========================================================== +// Value defines for the flags field in cilkrts_stack_frame +//=========================================================== + +/* CILK_FRAME_STOLEN is set if the frame has ever been stolen. */ +#define CILK_FRAME_STOLEN 0x001 + +/* CILK_FRAME_UNSYNCHED is set if the frame has been stolen and + is has not yet executed _Cilk_sync. It is technically a misnomer in that a + frame can have this flag set even if all children have returned. */ +#define CILK_FRAME_UNSYNCHED 0x002 + +/* Is this frame detached (spawned)? If so the runtime needs + to undo-detach in the slow path epilogue. */ +#define CILK_FRAME_DETACHED 0x004 + +/* CILK_FRAME_EXCEPTION_PENDING is set if the frame has an exception + to handle after syncing. */ +#define CILK_FRAME_EXCEPTION_PENDING 0x008 + +/* Is this frame excepting, meaning that a stolen continuation threw? */ +#define CILK_FRAME_EXCEPTING 0x010 + +/* Is this the last (oldest) Cilk frame? */ +#define CILK_FRAME_LAST 0x080 + +/* Is this frame handling an exception? */ +// TODO: currently only used when throwing an exception from the continuation +// (i.e. from the personality function). Used in scheduler.c to disable +// asserts that fail if trying to longjmp back to the personality +// function. +#define CILK_FRAME_SYNC_READY 0x200 + +static const uint32_t frame_magic = + (((((((((((((__CILKRTS_ABI_VERSION * 13) + + offsetof(struct __cilkrts_stack_frame, worker)) * + 13) + + offsetof(struct __cilkrts_stack_frame, ctx)) * + 13) + + offsetof(struct __cilkrts_stack_frame, magic)) * + 13) + + offsetof(struct __cilkrts_stack_frame, flags)) * + 13) + + offsetof(struct __cilkrts_stack_frame, call_parent)) * + 13) + + offsetof(struct __cilkrts_stack_frame, extension))); + +#define CHECK_CILK_FRAME_MAGIC(G, F) (frame_magic == (F)->magic) + +//=========================================================== +// Helper functions for the flags field in cilkrts_stack_frame +//=========================================================== + +/* A frame is set to be stolen as long as it has a corresponding Closure */ +static inline void __cilkrts_set_stolen(struct __cilkrts_stack_frame *sf) { + sf->flags |= CILK_FRAME_STOLEN; +} + +/* A frame is set to be unsynced only if it has parallel subcomputation + * underneathe, i.e., only if it has spawned children executing on a different + * worker + */ +static inline void __cilkrts_set_unsynced(struct __cilkrts_stack_frame *sf) { + sf->flags |= CILK_FRAME_UNSYNCHED; +} + +static inline void __cilkrts_set_synced(struct __cilkrts_stack_frame *sf) { + sf->flags &= ~CILK_FRAME_UNSYNCHED; +} + +/* Returns nonzero if the frame has been stolen. + Only used in assertions. */ +static inline int __cilkrts_stolen(struct __cilkrts_stack_frame *sf) { + return (sf->flags & CILK_FRAME_STOLEN); +} + +/* Returns nonzero if the frame is synched. Only used in assertions. */ +static inline int __cilkrts_synced(struct __cilkrts_stack_frame *sf) { + return ((sf->flags & CILK_FRAME_UNSYNCHED) == 0); +} + +/* Returns nonzero if the frame has never been stolen. */ +static inline int __cilkrts_not_stolen(struct __cilkrts_stack_frame *sf) { + return ((sf->flags & CILK_FRAME_STOLEN) == 0); +} + +#endif /* _CILK_FRAME_H */ diff --git a/runtime/global.c b/runtime/global.c index e30222d3..394d18c4 100644 --- a/runtime/global.c +++ b/runtime/global.c @@ -3,6 +3,9 @@ #endif #include +#ifdef __FreeBSD__ +#include +#endif #include #include #include @@ -10,10 +13,15 @@ #include "debug.h" #include "global.h" +#include "hypertable.h" #include "init.h" #include "readydeque.h" #include "reducer_impl.h" +#if defined __FreeBSD__ && __FreeBSD__ < 13 +typedef cpuset_t cpu_set_t; +#endif + global_state *default_cilkrts; unsigned cilkg_nproc = 0; @@ -175,11 +183,13 @@ global_state *global_state_init(int argc, char *argv[]) { g->root_closure_initialized = false; atomic_store_explicit(&g->done, 0, memory_order_relaxed); atomic_store_explicit(&g->cilkified, 0, memory_order_relaxed); - atomic_store_explicit(&g->disengaged_deprived, 0, memory_order_relaxed); + atomic_store_explicit(&g->disengaged_sentinel, 0, memory_order_relaxed); g->terminate = false; g->exiting_worker = 0; + g->worker_args = + (struct worker_args *)calloc(active_size, sizeof(struct worker_args)); g->workers = (__cilkrts_worker **)calloc(active_size, sizeof(__cilkrts_worker *)); g->deques = (ReadyDeque *)cilk_aligned_alloc( @@ -193,18 +203,22 @@ global_state *global_state_init(int argc, char *argv[]) { g->id_manager = NULL; + g->hyper_table = hyper_table_get_or_create(0); + return g; } void for_each_worker(global_state *g, void (*fn)(__cilkrts_worker *, void *), void *data) { for (unsigned i = 0; i < g->options.nproc; ++i) - fn(g->workers[i], data); + if (g->workers[i]) + fn(g->workers[i], data); } void for_each_worker_rev(global_state *g, void (*fn)(__cilkrts_worker *, void *), void *data) { unsigned i = g->options.nproc; while (i-- > 0) - fn(g->workers[i], data); + if (g->workers[i]) + fn(g->workers[i], data); } diff --git a/runtime/global.h b/runtime/global.h index e7571a89..5f6a5517 100644 --- a/runtime/global.h +++ b/runtime/global.h @@ -43,11 +43,14 @@ struct rts_options { unsigned int force_reduce; /* can be set via env variable CILK_FORCE_REDUCE */ }; +struct worker_args; + struct global_state { /* globally-visible options (read-only after init) */ struct rts_options options; - unsigned int nworkers; /* size of next 3 arrays */ + unsigned int nworkers; /* size of next 4 arrays */ + struct worker_args *worker_args; struct __cilkrts_worker **workers; /* dynamically-allocated array of deques, one per processor */ struct ReadyDeque *deques; @@ -91,11 +94,14 @@ struct global_state { worker_id *worker_to_index; cilk_mutex index_lock; - // Count of number of disengaged and deprived workers. Upper 32 bits count - // the disengaged workers. Lower 32 bits count the deprived workers. These + // Count of number of disengaged and sentinel workers. Upper 32 bits count + // the disengaged workers. Lower 32 bits count the sentinel workers. These // two counts are stored in a single word to make it easier to update both // counts atomically. - _Atomic uint64_t disengaged_deprived __attribute__((aligned(CILK_CACHE_LINE))); + _Atomic uint64_t disengaged_sentinel __attribute__((aligned(CILK_CACHE_LINE))); +#define GET_DISENGAGED(D) ((D) >> 32) +#define GET_SENTINEL(D) ((D) & 0xffffffff) +#define DISENGAGED_SENTINEL(A, B) (((uint64_t)(A) << 32) | (uint32_t)(B)) _Atomic uint32_t disengaged_thieves_futex __attribute__((aligned(CILK_CACHE_LINE))); @@ -106,11 +112,20 @@ struct global_state { struct reducer_id_manager *id_manager; /* null while Cilk is running */ + struct hyper_table *hyper_table; + struct global_sched_stats stats; }; -extern global_state *default_cilkrts; +CHEETAH_INTERNAL extern global_state *default_cilkrts; + +struct worker_args { + worker_id id; + global_state *g; +}; +CHEETAH_INTERNAL +__cilkrts_worker *__cilkrts_init_tls_worker(worker_id i, global_state *g); CHEETAH_INTERNAL void set_nworkers(global_state *g, unsigned int nworkers); CHEETAH_INTERNAL void set_force_reduce(global_state *g, unsigned int force_reduce); diff --git a/runtime/hyperobject_base.h b/runtime/hyperobject_base.h new file mode 100644 index 00000000..5b328088 --- /dev/null +++ b/runtime/hyperobject_base.h @@ -0,0 +1,34 @@ +#ifndef _HYPEROBJECT_BASE +#define _HYPEROBJECT_BASE + +#include +#include +#include +#include + +typedef uint32_t hyper_id_t; + +struct hyperobject_base; + +typedef struct hyperobject_base { + __cilk_identity_fn identity_fn; + __cilk_reduce_fn reduce_fn; + size_t view_size; // rounded to CACHE_LINE + hyper_id_t id_num; + int valid; + void *key; + /* 3 words left in cache line */ +} hyperobject_base; + +// This needs to be exported so cilksan can preempt it. +__attribute__((weak)) void *__cilkrts_hyper_alloc(size_t size); +// This needs to be exported so cilksan can preempt it. +__attribute__((weak)) void __cilkrts_hyper_dealloc(void *view, size_t size); +CHEETAH_INTERNAL +void cilkrts_hyper_register(hyperobject_base *hyper); +CHEETAH_INTERNAL +void cilkrts_hyper_unregister(hyperobject_base *hyper); +CHEETAH_INTERNAL +void *cilkrts_hyper_lookup(hyperobject_base *key); + +#endif /* _CILK_HYPEROBJECT_BASE */ diff --git a/runtime/hypertable.c b/runtime/hypertable.c new file mode 100644 index 00000000..783d65da --- /dev/null +++ b/runtime/hypertable.c @@ -0,0 +1,976 @@ +/* Open hash table with linear probing, mapping pointers to pointers. + Null keys and values are not allowed. Internally, EMPTY (null) + indicates an empty slot and DELETED a deleted entry. When an entry + is deleted an attempt is made to move another value into the vacated + slot to reduce chain lengths. When the total of chain lengths is too + large the table is rehashed. + + It is an error to delete a key that is not in the table. + + It is an error to insert a key that is already in the table. + (But a deleted key can be re-inserted with a different value.) + + Lookups are attempted without locks; this will not crash but may + give an incorrect result if the table changes during the lookup. + If the table changes, based on a modification count, the lookup + is retried with the lock held. + + In multithreaded environments readers can use a hyper_table_cache, + a one entry cache. (TODO: With lock-free reading this may not + be needed any more.) + + The table includes a count of the number of modifications to signal + that a previous successful lookup has become stale. This is + necessary to avoid the ABA problem. (TODO: Separate insert and + delete counters would allow hits to be invalidated by delete + and misses to be invalidated by insert, potentially halving the + number of cache misses.) + + TODO: Prove that if there are duplicate keys the first key is + paired with the correct value. */ + +#include "hypertable.h" + +#include +#include +#include +#include +#include +#include /* for fls() */ + +#define CACHE_LINE 64 + +#ifndef HYPER_TABLE_ASSERT +#define HYPER_TABLE_ASSERT HYPER_TABLE_DEBUG +#endif + +/* For debugging only, to examine code generation for static functions. */ +#ifndef HYPER_TABLE_CODEGEN +#define HYPER_TABLE_CODEGEN 0 +#endif + +/* This should normally be set for performance. */ +#ifndef LOCK_FREE_LOOKUP +#define LOCK_FREE_LOOKUP 1 +#endif + +/* This should normally be set for performance. */ +#ifndef ENABLE_CACHE +#define ENABLE_CACHE 1 +#endif + +/* Emit a store-store barrier that prevents stores from moving + in either direction. atomic_signal_fence(memory_order_release) + is sufficient but slightly stronger than needed. Override it + on ARM to use a store-store barrier instead. + LLVM docs/Atomics.html explains: + "store-store fences are generally not exposed to IR + because they are extremely difficult to use correctly". */ +#ifdef __aarch64__ +#define MEMBAR_ST_ST { \ + atomic_signal_fence(memory_order_acquire); \ + asm ("dmb ishst" : : : "memory"); \ + } +#else +#define MEMBAR_ST_ST \ + atomic_thread_fence(memory_order_release) +#endif + +#define EMPTY 0 +/* For strict C compliance, because 1 might convert to a valid pointer, + define this to be the address of a file scope variable. */ +#define DELETED 1 + +/* Alignment isn't important without a fast atomic 128 bit write + (available on ARM from v8.4). Tell the compiler about alignment + anyway just in case it can do something with the information. */ +struct bucket { + uintptr_t key; /* EMPTY, DELETED, or a user-provided pointer. */ + void *value; +} __attribute__((aligned(2 * sizeof(void*)))); + +#define LOG2_MIN_BUCKETS 5 +#define LOG2_MAX_BUCKETS 14 /* inclusive */ + +#define BUCKET(TABLE, SIZE) \ + &(TABLE)->buckets[(SIZE) - LOG2_MIN_BUCKETS] + +/* An integer big enough to hold 2^LOG2_MAX_BUCKETS (inclusive), + for internal use only. The API uses size_t. Making it signed + allows reserving negative values for invalid indices. An + unsigned integer could hold an extra bit. There are small + differences in code generation for unsigned or 16 bit indices. */ + +#if 0 /* signed implementation */ +typedef int32_t index_t; +#define INVALID_INDEX ((index_t)-1) /* or (index_t)~(index_t)0 */ +#define IS_INVALID(INDEX) ((INDEX) < 0) /* or !!(index_t)~(INDEX) */ +#define IS_VALID(INDEX) ((INDEX) >=0) /* or !(index_t)~(INDEX) */ +#else /* unsigned implementation */ +typedef uint32_t index_t; +#define INVALID_INDEX ((index_t)~(index_t)0) +#define IS_INVALID(INDEX) __builtin_expect(((INDEX) >> (sizeof(index_t) * 8 - 1)), 0) +#define IS_VALID(INDEX) !IS_INVALID(INDEX) +#endif + +#define BUSY(GEN) __builtin_expect((GEN) & 1U, 0) + +#define ALLOC_FAILED(PTR) __builtin_expect(!(PTR), 0) +#define NO_BUCKET(PTR) __builtin_expect(!(PTR), 0) + +struct hyper_table { + /* A count of changes to the table, with the low bit meaning + the table is busy and readers should wait or acquire the + lock. The value increments once at the beginning and once + at the end of each modification. */ + unsigned long _Atomic gen; + + /* Log base 2 of capacity. This field is an index into buckets[] + after subtracting LOG2_MIN_BUCKETS. */ + int _Atomic log_capacity; + + /* Number of values in the table. */ + index_t entries; + + /* A measure of total chain length added since last rehash, used + to decide when another rehash is required. */ + index_t waste; + + /* For statistics. */ + unsigned int rehashes; + + /* The cost of being lock-free most of the time is having to + keep around old storage. Each array element is null or a + pointer to an array of size 2^(index + LOG2_MIN_BUCKETS). */ + struct bucket *_Atomic buckets[LOG2_MAX_BUCKETS + 1 - LOG2_MIN_BUCKETS]; + + /* Number of child caches. Currently unused. */ + unsigned int _Atomic caches; + + /* The lock serializes additions, deletions, and rehashes. The only + field that can be modified without holding the lock is caches. + Lookups only take a lock if lock-free lookup detects a race. */ + pthread_mutex_t lock; + +} __attribute__((aligned(CACHE_LINE))); + +static enum hyper_table_error +hyper_table_insert_locked(struct hyper_table *, const void *, void *) + __attribute__((nonnull)); +static struct bucket *hyper_table_lookup_locked(struct hyper_table *, + const void *) + __attribute__((nonnull)); +static void *hyper_table_remove_locked(struct hyper_table *, const void *_p) + __attribute__((nonnull)); + +#if defined(_ISOC11_SOURCE) || __FreeBSD__ >= 10 || \ + __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 101500 +#define hyper_aligned_alloc(A, S) aligned_alloc(A, S) +#else +static void *hyper_aligned_alloc(size_t alignment, size_t size) +{ + void *ptr; + if (posix_memalign(&ptr, alignment, size) == 0) + return ptr; + return 0; +} +#endif + +static void lock_table(struct hyper_table *table) +{ + int error = pthread_mutex_lock(&table->lock); + assert(!error); +} + +static void unlock_table(struct hyper_table *table) +{ +#if HYPER_TABLE_ASSERT + assert(!BUSY(table->gen)); +#endif + int error = pthread_mutex_unlock(&table->lock); + assert(!error); +} + +#if HYPER_TABLE_ASSERT +__attribute__((noinline)) +static enum hyper_table_error fail(struct hyper_table *table, + enum hyper_table_error code) +{ + fprintf(stdout, "Operation failure code %d\n", (int)code); + hyper_table_dump(stdout, table); + fflush(stdout); + return code; +} +#else +#define fail(table, code) (code) +#endif + +/* Return a hash function where the low bits are hopefully random. + The caller is responsible for reducing the hash to the desired range. */ +static index_t calc_hash(uintptr_t key_in) +{ + uintptr_t key = key_in; + /* TODO: Improve this. Knuth likes the golden ratio for hashing. */ + /* Mac on x86 has addresses like 0x0000602000000210 with lots + of consecutive zero bits between groups of nonzero. */ + if (sizeof key > 4) + key += __builtin_rotateleft64(key, 21); + else + key += __builtin_rotateleft32(key, 21); + if (sizeof key > 4) + return (key * 0x595a5b5c5d5e5f53) >> 30; + else + return (key * 0x5a5a5a5b) >> 10; +} + +struct hyper_table *hyper_table_create(size_t capacity_req) +{ + int start_size = LOG2_MIN_BUCKETS; + if (3 * capacity_req >= (size_t)1U << (LOG2_MAX_BUCKETS + 1)) { + start_size = LOG2_MAX_BUCKETS; + } else if (capacity_req <= ((size_t)3U << (LOG2_MIN_BUCKETS - 1))) { + start_size = LOG2_MIN_BUCKETS; + } else { + /* Multiply by 3/2 for rounding. */ + long adjusted = capacity_req * 3; /* / 2 implied by -2 below */ +#if defined __linux__ || defined __APPLE__ /* No inlined flsl. */ + start_size = 8 * sizeof(long) - 2 - __builtin_clzl(adjusted); +#else + start_size = flsl(adjusted) - 2; +#endif + assert(start_size >= LOG2_MIN_BUCKETS && start_size < LOG2_MAX_BUCKETS); + } + + size_t capacity = (size_t)1 << start_size; + /* This needs to be a multiple of CACHE_LINE or aligned_alloc will fail. */ + size_t bucket_bytes = capacity * sizeof(struct bucket); + struct bucket *buckets = hyper_aligned_alloc(CACHE_LINE, bucket_bytes); + if (ALLOC_FAILED(buckets)) + return 0; + struct hyper_table *table = + hyper_aligned_alloc(CACHE_LINE, sizeof(struct hyper_table)); + if (ALLOC_FAILED(table)) + goto cleanup; + memset(table, 0, sizeof(struct hyper_table)); + if (pthread_mutex_init(&table->lock, 0)) + goto cleanup; + memset(buckets, 0, bucket_bytes); + atomic_store_explicit(&table->log_capacity, start_size, memory_order_relaxed); + /* Integer fields were set to zero by memset above. Also assume memset + nulled pointers, which is not strictly required by the C standard. */ + atomic_store_explicit(BUCKET(table, start_size), buckets, + memory_order_relaxed); + atomic_store_explicit(&table->gen, 2, memory_order_release); + return table; + cleanup: + free(buckets); + free(table); + return 0; +} + +void hyper_table_destroy(struct hyper_table *table) +{ + pthread_mutex_destroy(&table->lock); + for (unsigned i = 0; i < sizeof table->buckets / sizeof table->buckets[0]; ++i) { + struct bucket *b = + atomic_load_explicit(&table->buckets[i], memory_order_relaxed); + atomic_store_explicit(&table->buckets[i], 0, memory_order_relaxed); + free(b); + } + free(table); +} + +/* Called by remove and lookup to find a bucket that holds the given key, + stopping the search when an empty bucket is found. The caller must + handle a null value. */ +static struct bucket * +find_bucket(struct bucket *buckets, int log_capacity, + index_t hash, uintptr_t key) +{ + index_t capacity = (index_t)1 << log_capacity; + index_t mask = capacity - 1U; + hash &= mask; + index_t index = hash; + do { + struct bucket *bucket = &buckets[index]; + /* With reasonable load factors the first bucket will match. */ + if (__builtin_expect(bucket->key == key, 1)) + return bucket; + /* Predicted false for remove and true for lookup. */ + if (bucket->key == EMPTY) + return 0; + index = (index + 1) & mask; + } while (index != hash); + return 0; +} + +/* Set the busy flag in the low bit of table->gen to inform readers + that the table is being modified. Return the old generation number + before setting the flag. */ +static unsigned long mark_busy(struct hyper_table *table) +{ + unsigned long gen = atomic_load_explicit(&table->gen, memory_order_relaxed); +#if HYPER_TABLE_ASSERT + assert(!BUSY(gen)); +#endif + /* The store below is meant to act like a store with acquire semantics + (which does not exist in isolation). The store-store barrier ensures + that the set of the busy bit is visible before any changes to data. */ + atomic_store_explicit(&table->gen, gen + 1, memory_order_relaxed); + MEMBAR_ST_ST; + return gen; +} + +static void mark_free(struct hyper_table *table, unsigned long old_gen) +{ + /* Readers will load table->gen with acquire semantics. If the value + is unchanged from the start of the read operation then the table + has not been modified. */ + atomic_store_explicit(&table->gen, old_gen + 2, memory_order_release); +} + +/* Find a bucket that is empty or deleted. This function is called to + insert a key that is known not to be in the table. If insert instead + meant modify the function would need to continue past deleted buckets + to look for a matching key. */ +static struct bucket * +find_insert_point(struct bucket *buckets, index_t capacity, + index_t start, index_t *waste) +{ + index_t mask = capacity - 1U; + index_t index = start; + index_t waste0 = *waste; + for (index_t i = 0; i < capacity; ++i) { + struct bucket *bucket = &buckets[index++ & mask]; + uintptr_t key = bucket->key; + /* With reasonable load factors the first bucket will be available. */ + if (__builtin_expect(key == EMPTY || key == DELETED, 1)) { + *waste = waste0 + i; + return bucket; + } + } + return 0; +} + +/* Copy all valid entries to a new bucket list. Assert that the + number of entries copied is the same number thought to be in + the table. Return a measure of wasted space. */ +static void copy(struct bucket *restrict to_ptr, index_t to_size, + const struct bucket *restrict from_ptr, index_t from_size, + index_t expected) +{ + index_t waste = 0; + index_t new_entries = 0; + for (index_t from = 0; from < from_size; ++from) { + const struct bucket *b = &from_ptr[from]; + uintptr_t key = b->key; + if (key == EMPTY || key == DELETED) + continue; + index_t hash = calc_hash(key); + /* In this function find_insert_point should never fail because the + new table is guaranteed to be big enough. */ + *find_insert_point(to_ptr, to_size, hash, &waste) = *b; + ++new_entries; + } + assert(new_entries == expected); +} + +/* Effectively, erase the table and re-insert all entries in an attempt + to reduce chain lengths. */ +static void table_rehash(struct hyper_table *table) +{ + /* The lock is held so loads can use relaxed order. */ + int log_capacity = + atomic_load_explicit(&table->log_capacity, memory_order_relaxed); + struct bucket *old_buckets = + atomic_load_explicit(BUCKET(table, log_capacity), memory_order_relaxed); + assert(log_capacity <= LOG2_MAX_BUCKETS); + size_t capacity = (size_t)1 << log_capacity; + + struct bucket *tmp = + hyper_aligned_alloc(CACHE_LINE, capacity * sizeof(struct bucket)); + if (ALLOC_FAILED(tmp)) { + table->waste = 0; /* avoid repeated futile rehash attempts */ + return; + } + memset(tmp, 0, capacity * sizeof(struct bucket)); + copy(tmp, capacity, old_buckets, capacity, table->entries); + unsigned long old_gen = mark_busy(table); + memcpy(old_buckets, tmp, capacity * sizeof(struct bucket)); + table->waste = 0; + ++table->rehashes; + mark_free(table, old_gen); /* includes release fence */ + free(tmp); + return; +} + +/* Return null on failure, otherwise the new bucket list. */ +static struct bucket * +table_grow(struct hyper_table *table) +{ + /* The lock is held so loads can use relaxed order. */ + int old_log_capacity = + atomic_load_explicit(&table->log_capacity, memory_order_relaxed); + struct bucket *old_buckets = + atomic_load_explicit(BUCKET(table, old_log_capacity), memory_order_relaxed); + assert(old_log_capacity < LOG2_MAX_BUCKETS); + int new_log_capacity = old_log_capacity + 1; + size_t old_capacity = (size_t)1 << old_log_capacity; + size_t new_capacity = (size_t)1 << new_log_capacity; + + assert(new_log_capacity > old_log_capacity); + + /* Reuse an old array if there is one. This could happen + when shrinking is implemented. */ + struct bucket *new_buckets = + atomic_load_explicit(BUCKET(table, new_log_capacity), memory_order_relaxed); + if (!new_buckets) { + new_buckets = + hyper_aligned_alloc(CACHE_LINE, + new_capacity * sizeof(struct bucket)); + if (ALLOC_FAILED(new_buckets)) + return 0; + memset(new_buckets, 0, new_capacity * sizeof(struct bucket)); + /* Publish the new pointer after the memory is cleared. */ + atomic_store_explicit(BUCKET(table, new_log_capacity), + new_buckets, memory_order_release); + } + + copy(new_buckets, new_capacity, old_buckets, old_capacity, table->entries); + + table->waste = 0; + ++table->rehashes; + + /* First, mark the table busy so no readers come in between the + next two stores. */ + unsigned long old_gen = mark_busy(table); + + /* Force all writes to complete before the bucket pointer goes live. */ + atomic_store_explicit(&table->log_capacity, new_log_capacity, + memory_order_release); + + mark_free(table, old_gen); + + return new_buckets; +} + +enum hyper_table_error +hyper_table_insert(struct hyper_table *table, const void *key, void *value) +{ + if (__builtin_expect(!key, 0) || __builtin_expect(!value, 0)) + return HYPER_NULL; + lock_table(table); + enum hyper_table_error error = hyper_table_insert_locked(table, key, value); + unlock_table(table); + return error; +} + +/* Unlike lookup, this must be called with the lock held. */ +static enum hyper_table_error +hyper_table_insert_locked(struct hyper_table *restrict table, const void *key_p, + void *restrict value) +{ + /* Lock is held, relaxed order is fine. */ + int log_capacity = + atomic_load_explicit(&table->log_capacity, memory_order_relaxed); + struct bucket *buckets = + atomic_load_explicit(BUCKET(table, log_capacity), memory_order_relaxed); + index_t capacity = (index_t)1 << log_capacity; + /* Keep the load factor .5 or less if possible. If chain lengths are + growing long, which should be rare, rehash in place. */ + if (log_capacity < LOG2_MAX_BUCKETS && + __builtin_expect(table->entries > capacity / 2, 0)) { + capacity *= 2; + buckets = table_grow(table); + /* Strictly speaking this error is recoverable, but inability to + allocate a new hash table indicates memory is about to run out. + Also, inability to recreate the old hash table is very unlikely. */ + if (ALLOC_FAILED(buckets)) + return fail(table, HYPER_NOMEM); + } else if (__builtin_expect(table->waste * 3UL > capacity, 0)) { + table_rehash(table); /* bucket pointer unchanged */ + } + uintptr_t key = (uintptr_t)key_p; + index_t hash = calc_hash(key); + index_t waste = table->waste; + struct bucket *bucket = find_insert_point(buckets, capacity, hash, &waste); + if (NO_BUCKET(bucket)) + return fail(table, HYPER_FULL); + index_t entries = table->entries; + /* Up to now lookups can proceed in parallel with this insertion, + but filling the bucket is not atomic. */ + unsigned long old_gen = mark_busy(table); + /* These stores (before the release in mark_free) can happen in any order. */ + bucket->key = key; + bucket->value = value; + table->entries = entries + 1; + table->waste = waste; + mark_free(table, old_gen); + return HYPER_OK; +} + +void *hyper_table_remove(struct hyper_table *table, const void *key) +{ + lock_table(table); + void *value = hyper_table_remove_locked(table, key); + unlock_table(table); + return value; +} + +void *hyper_table_remove_locked(struct hyper_table *table, const void *key_p) +{ + /* Lock is held, relaxed order is fine for both loads. */ + int log_capacity = + atomic_load_explicit(&table->log_capacity, memory_order_relaxed); + struct bucket *buckets = + atomic_load_explicit(BUCKET(table, log_capacity), memory_order_relaxed); + uintptr_t key = (uintptr_t)key_p; + index_t hash = calc_hash(key); + struct bucket *bucket = find_bucket(buckets, log_capacity, hash, key); +#if HYPER_TABLE_ASSERT + /* In anticipated uses of this table, the entry must exist or outside + bookkeeping has gone wrong. */ + assert(!NO_BUCKET(bucket)); +#endif + if (NO_BUCKET(bucket)) + return 0; + + index_t index = bucket - buckets; + index_t mask = ((index_t)1 << log_capacity) - 1; + index_t this_target = hash & mask; + index_t entries = table->entries; + index_t waste = table->waste; + if (this_target != index && waste > 0) { + --waste; + } + unsigned long old_gen = mark_busy(table); + + table->entries = entries - 1; + + void *value = bucket->value; + bucket->key = DELETED; + bucket->value = 0; + /* While the lock is held do some cleanup in the vicinity of the + deleted entry: + 1. If the next bucket is empty mark this one empty, and also + the previous bucket if that bucket is deleted. + 2. If the next bucket wants to be earlier in the chain, move it up + to the newly vacated slot. */ + index_t prev = (index - 1) & mask; + index_t next = (index + 1) & mask; + uintptr_t next_key = buckets[next].key; + if (next_key == EMPTY) { + buckets[index].key = EMPTY; /* deleted -> empty */ + if (buckets[prev].key == DELETED) + buckets[prev].key = EMPTY; /* deleted -> empty */ + goto done; + } + ++waste; /* a new deleted bucket has been created */ + if (buckets[next].key == DELETED) + goto done; + /* Where does the next bucket want to be? */ + index_t next_target = calc_hash(next_key) & mask; + /* If the next bucket wants to be earlier, advance it into this slot. + A simple equality test is good enough here. If the bucket doesn't + want to be where it is, one place earlier is better. */ + if (next_target != next) { + buckets[index] = buckets[next]; + buckets[next].key = DELETED; /* full -> deleted */ + } + done: + table->waste = waste; + mark_free(table, old_gen); + return value; +} + +void *hyper_table_lookup(struct hyper_table *table, const void *key) +{ +#if LOCK_FREE_LOOKUP + unsigned long gen1 = + atomic_load_explicit(&table->gen, memory_order_acquire); + /* See hyper_table_cache_lookup for comments. */ + if (!BUSY(gen1)) { + int log_capacity = + atomic_load_explicit(&table->log_capacity, memory_order_relaxed); + struct bucket *buckets = + atomic_load_explicit(BUCKET(table, log_capacity), + memory_order_relaxed); + uintptr_t key_i = (uintptr_t)key; + struct bucket *bucket = + find_bucket(buckets, log_capacity, calc_hash(key_i), key_i); + if (!NO_BUCKET(bucket)) { + void *result = bucket->value; + atomic_thread_fence(memory_order_acquire); + unsigned long gen2 = + atomic_load_explicit(&table->gen, memory_order_relaxed); + if (__builtin_expect(gen1 == gen2, 1)) { + return result; + } + } + } +#endif + + lock_table(table); + struct bucket *bucket = hyper_table_lookup_locked(table, key); + void *value = bucket ? bucket->value : 0; + unlock_table(table); + return value; +} + +static struct bucket * +hyper_table_lookup_locked(struct hyper_table *table, const void *key_p) +{ + int log_capacity = + atomic_load_explicit(&table->log_capacity, memory_order_relaxed); + struct bucket *buckets = + atomic_load_explicit(BUCKET(table, log_capacity), memory_order_relaxed); + uintptr_t key = (uintptr_t)key_p; + return find_bucket(buckets, log_capacity, calc_hash(key), key); +} + +void hyper_table_iter(struct hyper_table *table, + void (*fn)(void *, const void *, void *), + void *arg) +{ + int error = pthread_mutex_lock(&table->lock); + assert(!error); + int log_capacity = + atomic_load_explicit(&table->log_capacity, memory_order_relaxed); + struct bucket *buckets = + atomic_load_explicit(BUCKET(table, log_capacity), memory_order_relaxed); + index_t size = table->entries; + struct bucket tmp[size]; + index_t out = 0; + index_t capacity = (index_t)1 << log_capacity; + for (index_t i = 0; i < capacity; ++i) { + if (buckets[i].key != EMPTY && buckets[i].key != DELETED) + tmp[out++] = buckets[i]; + } + assert(out == table->entries); + pthread_mutex_unlock(&table->lock); + for (index_t i = 0; i < size; ++i) + fn(arg, (const void *)tmp[i].key, tmp[i].value); +} + +#if HYPER_TABLE_DEBUG +void hyper_table_dump(FILE *out, const struct hyper_table *table) +{ + int log_capacity = + atomic_load_explicit(&table->log_capacity, memory_order_acquire); + struct bucket *buckets = + atomic_load_explicit(BUCKET(table, log_capacity), memory_order_consume); + index_t capacity = (index_t)1 << log_capacity; + fprintf(out, + "Table %p size %lu capacity %lu waste %lu rehash %u gen %lu\n", + table, (unsigned long)table->entries, + (unsigned long)capacity, + (unsigned long)table->waste, + table->rehashes, table->gen); + for (index_t i = 0; i < capacity; ++i) { + if (buckets[i].key == EMPTY) + continue; + if (buckets[i].key == DELETED) { + fprintf(out, "[%5u] = link\n", (unsigned)i); + continue; + } + index_t target = calc_hash((uintptr_t)buckets[i].key) & (capacity - 1); + fprintf(out, "[%5u]: %p -> %p", (unsigned)i, + (void *)buckets[i].key, buckets[i].value); + if (target != i) + fprintf(out, " (target %3lu)", (unsigned long)target); + fputc('\n', out); + } + fflush(out); +} +#endif + +/* Once global_table is non-null its value will not change. + + In order to ensure that readers of the table see the + initialization of the mutex, the value is published + with release semantics and loaded with consume semantics. + Consume order tells the compiler to tell the processor + not to allow any loads based off of [table] to be moved + before [table] is loaded. In practice (1) most processors + do this automatically, (2) the stupid compiler promotes + consume to acquire anyway. + + This ordering has no effect on x86 code generation: atomic + compare and exchange is always a full barrier, and causality + prevents any accesses based off of [global_table] from being + moved before the load. + + TODO: Performance should be tested on ARM. */ +// FIXME: Memory leak of *global_table. +static struct hyper_table *_Atomic global_table; + +struct hyper_table *hyper_table_get_or_create(size_t capacity) +{ + struct hyper_table *table = + atomic_load_explicit(&global_table, memory_order_consume); + if (!ALLOC_FAILED(table)) + return table; + table = hyper_table_create(capacity); + if (ALLOC_FAILED(table)) + return 0; + /* If [global_table] is still null, store [table] into [global_table]. + Otherwise, copy [global_table] into [tmp]. */ + struct hyper_table *tmp = 0; + if (__c11_atomic_compare_exchange_strong(&global_table, &tmp, table, + memory_order_release, + memory_order_consume)) + return table; + hyper_table_destroy(table); + return tmp; +} + +/* A simple two entry cache. In order to prevent an ABA problem a + cache lookup reads a word from the parent table to check whether + the table has changed. If a lookup races with a entry creation + or deletion the result is undefined. + + The structure needs to fill a cache line to prevent false sharing. */ +struct hyper_table_cache { + struct hyper_table *parent; +#if ENABLE_CACHE + unsigned long gen; + struct bucket entry[2]; + unsigned int count; +#endif +} __attribute__((aligned(CACHE_LINE))); + +static void hyper_table_cache_invalidate(struct hyper_table_cache *c) +{ +#if ENABLE_CACHE + c->entry[0].key = 0; + c->entry[0].value = 0; + c->entry[1].key = 0; + c->entry[1].value = 0; + c->count = 0; /* any value will do */ + c->gen = 1; /* 1 is never valid because the busy bit is set */ +#endif +} + +struct hyper_table_cache *hyper_table_cache_create(struct hyper_table *parent) +{ + struct hyper_table_cache *c = + hyper_aligned_alloc(__alignof__(struct hyper_table_cache), + sizeof(struct hyper_table_cache)); + if (ALLOC_FAILED(c)) + return 0; + c->parent = parent; + hyper_table_cache_invalidate(c); + atomic_fetch_add_explicit(&parent->caches, 1, memory_order_acquire); + return c; +} + +void hyper_table_cache_destroy(struct hyper_table_cache *c) +{ + struct hyper_table *parent = c->parent; + c->parent = 0; + hyper_table_cache_invalidate(c); + atomic_fetch_sub_explicit(&parent->caches, 1, memory_order_release); + free(c); +} + +static void * +hyper_table_cache_lookup_slow(struct hyper_table_cache *cache, + struct hyper_table *table, + const void *key_p) +{ + lock_table(table); + struct bucket *bucket = hyper_table_lookup_locked(table, key_p); + /* Relaxed order is fine with the lock held. */ + unsigned long gen = atomic_load_explicit(&table->gen, memory_order_relaxed); + void *value = 0; + if (bucket) { +#if ENABLE_CACHE + unsigned int e = 1U & ++cache->count; + cache->entry[e] = *bucket; + cache->gen = gen; +#endif + value = bucket->value; + } + unlock_table(table); + return value; +} + +void *hyper_table_cache_lookup(struct hyper_table_cache *cache, const void *key_p) +{ + if (__builtin_expect(!key_p, 0)) + return 0; + + /* On memory ordering: + + Table writers guarantee that there are no writes to the table + between a write of table->gen with low bit clear and the next + write to table->gen. Writes to table->gen with low bit clear + have release semantics. + + The first load of table->gen can find the low bit set or clear. + + If the bit is clear, the acquire pairs with the store-release of + the last write to table->gen to ensure the table is consistent. + + If the bit is set, a lock is taken to ensure consistency with + writers. Writers also take the lock. + + Following a load of table->gen with low bit clear, a second + load is issued at the end of the lookup fast path. If it finds + a different value, a lock is taken as above. + + What remains is to ensure that if both loads of table->gen + return the same value then values read in between them are + a consistent view of the table with no writes to it. + + An acquire fence before the second load pairs with the store-release + of the new value of table->gen. If the load of table->gen does not + see the new value, then none of the earlier loads saw stores that + preceded the write to table->gen. */ + + struct hyper_table *table = cache->parent; + + /* The cache hit case can use a relaxed load because it makes + no other accesses to the main table. The cache miss flow + depends on acquire semantics. */ + unsigned long gen1 = + atomic_load_explicit(&table->gen, memory_order_acquire); + +#if ENABLE_CACHE + if (__builtin_expect(cache->gen == gen1, 1)) { + uintptr_t key0 = cache->entry[0].key; + uintptr_t key1 = cache->entry[1].key; + if (key0 == (uintptr_t)key_p) + return cache->entry[0].value; + if (key1 == (uintptr_t)key_p) + return cache->entry[1].value; + } else { + hyper_table_cache_invalidate(cache); + } +#endif + +#if LOCK_FREE_LOOKUP + /* Attempt lock-free lookup first. */ + if (!BUSY(gen1)) { + /* Arguably the load of log_capacity should have memory_order_consume, but + 1: That only matters for a few unsupported DEC ALPHA chips + (where stores issued in order remotely may appear out of order locally). + 2: Consume is promoted to acquire, which has a cost. */ + int log_capacity = + atomic_load_explicit(&table->log_capacity, memory_order_relaxed); + struct bucket *buckets = + atomic_load_explicit(BUCKET(table, log_capacity), + memory_order_relaxed); + uintptr_t key = (uintptr_t)key_p; + struct bucket *bucket = + find_bucket(buckets, log_capacity, calc_hash(key), key); + if (!NO_BUCKET(bucket)) { +#if ENABLE_CACHE + /* Optimistically save the value in the cache. The cache invalidate + call below will clean up if the value is incorrect. */ + unsigned int e = 1U & ++cache->count; + cache->entry[e] = *bucket; + cache->gen = gen1; +#endif + void *result = bucket->value; + /* See comment above on memory ordering. */ + atomic_thread_fence(memory_order_acquire); + unsigned long gen2 = + atomic_load_explicit(&table->gen, memory_order_relaxed); + if (__builtin_expect(gen1 == gen2, 1)) { + return result; + } + } + } +#endif + + return hyper_table_cache_lookup_slow(cache, table, key_p); +} + +void *hyper_table_cache_remove(struct hyper_table_cache *cache, const void *key) +{ + if (__builtin_expect(!key, 0)) + return 0; + struct hyper_table *table = cache->parent; + lock_table(table); + hyper_table_cache_invalidate(cache); + void *value = hyper_table_remove_locked(table, key); + unlock_table(table); + return value; +} + +enum hyper_table_error +hyper_table_cache_insert(struct hyper_table_cache *cache, const void *key, + void *value) +{ + if (__builtin_expect(!key, 0) || __builtin_expect(!value, 0)) + return HYPER_NULL; + struct hyper_table *table = cache->parent; + lock_table(table); + enum hyper_table_error error = + hyper_table_insert_locked(table, key, value); + unsigned long gen = atomic_load_explicit(&table->gen, memory_order_relaxed); + unlock_table(table); +#if ENABLE_CACHE + /* Reset the cache to hold only the newly added entry in slot 0, + with slot 1 being the next used. */ + cache->count = 0; + cache->gen = gen; + cache->entry[0].key = (uintptr_t)key; + cache->entry[0].value = value; + cache->entry[1].key = 0; + cache->entry[1].value = 0; +#endif + return error; +} + +#if HYPER_TABLE_CODEGEN +void copy_debug(struct bucket *restrict to_ptr, index_t to_size, + const struct bucket *restrict from_ptr, index_t from_size, + index_t expected) +{ + copy(to_ptr, to_size, from_ptr, from_size, expected); +} + +struct bucket *find_bucket_debug(struct bucket *buckets, int log_capacity, + index_t hash, uintptr_t key) +{ + return find_bucket(buckets, log_capacity, hash, key); +} + +unsigned long mark_busy_debug(struct hyper_table *table) +{ + return mark_busy(table); +} +#endif + +const char *hyper_table_error_string(enum hyper_table_error code) +{ + switch (code) { + case HYPER_OK: return "no error"; + case HYPER_NOT_FOUND: return "key not found"; + case HYPER_NULL: return "null key"; + case HYPER_NOMEM: return "out of memory"; + case HYPER_FULL: return "table full"; + default: return "unknown error"; + } +} + +size_t hyper_table_size(const struct hyper_table *table) +{ + return table->entries; +} + +size_t hyper_table_index(const struct hyper_table *table, const void *key) +{ + int log_capacity = + atomic_load_explicit(&table->log_capacity, memory_order_relaxed); + return calc_hash((uintptr_t)key) & (((index_t)1 << log_capacity) - 1); +} + +size_t hyper_table_capacity(const struct hyper_table *table) +{ + int log_capacity = + atomic_load_explicit(&table->log_capacity, memory_order_relaxed); + return (index_t)1 << log_capacity; +} diff --git a/runtime/hypertable.h b/runtime/hypertable.h new file mode 100644 index 00000000..d5b379d4 --- /dev/null +++ b/runtime/hypertable.h @@ -0,0 +1,108 @@ +#ifdef HYPER_TABLE_DEBUG +#include +#endif +#include + +#ifdef HYPER_TABLE_HIDDEN +#define HYPER_TABLE_HIDE __attribute__((visibility("hidden"), nothrow)) +#define HYPER_TABLE_OP __attribute__((visibility("hidden"), nothrow, nonnull(1))) +#define HYPER_TABLE_CHECK __attribute__((visibility("hidden"), nothrow, warn_unused_result, nonnull(1))) +#define HYPER_TABLE_ALLOC __attribute__((visibility("hidden"), nothrow, malloc)) +#else +#define HYPER_TABLE_HIDE __attribute__((nothrow)) +#define HYPER_TABLE_OP __attribute__((nothrow, nonnull(1))) +#define HYPER_TABLE_CHECK __attribute__((warn_unused_result, nothrow, nonnull(1))) +#define HYPER_TABLE_ALLOC __attribute__((malloc, nothrow)) +#endif + +enum hyper_table_error { + HYPER_OK, + HYPER_NOT_FOUND, + HYPER_FULL, + HYPER_NULL, /* user error: null key */ + HYPER_NOMEM, /* unable to allocate memory */ +}; + +struct hyper_table; + +/* Get the unique global hyperobject table, creating it if it does + not already exist. */ +HYPER_TABLE_HIDE +struct hyper_table *hyper_table_get_or_create(size_t capacity); + +/* Create a new hyperobject table. */ +HYPER_TABLE_ALLOC +struct hyper_table *hyper_table_create(size_t capacity); + +/* Destroy a hyperobject table created by hyper_table_create. */ +HYPER_TABLE_OP +void hyper_table_destroy(struct hyper_table *); + +/* Insert a new entry. The key must not be in the table already. */ +HYPER_TABLE_CHECK +enum hyper_table_error +hyper_table_insert(struct hyper_table *, const void *key, void *value); + +/* Remove a key and return the old value. The key must be in the table. */ +HYPER_TABLE_OP +void *hyper_table_remove(struct hyper_table *, const void *key); + +/* Return the value for a key, or null if it is not present. */ +HYPER_TABLE_OP +void *hyper_table_lookup(struct hyper_table *, const void *key); + +/* Return the number of keys in the table. */ +HYPER_TABLE_OP +size_t hyper_table_size(const struct hyper_table *); + +/* Apply a function to every table entry. */ +HYPER_TABLE_OP +void hyper_table_iter(struct hyper_table *, + void (*fn)(void *, const void *, void *), + void *); + +/* Return the current bucket list length, which will not be less + than hyper_table_size(). This is intended for testing. */ +HYPER_TABLE_OP +size_t hyper_table_capacity(const struct hyper_table *); + +/* Return the index where a key belongs. The value will be less + than hyper_table_capacity(). This is intended for testing. */ +HYPER_TABLE_OP +size_t hyper_table_index(const struct hyper_table *, const void *); + +#if HYPER_TABLE_DEBUG +/* Print a text representation of the table. */ +HYPER_TABLE_HIDE +void hyper_table_dump(FILE *out, const struct hyper_table *table); +#endif + +struct hyper_table_cache; + +/* Create, destroy, insert, lookup, and remove work like the + functions above except they use a cache. */ + +HYPER_TABLE_CHECK +struct hyper_table_cache *hyper_table_cache_create(struct hyper_table *); + +HYPER_TABLE_OP +void hyper_table_cache_destroy(struct hyper_table_cache *); + +HYPER_TABLE_CHECK +enum hyper_table_error +hyper_table_cache_insert(struct hyper_table_cache *, const void *key, + void *value); + +HYPER_TABLE_OP +void *hyper_table_cache_lookup(struct hyper_table_cache *, const void *key); + +HYPER_TABLE_OP +void *hyper_table_cache_remove(struct hyper_table_cache *, const void *key); + +HYPER_TABLE_HIDE +const char *hyper_table_error_string(enum hyper_table_error error) + __attribute__((returns_nonnull)); + +#undef HYPER_TABLE_HIDE +#undef HYPER_TABLE_CHECK +#undef HYPER_TABLE_ALLOC diff --git a/runtime/init.c b/runtime/init.c index a07f3ad7..f76c5e7f 100644 --- a/runtime/init.c +++ b/runtime/init.c @@ -19,9 +19,11 @@ #endif #include +#include "cilk-internal.h" #include "debug.h" #include "fiber.h" #include "global.h" +#include "hypertable.h" #include "init.h" #include "local.h" #include "readydeque.h" @@ -31,31 +33,35 @@ #include "reducer_impl.h" -extern __thread bool is_boss_thread; - -#ifdef __FreeBSD__ +#if defined __FreeBSD__ && __FreeBSD__ < 13 typedef cpuset_t cpu_set_t; #endif -static local_state *worker_local_init(global_state *g) { - local_state *l = (local_state *)calloc(1, sizeof(local_state)); +static local_state *worker_local_init(local_state *l, global_state *g) { l->shadow_stack = (__cilkrts_stack_frame **)calloc( g->options.deqdepth, sizeof(struct __cilkrts_stack_frame *)); for (int i = 0; i < JMPBUF_SIZE; i++) { l->rts_ctx[i] = NULL; } + l->hyper_table = + g->hyper_table ? hyper_table_cache_create(g->hyper_table) : NULL; l->fiber_to_free = NULL; + l->ext_fiber_to_free = NULL; l->state = WORKER_IDLE; - l->lock_wait = false; l->provably_good_steal = false; l->rand_next = 0; /* will be reset in scheduler loop */ - l->index_to_worker = - (worker_id *)calloc(g->options.nproc, sizeof(worker_id)); cilk_sched_stats_init(&(l->stats)); return l; } +static void worker_local_destroy(local_state *l, global_state *g) { + if (l->hyper_table) { + hyper_table_cache_destroy(l->hyper_table); + l->hyper_table = NULL; + } +} + static void deques_init(global_state *g) { cilkrts_alert(BOOT, NULL, "(deques_init) Initializing deques"); for (unsigned int i = 0; i < g->options.nproc; i++) { @@ -69,30 +75,49 @@ static void deques_init(global_state *g) { static void workers_init(global_state *g) { cilkrts_alert(BOOT, NULL, "(workers_init) Initializing workers"); for (unsigned int i = 0; i < g->options.nproc; i++) { - cilkrts_alert(BOOT, NULL, "(workers_init) Initializing worker %u", i); - __cilkrts_worker *w = (__cilkrts_worker *)cilk_aligned_alloc( - __alignof__(__cilkrts_worker), sizeof(__cilkrts_worker)); - w->self = i; - w->g = g; - w->l = worker_local_init(g); - - w->ltq_limit = w->l->shadow_stack + g->options.deqdepth; - g->workers[i] = w; - __cilkrts_stack_frame **init = w->l->shadow_stack + 1; - atomic_store_explicit(&w->tail, init, memory_order_relaxed); - atomic_store_explicit(&w->head, init, memory_order_relaxed); - atomic_store_explicit(&w->exc, init, memory_order_relaxed); - w->current_stack_frame = NULL; - w->reducer_map = NULL; - // initialize internal malloc first - cilk_internal_malloc_per_worker_init(w); + if (i == 0) { + // Initialize worker 0, so we always have a worker structure to fall + // back on. + __cilkrts_init_tls_worker(0, g); + } // Initialize index-to-worker map entry for this worker. + g->worker_args[i].id = i; + g->worker_args[i].g = g; g->index_to_worker[i] = i; g->worker_to_index[i] = i; } } +__cilkrts_worker *__cilkrts_init_tls_worker(worker_id i, global_state *g) { + cilkrts_alert(BOOT, NULL, "(workers_init) Initializing worker %u", i); + size_t alignment = 2 * __alignof__(__cilkrts_worker); + void *mem = cilk_aligned_alloc( + alignment, round_size_to_alignment(alignment, sizeof(__cilkrts_worker) + + sizeof(local_state))); + __cilkrts_worker *w = (__cilkrts_worker *)mem; + w->self = i; + w->extension = NULL; + w->ext_stack = NULL; + w->g = g; + w->l = worker_local_init(mem + sizeof(__cilkrts_worker), g); + + w->ltq_limit = w->l->shadow_stack + g->options.deqdepth; + g->workers[i] = w; + __cilkrts_stack_frame **init = w->l->shadow_stack + 1; + atomic_store_explicit(&w->tail, init, memory_order_relaxed); + atomic_store_explicit(&w->head, init, memory_order_relaxed); + atomic_store_explicit(&w->exc, init, memory_order_relaxed); + w->current_stack_frame = NULL; + w->reducer_map = NULL; + // initialize internal malloc first + cilk_internal_malloc_per_worker_init(w); + // zero-initialize the worker's fiber pool. + cilk_fiber_pool_per_worker_zero_init(w); + + return w; +} + #ifdef CPU_SETSIZE static void move_bit(int cpu, cpu_set_t *to, cpu_set_t *from) { if (CPU_ISSET(cpu, from)) { @@ -185,7 +210,7 @@ static void threads_init(global_state *g) { ; for (int w = worker_start; w < n_threads; w++) { int status = pthread_create(&g->threads[w], NULL, scheduler_thread_proc, - g->workers[w]); + &g->worker_args[w]); if (status != 0) cilkrts_bug(NULL, "Cilk: thread creation (%u) failed: %s", w, @@ -225,11 +250,11 @@ global_state *__cilkrts_startup(int argc, char *argv[]) { cilkrts_alert(BOOT, NULL, "(__cilkrts_startup) argc %d", argc); global_state *g = global_state_init(argc, argv); reducers_init(g); - __cilkrts_init_tls_variables(); + /* __cilkrts_init_tls_variables(); */ workers_init(g); deques_init(g); CILK_ASSERT_G(0 == g->exiting_worker); - reducers_import(g, g->workers[g->exiting_worker]); + reducers_import(g, g->workers[0]); // Create the root closure and a fiber to go with it. Use worker 0 to // allocate the closure and fiber. @@ -315,14 +340,14 @@ static inline __attribute__((noinline)) void boss_wait_helper(void) { // function arguments and local variables in this function. Get // fresh copies of these arguments from the runtime's global // state. - global_state *g = tls_worker->g; + global_state *g = __cilkrts_tls_worker->g; __cilkrts_stack_frame *sf = g->root_closure->frame; CILK_BOSS_START_TIMING(g); #if !BOSS_THIEF - worker_id self = tls_worker->self; + worker_id self = __cilkrts_tls_worker->self; #endif - tls_worker = NULL; + __cilkrts_tls_worker = NULL; #if !BOSS_THIEF // Wake up the worker the boss was impersonating, to let it take @@ -334,8 +359,13 @@ static inline __attribute__((noinline)) void boss_wait_helper(void) { wait_until_cilk_done(g); #if BOSS_THIEF - g->workers[0]->reducer_map = g->workers[g->exiting_worker]->reducer_map; - g->workers[g->exiting_worker]->reducer_map = NULL; + __cilkrts_worker **workers = g->workers; + __cilkrts_worker *w0 = workers[0]; + __cilkrts_worker *wexit = workers[g->exiting_worker]; + w0->reducer_map = wexit->reducer_map; + wexit->reducer_map = NULL; + w0->extension = wexit->extension; + wexit->extension = NULL; g->exiting_worker = 0; #endif @@ -357,13 +387,10 @@ static inline __attribute__((noinline)) void boss_wait_helper(void) { // Setup runtime structures to start a new Cilkified region. Executed by the // Cilkifying thread in cilkify(). -void __cilkrts_internal_invoke_cilkified_root(global_state *g, - __cilkrts_stack_frame *sf) { +void __cilkrts_internal_invoke_cilkified_root(__cilkrts_stack_frame *sf) { + global_state *g = default_cilkrts; + CILK_ASSERT_G(!__cilkrts_get_tls_worker()); - /* CILK_ASSERT_G( */ - /* !atomic_load_explicit(&g->start_thieves, memory_order_acquire)); */ - /* CILK_ASSERT_G( */ - /* !atomic_load_explicit(&g->start_thieves_futex, memory_order_acquire)); */ // Start the workers if necessary if (__builtin_expect(!g->workers_started, false)) { @@ -376,17 +403,29 @@ void __cilkrts_internal_invoke_cilkified_root(global_state *g, // rts_srand(g->workers[0], (0 + 1) * 162347); g->workers[0]->l->rand_next = 162347; #endif + if (USE_EXTENSION) { + g->root_closure->ext_fiber = + cilk_fiber_allocate(g->workers[0], g->options.stacksize); + } is_boss_thread = true; } // The boss thread will impersonate the last exiting worker until it tries // to become a thief. #if BOSS_THIEF - tls_worker = g->workers[0]; + __cilkrts_tls_worker = g->workers[0]; #else - tls_worker = g->workers[g->exiting_worker]; + __cilkrts_tls_worker = g->workers[g->exiting_worker]; #endif - CILK_START_TIMING(tls_worker, INTERVAL_CILKIFY_ENTER); + if (USE_EXTENSION) { + // Initialize sf->extension, to appease the later call to + // setup_for_execution. + sf->extension = __cilkrts_tls_worker->extension; + // Initialize worker->ext_stack. + __cilkrts_tls_worker->ext_stack = + sysdep_get_stack_start(g->root_closure->ext_fiber); + } + CILK_START_TIMING(__cilkrts_tls_worker, INTERVAL_CILKIFY_ENTER); // Mark the root closure as not initialized g->root_closure_initialized = false; @@ -413,6 +452,8 @@ void __cilkrts_internal_invoke_cilkified_root(global_state *g, // flags. /* reset_disengaged_var(g); */ + CILK_ASSERT_G(!atomic_load_explicit(&g->cilkified, memory_order_relaxed) && + "OpenCilk runtime already executing a Cilk computation."); set_cilkified(g); // Set g->done = 0, so Cilk workers will continue trying to steal. @@ -428,8 +469,9 @@ void __cilkrts_internal_invoke_cilkified_root(global_state *g, /* request_more_thieves(g, g->nworkers); */ if (__builtin_setjmp(g->boss_ctx) == 0) { - CILK_SWITCH_TIMING(tls_worker, INTERVAL_CILKIFY_ENTER, INTERVAL_SCHED); - do_what_it_says_boss(tls_worker, g->root_closure); + CILK_SWITCH_TIMING(__cilkrts_tls_worker, INTERVAL_CILKIFY_ENTER, + INTERVAL_SCHED); + do_what_it_says_boss(__cilkrts_tls_worker, g->root_closure); } else { // The stack on which // __cilkrts_internal_invoke_cilkified_root() was called may @@ -453,6 +495,7 @@ void __cilkrts_internal_exit_cilkified_root(global_state *g, // exiting_worker. worker_id self = w->self; g->exiting_worker = self; + ReadyDeque *deques = g->deques; // Mark the computation as done. Also "sleep" the workers: update global // flags so workers who exit the work-stealing loop will return to waiting @@ -476,11 +519,11 @@ void __cilkrts_internal_exit_cilkified_root(global_state *g, // Cilkified region to start with an empty deque. We go ahead and grab the // deque lock to make sure no other worker has a lingering pointer to the // closure. - deque_lock_self(w); - g->deques[w->self].bottom = (Closure *)NULL; - g->deques[w->self].top = (Closure *)NULL; + deque_lock_self(deques, w); + deques[self].bottom = (Closure *)NULL; + deques[self].top = (Closure *)NULL; WHEN_CILK_DEBUG(g->root_closure->owner_ready_deque = NO_WORKER); - deque_unlock_self(w); + deque_unlock_self(deques, w); // Clear the flags in sf. This routine runs before leave_frame in a Cilk // function, but leave_frame is executed conditionally in Cilk functions @@ -494,13 +537,18 @@ void __cilkrts_internal_exit_cilkified_root(global_state *g, // We finished the computation on the boss thread. No need to jump to // the runtime in this case; just return normally. /* CILK_ASSERT(w, w->l->fiber_to_free == NULL); */ - if (w->l->fiber_to_free) { - cilk_fiber_deallocate_to_pool(w, w->l->fiber_to_free); + local_state *l = w->l; + if (l->fiber_to_free) { + cilk_fiber_deallocate_to_pool(w, l->fiber_to_free); + l->fiber_to_free = NULL; + } + if (l->ext_fiber_to_free) { + cilk_fiber_deallocate_to_pool(w, l->ext_fiber_to_free); + l->ext_fiber_to_free = NULL; } - w->l->fiber_to_free = NULL; atomic_store_explicit(&g->cilkified, 0, memory_order_release); - w->l->state = WORKER_IDLE; - tls_worker = NULL; + l->state = WORKER_IDLE; + __cilkrts_tls_worker = NULL; // Restore the boss's original rsp, so the boss completes the Cilk // function on its original stack. @@ -537,6 +585,8 @@ static void global_state_deinit(global_state *g) { pthread_cond_destroy(&g->start_root_worker_cond_var); pthread_mutex_destroy(&g->disengaged_lock); pthread_cond_destroy(&g->disengaged_cond_var); + free(g->worker_args); + g->worker_args = NULL; free(g->workers); g->workers = NULL; g->nworkers = 0; @@ -569,6 +619,7 @@ static void worker_terminate(__cilkrts_worker *w, void *data) { if (rm) { cilkred_map_destroy_map(w, rm); } + worker_local_destroy(w->l, w->g); cilk_internal_malloc_per_worker_terminate(w); // internal malloc last } @@ -605,12 +656,11 @@ static void workers_deinit(global_state *g) { while (i-- > 0) { __cilkrts_worker *w = g->workers[i]; g->workers[i] = NULL; + if (!w) + continue; cilk_internal_malloc_per_worker_destroy(w); // internal malloc last free(w->l->shadow_stack); w->l->shadow_stack = NULL; - free(w->l->index_to_worker); - w->l->index_to_worker = NULL; - free(w->l); w->l = NULL; free(w); } @@ -628,6 +678,8 @@ CHEETAH_INTERNAL void __cilkrts_shutdown(global_state *g) { // Deallocate the root closure and its fiber cilk_fiber_deallocate_global(g, g->root_closure->fiber); + if (USE_EXTENSION) + cilk_fiber_deallocate_global(g, g->root_closure->ext_fiber); Closure_destroy_global(g, g->root_closure); // Cleanup the global state diff --git a/runtime/init.h b/runtime/init.h index 34c55720..2df4f8b3 100644 --- a/runtime/init.h +++ b/runtime/init.h @@ -3,7 +3,8 @@ #include "cilk-internal.h" -void __cilkrts_internal_invoke_cilkified_root(global_state *g, __cilkrts_stack_frame *sf); +// For invoke, the global state is implied. +void __cilkrts_internal_invoke_cilkified_root(__cilkrts_stack_frame *sf); void __cilkrts_internal_exit_cilkified_root(global_state *g, __cilkrts_stack_frame *sf); // Used by Cilksan to set nworkers to 1 and force reduction diff --git a/runtime/internal-malloc.h b/runtime/internal-malloc.h index 2bde0900..a512ce0a 100644 --- a/runtime/internal-malloc.h +++ b/runtime/internal-malloc.h @@ -18,6 +18,12 @@ enum im_tag { CHEETAH_INTERNAL const char *name_for_im_tag(enum im_tag); +/* Helper routine to round sizes to alignments, for use with cilk_aligned_alloc. + */ +static inline size_t round_size_to_alignment(size_t alignment, size_t size) { + return ((size + alignment - 1) / alignment) * alignment; +} + /* Custom implementation of aligned_alloc. */ static inline void *cilk_aligned_alloc(size_t alignment, size_t size) { #if defined(_ISOC11_SOURCE) diff --git a/runtime/local.h b/runtime/local.h index 0be88c9f..aafd7947 100644 --- a/runtime/local.h +++ b/runtime/local.h @@ -3,20 +3,23 @@ #include +#include "internal-malloc-impl.h" /* for cilk_im_desc */ + +struct hyper_table_cache; + struct local_state { struct __cilkrts_stack_frame **shadow_stack; + struct hyper_table_cache *hyper_table; unsigned short state; /* __cilkrts_worker_state */ - bool lock_wait; bool provably_good_steal; unsigned int rand_next; - // Local copy of the index-to-worker map. - worker_id *index_to_worker; jmpbuf rts_ctx; struct cilk_fiber_pool fiber_pool; struct cilk_im_desc im_desc; struct cilk_fiber *fiber_to_free; + struct cilk_fiber *ext_fiber_to_free; struct sched_stats stats; }; diff --git a/runtime/pedigree-internal.h b/runtime/pedigree-internal.h new file mode 100644 index 00000000..bd6a794e --- /dev/null +++ b/runtime/pedigree-internal.h @@ -0,0 +1,73 @@ +#ifndef _PEDIGREE_INTERNAL_H +#define _PEDIGREE_INTERNAL_H + +#include +#include + +#include "cilk-internal.h" + +static const uint64_t DPRNG_PRIME = (uint64_t)(-59); +extern uint64_t *__pedigree_dprng_m_array; +extern uint64_t __pedigree_dprng_seed; + +typedef struct __pedigree_frame { + __cilkrts_pedigree pedigree; // Fields for pedigrees. + int64_t rank; + uint64_t dprng_dotproduct; + int64_t dprng_depth; +} __pedigree_frame; + +typedef struct __pedigree_frame_storage_t { + size_t next_pedigree_frame; + __pedigree_frame* frames; +} __pedigree_frame_storage_t; + + +/////////////////////////////////////////////////////////////////////////// +// Helper methods + +static inline __attribute__((malloc)) __pedigree_frame * +push_pedigree_frame(__cilkrts_worker *w) { + return __cilkrts_push_ext_stack(w, sizeof(__pedigree_frame)); +} + +static inline void pop_pedigree_frame(__cilkrts_worker *w) { + __cilkrts_pop_ext_stack(w, sizeof(__pedigree_frame)); +} + +static inline uint64_t __cilkrts_dprng_swap_halves(uint64_t x) { + return (x >> (4 * sizeof(uint64_t))) | (x << (4 * sizeof(uint64_t))); +} + +static inline uint64_t __cilkrts_dprng_mix(uint64_t x) { + for (int i = 0; i < 4; i++) { + x = x * (2*x+1); + x = __cilkrts_dprng_swap_halves(x); + } + return x; +} + +static inline uint64_t __cilkrts_dprng_mix_mod_p(uint64_t x) { + x = __cilkrts_dprng_mix(x); + return x - (DPRNG_PRIME & -(x >= DPRNG_PRIME)); +} + +static inline uint64_t __cilkrts_dprng_sum_mod_p(uint64_t a, uint64_t b) { + uint64_t z = a + b; + if ((z < a) || (z >= DPRNG_PRIME)) { + z -= DPRNG_PRIME; + } + return z; +} + +// Helper method to advance the pedigree and dprng states. +static inline __attribute__((always_inline)) __pedigree_frame * +bump_worker_rank(void) { + __pedigree_frame *frame = (__pedigree_frame *)(__cilkrts_get_extension()); + frame->rank++; + frame->dprng_dotproduct = __cilkrts_dprng_sum_mod_p( + frame->dprng_dotproduct, __pedigree_dprng_m_array[frame->dprng_depth]); + return frame; +} + +#endif // _PEDIGREE_INTERNAL_H diff --git a/runtime/pedigree_ext.c b/runtime/pedigree_ext.c new file mode 100644 index 00000000..b99d2cd2 --- /dev/null +++ b/runtime/pedigree_ext.c @@ -0,0 +1,49 @@ +#include "pedigree-internal.h" + +// Pedigree-extension code, included in the runtime as part of the bitcode file. + +void __cilkrts_extend_spawn(__cilkrts_worker *w, void **parent_extension, + void **child_extension) { + // Copy the child extension into the parent, and create a new + // __pedigree_frame for the child. + *parent_extension = *child_extension; + + // Get a new pedigree frame for the child extension. + __pedigree_frame *frame = push_pedigree_frame(w); + *child_extension = frame; + + // Initialize the new frame. + __pedigree_frame *parent_frame = (__pedigree_frame *)(*parent_extension); + // Copy the parent's rank into the child frame's pedigree.rank. + frame->pedigree.rank = parent_frame->rank; + // Append the child frame's pedigree onto the linked list. + frame->pedigree.parent = &(parent_frame->pedigree); + // Initialize the child frame's rank to 0. + frame->rank = 0; + + // Increment the dprng_depth in the child frame. + frame->dprng_depth = parent_frame->dprng_depth + 1; + // Update the child frame's dprng_dotproduct. + uint64_t parent_dprng_dotproduct = parent_frame->dprng_dotproduct; + frame->dprng_dotproduct = __cilkrts_dprng_sum_mod_p( + parent_dprng_dotproduct, __pedigree_dprng_m_array[frame->dprng_depth]); + + // Update the rank and dprng_dotproduct in the parent frame. + parent_frame->rank++; + parent_frame->dprng_dotproduct = __cilkrts_dprng_sum_mod_p( + parent_dprng_dotproduct, + __pedigree_dprng_m_array[parent_frame->dprng_depth]); +} + +void __cilkrts_extend_return_from_spawn(__cilkrts_worker *w, void **extension) { + // Free the pedigree frame. + pop_pedigree_frame(w); +} + +void __cilkrts_extend_sync(void **extension) { + // Update the rank and dprng_dotproduct. + __pedigree_frame *frame = (__pedigree_frame *)(*extension); + frame->rank++; + frame->dprng_dotproduct = __cilkrts_dprng_sum_mod_p( + frame->dprng_dotproduct, __pedigree_dprng_m_array[frame->dprng_depth]); +} diff --git a/runtime/pedigree_globals.c b/runtime/pedigree_globals.c index 5c17960d..a3d31b6c 100644 --- a/runtime/pedigree_globals.c +++ b/runtime/pedigree_globals.c @@ -1,44 +1,5 @@ -#include -#include -#include -#define ENABLE_CILKRTS_PEDIGREE -#include - -__cilkrts_pedigree cilkrts_root_pedigree_node; -uint64_t DPRNG_PRIME = (uint64_t)(-59); -uint64_t* dprng_m_array; -uint64_t dprng_m_X = 0; - -uint64_t __cilkrts_dprng_swap_halves(uint64_t x) { - return (x >> (4 * sizeof(uint64_t))) | (x << (4 * sizeof(uint64_t))); -} - -uint64_t __cilkrts_dprng_mix(uint64_t x) { - for (int i = 0; i < 4; i++) { - x = x * (2*x+1); - x = __cilkrts_dprng_swap_halves(x); - } - return x; -} - -uint64_t __cilkrts_dprng_mix_mod_p(uint64_t x) { - x = __cilkrts_dprng_mix(x); - return x - (DPRNG_PRIME & -(x >= DPRNG_PRIME)); -} - -uint64_t __cilkrts_dprng_sum_mod_p(uint64_t a, uint64_t b) { - uint64_t z = a+b; - if ((z < a) || (z >= DPRNG_PRIME)) { - z -= DPRNG_PRIME; - } - return z; -} - -void __cilkrts_init_dprng(void) { - dprng_m_array = (uint64_t*) malloc(sizeof(uint64_t*) * 4096); - for (int i = 0; i < 4096; i++) { - dprng_m_array[i] = __cilkrts_dprng_mix_mod_p(0x8c679c168e6bf733ul + i); - } - dprng_m_X = __cilkrts_dprng_mix_mod_p(0x8c679c168e6bf733ul + 4096); -} +#include "pedigree-internal.h" +// This variable needs to be accessed both from the external pedigree library +// and the pedigree-extension code in the core runtime library. +uint64_t *__pedigree_dprng_m_array = NULL; diff --git a/runtime/pedigree_lib.c b/runtime/pedigree_lib.c new file mode 100644 index 00000000..66474964 --- /dev/null +++ b/runtime/pedigree_lib.c @@ -0,0 +1,90 @@ +#include "pedigree-internal.h" + +// External pedigree library code. Linking this code with a Cilk program +// enables pedigrees. + +//////////////////////////////////////////////////////////////////////////////// +// Global variables local to the library. + +uint64_t __pedigree_dprng_seed = 0x8c679c168e6bf733ul; +uint64_t __pedigree_dprng_m_X = 0; +CHEETAH_INTERNAL +__pedigree_frame root_frame = {.pedigree = {.rank = 0, .parent = NULL}, + .rank = 0, + .dprng_depth = 0, + .dprng_dotproduct = 0}; + +//////////////////////////////////////////////////////////////////////////////// +// Initialization and deinitialization + +CHEETAH_INTERNAL +void __cilkrts_deinit_dprng(void) { + if (__pedigree_dprng_m_array) { + free(__pedigree_dprng_m_array); + __pedigree_dprng_m_array = NULL; + } +} + +void __cilkrts_init_dprng(void) { + // TODO: Disallow __cilkrts_init_dprng() from being called in parallel. + if (!__pedigree_dprng_m_array) { + __pedigree_dprng_m_array = + (uint64_t *)malloc(sizeof(uint64_t *) * 4096); + atexit(__cilkrts_deinit_dprng); + } + + for (int i = 0; i < 4096; i++) { + __pedigree_dprng_m_array[i] = + __cilkrts_dprng_mix_mod_p(__pedigree_dprng_seed + i); + } + __pedigree_dprng_m_X = + __cilkrts_dprng_mix_mod_p(__pedigree_dprng_seed + 4096); +} + +CHEETAH_INTERNAL +void __pedigree_init(void) { + root_frame.dprng_dotproduct = __pedigree_dprng_m_X; + + __cilkrts_register_extension(&root_frame); +} + +CHEETAH_INTERNAL +__attribute__((constructor)) void __pedigree_startup(void) { + __cilkrts_init_dprng(); + + if (!__cilkrts_is_initialized()) + __cilkrts_atinit(__pedigree_init); + else + __pedigree_init(); +} + +//////////////////////////////////////////////////////////////////////////////// +// API methods, callable from user code. +// +// These methods are included here so that, if a Cilk program attempts to use +// one of these routines without incorporating this library, the user will get +// sensible-looking linker errors. + +// Helper method to advance the pedigree and dprng states. +void __cilkrts_bump_worker_rank(void) { bump_worker_rank(); } + +// Set the seed for the dprand DPRNG. +void __cilkrts_dprand_set_seed(uint64_t seed) { + __pedigree_dprng_seed = seed; + __cilkrts_init_dprng(); +} + +// Get the current value of the dprand DPRNG. +uint64_t __cilkrts_get_dprand(void) { + __pedigree_frame *frame = bump_worker_rank(); + return __cilkrts_dprng_mix_mod_p(frame->dprng_dotproduct); +} + +// Get the current pedigree, in the form of a pointer to its leaf node. +__cilkrts_pedigree __cilkrts_get_pedigree(void) { + __cilkrts_pedigree ret_ped; + __pedigree_frame *frame = (__pedigree_frame *)(__cilkrts_get_extension()); + ret_ped.parent = &(frame->pedigree); + ret_ped.rank = frame->rank; + return ret_ped; +} diff --git a/runtime/personality.c b/runtime/personality.c index 187bd4f3..4ffa03b2 100644 --- a/runtime/personality.c +++ b/runtime/personality.c @@ -45,6 +45,7 @@ _Unwind_Reason_Code __cilk_personality_internal( __cilkrts_worker *w = __cilkrts_get_tls_worker(); __cilkrts_stack_frame *sf = w->current_stack_frame; + ReadyDeque *deques = w->g->deques; if (actions & _UA_SEARCH_PHASE) { // don't do anything out of the ordinary during search phase. @@ -60,8 +61,9 @@ _Unwind_Reason_Code __cilk_personality_internal( sysdep_save_fp_ctrl_state(sf); if (__builtin_setjmp(sf->ctx) == 0) { - deque_lock_self(w); - Closure *t = deque_peek_bottom(w, w->self); + + deque_lock_self(deques, w); + Closure *t = deque_peek_bottom(deques, w, w->self); // ensure that we return here after a cilk_sync. t->parent_rsp = t->orig_rsp; @@ -70,7 +72,7 @@ _Unwind_Reason_Code __cilk_personality_internal( // set closure_exception t->user_exn.exn = (char *)ue_header; - deque_unlock_self(w); + deque_unlock_self(deques, w); // For now, use this flag to indicate that we are setjmping from // the personality function. This will "disable" some asserts in @@ -83,9 +85,9 @@ _Unwind_Reason_Code __cilk_personality_internal( // after longjmping back, the worker may have changed. w = __cilkrts_get_tls_worker(); - deque_lock_self(w); - Closure *t = deque_peek_bottom(w, w->self); - deque_unlock_self(w); + deque_lock_self(deques, w); + Closure *t = deque_peek_bottom(deques, w, w->self); + deque_unlock_self(deques, w); bool in_reraised_cfa = (t->reraise_cfa == (char *)get_cfa(context)); bool skip_leaveframe = ((t->reraise_cfa != NULL) && !in_reraised_cfa); if (in_reraised_cfa) diff --git a/runtime/readydeque.h b/runtime/readydeque.h index ba48eeb5..a4b7e1ce 100644 --- a/runtime/readydeque.h +++ b/runtime/readydeque.h @@ -26,55 +26,45 @@ struct ReadyDeque { * Management of ReadyDeques *********************************************************/ -static inline -void deque_assert_ownership(__cilkrts_worker *const w, worker_id pn) { - CILK_ASSERT(w, w->g->deques[pn].mutex_owner == w->self); +static inline void deque_assert_ownership(ReadyDeque *deques, + __cilkrts_worker *const w, + worker_id pn) { + CILK_ASSERT(w, deques[pn].mutex_owner == w->self); } -static inline -void deque_lock_self(__cilkrts_worker *const w) { - struct local_state *l = w->l; +static inline void deque_lock_self(ReadyDeque *deques, + __cilkrts_worker *const w) { worker_id id = w->self; - global_state *g = w->g; - l->lock_wait = true; - cilk_mutex_lock(&g->deques[id].mutex); - l->lock_wait = false; - g->deques[id].mutex_owner = id; + cilk_mutex_lock(&deques[id].mutex); + deques[id].mutex_owner = id; } -static inline -void deque_unlock_self(__cilkrts_worker *const w) { +static inline void deque_unlock_self(ReadyDeque *deques, + __cilkrts_worker *const w) { worker_id id = w->self; - global_state *g = w->g; - g->deques[id].mutex_owner = NO_WORKER; - cilk_mutex_unlock(&g->deques[id].mutex); + deques[id].mutex_owner = NO_WORKER; + cilk_mutex_unlock(&deques[id].mutex); } -static inline -int deque_trylock(__cilkrts_worker *const w, worker_id pn) { - global_state *g = w->g; - int ret = cilk_mutex_try(&g->deques[pn].mutex); +static inline int deque_trylock(ReadyDeque *deques, __cilkrts_worker *const w, + worker_id pn) { + int ret = cilk_mutex_try(&deques[pn].mutex); if (ret) { - g->deques[pn].mutex_owner = w->self; + deques[pn].mutex_owner = w->self; } return ret; } -static inline -void deque_lock(__cilkrts_worker *const w, worker_id pn) { - global_state *g = w->g; - struct local_state *l = w->l; - l->lock_wait = true; - cilk_mutex_lock(&g->deques[pn].mutex); - l->lock_wait = false; - g->deques[pn].mutex_owner = w->self; +static inline void deque_lock(ReadyDeque *deques, __cilkrts_worker *const w, + worker_id pn) { + cilk_mutex_lock(&deques[pn].mutex); + deques[pn].mutex_owner = w->self; } -static inline -void deque_unlock(__cilkrts_worker *const w, worker_id pn) { - global_state *g = w->g; - g->deques[pn].mutex_owner = NO_WORKER; - cilk_mutex_unlock(&w->g->deques[pn].mutex); +static inline void deque_unlock(ReadyDeque *deques, __cilkrts_worker *const w, + worker_id pn) { + deques[pn].mutex_owner = NO_WORKER; + cilk_mutex_unlock(&deques[pn].mutex); } /* @@ -84,44 +74,44 @@ void deque_unlock(__cilkrts_worker *const w, worker_id pn) { * ANGE: the precondition of these functions is that the worker w -> self * must have locked worker pn's deque before entering the function */ -static inline -Closure *deque_xtract_top(__cilkrts_worker *const w, worker_id pn) { +static inline Closure * +deque_xtract_top(ReadyDeque *deques, __cilkrts_worker *const w, worker_id pn) { Closure *cl; /* ANGE: make sure w has the lock on worker pn's deque */ - deque_assert_ownership(w, pn); + deque_assert_ownership(deques, w, pn); - cl = w->g->deques[pn].top; + cl = deques[pn].top; if (cl) { CILK_ASSERT(w, cl->owner_ready_deque == pn); - w->g->deques[pn].top = cl->next_ready; + deques[pn].top = cl->next_ready; /* ANGE: if there is only one entry in the deque ... */ - if (cl == w->g->deques[pn].bottom) { + if (cl == deques[pn].bottom) { CILK_ASSERT(w, cl->next_ready == (Closure *)NULL); - w->g->deques[pn].bottom = (Closure *)NULL; + deques[pn].bottom = (Closure *)NULL; } else { CILK_ASSERT(w, cl->next_ready); (cl->next_ready)->prev_ready = (Closure *)NULL; } WHEN_CILK_DEBUG(cl->owner_ready_deque = NO_WORKER); } else { - CILK_ASSERT(w, w->g->deques[pn].bottom == (Closure *)NULL); + CILK_ASSERT(w, deques[pn].bottom == (Closure *)NULL); } return cl; } -static inline -Closure *deque_peek_top(__cilkrts_worker *const w, worker_id pn) { +static inline Closure *deque_peek_top(ReadyDeque *deques, + __cilkrts_worker *const w, worker_id pn) { Closure *cl; /* ANGE: make sure w has the lock on worker pn's deque */ - deque_assert_ownership(w, pn); + deque_assert_ownership(deques, w, pn); /* ANGE: return the top but does not unlink it from the rest */ - cl = w->g->deques[pn].top; + cl = deques[pn].top; if (cl) { // If w is stealing, then it may peek the top of the deque of the worker // who is in the midst of exiting a Cilkified region. In that case, cl @@ -130,27 +120,28 @@ Closure *deque_peek_top(__cilkrts_worker *const w, worker_id pn) { CILK_ASSERT(w, cl->owner_ready_deque == pn || (w->self != pn && cl == w->g->root_closure)); } else { - CILK_ASSERT(w, w->g->deques[pn].bottom == (Closure *)NULL); + CILK_ASSERT(w, deques[pn].bottom == (Closure *)NULL); } return cl; } -static inline -Closure *deque_xtract_bottom(__cilkrts_worker *const w, worker_id pn) { +static inline Closure *deque_xtract_bottom(ReadyDeque *deques, + __cilkrts_worker *const w, + worker_id pn) { Closure *cl; /* ANGE: make sure w has the lock on worker pn's deque */ - deque_assert_ownership(w, pn); + deque_assert_ownership(deques, w, pn); - cl = w->g->deques[pn].bottom; + cl = deques[pn].bottom; if (cl) { CILK_ASSERT(w, cl->owner_ready_deque == pn); - w->g->deques[pn].bottom = cl->prev_ready; - if (cl == w->g->deques[pn].top) { + deques[pn].bottom = cl->prev_ready; + if (cl == deques[pn].top) { CILK_ASSERT(w, cl->prev_ready == (Closure *)NULL); - w->g->deques[pn].top = (Closure *)NULL; + deques[pn].top = (Closure *)NULL; } else { CILK_ASSERT(w, cl->prev_ready); (cl->prev_ready)->next_ready = (Closure *)NULL; @@ -158,58 +149,60 @@ Closure *deque_xtract_bottom(__cilkrts_worker *const w, worker_id pn) { WHEN_CILK_DEBUG(cl->owner_ready_deque = NO_WORKER); } else { - CILK_ASSERT(w, w->g->deques[pn].top == (Closure *)NULL); + CILK_ASSERT(w, deques[pn].top == (Closure *)NULL); } return cl; } -static inline -Closure *deque_peek_bottom(__cilkrts_worker *const w, worker_id pn) { +static inline Closure * +deque_peek_bottom(ReadyDeque *deques, __cilkrts_worker *const w, worker_id pn) { Closure *cl; /* ANGE: make sure w has the lock on worker pn's deque */ - deque_assert_ownership(w, pn); + deque_assert_ownership(deques, w, pn); - cl = w->g->deques[pn].bottom; + cl = deques[pn].bottom; if (cl) { CILK_ASSERT(w, cl->owner_ready_deque == pn); } else { - CILK_ASSERT(w, w->g->deques[pn].top == (Closure *)NULL); + CILK_ASSERT(w, deques[pn].top == (Closure *)NULL); } return cl; } -static inline -void deque_assert_is_bottom(__cilkrts_worker *const w, Closure *t) { +static inline void deque_assert_is_bottom(ReadyDeque *deques, + __cilkrts_worker *const w, + Closure *t) { /* ANGE: still need to make sure the worker self has the lock */ - deque_assert_ownership(w, w->self); - CILK_ASSERT(w, t == deque_peek_bottom(w, w->self)); + deque_assert_ownership(deques, w, w->self); + CILK_ASSERT(w, t == deque_peek_bottom(deques, w, w->self)); } /* * ANGE: this allow w -> self to append Closure cl onto worker pn's ready * deque (i.e. make cl the new bottom). */ -static inline -void deque_add_bottom(__cilkrts_worker *const w, Closure *cl, worker_id pn) { +static inline void deque_add_bottom(ReadyDeque *deques, + __cilkrts_worker *const w, Closure *cl, + worker_id pn) { - deque_assert_ownership(w, pn); + deque_assert_ownership(deques, w, pn); CILK_ASSERT(w, cl->owner_ready_deque == NO_WORKER); - cl->prev_ready = w->g->deques[pn].bottom; + cl->prev_ready = deques[pn].bottom; cl->next_ready = (Closure *)NULL; - w->g->deques[pn].bottom = cl; + deques[pn].bottom = cl; WHEN_CILK_DEBUG(cl->owner_ready_deque = pn); - if (w->g->deques[pn].top) { + if (deques[pn].top) { CILK_ASSERT(w, cl->prev_ready); (cl->prev_ready)->next_ready = cl; } else { - w->g->deques[pn].top = cl; + deques[pn].top = cl; } } diff --git a/runtime/reducer_api.c b/runtime/reducer_api.c new file mode 100644 index 00000000..704ab66c --- /dev/null +++ b/runtime/reducer_api.c @@ -0,0 +1,113 @@ + +/* Begin new reducer interface */ + +#include +#include +#include +#include "rts-config.h" +#include "hyperobject_base.h" +#include "cilk-internal.h" +#include "hypertable.h" +#include "local.h" + +static const size_t HSIZE = 0; // meaning use default + +hyperobject_base * +__cilkrts_add_key(void *leftmost, size_t size, + __cilk_identity_fn id, + __cilk_reduce_fn reduce) { + __cilkrts_worker *w = __cilkrts_get_tls_worker(); + + if (size <= 0) + cilkrts_bug(w, "User error: reducer size not positive"); + + size = size + (CILK_CACHE_LINE - 1) & ~(size_t)(CILK_CACHE_LINE - 1); + + /* TODO: Internal malloc (which wants a non-null worker) */ + hyperobject_base *hyper = + cilk_aligned_alloc(CILK_CACHE_LINE, sizeof (hyperobject_base)); + if (!hyper) + cilkrts_bug(w, "unable to allocate hyperobject"); + hyper->identity_fn = id; + hyper->reduce_fn = reduce; + hyper->key = leftmost; + hyper->view_size = size; + hyper->id_num = 0; + cilkrts_hyper_register(hyper); + + if (w && w->l->hyper_table) { + enum hyper_table_error error = + hyper_table_cache_insert(w->l->hyper_table, leftmost, hyper); + if (error != HYPER_OK) { + cilkrts_bug(w, "unable to insert hyperobject in table (%s)", + hyper_table_error_string(error)); + cilkrts_hyper_unregister(hyper); + return 0; + } + return hyper; + } + + struct hyper_table *table = hyper_table_get_or_create(HSIZE); + if (hyper_table_insert(table, leftmost, hyper) != HYPER_OK) { + cilkrts_bug(w, "unable to insert hyperobject in table"); + cilkrts_hyper_unregister(hyper); + return 0; + } + return hyper; +} + +void __cilkrts_drop_key(void *key) { + __cilkrts_worker *w = __cilkrts_get_tls_worker(); + hyperobject_base *hyper; + if (w && w->l->hyper_table) { + hyper = hyper_table_cache_remove(w->l->hyper_table, key); + } else { + struct hyper_table *table = hyper_table_get_or_create(HSIZE); + hyper = hyper_table_remove(table, key); + } + if (!hyper) + return; + cilkrts_hyper_unregister(hyper); + free(hyper); +} + +hyperobject_base *__cilkrts_hyper_key(void *key) { + __cilkrts_worker *w = __cilkrts_get_tls_worker(); + if (w && w->l->hyper_table) + return hyper_table_cache_lookup(w->l->hyper_table, key); + struct hyper_table *table = hyper_table_get_or_create(HSIZE); + return hyper_table_lookup(table, key); +} + +/* ABI, declared in cilk_api.h */ +void *__cilkrts_reducer_lookup(void *key) { + hyperobject_base *hyper = __cilkrts_hyper_key(key); + if (hyper) + return cilkrts_hyper_lookup(hyper); + return key; +} + +void +__cilkrts_reducer_register(void *key, size_t size, + __cilk_identity_fn id, + __cilk_reduce_fn reduce) { + __cilkrts_add_key(key, size, id, reduce); +} + +void +__cilkrts_reducer_register_32(void *key, uint32_t size, + __cilk_identity_fn id, + __cilk_reduce_fn reduce) { + __cilkrts_add_key(key, size, id, reduce); +} + +void +__cilkrts_reducer_register_64(void *key, uint64_t size, + __cilk_identity_fn id, + __cilk_reduce_fn reduce) { + __cilkrts_add_key(key, size, id, reduce); +} + +void __cilkrts_reducer_unregister(void *key) { + __cilkrts_drop_key(key); +} diff --git a/runtime/reducer_impl.c b/runtime/reducer_impl.c index 67d69934..41ffc48a 100644 --- a/runtime/reducer_impl.c +++ b/runtime/reducer_impl.c @@ -3,7 +3,7 @@ #define _GNU_SOURCE #endif #include "reducer_impl.h" -#include "cilk/hyperobject_base.h" +#include "hyperobject_base.h" #include "global.h" #include "init.h" #include "internal-malloc.h" @@ -11,6 +11,7 @@ #include "scheduler.h" #include #include +#include #include #include #include @@ -19,7 +20,6 @@ #define USE_INTERNAL_MALLOC 1 -#define REDUCER_LIMIT 1024U #define GLOBAL_REDUCER_LIMIT 100U // ================================================================= @@ -38,9 +38,71 @@ typedef struct reducer_id_manager { /* When Cilk is not running, global holds all the registered hyperobjects so they can be imported into the first worker. Size is GLOBAL_REDUCER_LIMIT, regardless of spa_cap. */ - __cilkrts_hyperobject_base **global; + hyperobject_base **global; } reducer_id_manager; +/* A table of hyperobjects + TODO: Use the bitmap logic from local reducer maps. */ +static struct { + pthread_mutex_t lock; + uint32_t size, count, hint; + hyperobject_base **list; +} global_reducers __attribute__((aligned(32))) + = {PTHREAD_MUTEX_INITIALIZER, 0, 0, 0, 0}; + +void remove_global_reducer(hyperobject_base *hyper) { + int error = pthread_mutex_lock(&global_reducers.lock); + if (error) + cilkrts_bug(0, "mutex lock error"); + uint32_t index = hyper->id_num; + CILK_ASSERT_G(index < global_reducers.size); + CILK_ASSERT_G(global_reducers.list[index] == hyper); + global_reducers.list[index] = 0; + --global_reducers.count; + uint32_t hint = global_reducers.hint; + global_reducers.hint = hint > index ? hint : index; + pthread_mutex_unlock(&global_reducers.lock); +} + +void add_global_reducer(hyperobject_base *hyper) { + int error = pthread_mutex_lock(&global_reducers.lock); + if (error) + cilkrts_bug(0, "mutex lock error"); + hyperobject_base **list = global_reducers.list; + size_t size = global_reducers.size; + size_t count = global_reducers.count; + size_t hint = global_reducers.hint; + uint32_t index = 0; + if (!list) { + list = calloc(32, sizeof(hyperobject_base *)); + size = 32; + index = 0; + for (int i = 0; i < 32; ++i) + list[i] = 0; + } else if (count == size) { + size_t new_size = size * 3 / 2; + CILK_ASSERT_G((uint32_t)new_size == new_size); + list = realloc(list, new_size * sizeof(hyperobject_base *)); + while (++size < new_size) + list[size] = 0; + size = new_size; + index = size; + } else if (!list[hint]) { + index = hint; + } else { + index = size; + while (index-- > 0) + if (!list[index]) + break; + } + hyper->id_num = index; + list[index] = hyper; + global_reducers.list = list; + global_reducers.count = count + 1; + global_reducers.size = size; + global_reducers.hint = (size == index + 1) ? 0 : index + 1; + pthread_mutex_unlock(&global_reducers.lock); +} static void reducer_id_manager_assert_ownership(reducer_id_manager *m, __cilkrts_worker *const w) { @@ -93,7 +155,7 @@ static void free_reducer_id_manager(reducer_id_manager *m) { m->used = NULL; free(old); } - __cilkrts_hyperobject_base **global = m->global; + hyperobject_base **global = m->global; if (global) { m->global = NULL; free(global); @@ -148,6 +210,14 @@ static void reducer_id_free(__cilkrts_worker *const ws, hyper_id_t id) { reducer_id_manager_unlock(m, ws); } +static void *get_or_init_leftmost(__cilkrts_worker *w, + hyperobject_base *hyper) { + void *left = hyper->key; + if (!left) + cilkrts_bug(w, "User error: hyperobject has no leftmost object"); + return left; +} + // ================================================================= // Init / deinit functions // ================================================================= @@ -158,7 +228,7 @@ void reducers_init(global_state *g) { if (g->id_manager) { return; } else { - g->id_manager = init_reducer_id_manager(REDUCER_LIMIT); + g->id_manager = init_reducer_id_manager(DEFAULT_REDUCER_LIMIT); } } @@ -177,14 +247,13 @@ CHEETAH_INTERNAL void reducers_import(global_state *g, __cilkrts_worker *w) { should be exported when Cilk exits. */ cilkred_map *map = cilkred_map_make_map(w, m->spa_cap); for (hyper_id_t i = 0; i < m->hwm; ++i) { - __cilkrts_hyperobject_base *h = m->global[i]; - if (h) { - map->vinfo[i].key = h; - map->vinfo[i].val = (char *)h + (ptrdiff_t)h->__view_offset; + hyperobject_base *hyper = m->global[i]; + if (hyper) { + map->vinfo[i].hyper = hyper; + map->vinfo[i].view = get_or_init_leftmost(w, hyper); + CILK_ASSERT(w, hyper->valid); + cilkred_map_log_id(w, map, hyper->id_num); } - hyper_id_t id = h->__id_num; - CILK_ASSERT(w, id & HYPER_ID_VALID); - cilkred_map_log_id(w, map, id & ~HYPER_ID_VALID); } w->reducer_map = map; } @@ -209,7 +278,7 @@ static cilkred_map *install_new_reducer_map(__cilkrts_worker *w) { /* remove the reducer from the current reducer map. If the reducer exists in maps other than the current one, the behavior is undefined. */ -void __cilkrts_hyper_destroy(__cilkrts_hyperobject_base *key) { +void cilkrts_hyper_unregister(hyperobject_base *hyper) { __cilkrts_worker *w = __cilkrts_get_tls_worker(); // If we don't have a worker, use instead the last exiting worker from the @@ -217,14 +286,13 @@ void __cilkrts_hyper_destroy(__cilkrts_hyperobject_base *key) { if (!w) w = default_cilkrts->workers[default_cilkrts->exiting_worker]; - hyper_id_t id = key->__id_num; - cilkrts_alert(REDUCE_ID, w, "Destroy reducer %x at %p", (unsigned)id, key); - if (!__builtin_expect(id & HYPER_ID_VALID, HYPER_ID_VALID)) { - cilkrts_bug(w, "unregistering unregistered hyperobject %p", key); + hyper_id_t id = hyper->id_num; + cilkrts_alert(REDUCE_ID, w, "Destroy reducer %x at %p", (unsigned)id, hyper); + if (__builtin_expect(!hyper->valid, 0)) { + cilkrts_bug(w, "unregistering unregistered hyperobject %p", hyper); return; } - id &= ~HYPER_ID_VALID; - key->__id_num = id; + hyper->id_num = id; if (w) { #define UNSYNCED_REDUCER_MSG \ @@ -243,7 +311,7 @@ void __cilkrts_hyper_destroy(__cilkrts_hyperobject_base *key) { reducer_id_free(w, id); } -void __cilkrts_hyper_create(__cilkrts_hyperobject_base *key) { +void cilkrts_hyper_register(hyperobject_base *hyper) { // This function registers the specified hyperobject in the current // reducer map and registers the initial value of the hyperobject as the // leftmost view of the reducer. @@ -260,18 +328,13 @@ void __cilkrts_hyper_create(__cilkrts_hyperobject_base *key) { } hyper_id_t id = reducer_id_get(m, w); - key->__id_num = id | HYPER_ID_VALID; + hyper->id_num = id; + hyper->valid = 1; - cilkrts_alert(REDUCE_ID, w, "Create reducer %x at %p", (unsigned)id, key); + cilkrts_alert(REDUCE_ID, w, "Create reducer %x at %p", (unsigned)id, hyper); if (__builtin_expect(!w, 0)) { - if (id >= GLOBAL_REDUCER_LIMIT) { - cilkrts_bug(w, "Global reducer pool exhausted"); - } - if (!m->global) { - m->global = calloc(GLOBAL_REDUCER_LIMIT, sizeof *m->global); - } - m->global[id] = key; + add_global_reducer(hyper); return; } @@ -282,7 +345,7 @@ void __cilkrts_hyper_create(__cilkrts_hyperobject_base *key) { } /* Must not exist. */ - CILK_ASSERT(w, cilkred_map_lookup(h, key) == NULL); + CILK_ASSERT(w, cilkred_map_lookup(h, hyper) == NULL); if (h->merging) cilkrts_bug(w, "User error: hyperobject used by another hyperobject"); @@ -290,27 +353,26 @@ void __cilkrts_hyper_create(__cilkrts_hyperobject_base *key) { CILK_ASSERT(w, w->reducer_map == h); ViewInfo *vinfo = &h->vinfo[id]; - vinfo->key = key; + vinfo->hyper = hyper; // init with left most view - vinfo->val = (char *)key + (ptrdiff_t)key->__view_offset; + vinfo->view = get_or_init_leftmost(w, hyper); cilkred_map_log_id(w, h, id); - static_assert(sizeof(__cilkrts_hyperobject_base) <= 64, + static_assert(sizeof(hyperobject_base) <= 64, "hyperobject base is too large"); } -void *__cilkrts_hyper_lookup(__cilkrts_hyperobject_base *key) { +void *cilkrts_hyper_lookup(hyperobject_base *hyper) { __cilkrts_worker *w = __cilkrts_get_tls_worker(); - hyper_id_t id = key->__id_num; + hyper_id_t id = hyper->id_num; - if (!__builtin_expect(id & HYPER_ID_VALID, HYPER_ID_VALID)) { + if (__builtin_expect(!hyper->valid, 0)) { cilkrts_bug(w, "User error: reference to unregistered hyperobject %p", - key); + hyper); } - id &= ~HYPER_ID_VALID; if (__builtin_expect(!w, 0)) { - return (char *)key + key->__view_offset; + return hyper->key; } /* TODO: If this is the first reference to a reducer created at @@ -330,24 +392,25 @@ void *__cilkrts_hyper_lookup(__cilkrts_hyperobject_base *key) { if (h->merging) cilkrts_bug(w, "User error: hyperobject used by another hyperobject"); - ViewInfo *vinfo = cilkred_map_lookup(h, key); + ViewInfo *vinfo = cilkred_map_lookup(h, hyper); if (vinfo == NULL) { CILK_ASSERT(w, id < h->spa_cap); vinfo = &h->vinfo[id]; - CILK_ASSERT(w, vinfo->key == NULL && vinfo->val == NULL); + CILK_ASSERT(w, vinfo->hyper == NULL && vinfo->view == NULL); - void *val = key->__c_monoid.allocate_fn(key, key->__view_size); - key->__c_monoid.identity_fn(key, val); + void *view = __cilkrts_hyper_alloc(hyper->view_size); + hyper->identity_fn(view); // allocate space for the val and initialize it to identity - vinfo->key = key; - vinfo->val = val; + vinfo->hyper = hyper; + vinfo->view = view; cilkred_map_log_id(w, h, id); } - return vinfo->val; + return vinfo->view; } -void *__cilkrts_hyper_alloc(__cilkrts_hyperobject_base *key, size_t bytes) { +__attribute__((noinline)) +void *__cilkrts_hyper_alloc(size_t bytes) { if (USE_INTERNAL_MALLOC) { __cilkrts_worker *w = __cilkrts_get_tls_worker(); if (!w) @@ -355,18 +418,20 @@ void *__cilkrts_hyper_alloc(__cilkrts_hyperobject_base *key, size_t bytes) { // a Cilkified region w = default_cilkrts->workers[default_cilkrts->exiting_worker]; return cilk_internal_malloc(w, bytes, IM_REDUCER_MAP); - } else - return cilk_aligned_alloc(16, bytes); + } else { + return cilk_aligned_alloc(CILK_CACHE_LINE, bytes); + } } -void __cilkrts_hyper_dealloc(__cilkrts_hyperobject_base *key, void *view) { +__attribute__((noinline)) +void __cilkrts_hyper_dealloc(void *view, size_t bytes) { if (USE_INTERNAL_MALLOC) { __cilkrts_worker *w = __cilkrts_get_tls_worker(); if (!w) // Use instead the worker from the default CilkRTS that last exited // a Cilkified region w = default_cilkrts->workers[default_cilkrts->exiting_worker]; - cilk_internal_free(w, view, key->__view_size, IM_REDUCER_MAP); + cilk_internal_free(w, view, bytes, IM_REDUCER_MAP); } else free(view); } diff --git a/runtime/rts-config.h b/runtime/rts-config.h index 63bc2978..f04daef5 100644 --- a/runtime/rts-config.h +++ b/runtime/rts-config.h @@ -18,7 +18,7 @@ #endif #define __CILKRTS_VERSION 0x0 -#define __CILKRTS_ABI_VERSION 3 +#define __CILKRTS_ABI_VERSION 4 #ifndef CILK_DEBUG #define CILK_DEBUG 1 @@ -38,6 +38,12 @@ #define PROC_SPEED_IN_GHZ 2.2 +#define BUSY_LOOP_SPIN 4096 + +#define ENABLE_THIEF_SLEEP 1 + +#define ENABLE_EXTENSION 1 + #if defined __linux__ #define CILK_PAGE_SIZE 0 /* page size not available at compile time */ #elif defined __APPLE__ @@ -56,9 +62,11 @@ #define DEFAULT_NPROC 0 // 0 for # of cores available #define DEFAULT_DEQ_DEPTH 1024 #define DEFAULT_STACK_SIZE 0x100000 // 1 MBytes -#define DEFAULT_FIBER_POOL_CAP 3 // initial per-worker fiber pool capacity +#define DEFAULT_FIBER_POOL_CAP 8 // initial per-worker fiber pool capacity #define DEFAULT_REDUCER_LIMIT 1024 #define DEFAULT_FORCE_REDUCE 0 // do not self steal to force reduce #define MAX_CALLBACKS 32 // Maximum number of init or exit callbacks + +#define HYPER_TABLE_HIDDEN 1 #endif // _CONFIG_H diff --git a/runtime/sched_stats.c b/runtime/sched_stats.c index e5d16e0d..bb8e60c9 100644 --- a/runtime/sched_stats.c +++ b/runtime/sched_stats.c @@ -1,3 +1,4 @@ +#include #include #include @@ -30,35 +31,11 @@ static const char *enum_to_str(enum timing_type t) { } } -static inline double cycles_to_micro_sec(uint64_t cycle) { - return (double)cycle / ((double)PROC_SPEED_IN_GHZ * 1000.0); -} - __attribute__((unused)) static inline double micro_sec_to_sec(double micro_sec) { return micro_sec / 1000000.0; } -static inline uint64_t begin_cycle_count() { - unsigned int low, high; - __asm__ volatile("cpuid\n\t" - "rdtsc\n\t" - "mov %%edx, %0\n\t" - "mov %%eax, %1\n\t" - : "=r"(high), "=r"(low)::"%rax", "%rbx", "%rcx", "%rdx"); - return ((uint64_t)high << 32) | low; -} - -static inline uint64_t end_cycle_count() { - unsigned int low, high; - __asm__ volatile("rdtscp\n\t" - "mov %%edx, %0\n\t" - "mov %%eax, %1\n\t" - "cpuid\n\t" - : "=r"(high), "=r"(low)::"%rax", "%rbx", "%rcx", "%rdx"); - return ((uint64_t)high << 32) | low; -} - static inline double nsec_to_sec(uint64_t nsec) { return nsec / 1.0e9; } static inline uint64_t begin_time() { @@ -83,6 +60,8 @@ void cilk_global_sched_stats_init(struct global_sched_stats *s) { s->exit_time = 0; s->steals = 0; s->repos = 0; + s->reeng_rqsts = 0; + s->onesen_rqsts = 0; for (int i = 0; i < NUMBER_OF_STATS; ++i) { s->time[i] = 0.0; s->count[i] = 0; @@ -98,6 +77,8 @@ void cilk_sched_stats_init(struct sched_stats *s) { } s->steals = 0; s->repos = 0; + s->reeng_rqsts = 0; + s->onesen_rqsts = 0; } void cilk_start_timing(__cilkrts_worker *w, enum timing_type t) { @@ -182,14 +163,16 @@ static void sched_stats_reset_worker(__cilkrts_worker *w, } w->l->stats.steals = 0; w->l->stats.repos = 0; + w->l->stats.reeng_rqsts = 0; + w->l->stats.onesen_rqsts = 0; } #define COL_DESC "%15s" -#define HDR_DESC "%18s %8s" +#define HDR_DESC "%18s %10s" #define WORKER_HDR_DESC "%10s %3u:" -#define FIELD_DESC "%18.6f %8ld" -#define COUNT_HDR_DESC "%8s" -#define COUNT_DESC "%8ld" +#define FIELD_DESC "%18.6f %10" PRIu64 +#define COUNT_HDR_DESC "%10s" +#define COUNT_DESC "%10" PRIu64 static void sched_stats_print_worker(__cilkrts_worker *w, void *data) { FILE *fp = (FILE *)data; @@ -203,9 +186,13 @@ static void sched_stats_print_worker(__cilkrts_worker *w, void *data) { } w->g->stats.steals += w->l->stats.steals; w->g->stats.repos += w->l->stats.repos; + w->g->stats.reeng_rqsts += w->l->stats.reeng_rqsts; + w->g->stats.onesen_rqsts += w->l->stats.onesen_rqsts; fprintf(stderr, COUNT_DESC, w->l->stats.steals); fprintf(stderr, COUNT_DESC, w->l->stats.repos); + fprintf(stderr, COUNT_DESC, w->l->stats.reeng_rqsts); + fprintf(stderr, COUNT_DESC, w->l->stats.onesen_rqsts); fprintf(fp, "\n"); } @@ -216,6 +203,8 @@ void cilk_sched_stats_print(struct global_state *g) { } g->stats.steals = 0; g->stats.repos = 0; + g->stats.reeng_rqsts = 0; + g->stats.onesen_rqsts = 0; fprintf(stderr, "\nSCHEDULING STATS (SECONDS):\n"); { @@ -232,6 +221,8 @@ void cilk_sched_stats_print(struct global_state *g) { } fprintf(stderr, COUNT_HDR_DESC, "steals"); fprintf(stderr, COUNT_HDR_DESC, "reposses"); + fprintf(stderr, COUNT_HDR_DESC, "reengs"); + fprintf(stderr, COUNT_HDR_DESC, "onesen"); fprintf(stderr, "\n"); for_each_worker(g, &sched_stats_print_worker, stderr); @@ -242,6 +233,8 @@ void cilk_sched_stats_print(struct global_state *g) { } fprintf(stderr, COUNT_DESC, g->stats.steals); fprintf(stderr, COUNT_DESC, g->stats.repos); + fprintf(stderr, COUNT_DESC, g->stats.reeng_rqsts); + fprintf(stderr, COUNT_DESC, g->stats.onesen_rqsts); fprintf(stderr, "\n"); for_each_worker(g, &sched_stats_reset_worker, NULL); diff --git a/runtime/sched_stats.h b/runtime/sched_stats.h index a2df087a..bf4cb4ee 100644 --- a/runtime/sched_stats.h +++ b/runtime/sched_stats.h @@ -27,6 +27,8 @@ struct sched_stats { uint64_t steals; uint64_t repos; + uint64_t reeng_rqsts; + uint64_t onesen_rqsts; }; struct global_sched_stats { @@ -38,6 +40,8 @@ struct global_sched_stats { uint64_t boss_end; uint64_t steals; uint64_t repos; + uint64_t reeng_rqsts; + uint64_t onesen_rqsts; double time[NUMBER_OF_STATS]; // Total time measured for all stats uint64_t count[NUMBER_OF_STATS]; }; diff --git a/runtime/scheduler.c b/runtime/scheduler.c index 0695a72b..cc24c4a1 100644 --- a/runtime/scheduler.c +++ b/runtime/scheduler.c @@ -7,6 +7,10 @@ #include #include +#ifdef __APPLE__ +#include +#endif + #include "cilk-internal.h" #include "closure.h" #include "fiber.h" @@ -16,11 +20,14 @@ #include "readydeque.h" #include "scheduler.h" #include "worker_coord.h" +#include "worker_sleep.h" #include "reducer_impl.h" -__thread __cilkrts_worker *tls_worker = NULL; -__thread bool is_boss_thread = false; +bool __cilkrts_use_extension = false; + +__thread __cilkrts_worker *__cilkrts_tls_worker = NULL; +CHEETAH_INTERNAL __thread bool is_boss_thread = false; // ============================================== // Misc. helper functions @@ -33,9 +40,12 @@ static void rts_srand(__cilkrts_worker *const w, unsigned int seed) { w->l->rand_next = seed; } -static unsigned int rts_rand(local_state *l) { - l->rand_next = l->rand_next * 1103515245 + 12345; - return (l->rand_next >> 16); +static unsigned int update_rand_state(unsigned int state) { + return state * 1103515245 + 12345; +} + +static unsigned int get_rand(unsigned int state) { + return state >> 16; } static void worker_change_state(__cilkrts_worker *w, @@ -145,6 +155,12 @@ static void setup_for_sync(__cilkrts_worker *w, Closure *t) { w->l->fiber_to_free = t->fiber; t->fiber = t->fiber_child; t->fiber_child = NULL; + + if (USE_EXTENSION) { + w->l->ext_fiber_to_free = t->ext_fiber; + t->ext_fiber = t->ext_fiber_child; + t->ext_fiber_child = NULL; + } } CILK_ASSERT(w, t->fiber); @@ -163,16 +179,16 @@ static void setup_for_sync(__cilkrts_worker *w, Closure *t) { // ============================================== // TLS related functions // ============================================== -static pthread_key_t worker_key; +/* static pthread_key_t worker_key; */ CHEETAH_INTERNAL void __cilkrts_init_tls_variables() { - int status = pthread_key_create(&worker_key, NULL); - USE_UNUSED(status); - CILK_ASSERT_G(status == 0); + /* int status = pthread_key_create(&worker_key, NULL); */ + /* USE_UNUSED(status); */ + /* CILK_ASSERT_G(status == 0); */ } CHEETAH_INTERNAL void __cilkrts_set_tls_worker(__cilkrts_worker *w) { - tls_worker = w; + __cilkrts_tls_worker = w; } // ============================================== @@ -180,10 +196,11 @@ CHEETAH_INTERNAL void __cilkrts_set_tls_worker(__cilkrts_worker *w) { // ============================================== /* Doing an "unconditional steal" to steal back the call parent closure */ -static Closure *setup_call_parent_resumption(__cilkrts_worker *const w, +static Closure *setup_call_parent_resumption(ReadyDeque *deques, + __cilkrts_worker *const w, Closure *t) { - deque_assert_ownership(w, w->self); + deque_assert_ownership(deques, w, w->self); Closure_assert_ownership(w, t); CILK_ASSERT_POINTER_EQUAL(w, w, __cilkrts_get_tls_worker()); @@ -192,6 +209,9 @@ static Closure *setup_call_parent_resumption(__cilkrts_worker *const w, CILK_ASSERT(w, ((intptr_t)t->frame->worker) & 1); CILK_ASSERT_POINTER_EQUAL(w, w->head, w->tail); CILK_ASSERT_POINTER_EQUAL(w, w->current_stack_frame, t->frame); + if (USE_EXTENSION) { + w->extension = t->frame->extension; + } Closure_change_status(w, t, CLOSURE_SUSPENDED, CLOSURE_RUNNING); atomic_store_explicit(&t->frame->worker, w, memory_order_relaxed); @@ -205,9 +225,11 @@ void Cilk_set_return(__cilkrts_worker *const w) { Closure *t; cilkrts_alert(RETURN, w, "(Cilk_set_return)"); + ReadyDeque *deques = w->g->deques; + worker_id self = w->self; - deque_lock_self(w); - t = deque_peek_bottom(w, w->self); + deque_lock_self(deques, w); + t = deque_peek_bottom(deques, w, self); Closure_lock(w, t); CILK_ASSERT(w, t->status == CLOSURE_RUNNING); @@ -222,7 +244,7 @@ void Cilk_set_return(__cilkrts_worker *const w) { CILK_ASSERT(w, t->simulated_stolen == false); Closure *call_parent = t->call_parent; - Closure *t1 = deque_xtract_bottom(w, w->self); + Closure *t1 = deque_xtract_bottom(deques, w, self); USE_UNUSED(t1); CILK_ASSERT(w, t == t1); @@ -234,9 +256,13 @@ void Cilk_set_return(__cilkrts_worker *const w) { Closure_lock(w, call_parent); CILK_ASSERT(w, call_parent->fiber == t->fiber); t->fiber = NULL; + if (USE_EXTENSION) { + CILK_ASSERT(w, call_parent->ext_fiber == t->ext_fiber); + t->ext_fiber = NULL; + } Closure_remove_callee(w, call_parent); - setup_call_parent_resumption(w, call_parent); + setup_call_parent_resumption(deques, w, call_parent); Closure_unlock(w, call_parent); if (t->saved_throwing_fiber) { @@ -244,9 +270,9 @@ void Cilk_set_return(__cilkrts_worker *const w) { t->saved_throwing_fiber = NULL; } Closure_destroy(w, t); - deque_add_bottom(w, call_parent, w->self); + deque_add_bottom(deques, w, call_parent, self); - deque_unlock_self(w); + deque_unlock_self(deques, w); } /*** @@ -271,6 +297,10 @@ static Closure *unconditional_steal(__cilkrts_worker *const w, CILK_ASSERT(w, (parent->fiber == NULL) && parent->fiber_child); parent->fiber = parent->fiber_child; parent->fiber_child = NULL; + if (USE_EXTENSION) { + parent->ext_fiber = parent->ext_fiber_child; + parent->ext_fiber_child = NULL; + } Closure_make_ready(parent); return parent; @@ -465,12 +495,19 @@ static Closure *Closure_return(__cilkrts_worker *const w, Closure *child) { // Case where we are not the leftmost stack. CILK_ASSERT(w, parent->fiber_child != child->fiber); cilk_fiber_deallocate_to_pool(w, child->fiber); + if (USE_EXTENSION) { + cilk_fiber_deallocate_to_pool(w, child->ext_fiber); + } } else { // We are leftmost, pass stack/fiber up to parent. // Thus, no stack/fiber to free. parent->fiber_child = child->fiber; + if (USE_EXTENSION) { + parent->ext_fiber_child = child->ext_fiber; + } } child->fiber = NULL; + child->ext_fiber = NULL; Closure_remove_child(w, parent, child); // unlink child from tree // we have deposited our views and unlinked; we can quit now @@ -586,9 +623,10 @@ void Cilk_exception_handler(char *exn) { Closure *t; __cilkrts_worker *w = __cilkrts_get_tls_worker(); + ReadyDeque *deques = w->g->deques; - deque_lock_self(w); - t = deque_peek_bottom(w, w->self); + deque_lock_self(deques, w); + t = deque_peek_bottom(deques, w, w->self); CILK_ASSERT(w, t); Closure_lock(w, t); @@ -618,13 +656,13 @@ void Cilk_exception_handler(char *exn) { } Closure_unlock(w, t); - deque_unlock_self(w); + deque_unlock_self(deques, w); sanitizer_unpoison_fiber(t->fiber); longjmp_to_runtime(w); // NOT returning back to user code } else { // not steal, not abort; false alarm Closure_unlock(w, t); - deque_unlock_self(w); + deque_unlock_self(deques, w); return; } @@ -661,8 +699,7 @@ oldest_non_stolen_frame_in_stacklet(__cilkrts_stack_frame *head) { static Closure *setup_call_parent_closure_helper( __cilkrts_worker *const w, __cilkrts_worker *const victim_w, - __cilkrts_stack_frame *frame, Closure *oldest) { - + __cilkrts_stack_frame *frame, void *extension, Closure *oldest) { Closure *call_parent, *curr_cl; if (oldest->frame == frame) { @@ -670,9 +707,8 @@ static Closure *setup_call_parent_closure_helper( CILK_ASSERT(w, oldest->fiber); return oldest; } - - call_parent = setup_call_parent_closure_helper(w, victim_w, - frame->call_parent, oldest); + call_parent = setup_call_parent_closure_helper( + w, victim_w, frame->call_parent, extension, oldest); __cilkrts_set_stolen(frame); curr_cl = Closure_create(w); curr_cl->frame = frame; @@ -685,6 +721,11 @@ static Closure *setup_call_parent_closure_helper( memory_order_relaxed); curr_cl->fiber = call_parent->fiber; + if (USE_EXTENSION) { + curr_cl->frame->extension = extension; + curr_cl->ext_fiber = call_parent->ext_fiber; + } + Closure_add_callee(w, call_parent, curr_cl); return curr_cl; @@ -705,8 +746,8 @@ static void setup_closures_in_stacklet(__cilkrts_worker *const w, Closure *call_parent; Closure *oldest_cl = youngest_cl->call_parent; __cilkrts_stack_frame *youngest, *oldest; - youngest = youngest_cl->frame; + void *extension = USE_EXTENSION ? youngest->extension : NULL; oldest = oldest_non_stolen_frame_in_stacklet(youngest); CILK_ASSERT(w, youngest == youngest_cl->frame); @@ -722,13 +763,16 @@ static void setup_closures_in_stacklet(__cilkrts_worker *const w, CILK_ASSERT(w, oldest->flags & CILK_FRAME_DETACHED); __cilkrts_set_stolen(oldest); oldest_cl->frame = oldest; + if (USE_EXTENSION) { + oldest_cl->frame->extension = extension; + } } CILK_ASSERT(w, oldest->worker == victim_w); atomic_store_explicit(&oldest_cl->frame->worker, INVALID, memory_order_relaxed); call_parent = setup_call_parent_closure_helper( - w, victim_w, youngest->call_parent, oldest_cl); + w, victim_w, youngest->call_parent, extension, oldest_cl); CILK_ASSERT(w, youngest_cl->fiber != oldest_cl->fiber); CILK_ASSERT(w, youngest->worker == victim_w); @@ -788,13 +832,13 @@ static int do_dekker_on(__cilkrts_worker *const w, * deque to get the parent closure. This is the only time I can * think of, where the ready deque contains more than one frame. ***/ -static Closure *promote_child(__cilkrts_worker *const w, +static Closure *promote_child(ReadyDeque *deques, __cilkrts_worker *const w, __cilkrts_worker *const victim_w, Closure *cl, Closure **res) { worker_id pn = victim_w->self; - deque_assert_ownership(w, pn); + deque_assert_ownership(deques, w, pn); Closure_assert_ownership(w, cl); CILK_ASSERT(w, cl->status == CLOSURE_RUNNING); @@ -813,7 +857,6 @@ static Closure *promote_child(__cilkrts_worker *const w, __cilkrts_stack_frame **head = atomic_load_explicit(&victim_w->head, memory_order_acquire); __cilkrts_stack_frame *frame_to_steal = *head; - // ANGE: this must be true if we get this far // Note that it can be that H == T here; victim could have done T-- // after the thief passes Dekker; in which case, thief gets the last @@ -861,7 +904,7 @@ static Closure *promote_child(__cilkrts_worker *const w, spawn_parent->call_parent = cl; // suspend cl & remove it from deque - Closure_suspend_victim(w, victim_w, cl); + Closure_suspend_victim(deques, w, victim_w, cl); Closure_unlock(w, cl); Closure_lock(w, spawn_parent); @@ -880,7 +923,7 @@ static Closure *promote_child(__cilkrts_worker *const w, /*** * Register this child, which sets up its sibling links. - * We do this here intead of in finish_promote, because we must setup + * We do this here instead of in finish_promote, because we must setup * the sib links for the new child before its pointer escapses. ***/ Closure_add_child(w, spawn_parent, spawn_child); @@ -893,7 +936,7 @@ static Closure *promote_child(__cilkrts_worker *const w, spawn_child->frame = (__cilkrts_stack_frame *)NULL; /* insert the closure on the victim processor's deque */ - deque_add_bottom(w, spawn_child, pn); + deque_add_bottom(deques, w, spawn_child, pn); /* at this point the child can be freely executed */ return spawn_child; @@ -947,13 +990,16 @@ static void finish_promote(__cilkrts_worker *const w, * NOTE: this function assumes that w holds the lock on victim_w's deque * and Closure cl and releases them before returning. ***/ -static Closure *extract_top_spawning_closure(__cilkrts_worker *const w, +static Closure *extract_top_spawning_closure(ReadyDeque *deques, + __cilkrts_worker *const w, __cilkrts_worker *const victim_w, Closure *cl) { Closure *res = NULL, *child; struct cilk_fiber *parent_fiber = cl->fiber; + struct cilk_fiber *parent_ext_fiber = cl->ext_fiber; + worker_id victim_id = victim_w->self; - deque_assert_ownership(w, victim_w->self); + deque_assert_ownership(deques, w, victim_id); Closure_assert_ownership(w, cl); CILK_ASSERT(w, parent_fiber); @@ -961,7 +1007,7 @@ static Closure *extract_top_spawning_closure(__cilkrts_worker *const w, * if dekker passes, promote the child to a full closure, * and steal the parent */ - child = promote_child(w, victim_w, cl, &res); + child = promote_child(deques, w, victim_w, cl, &res); cilkrts_alert(STEAL, w, "(Closure_steal) promote gave cl/res/child = %p/%p/%p", (void *)cl, (void *)res, (void *)child); @@ -971,7 +1017,7 @@ static Closure *extract_top_spawning_closure(__cilkrts_worker *const w, // ANGE: in this case, the spawning parent to steal / resume // is simply cl (i.e., there is only one frame in the stacklet), // so we didn't set res in promote_child. - res = deque_xtract_top(w, victim_w->self); + res = deque_xtract_top(deques, w, victim_id); CILK_ASSERT(w, cl == res); } @@ -979,13 +1025,20 @@ static Closure *extract_top_spawning_closure(__cilkrts_worker *const w, // only create a new fiber if it's a real steal if (w == victim_w) { res->fiber = NULL; + res->ext_fiber = NULL; } else { res->fiber = cilk_fiber_allocate_from_pool(w); + if (USE_EXTENSION) { + res->ext_fiber = cilk_fiber_allocate_from_pool(w); + } } // make sure we are not hold lock on child Closure_assert_alienation(w, child); child->fiber = parent_fiber; + if (USE_EXTENSION) { + child->ext_fiber = parent_ext_fiber; + } return res; } @@ -994,12 +1047,16 @@ static Closure *extract_top_spawning_closure(__cilkrts_worker *const w, * stealing protocol. Tries to steal from the victim; returns a * stolen closure, or NULL if none. */ -static Closure *Closure_steal(__cilkrts_worker *const w, int victim) { +static Closure *Closure_steal(__cilkrts_worker **workers, ReadyDeque *deques, + __cilkrts_worker *const w, int victim) { Closure *cl; Closure *res = (Closure *)NULL; __cilkrts_worker *victim_w; - victim_w = w->g->workers[victim]; + victim_w = workers[victim]; + + if (victim_w == NULL) + return NULL; // Fast test for an unsuccessful steal attempt using only read operations. // This fast test seems to improve parallel performance. @@ -1014,15 +1071,15 @@ static Closure *Closure_steal(__cilkrts_worker *const w, int victim) { } //----- EVENT_STEAL_ATTEMPT - if (deque_trylock(w, victim) == 0) { + if (deque_trylock(deques, w, victim) == 0) { return NULL; } - cl = deque_peek_top(w, victim); + cl = deque_peek_top(deques, w, victim); if (cl) { if (Closure_trylock(w, cl) == 0) { - deque_unlock(w, victim); + deque_unlock(deques, w, victim); return NULL; } @@ -1037,10 +1094,10 @@ static Closure *Closure_steal(__cilkrts_worker *const w, int victim) { cilkrts_alert(STEAL, w, "(Closure_steal) can steal from W%d; cl=%p", victim, (void *)cl); - res = extract_top_spawning_closure(w, victim_w, cl); + res = extract_top_spawning_closure(deques, w, victim_w, cl); // at this point, more steals can happen from the victim. - deque_unlock(w, victim); + deque_unlock(deques, w, victim); CILK_ASSERT(w, res->fiber); CILK_ASSERT(w, res->frame->worker == victim_w); @@ -1070,7 +1127,7 @@ static Closure *Closure_steal(__cilkrts_worker *const w, int victim) { // MUST unlock the closure before the queue; // see rule D in the file PROTOCOLS Closure_unlock(w, cl); - deque_unlock(w, victim); + deque_unlock(deques, w, victim); break; default: @@ -1082,7 +1139,7 @@ static Closure *Closure_steal(__cilkrts_worker *const w, int victim) { Closure_status_to_str(cl->status)); } } else { - deque_unlock(w, victim); + deque_unlock(deques, w, victim); //----- EVENT_STEAL_EMPTY_DEQUE } @@ -1102,7 +1159,9 @@ static Closure *Closure_steal(__cilkrts_worker *const w, int victim) { ***/ void promote_own_deque(__cilkrts_worker *w) { - if (deque_trylock(w, w->self) == 0) { + ReadyDeque *deques = w->g->deques; + worker_id self = w->self; + if (deque_trylock(deques, w, self) == 0) { cilkrts_bug( w, "Bug: failed to acquire deque lock when promoting own deque"); return; @@ -1110,12 +1169,12 @@ void promote_own_deque(__cilkrts_worker *w) { bool done = false; while (!done) { - Closure *cl = deque_peek_top(w, w->self); + Closure *cl = deque_peek_top(deques, w, self); CILK_ASSERT(w, cl); CILK_ASSERT(w, cl->status == CLOSURE_RUNNING); if (Closure_trylock(w, cl) == 0) { - deque_unlock(w, w->self); + deque_unlock(deques, w, self); cilkrts_bug( w, "Bug: failed to acquire deque lock when promoting own deque"); @@ -1123,7 +1182,7 @@ void promote_own_deque(__cilkrts_worker *w) { } if (do_dekker_on(w, w, cl)) { // unfortunately this function releases both locks - Closure *res = extract_top_spawning_closure(w, w, cl); + Closure *res = extract_top_spawning_closure(deques, w, w, cl); CILK_ASSERT(w, res); CILK_ASSERT(w, res->fiber == NULL); CILK_ASSERT(w, res->frame->worker == w); @@ -1142,7 +1201,7 @@ void promote_own_deque(__cilkrts_worker *w) { } else { Closure_unlock(w, cl); - deque_unlock(w, w->self); + deque_unlock(deques, w, self); done = true; // we can break out; no more frames to promote } } @@ -1181,13 +1240,18 @@ void longjmp_to_user_code(__cilkrts_worker *w, Closure *t) { // This is the first time we run the root closure in this Cilkified // region. The closure has been completely setup at this point by // invoke_cilkified_root(). We just need jump to the user code. - volatile bool *initialized = &w->g->root_closure_initialized; - if (t == w->g->root_closure && *initialized == false) { + global_state *g = w->g; + volatile bool *initialized = &g->root_closure_initialized; + if (t == g->root_closure && *initialized == false) { *initialized = true; } else if (!t->simulated_stolen) { void *new_rsp = sysdep_reset_stack_for_resume(fiber, sf); USE_UNUSED(new_rsp); CILK_ASSERT(w, SP(sf) == new_rsp); + if (USE_EXTENSION) { + w->extension = sf->extension; + w->ext_stack = sysdep_get_stack_start(t->ext_fiber); + } } } CILK_SWITCH_TIMING(w, INTERVAL_SCHED, INTERVAL_WORK); @@ -1218,9 +1282,10 @@ int Cilk_sync(__cilkrts_worker *const w, __cilkrts_stack_frame *frame) { int res = SYNC_READY; //----- EVENT_CILK_SYNC + ReadyDeque *deques = w->g->deques; - deque_lock_self(w); - t = deque_peek_bottom(w, w->self); + deque_lock_self(deques, w); + t = deque_peek_bottom(deques, w, w->self); Closure_lock(w, t); /* assert we are really at the top of the stack */ CILK_ASSERT(w, Closure_at_top_of_stack(w)); @@ -1244,12 +1309,18 @@ int Cilk_sync(__cilkrts_worker *const w, __cilkrts_stack_frame *frame) { // gotten back to runtime but returning to another ancestor that needs // to sync ... in which case we might have a fiber to free, but it's // never the same fiber that we are on right now. - if (w->l->fiber_to_free) { - CILK_ASSERT(w, w->l->fiber_to_free != t->fiber); + local_state *l = w->l; + if (l->fiber_to_free) { + CILK_ASSERT(w, l->fiber_to_free != t->fiber); // we should free this fiber now and we can as long as we are not on // it - cilk_fiber_deallocate_to_pool(w, w->l->fiber_to_free); - w->l->fiber_to_free = NULL; + cilk_fiber_deallocate_to_pool(w, l->fiber_to_free); + l->fiber_to_free = NULL; + } + if (USE_EXTENSION && l->ext_fiber_to_free) { + CILK_ASSERT(w, l->ext_fiber_to_free != t->ext_fiber); + cilk_fiber_deallocate_to_pool(w, l->ext_fiber_to_free); + l->ext_fiber_to_free = NULL; } if (Closure_has_children(t)) { @@ -1261,17 +1332,21 @@ int Cilk_sync(__cilkrts_worker *const w, __cilkrts_stack_frame *frame) { // exception in the continuation was thrown), we still need this // fiber for unwinding. if (t->user_exn.exn == NULL) { - w->l->fiber_to_free = t->fiber; + l->fiber_to_free = t->fiber; } else { t->saved_throwing_fiber = t->fiber; } + if (USE_EXTENSION) { + l->ext_fiber_to_free = t->ext_fiber; + } t->fiber = NULL; + t->ext_fiber = NULL; // place holder for reducer map; the view in tlmm (if any) are // updated by the last strand in Closure t before sync; need to // reduce these when successful provably good steal occurs cilkred_map *reducers = w->reducer_map; w->reducer_map = NULL; - Closure_suspend(w, t); + Closure_suspend(deques, w, t); t->user_rmap = reducers; /* set this after state change to suspended */ res = SYNC_NOT_READY; } else { @@ -1281,7 +1356,7 @@ int Cilk_sync(__cilkrts_worker *const w, __cilkrts_stack_frame *frame) { } Closure_unlock(w, t); - deque_unlock_self(w); + deque_unlock_self(deques, w); if (res == SYNC_READY) { struct closure_exception child_exn = t->child_exn; @@ -1311,8 +1386,11 @@ int Cilk_sync(__cilkrts_worker *const w, __cilkrts_stack_frame *frame) { return res; } -static void do_what_it_says(__cilkrts_worker *w, Closure *t) { +static void do_what_it_says(ReadyDeque *deques, __cilkrts_worker *w, + Closure *t) { __cilkrts_stack_frame *f; + worker_id self = w->self; + local_state *l = w->l; do { cilkrts_alert(SCHED, w, "(do_what_it_says) closure %p", (void *)t); @@ -1321,7 +1399,8 @@ static void do_what_it_says(__cilkrts_worker *w, Closure *t) { switch (t->status) { case CLOSURE_READY: // ANGE: anything we need to free must have been freed at this point - CILK_ASSERT(w, w->l->fiber_to_free == NULL); + CILK_ASSERT(w, l->fiber_to_free == NULL); + CILK_ASSERT(w, l->ext_fiber_to_free == NULL); cilkrts_alert(SCHED, w, "(do_what_it_says) CLOSURE_READY"); /* just execute it */ @@ -1336,9 +1415,9 @@ static void do_what_it_says(__cilkrts_worker *w, Closure *t) { // MUST unlock the closure before locking the queue // (rule A in file PROTOCOLS) - deque_lock_self(w); - deque_add_bottom(w, t, w->self); - deque_unlock_self(w); + deque_lock_self(deques, w); + deque_add_bottom(deques, w, t, self); + deque_unlock_self(deques, w); /* now execute it */ cilkrts_alert(SCHED, w, "(do_what_it_says) Jump into user code"); @@ -1350,24 +1429,29 @@ static void do_what_it_says(__cilkrts_worker *w, Closure *t) { // code"); // longjmp invalidates non-volatile variables __cilkrts_worker *volatile w_save = w; - if (__builtin_setjmp(w->l->rts_ctx) == 0) { + if (__builtin_setjmp(l->rts_ctx) == 0) { worker_change_state(w, WORKER_RUN); longjmp_to_user_code(w, t); } else { w = w_save; + l = w->l; CILK_ASSERT_POINTER_EQUAL(w, w, __cilkrts_get_tls_worker()); sanitizer_finish_switch_fiber(); worker_change_state(w, WORKER_SCHED); // CILK_ASSERT(w, t->fiber == w->l->fiber_to_free); - if (w->l->fiber_to_free) { - cilk_fiber_deallocate_to_pool(w, w->l->fiber_to_free); + if (l->fiber_to_free) { + cilk_fiber_deallocate_to_pool(w, l->fiber_to_free); + l->fiber_to_free = NULL; + } + if (USE_EXTENSION && l->ext_fiber_to_free) { + cilk_fiber_deallocate_to_pool(w, l->ext_fiber_to_free); + l->ext_fiber_to_free = NULL; } - w->l->fiber_to_free = NULL; // Attempt to get a closure from the bottom of our deque. - deque_lock_self(w); - t = deque_xtract_bottom(w, w->self); - deque_unlock_self(w); + deque_lock_self(deques, w); + t = deque_xtract_bottom(deques, w, self); + deque_unlock_self(deques, w); } break; // ? @@ -1388,7 +1472,7 @@ static void do_what_it_says(__cilkrts_worker *w, Closure *t) { break; } if (t) { - WHEN_SCHED_STATS(w->l->stats.repos++); + WHEN_SCHED_STATS(l->stats.repos++); } } while (t); } @@ -1397,7 +1481,7 @@ static void do_what_it_says(__cilkrts_worker *w, Closure *t) { // Cilk computation until it would enter the work-stealing loop. void do_what_it_says_boss(__cilkrts_worker *w, Closure *t) { - do_what_it_says(w, t); + do_what_it_says(w->g->deques, w, t); // At this point, the boss has run out of work to do. Rather than become a // thief itself, the boss wakes up the root worker to become a thief. @@ -1411,234 +1495,6 @@ void do_what_it_says_boss(__cilkrts_worker *w, Closure *t) { #endif } -// Update the index-to-worker map to swap self with the worker at the target -// index. -static void swap_worker_with_target(global_state *g, worker_id self, - worker_id target_index) { - worker_id self_index = g->worker_to_index[self]; - worker_id target_worker = g->index_to_worker[target_index]; - - // Update the index-to-worker map. - g->index_to_worker[self_index] = target_worker; - g->index_to_worker[target_index] = self; - - // Update the worker-to-index map. - g->worker_to_index[target_worker] = self_index; - g->worker_to_index[self] = target_index; -} - -// Called by a thief thread. Causes the thief thread to try to sleep, that is, -// to wait for a signal to resume work-stealing. -static bool try_to_disengage_thief(global_state *g, worker_id self, - uint64_t disengaged_deprived) { - // Try to grab the lock on the index structure. - if (!cilk_mutex_try(&g->index_lock)) { - return false; - } - - // Increment the number of disengaged thieves and decrement number of deprived - // thieves. - const uint64_t disengaged_mask = ((uint64_t)-1) << 32; - uint64_t disengaged = disengaged_deprived & disengaged_mask; - uint64_t new_disengaged_deprived = - ((disengaged + (1UL << 32)) & disengaged_mask) | - ((disengaged_deprived - 1) & ~disengaged_mask); - // Try to update the number of disengaged workers. This step synchronizes - // with parallel calls to reengage thieves, calls to reengage thieves, and - // updates to the number of deprived workers. - // First atomically update the number of disengaged workers. - if (atomic_compare_exchange_strong_explicit( - &g->disengaged_deprived, &disengaged_deprived, - new_disengaged_deprived, memory_order_release, - memory_order_acquire)) { - // Update the index-to-worker map. - worker_id last_index = g->nworkers - (new_disengaged_deprived >> 32); - if (g->worker_to_index[self] < last_index) { - swap_worker_with_target(g, self, last_index); - } - // Release the lock on the index structure - cilk_mutex_unlock(&g->index_lock); - - // Disengage this thread. - thief_disengage(g); - - // The thread is now reengaged. Grab the lock on the index structure. - cilk_mutex_lock(&g->index_lock); - - // Decrement the number of disengaged workers. - while (true) { - // Atomically decrement the number of disengaged workers. - uint64_t disengaged_deprived = atomic_load_explicit( - &g->disengaged_deprived, memory_order_acquire); - disengaged = disengaged_deprived & disengaged_mask; - new_disengaged_deprived = ((disengaged - (1UL << 32)) & disengaged_mask) | - ((disengaged_deprived + 1) & ~disengaged_mask); - if (atomic_compare_exchange_strong_explicit( - &g->disengaged_deprived, &disengaged_deprived, - new_disengaged_deprived, memory_order_release, - memory_order_acquire)) { - // Update the index structure. - last_index = g->nworkers - (disengaged_deprived >> 32); - if (g->worker_to_index[self] > last_index) { - swap_worker_with_target(g, self, last_index); - } - - // Release the lock on the index structure. - cilk_mutex_unlock(&g->index_lock); - return true; - } - } - } else { - // Release the lock on the index structure. - cilk_mutex_unlock(&g->index_lock); - return false; - } -} - -// Attempt to disengage this thief thread. The __cilkrts_worker parameter is only -// used for debugging. -static bool maybe_disengage_thief(global_state *g, worker_id self, - unsigned int nworkers, __cilkrts_worker *w) { - // Check the number of active and deprived workers, and disengage this worker - // if there are too many deprived workers. - while (true) { - // Check if this deprived thread should sleep. - uint64_t disengaged_deprived = - atomic_load_explicit(&g->disengaged_deprived, memory_order_acquire); - const uint64_t disengaged_mask = ((uint64_t)-1) << 32; - uint32_t disengaged = (uint32_t)(disengaged_deprived >> 32); - uint32_t deprived = (uint32_t)(disengaged_deprived & ~disengaged_mask); - - CILK_ASSERT(w, disengaged < nworkers); - CILK_ASSERT(w, deprived < nworkers); - int32_t active = - (int32_t)nworkers - (int32_t)disengaged - (int32_t)deprived; - CILK_ASSERT(w, active >= 1); - // TODO: Investigate whether it's better to keep the number of deprived - // workers less than the number of active workers. - if (active < (int32_t)deprived) { - // Too many deprived thieves. Try to disengage this worker. If it - // fails, repeat the loop. - if (try_to_disengage_thief(g, self, disengaged_deprived)) { - // The thief was successfully disengaged. It has since been - // taken out of disengage. - return true; - } - } else { - // We have enough active workers to keep this worker out of disengage, - // but this worker was still unable to steal work. Put this thief - // to sleep for a while using the conventional way. - // In testing, a nanosleep(0) takes approximately 50 us. - const struct timespec sleeptime = {.tv_sec = 0, .tv_nsec = 50000}; - /* const struct timespec sleeptime = {.tv_sec = 0, .tv_nsec = - * 25000}; */ - nanosleep(&sleeptime, NULL); - break; - } - } - return false; -} - -// Threshold for number of consective failed steal attempts to declare a -// thief as deprived. Must be a power of 2. -#define DEPRIVED_THRESHOLD 2048 - -// Number of attempted steals the thief should do each time it copies the -// worker state. ATTEMPTS must divide DEPRIVED_THRESHOLD. -#define ATTEMPTS 4 - -static unsigned int go_to_sleep_maybe(global_state *const rts, worker_id self, - unsigned int nworkers, - __cilkrts_worker *const w, - Closure *const t, unsigned int fails) { - - // Threshold for number of consecutive failed steal attempts to try - // disengaging this worker. Must be a multiple of DEPRIVED_THRESHOLD and a - // power of 2. - const unsigned int DISENGAGE_THRESHOLD = 4 * DEPRIVED_THRESHOLD; - // Threshold for number of failed steal attempts to put this thief to sleep - // for an extended amount of time. Must be larger than DISENGAGE_THRESHOLD. - const unsigned int SLEEP_THRESHOLD = 32 * DEPRIVED_THRESHOLD; - - if (t) { - if (fails >= DEPRIVED_THRESHOLD) { - // This thief is no longer deprived. Decrement the number - // of deprived thieves. - atomic_fetch_sub_explicit(&rts->disengaged_deprived, 1, - memory_order_release); - - // Request to reengage at most 2 thieves. - // TODO: Investigate whether it's better to keep the number - // less than the number of active workers. - request_more_thieves(rts, 2); - } - fails = 0; - - } else { - CILK_START_TIMING(w, INTERVAL_SLEEP); - fails += ATTEMPTS; - - // Every DEPRIVED_THRESHOLD consecutive failed steal attempts, - // update the set of deprived workers, and maybe disengage this - // worker if there are too many deprived workers. - if (fails % DEPRIVED_THRESHOLD == 0) { - if (fails > (1 << 25)) { - // Prevent the fail count from exceeding this maximum, so we - // don't have to worry about the fail count overflowing. - // - // This maximum bound is chosen based on the maximum sleep - // time when fails > SLEEP_THRESHOLD, which specifies the - // time to sleep in nanoseconds. Because the specification - // to nanosleep() disallows times with more than 1e9 - // nanoseconds, we set the maximum fails value here - // accordinly and, in this case, simply sleep for 1 second. - fails = (1 << 25); - const struct timespec sleeptime = {.tv_sec = 1, .tv_nsec = 0}; - nanosleep(&sleeptime, NULL); - } else if (DEPRIVED_THRESHOLD == fails) { - // This thief is now considered deprived. Increment the - // number of deprived workers. - atomic_fetch_add_explicit(&rts->disengaged_deprived, 1, - memory_order_release); - } else if (fails % DISENGAGE_THRESHOLD == 0) { -#if BOSS_THIEF - if (is_boss_thread) { - // The boss thread should never disengage. Sleep instead. - const struct timespec sleeptime = {.tv_sec = 0, - .tv_nsec = 50000}; - nanosleep(&sleeptime, NULL); - } else -#endif - if (maybe_disengage_thief(rts, self, nworkers, w)) { - // The semaphore for reserving workers may have been - // non-zero due to past successful steals, rather than a - // recent successful steal. Decrement fails so we try - // to disengage this again sooner, in case there is - // still nothing to steal. - fails -= (DISENGAGE_THRESHOLD / 2); - } - - } else if (fails > SLEEP_THRESHOLD) { - // This thief has failed a lot of consecutive steal - // attempts, but it's not disengaged. Sleep for increasing - // lengths of time. - const struct timespec sleeptime = {.tv_sec = 0, - .tv_nsec = 16 * fails}; - nanosleep(&sleeptime, NULL); - } else if (fails % DISENGAGE_THRESHOLD != 0) { - // This thief has failed many consecutive steal attempts, - // but it's not disengaged. Sleep for a short time. - const struct timespec sleeptime = {.tv_sec = 0, - .tv_nsec = 50000}; - nanosleep(&sleeptime, NULL); - } - } - CILK_STOP_TIMING(w, INTERVAL_SLEEP); - } - - return fails; -} - void worker_scheduler(__cilkrts_worker *w) { Closure *t = NULL; CILK_ASSERT(w, w == __cilkrts_get_tls_worker()); @@ -1647,11 +1503,12 @@ void worker_scheduler(__cilkrts_worker *w) { worker_change_state(w, WORKER_SCHED); global_state *rts = w->g; worker_id self = w->self; + const bool is_boss = is_boss_thread; // Get this worker's local_state pointer, to avoid rereading it // unnecessarily during the work-stealing loop. This optimization helps // reduce sharing on the worker structure. - local_state *l = w->l; + unsigned int rand_state = w->l->rand_next; // Get the number of workers. We don't currently support changing the // number of workers dynamically during execution of a Cilkified region. @@ -1659,8 +1516,20 @@ void worker_scheduler(__cilkrts_worker *w) { // Initialize count of consecutive failed steal attempts. Effectively, // every worker is active upon entering this routine. unsigned int fails = 0; + unsigned int request_threshold = SENTINEL_THRESHOLD; + // Local history information of the state of the system, for sentinel + // workers to use to determine when to disengage and how many workers to + // reengage. + history_t inefficient_history = 0; + history_t efficient_history = 0; + unsigned int sentinel_count_history[SENTINEL_COUNT_HISTORY] = { 1 }; + unsigned int sentinel_count_history_tail = 0; + unsigned int recent_sentinel_count = SENTINEL_COUNT_HISTORY; + // Get pointers to the local and global copies of the index-to-worker map. - worker_id *local_index_to_worker = l->index_to_worker; + worker_id *index_to_worker = rts->index_to_worker; + __cilkrts_worker **workers = rts->workers; + ReadyDeque *deques = rts->deques; while (!atomic_load_explicit(&rts->done, memory_order_acquire)) { /* A worker entering the steal loop must have saved its reducer map into @@ -1672,39 +1541,44 @@ void worker_scheduler(__cilkrts_worker *w) { while (!t && !atomic_load_explicit(&rts->done, memory_order_acquire)) { CILK_START_TIMING(w, INTERVAL_SCHED); CILK_START_TIMING(w, INTERVAL_IDLE); +#if ENABLE_THIEF_SLEEP // Get the set of workers we can steal from and a local copy of the // index-to-worker map. We'll attempt a few steals using these // local copies to minimize memory traffic. - uint64_t disengaged_deprived = atomic_load_explicit( - &rts->disengaged_deprived, memory_order_relaxed); - uint32_t disengaged = (uint32_t)(disengaged_deprived >> 32); + uint64_t disengaged_sentinel = atomic_load_explicit( + &rts->disengaged_sentinel, memory_order_relaxed); + uint32_t disengaged = GET_DISENGAGED(disengaged_sentinel); uint32_t stealable = nworkers - disengaged; - // TODO: Technically, ATTEMPTS should scale with the number of - // workers, to amortize the memcpy, which takes O(P)-time. However, - // in testing, ATTEMPTS = 4 works well even on large worker counts - // (e.g., 96) in a NUMA environment. I suspect the total cost of - // the memcpy is too small for such worker counts to worry about. - memcpy(local_index_to_worker, rts->index_to_worker, - sizeof(worker_id) * stealable); + + if (__builtin_expect(stealable == 1, false)) + // If this worker detects only 1 stealable worker, then its the + // only worker in the work-stealing loop. + continue; + +#else // ENABLE_THIEF_SLEEP + uint32_t stealable = nworkers; +#endif // ENABLE_THIEF_SLEEP int attempt = ATTEMPTS; do { // Choose a random victim not equal to self. worker_id victim = - local_index_to_worker[rts_rand(l) % stealable]; + index_to_worker[get_rand(rand_state) % stealable]; + rand_state = update_rand_state(rand_state); while (victim == self) { - victim = local_index_to_worker[rts_rand(l) % stealable]; + busy_loop_pause(); + victim = index_to_worker[get_rand(rand_state) % stealable]; + rand_state = update_rand_state(rand_state); } // Attempt to steal from that victim. - t = Closure_steal(w, victim); + t = Closure_steal(workers, deques, w, victim); if (!t) { - // Pause inside this busy loop. -#ifdef __SSE__ - __builtin_ia32_pause(); -#endif -#ifdef __aarch64__ - __builtin_arm_yield(); -#endif + // Pause inside this busy loop. We perform many pause + // instructions in order to limit how much memory bandwidth + // the theif consumes. + for (int i = 0; i < STEAL_BUSY_PAUSE; ++i) { + busy_loop_pause(); + } } } while (!t && --attempt > 0); @@ -1718,37 +1592,80 @@ void worker_scheduler(__cilkrts_worker *w) { CILK_DROP_TIMING(w, INTERVAL_SCHED); } #endif - fails = go_to_sleep_maybe(rts, self, nworkers, w, t, fails); + fails = go_to_sleep_maybe( + rts, self, nworkers, w, t, fails, &request_threshold, + &inefficient_history, &efficient_history, + sentinel_count_history, &sentinel_count_history_tail, + &recent_sentinel_count); } CILK_START_TIMING(w, INTERVAL_SCHED); // If one Cilkified region stops and another one starts, then a worker // can reach this point with t == NULL and w->g->done == false. Check // that t is not NULL before calling do_what_it_says. if (t) { - // if provably-good steal happens, do_what_it_says will return - // the next closure to execute - do_what_it_says(w, t); +#if ENABLE_THIEF_SLEEP + const unsigned int MIN_FAILS = 2 * ATTEMPTS; + uint64_t start, end; + // Executing do_what_it_says involves some minimum amount of work, + // which can be used to amortize the cost of some failed steal + // attempts. Therefore, avoid measuring the elapsed cycles if we + // haven't failed many steal attempts. + if (fails > MIN_FAILS) { + start = gettime_fast(); + } +#endif // ENABLE_THIEF_SLEEP + do_what_it_says(deques, w, t); +#if ENABLE_THIEF_SLEEP + if (fails > MIN_FAILS) { + end = gettime_fast(); + uint64_t elapsed = end - start; + // Decrement the count of failed steal attempts based on the + // amount of work done. + fails = decrease_fails_by_work(rts, w, fails, elapsed, + &request_threshold); + if (fails < SENTINEL_THRESHOLD) { + inefficient_history = 0; + efficient_history = 0; + } + } else { + fails = 0; + request_threshold = SENTINEL_THRESHOLD; + } +#endif // ENABLE_THIEF_SLEEP t = NULL; + } else if (!is_boss && + atomic_load_explicit(&rts->done, memory_order_acquire)) { + // If it appears the computation is done, busy-wait for a while + // before exiting the work-stealing loop, in case another cilkified + // region is started soon. + unsigned int busy_fail = 0; + while (busy_fail++ < 2 * BUSY_LOOP_SPIN && + atomic_load_explicit(&rts->done, memory_order_acquire)) { + busy_loop_pause(); + } + if (thief_should_wait(rts)) { + break; + } } } - if (fails >= DEPRIVED_THRESHOLD) { - // If this worker was deprived, decrement the number of deprived - // workers, essentially making this worker active. - atomic_fetch_sub_explicit(&rts->disengaged_deprived, 1, - memory_order_release); - } + // Reset the fail count. + reset_fails(rts, fails); + w->l->rand_next = rand_state; + CILK_STOP_TIMING(w, INTERVAL_SCHED); worker_change_state(w, WORKER_IDLE); #if BOSS_THIEF - if (is_boss_thread) { + if (is_boss) { __builtin_longjmp(w->g->boss_ctx, 1); } #endif } void *scheduler_thread_proc(void *arg) { - __cilkrts_worker *w = (__cilkrts_worker *)arg; + struct worker_args *w_arg = (struct worker_args *)arg; + __cilkrts_worker *w = __cilkrts_init_tls_worker(w_arg->id, w_arg->g); + cilkrts_alert(BOOT, w, "scheduler_thread_proc"); __cilkrts_set_tls_worker(w); @@ -1762,6 +1679,7 @@ void *scheduler_thread_proc(void *arg) { // Avoid redundant lookups of these commonly accessed worker fields. const worker_id self = w->self; global_state *rts = w->g; + const unsigned int nworkers = rts->nworkers; // Initialize worker's random-number generator. rts_srand(w, (self + 1) * 162347); @@ -1776,7 +1694,11 @@ void *scheduler_thread_proc(void *arg) { root_worker_wait(rts, self); } else { #endif - thief_wait(rts); + if (thief_should_wait(rts)) { + disengage_worker(rts, nworkers, self); + thief_wait(rts); + reengage_worker(rts, nworkers, self); + } #if !BOSS_THIEF } #endif @@ -1811,15 +1733,10 @@ void *scheduler_thread_proc(void *arg) { signal_uncilkified(rts); #if BOSS_THIEF unsigned int fail = 0; - while (fail++ < 2048 && + while (fail++ < BUSY_LOOP_SPIN && !atomic_load_explicit(&rts->disengaged_thieves_futex, memory_order_acquire)) { -#ifdef __SSE__ - __builtin_ia32_pause(); -#endif -#ifdef __aarch64__ - __builtin_arm_yield(); -#endif + busy_loop_pause(); } #endif // BOSS_THIEF } else { @@ -1827,15 +1744,10 @@ void *scheduler_thread_proc(void *arg) { // Busy-wait for a while to amortize the cost of syscalls to put // thief threads to sleep. unsigned int fail = 0; - while (fail++ < 2048 && + while (fail++ < BUSY_LOOP_SPIN && !atomic_load_explicit(&rts->disengaged_thieves_futex, memory_order_acquire)) { -#ifdef __SSE__ - __builtin_ia32_pause(); -#endif -#ifdef __aarch64__ - __builtin_arm_yield(); -#endif + busy_loop_pause(); } } } while (true); diff --git a/runtime/worker.h b/runtime/worker.h new file mode 100644 index 00000000..cf1f3023 --- /dev/null +++ b/runtime/worker.h @@ -0,0 +1,55 @@ +#ifndef _CILK_WORKER_H +#define _CILK_WORKER_H + +#include "rts-config.h" + +struct __cilkrts_stack_frame; +struct local_state; +struct global_state; + +enum __cilkrts_worker_state { + WORKER_IDLE = 10, + WORKER_SCHED, + WORKER_STEAL, + WORKER_RUN +}; + +struct __cilkrts_worker { + // T, H, and E pointers in the THE protocol. + // T and E are frequently accessed and should be in a hot cache line. + // H could be moved elsewhere because it is only touched when stealing. + _Atomic(struct __cilkrts_stack_frame **) head; + _Atomic(struct __cilkrts_stack_frame **) tail; + _Atomic(struct __cilkrts_stack_frame **) exc; + + // Worker id, a small integer + worker_id self; + + // 4 byte hole on 64 bit systems + + // A slot that points to the currently executing Cilk frame. + struct __cilkrts_stack_frame *current_stack_frame; + + // Map from reducer names to reducer values + cilkred_map *reducer_map; + + // Global state of the runtime system, opaque to the client. + struct global_state *g; + + // Additional per-worker state hidden from the client. + struct local_state *l; + + // Cache line boundary on 64 bit systems with 64 byte cache lines + + // Optional state, only maintained if __cilkrts_use_extension == true. + void *extension; + void *ext_stack; + + // Limit of the Lazy Task Queue, to detect queue overflow (debug only) + struct __cilkrts_stack_frame **ltq_limit; + +} __attribute__((aligned(1024))); // This alignment reduces false sharing + // induced by hardware prefetchers on some + // systems, such as Intel CPUs. + +#endif /* _CILK_WORKER_H */ diff --git a/runtime/worker_coord.h b/runtime/worker_coord.h index 1421d0b8..ebde70e2 100644 --- a/runtime/worker_coord.h +++ b/runtime/worker_coord.h @@ -134,13 +134,22 @@ static inline void worker_clear_start(volatile atomic_bool *start) { // Common internal interface for managing execution of workers. //========================================================= +__attribute__((always_inline)) static void busy_loop_pause() { +#ifdef __SSE__ + __builtin_ia32_pause(); +#endif +#ifdef __aarch64__ + __builtin_arm_yield(); +#endif +} + // Called by a root-worker thread, that is, the worker w where w->self == // g->exiting_worker. Causes the root-worker thread to wait for a signal to // start work-stealing. static inline void root_worker_wait(global_state *g, const uint32_t id) { _Atomic uint32_t *root_worker_p = &g->start_root_worker; /* unsigned int fail = 0; */ -/* while (fail++ < 2048) { */ +/* while (fail++ < BUSY_LOOP_SPIN) { */ /* if (id != atomic_load_explicit(root_worker_p, memory_order_acquire)) { */ /* return; */ /* } */ @@ -244,16 +253,11 @@ static inline void signal_uncilkified(global_state *g) { // region. static inline void wait_while_cilkified(global_state *g) { unsigned int fail = 0; - while (fail++ < 2048) { + while (fail++ < BUSY_LOOP_SPIN) { if (!atomic_load_explicit(&g->cilkified, memory_order_acquire)) { return; } -#ifdef __SSE__ - __builtin_ia32_pause(); -#endif -#ifdef __aarch64__ - __builtin_arm_yield(); -#endif + busy_loop_pause(); } #if USE_FUTEX while (atomic_load_explicit(&g->cilkified, memory_order_acquire)) { @@ -296,6 +300,11 @@ static inline void reset_disengaged_var(global_state *g) { static inline void request_more_thieves(global_state *g, uint32_t count) { CILK_ASSERT_G(count > 0); + // Don't allow this routine increment the futex beyond half the number of + // workers on the system. This bounds how many successful steals can + // possibly keep thieves engaged unnecessarily in the future, when there may + // not be as much parallelism. + int32_t max_requests = (int32_t)(g->nworkers / 2); #if USE_FUTEX // This step synchronizes with concurrent calls to request_more_thieves and // concurrent calls to try_to_disengage_thief. @@ -303,12 +312,7 @@ static inline void request_more_thieves(global_state *g, uint32_t count) { uint32_t disengaged_thieves_futex = atomic_load_explicit( &g->disengaged_thieves_futex, memory_order_acquire); - // Don't allow this routine increment the futex beyond half the number - // of workers on the system. This bounds how many successful steals can - // possibly keep thieves engaged unnecessarily in the future, when there - // may not be as much parallelism. - int32_t max_to_wake = - (int32_t)(g->nworkers / 2) - disengaged_thieves_futex; + int32_t max_to_wake = max_requests - disengaged_thieves_futex; if (max_to_wake <= 0) return; uint64_t to_wake = max_to_wake < (int32_t)count ? max_to_wake : count; @@ -331,11 +335,7 @@ static inline void request_more_thieves(global_state *g, uint32_t count) { uint32_t disengaged_thieves_futex = atomic_load_explicit( &g->disengaged_thieves_futex, memory_order_acquire); - // Don't allow this routine increment the futex beyond half the number - // of workers on the system. This bounds how many successful steals can - // possibly keep thieves engaged unnecessarily in the future, when there - // may not be as much parallelism. - int32_t max_to_wake = (int32_t)(g->nworkers / 2) - disengaged_thieves_futex; + int32_t max_to_wake = max_requests - disengaged_thieves_futex; if (max_to_wake <= 0) { pthread_mutex_unlock(&g->disengaged_lock); return; @@ -359,7 +359,7 @@ static inline void thief_disengage_futex(_Atomic uint32_t *futexp) { // designed to handle cases where multiple threads waiting on the futex // were woken up and where there may be spurious wakeups. uint32_t val; - while ((val = atomic_load_explicit(futexp, memory_order_relaxed)) > 0) { + while ((val = atomic_load_explicit(futexp, memory_order_acquire)) > 0) { if (atomic_compare_exchange_strong_explicit(futexp, &val, val - 1, memory_order_release, memory_order_acquire)) { @@ -429,6 +429,39 @@ static inline void thief_wait(global_state *g) { thief_disengage(g); } +// Called by a thief thread. Check if the thief should start waiting for the +// start of a cilkified region. If a new cilkified region has been started +// already, update the global state to indicate that this worker is engaged in +// work stealing. +static inline bool thief_should_wait(global_state *g) { + _Atomic uint32_t *futexp = &g->disengaged_thieves_futex; + uint32_t val = atomic_load_explicit(futexp, memory_order_acquire); +#if USE_FUTEX + while (val > 0) { + if (atomic_compare_exchange_strong_explicit(futexp, &val, val - 1, + memory_order_release, + memory_order_acquire)) + return false; + val = atomic_load_explicit(futexp, memory_order_acquire); + } + return true; +#else + if (val == 0) + return true; + + pthread_mutex_t *lock = &g->disengaged_lock; + pthread_mutex_lock(lock); + val = atomic_load_explicit(futexp, memory_order_relaxed); + if (val > 0) { + atomic_store_explicit(futexp, val - 1, memory_order_release); + pthread_mutex_unlock(lock); + return false; + } + pthread_mutex_unlock(lock); + return true; +#endif +} + // Signal the thief threads to start work-stealing (or terminate, if // g->terminate == 1). static inline void wake_thieves(global_state *g) { diff --git a/runtime/worker_sleep.h b/runtime/worker_sleep.h new file mode 100644 index 00000000..236c2774 --- /dev/null +++ b/runtime/worker_sleep.h @@ -0,0 +1,592 @@ +#ifndef _WORKER_SLEEP_H +#define _WORKER_SLEEP_H + +#include "cilk-internal.h" +#include "worker_coord.h" + +#if defined(__APPLE__) && defined(__aarch64__) +#define APPLE_ARM64 +#endif + +#ifdef APPLE_ARM64 +#include +#endif // APPLE_ARM64 + +// Nanoseconds that a sentinel worker should sleep if it reaches the disengage +// threshold but does not disengage. +/* #define SLEEP_NSEC 12500 */ +#define NAP_NSEC 25000 +/* #define SLEEP_NSEC 50000 */ +#define SLEEP_NSEC 4 * NAP_NSEC + +// Ratio of active workers over sentinels that the system aims to maintain. +#define AS_RATIO 2 + +// Threshold for number of consective failed steal attempts to declare a +// thief as sentinel. Must be a power of 2. +#define SENTINEL_THRESHOLD 256 + +// Number of attempted steals the thief should do each time it copies the +// worker state. ATTEMPTS must divide SENTINEL_THRESHOLD. +#define ATTEMPTS 8 + +// Information for histories of efficient and inefficient worker-count samples +// and for sentinel counts. +typedef uint32_t history_t; +#define HISTORY_LENGTH 32 +#define SENTINEL_COUNT_HISTORY 8 + +// Amount of history that must be efficient/inefficient to reengage/disengage +// workers. +#define HISTORY_THRESHOLD HISTORY_LENGTH / 2 + +// Threshold for number of consecutive failed steal attempts to try disengaging +// this worker. Must be a multiple of SENTINEL_THRESHOLD and a power of 2. +#define DISENGAGE_THRESHOLD HISTORY_THRESHOLD * SENTINEL_THRESHOLD + +// Number of pauses to perform per steal attempt, to ensure failed steal +// attempts don't take too much memory bandwidth away from the workers doing +// work. +#define STEAL_BUSY_PAUSE 16 + +static inline __attribute__((always_inline)) uint64_t gettime_fast(void) { +#ifdef APPLE_ARM64 + // __builtin_readcyclecounter triggers "illegal instruction" runtime errors + // on Apple M1s. + return clock_gettime_nsec_np(CLOCK_MONOTONIC_RAW); +#else + return __builtin_readcyclecounter(); +#endif // #if APPLE_ARM64 +} + +typedef struct worker_counts { + int32_t active; + int32_t sentinels; + int32_t disengaged; +} worker_counts; + +// Update the index-to-worker map to swap self with the worker at the target +// index. +static void swap_worker_with_target(global_state *g, worker_id self, + worker_id target_index) { + worker_id *worker_to_index = g->worker_to_index; + worker_id *index_to_worker = g->index_to_worker; + + worker_id self_index = worker_to_index[self]; + worker_id target_worker = index_to_worker[target_index]; + + // Update the index-to-worker map. + index_to_worker[self_index] = target_worker; + index_to_worker[target_index] = self; + + // Update the worker-to-index map. + worker_to_index[target_worker] = self_index; + worker_to_index[self] = target_index; +} + +// Called by a thief thread. Causes the thief thread to try to sleep, that is, +// to wait for a signal to resume work-stealing. +static bool try_to_disengage_thief(global_state *g, worker_id self, + uint64_t disengaged_sentinel) { + // Try to grab the lock on the index structure. + if (!cilk_mutex_try(&g->index_lock)) { + return false; + } + + // Increment the number of disengaged thieves and decrement number of + // sentinels. + uint32_t disengaged = GET_DISENGAGED(disengaged_sentinel); + uint32_t sentinel = GET_SENTINEL(disengaged_sentinel); + uint64_t new_disengaged_sentinel = + DISENGAGED_SENTINEL(disengaged + 1, sentinel - 1); + + unsigned int nworkers = g->nworkers; + worker_id *worker_to_index = g->worker_to_index; + + // Try to update the number of disengaged workers. This step synchronizes + // with parallel calls to reengage thieves, calls to reengage thieves, and + // updates to the number of sentinel workers. + // First atomically update the number of disengaged workers. + if (atomic_compare_exchange_strong_explicit( + &g->disengaged_sentinel, &disengaged_sentinel, + new_disengaged_sentinel, memory_order_release, + memory_order_acquire)) { + // Update the index-to-worker map. + worker_id last_index = nworkers - (new_disengaged_sentinel >> 32); + if (worker_to_index[self] < last_index) { + swap_worker_with_target(g, self, last_index); + } + // Release the lock on the index structure + cilk_mutex_unlock(&g->index_lock); + + // Disengage this thread. + thief_disengage(g); + + // The thread is now reengaged. Grab the lock on the index structure. + cilk_mutex_lock(&g->index_lock); + + // Decrement the number of disengaged workers. + while (true) { + // Atomically decrement the number of disengaged workers. + uint64_t disengaged_sentinel = atomic_load_explicit( + &g->disengaged_sentinel, memory_order_relaxed); + uint32_t disengaged = GET_DISENGAGED(disengaged_sentinel); + uint32_t sentinel = GET_SENTINEL(disengaged_sentinel); + new_disengaged_sentinel = + DISENGAGED_SENTINEL(disengaged - 1, sentinel + 1); + if (atomic_compare_exchange_strong_explicit( + &g->disengaged_sentinel, &disengaged_sentinel, + new_disengaged_sentinel, memory_order_release, + memory_order_acquire)) { + // Update the index structure. + last_index = nworkers - GET_DISENGAGED(disengaged_sentinel); + if (worker_to_index[self] > last_index) { + swap_worker_with_target(g, self, last_index); + } + + // Release the lock on the index structure. + cilk_mutex_unlock(&g->index_lock); + return true; + } + busy_loop_pause(); + } + } else { + // Release the lock on the index structure. + cilk_mutex_unlock(&g->index_lock); + return false; + } +} + +// Helper function to parse the given value of disengaged_sentinel to determine +// the number of active, sentinel, and disengaged workers. +__attribute__((const, always_inline)) static inline worker_counts +get_worker_counts(__cilkrts_worker *const w, uint64_t disengaged_sentinel, + unsigned int nworkers) { + uint32_t disengaged = GET_DISENGAGED(disengaged_sentinel); + uint32_t sentinel = GET_SENTINEL(disengaged_sentinel); + CILK_ASSERT(w, disengaged < nworkers); + CILK_ASSERT(w, sentinel <= nworkers); + int32_t active = + (int32_t)nworkers - (int32_t)disengaged - (int32_t)sentinel; + + worker_counts counts = { + .active = active, .sentinels = sentinel, .disengaged = disengaged}; + return counts; +} + +// Check if the given worker counts are inefficient, i.e., if active < +// sentinels. +__attribute__((const, always_inline)) static inline history_t +is_inefficient(worker_counts counts) { + return counts.sentinels > 1 && counts.active >= 1 && + counts.active * AS_RATIO < counts.sentinels * 1; +} + +// Check if the given worker counts are efficient, i.e., if active >= 2 * +// sentinels. +__attribute__((const, always_inline)) static inline history_t +is_efficient(worker_counts counts) { + return (counts.active * 1 >= counts.sentinels * AS_RATIO) || + (counts.sentinels <= 1); +} + +// Convert the elapsed time spent working into a fail count. +__attribute__((const, always_inline)) static inline unsigned int +get_scaled_elapsed(unsigned int elapsed) { +#ifdef APPLE_ARM64 + return ((elapsed * (1 * SENTINEL_THRESHOLD) / (16 * 65536)) / ATTEMPTS) * + ATTEMPTS; +#else + return ((elapsed * (1 * SENTINEL_THRESHOLD) / (2 * 65536)) / ATTEMPTS) * + ATTEMPTS; +#endif // APPLE_ARM64 +} + +// If steal attempts found work, update histories as appropriate and possibly +// reengage workers. +__attribute__((always_inline)) static inline unsigned int +maybe_reengage_workers(global_state *const rts, worker_id self, + unsigned int nworkers, __cilkrts_worker *const w, + unsigned int fails, + unsigned int *const request_threshold, + history_t *const inefficient_history, + history_t *const efficient_history, + unsigned int *const sentinel_count_history, + unsigned int *const sentinel_count_history_tail, + unsigned int *const recent_sentinel_count) { + if (fails >= SENTINEL_THRESHOLD) { + // This thief is no longer a sentinel. Decrement the number of + // sentinels. + uint64_t disengaged_sentinel = atomic_fetch_sub_explicit( + &rts->disengaged_sentinel, 1, memory_order_release); +#if !ENABLE_THIEF_SLEEP + return 0; +#endif + // Get the current worker counts, with this sentinel now active. + worker_counts counts = + get_worker_counts(w, disengaged_sentinel - 1, nworkers); + CILK_ASSERT(w, counts.active >= 1); + + history_t my_efficient_history = *efficient_history; + history_t my_inefficient_history = *inefficient_history; + unsigned int my_sentinel_count = *recent_sentinel_count; + if (fails >= *request_threshold) { + // Update the inefficient history. + history_t curr_ineff = is_inefficient(counts); + my_inefficient_history = (my_inefficient_history >> 1) | + (curr_ineff << (HISTORY_LENGTH - 1)); + + // Update the efficient history. + history_t curr_eff = is_efficient(counts); + my_efficient_history = (my_efficient_history >> 1) | + (curr_eff << (HISTORY_LENGTH - 1)); + + // Update the sentinel count. + unsigned int current_sentinel_count = counts.sentinels + 1; + unsigned int tail = *sentinel_count_history_tail; + my_sentinel_count = my_sentinel_count - + sentinel_count_history[tail] + + current_sentinel_count; + *recent_sentinel_count = my_sentinel_count; + sentinel_count_history[tail] = current_sentinel_count; + *sentinel_count_history_tail = (tail + 1) % SENTINEL_COUNT_HISTORY; + } + + // Request to reengage some thieves, depending on whether there are + // too many active workers compared to sentinel workers. + + // Compute a number of additional workers to request, based on the + // efficiency history divided by the average recent sentinel count. + // + // Dividing by the average recent sentinel count is intended to + // handle the case where sentinels request more workers in parallel, + // based on the same independently collected history. + int32_t request; + int32_t eff_steps = __builtin_popcount(my_efficient_history); + int32_t ineff_steps = __builtin_popcount(my_inefficient_history); + int32_t eff_diff = eff_steps - ineff_steps; + if (eff_diff < HISTORY_THRESHOLD) { + request = 0; + *efficient_history = my_efficient_history; + *inefficient_history = my_inefficient_history; + } else { + unsigned int avg_sentinels = + my_sentinel_count / SENTINEL_COUNT_HISTORY; + request = eff_diff / avg_sentinels; + int32_t remainder = eff_diff % avg_sentinels; + if (remainder) + request += (self % remainder != 0); + // Charge the request for more workers against the efficiency + // history by resetting that history. + *efficient_history = 0; + *inefficient_history = 0; + } + WHEN_SCHED_STATS(w->l->stats.reeng_rqsts += request); + + // Make sure at least 1 worker is requested if we're about to run + // out of sentinels. + if (request == 0 && counts.sentinels == 0 && + counts.active < (int32_t)nworkers && + !atomic_load_explicit(&rts->disengaged_thieves_futex, + memory_order_relaxed)) { + request = (counts.active + 3) / 4; + WHEN_SCHED_STATS(w->l->stats.onesen_rqsts += request); + } + + if (request > 0) { + request_more_thieves(rts, request); + } + + // Set a cap on the fail count. + if (fails > DISENGAGE_THRESHOLD) + fails = DISENGAGE_THRESHOLD; + + // Update request threshold so that, in case this worker ends up + // executing a small task, it still adds samples to its history that + // are spread out in time. + *request_threshold = fails + (SENTINEL_THRESHOLD / 1); + } + + return fails; +} + +// Attempt to disengage this thief thread. The __cilkrts_worker parameter is only +// used for debugging. +static bool maybe_disengage_thief(global_state *g, worker_id self, + unsigned int nworkers, + __cilkrts_worker *const w) { +#if !ENABLE_THIEF_SLEEP + return false; +#endif // !ENABLE_THIEF_SLEEP + // Check the number of active and sentinel workers, and disengage this + // worker if there are too many sentinel workers. + while (true) { + // Check if this sentinel thread should sleep. + uint64_t disengaged_sentinel = + atomic_load_explicit(&g->disengaged_sentinel, memory_order_acquire); + + worker_counts counts = get_worker_counts(w, disengaged_sentinel, nworkers); + + // Make sure that we don't inadvertently disengage the last sentinel. + if (is_inefficient(counts)) { + // Too many sentinels. Try to disengage this worker. If it fails, + // repeat the loop. + if (try_to_disengage_thief(g, self, disengaged_sentinel)) { + // The thief was successfully disengaged. It has since been + // reengaged. + return true; + } + } else { + break; + } + busy_loop_pause(); + } + return false; +} + +// If steal attempts did not find work, update histories as appropriate and +// possibly disengage this worker. +__attribute__((always_inline)) static inline unsigned int +handle_failed_steal_attempts(global_state *const rts, worker_id self, + unsigned int nworkers, __cilkrts_worker *const w, + unsigned int fails, + unsigned int *const request_threshold, + history_t *const inefficient_history, + history_t *const efficient_history, + unsigned int *const sentinel_count_history, + unsigned int *const sentinel_count_history_tail, + unsigned int *const recent_sentinel_count) { + // Threshold for number of failed steal attempts to put this thief to sleep + // for an extended amount of time. Must be at least SENTINEL_THRESHOLD and + // a power of 2. + const unsigned int NAP_THRESHOLD = 8 * SENTINEL_THRESHOLD; + const unsigned int SLEEP_THRESHOLD = 16 * NAP_THRESHOLD; + + CILK_START_TIMING(w, INTERVAL_SLEEP); + fails += ATTEMPTS; + + // Every SENTINEL_THRESHOLD consecutive failed steal attempts, update the + // set of sentinel workers, and maybe disengage this worker if there are too + // many sentinel workers. + if (fails % SENTINEL_THRESHOLD == 0) { + if (fails > (1 << 30)) { + // Prevent the fail count from exceeding this maximum, so we don't + // have to worry about the fail count overflowing. + fails = (1 << 30); + const struct timespec sleeptime = {.tv_sec = 0, .tv_nsec = SLEEP_NSEC}; + nanosleep(&sleeptime, NULL); + } else { + if (SENTINEL_THRESHOLD == fails) + atomic_fetch_add_explicit(&rts->disengaged_sentinel, 1, + memory_order_release); +#if BOSS_THIEF + if (is_boss_thread) { + if (fails % NAP_THRESHOLD == 0) { + // The boss thread should never disengage. Sleep + // instead. + const struct timespec sleeptime = { + .tv_sec = 0, + .tv_nsec = + (fails > SLEEP_THRESHOLD) ? SLEEP_NSEC : NAP_NSEC}; + nanosleep(&sleeptime, NULL); + } + } else { +#else + { +#endif + // Check if the current worker counts. + uint64_t disengaged_sentinel = atomic_load_explicit( + &rts->disengaged_sentinel, memory_order_acquire); + worker_counts counts = + get_worker_counts(w, disengaged_sentinel, nworkers); + + // Update the efficient history. + history_t curr_eff = is_efficient(counts); + history_t my_efficient_history = *efficient_history; + my_efficient_history = (my_efficient_history >> 1) | + (curr_eff << (HISTORY_LENGTH - 1)); + int32_t eff_steps = __builtin_popcount(my_efficient_history); + *efficient_history = my_efficient_history; + + // Update the sentinel count. + unsigned int current_sentinel_count = counts.sentinels; + unsigned int tail = *sentinel_count_history_tail; + *recent_sentinel_count = *recent_sentinel_count - + sentinel_count_history[tail] + + current_sentinel_count; + sentinel_count_history[tail] = current_sentinel_count; + *sentinel_count_history_tail = + (tail + 1) % SENTINEL_COUNT_HISTORY; + + // Update the inefficient history. + history_t curr_ineff = is_inefficient(counts); + history_t my_inefficient_history = *inefficient_history; + my_inefficient_history = (my_inefficient_history >> 1) | + (curr_ineff << (HISTORY_LENGTH - 1)); + int32_t ineff_steps = + __builtin_popcount(my_inefficient_history); + *inefficient_history = my_inefficient_history; + + if (ENABLE_THIEF_SLEEP && curr_ineff && + (ineff_steps - eff_steps) > HISTORY_THRESHOLD) { + uint64_t start, end; + start = gettime_fast(); + if (maybe_disengage_thief(rts, self, nworkers, w)) { + // The semaphore for reserving workers may have been + // non-zero due to past successful steals, rather than a + // recent successful steal. Decrement fails so we try + // to disengage this again sooner, in case there is + // still nothing to steal. + end = gettime_fast(); + unsigned int scaled_elapsed = + get_scaled_elapsed(end - start); + + // Update histories + if (scaled_elapsed > SENTINEL_THRESHOLD) { + uint32_t samples = + scaled_elapsed / SENTINEL_THRESHOLD; + if (samples >= HISTORY_LENGTH) { + *efficient_history = 0; + *inefficient_history = 0; + } else { + *efficient_history >>= samples; + *inefficient_history >>= samples; + } + } + + // Update fail count + if (scaled_elapsed < SENTINEL_THRESHOLD) + fails -= scaled_elapsed; + else { + fails = DISENGAGE_THRESHOLD - SENTINEL_THRESHOLD; + } + *request_threshold = SENTINEL_THRESHOLD; + } + } else if (fails % NAP_THRESHOLD == 0) { + // We have enough active workers to keep this worker out of + // disengage, but this worker was still unable to steal + // work. Put this thief to sleep for a while using the + // conventional way. In testing, a nanosleep(0) takes + // approximately 50 us. + const struct timespec sleeptime = { + .tv_sec = 0, + .tv_nsec = + (fails > SLEEP_THRESHOLD) ? SLEEP_NSEC : NAP_NSEC}; + nanosleep(&sleeptime, NULL); + } else { + // We perform many pause instructions to reduce the thief's + // load on the system in a lightweight manner. + for (int i = 0; i < 8 * ATTEMPTS; ++i) { + busy_loop_pause(); + } + } + } + } + } else { + // We perform many pause instructions to reduce the thief's load on + // the system in a lightweight manner. + for (int i = 0; i < 32 * ATTEMPTS; ++i) { + busy_loop_pause(); + } + } + CILK_STOP_TIMING(w, INTERVAL_SLEEP); + return fails; +} + +__attribute__((always_inline)) +static unsigned int go_to_sleep_maybe(global_state *const rts, worker_id self, + unsigned int nworkers, + __cilkrts_worker *const w, + Closure *const t, unsigned int fails, + unsigned int *const request_threshold, + history_t *const inefficient_history, + history_t *const efficient_history, + unsigned int *const sentinel_count_history, + unsigned int *const sentinel_count_history_tail, + unsigned int *const recent_sentinel_count) { + if (t) { + return maybe_reengage_workers( + rts, self, nworkers, w, fails, request_threshold, + inefficient_history, efficient_history, sentinel_count_history, + sentinel_count_history_tail, recent_sentinel_count); + } else { + return handle_failed_steal_attempts( + rts, self, nworkers, w, fails, request_threshold, + inefficient_history, efficient_history, sentinel_count_history, + sentinel_count_history_tail, recent_sentinel_count); + } +} + +#if ENABLE_THIEF_SLEEP +__attribute__((always_inline)) static unsigned int +decrease_fails_by_work(global_state *const rts, __cilkrts_worker *const w, + unsigned int fails, uint64_t elapsed, + unsigned int *const request_threshold) { + uint64_t scaled_elapsed = get_scaled_elapsed(elapsed); + + // Decrease the number of fails based on the work done. + if (scaled_elapsed > (uint64_t)fails) + fails = 0; + else { + fails -= scaled_elapsed; + } + + // The fail count must be a multiple of ATTEMPTS for the sleep logic to + // work. + CILK_ASSERT(w, fails % ATTEMPTS == 0); + + if (scaled_elapsed > (uint64_t)(*request_threshold) - SENTINEL_THRESHOLD) + *request_threshold = SENTINEL_THRESHOLD; + else + *request_threshold -= scaled_elapsed; + + // If this worker is still sentinel, update sentinel-worker count. + if (fails >= SENTINEL_THRESHOLD) + atomic_fetch_add_explicit(&rts->disengaged_sentinel, 1, + memory_order_release); + return fails; +} +#endif // ENABLE_THIEF_SLEEP + +__attribute__((always_inline)) static unsigned int +reset_fails(global_state *rts, unsigned int fails) { + if (fails >= SENTINEL_THRESHOLD) { + // If this worker was sentinel, decrement the number of sentinel + // workers, effectively making this worker active. + atomic_fetch_sub_explicit(&rts->disengaged_sentinel, 1, + memory_order_release); + } + return 0; +} + +__attribute__((always_inline)) static inline void +disengage_worker(global_state *g, unsigned int nworkers, worker_id self) { + cilk_mutex_lock(&g->index_lock); + uint64_t disengaged_sentinel = atomic_fetch_add_explicit( + &g->disengaged_sentinel, (1UL << 32), memory_order_release); + // Update the index-to-worker map. We derive last_index from the new value + // of disengaged_sentinel, because the index is now invalid. + worker_id last_index = nworkers - ((disengaged_sentinel >> 32) + 1); + if (g->worker_to_index[self] < last_index) { + swap_worker_with_target(g, self, last_index); + } + // Release the lock on the index structure + cilk_mutex_unlock(&g->index_lock); +} + + __attribute__((always_inline)) static inline void +reengage_worker(global_state *g, unsigned int nworkers, worker_id self) { + cilk_mutex_lock(&g->index_lock); + uint64_t disengaged_sentinel = atomic_fetch_sub_explicit( + &g->disengaged_sentinel, (1UL << 32), memory_order_release); + // Update the index-to-worker map. We derive last_index from the old value + // of disengaged_sentinel, because the index is now valid. + worker_id last_index = nworkers - (disengaged_sentinel >> 32); + if (g->worker_to_index[self] > last_index) { + swap_worker_with_target(g, self, last_index); + } + // Release the lock on the index structure + cilk_mutex_unlock(&g->index_lock); +} + +#endif /* _WORKER_SLEEP_H */