diff --git a/Foundation/include/Poco/BlockingConcurrentQueue.h b/Foundation/include/Poco/BlockingConcurrentQueue.h deleted file mode 100644 index 3aedd5b583..0000000000 --- a/Foundation/include/Poco/BlockingConcurrentQueue.h +++ /dev/null @@ -1,621 +0,0 @@ -// -// BlockingConcurrentQueue.h -// -// Library: Foundation -// Package: Core -// Module: BlockingConcurrentQueue -// -// Copyright Copyright (c) 2013-2020, Cameron Desrochers. All rights reserved. -// Extracted from https://github.com/cameron314/concurrentqueue lib and adapted for poco: Bychuk Alexander 2023 -// -// SPDX-License-Identifier: BSL-1.0 -// - -// Provides an efficient blocking version of Poco::ConcurrentQueue. -// ©2015-2020 Cameron Desrochers. Distributed under the terms of the simplified -// BSD license, available at the top of concurrentqueue.h. -// Also dual-licensed under the Boost Software License (see LICENSE.md) -// Uses Jeff Preshing's semaphore implementation (under the terms of its -// separate zlib license, see lightweightsemaphore.h). - -#pragma once - -#include "ConcurrentQueue.h" -#include "Poco/LightWeightSemaphore.h" - -#include -#include -#include -#include -#include - -namespace Poco -{ -// This is a blocking version of the queue. It has an almost identical interface to -// the normal non-blocking version, with the addition of various wait_dequeue() methods -// and the removal of producer-specific dequeue methods. -template -class BlockingConcurrentQueue -{ -private: - using ConcurrentQueue = ::Poco::ConcurrentQueue; - using LightweightSemaphore = ::Poco::LightweightSemaphore; - -public: - using producer_token_t = typename ConcurrentQueue::producer_token_t; - using consumer_token_t = typename ConcurrentQueue::consumer_token_t; - - using index_t = typename ConcurrentQueue::index_t; - using size_t = typename ConcurrentQueue::size_t; - using ssize_t = typename std::make_signed::type; - - static const size_t BLOCK_SIZE = ConcurrentQueue::BLOCK_SIZE; - static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = ConcurrentQueue::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD; - static const size_t EXPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::EXPLICIT_INITIAL_INDEX_SIZE; - static const size_t IMPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::IMPLICIT_INITIAL_INDEX_SIZE; - static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = ConcurrentQueue::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; - static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = ConcurrentQueue::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE; - static const size_t MAX_SUBQUEUE_SIZE = ConcurrentQueue::MAX_SUBQUEUE_SIZE; - -public: - // Creates a queue with at least `capacity` element slots; note that the - // actual number of elements that can be inserted without additional memory - // allocation depends on the number of producers and the block size (e.g. if - // the block size is equal to `capacity`, only a single block will be allocated - // up-front, which means only a single producer will be able to enqueue elements - // without an extra allocation -- blocks aren't shared between producers). - // This method is not thread safe -- it is up to the user to ensure that the - // queue is fully constructed before it starts being used by other threads (this - // includes making the memory effects of construction visible, possibly with a - // memory barrier). - explicit BlockingConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE) - : inner(capacity), sema(create(0, (int)Traits::MAX_SEMA_SPINS), &BlockingConcurrentQueue::template destroy) - { - assert(reinterpret_cast((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member"); - } - - BlockingConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers) - : inner(minCapacity, maxExplicitProducers, maxImplicitProducers), sema(create(0, (int)Traits::MAX_SEMA_SPINS), &BlockingConcurrentQueue::template destroy) - { - assert(reinterpret_cast((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member"); - } - - // Disable copying and copy assignment - BlockingConcurrentQueue(BlockingConcurrentQueue const&) = delete; - BlockingConcurrentQueue& operator=(BlockingConcurrentQueue const&) = delete; - - // Moving is supported, but note that it is *not* a thread-safe operation. - // Nobody can use the queue while it's being moved, and the memory effects - // of that move must be propagated to other threads before they can use it. - // Note: When a queue is moved, its tokens are still valid but can only be - // used with the destination queue (i.e. semantically they are moved along - // with the queue itself). - BlockingConcurrentQueue(BlockingConcurrentQueue&& other) noexcept - : inner(std::move(other.inner)), sema(std::move(other.sema)) - { } - - inline BlockingConcurrentQueue& operator=(BlockingConcurrentQueue&& other) noexcept - { - return swap_internal(other); - } - - // Swaps this queue's state with the other's. Not thread-safe. - // Swapping two queues does not invalidate their tokens, however - // the tokens that were created for one queue must be used with - // only the swapped queue (i.e. the tokens are tied to the - // queue's movable state, not the object itself). - inline void swap(BlockingConcurrentQueue& other) noexcept - { - swap_internal(other); - } - -private: - BlockingConcurrentQueue& swap_internal(BlockingConcurrentQueue& other) - { - if (this == &other) - { - return *this; - } - - inner.swap(other.inner); - sema.swap(other.sema); - return *this; - } - -public: - // Enqueues a single item (by copying it). - // Allocates memory if required. Only fails if memory allocation fails (or implicit - // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, - // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). - // Thread-safe. - inline bool enqueue(T const& item) - { - if ((Details::likely)(inner.enqueue(item))) - { - sema->signal(); - return true; - } - return false; - } - - // Enqueues a single item (by moving it, if possible). - // Allocates memory if required. Only fails if memory allocation fails (or implicit - // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, - // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). - // Thread-safe. - inline bool enqueue(T&& item) - { - if ((Details::likely)(inner.enqueue(std::move(item)))) - { - sema->signal(); - return true; - } - return false; - } - - // Enqueues a single item (by copying it) using an explicit producer token. - // Allocates memory if required. Only fails if memory allocation fails (or - // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). - // Thread-safe. - inline bool enqueue(producer_token_t const& token, T const& item) - { - if ((Details::likely)(inner.enqueue(token, item))) - { - sema->signal(); - return true; - } - return false; - } - - // Enqueues a single item (by moving it, if possible) using an explicit producer token. - // Allocates memory if required. Only fails if memory allocation fails (or - // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). - // Thread-safe. - inline bool enqueue(producer_token_t const& token, T&& item) - { - if ((Details::likely)(inner.enqueue(token, std::move(item)))) - { - sema->signal(); - return true; - } - return false; - } - - // Enqueues several items. - // Allocates memory if required. Only fails if memory allocation fails (or - // implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - // is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). - // Note: Use std::make_move_iterator if the elements should be moved instead of copied. - // Thread-safe. - template - inline bool enqueue_bulk(It itemFirst, size_t count) - { - if ((Details::likely)(inner.enqueue_bulk(std::forward(itemFirst), count))) - { - sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count); - return true; - } - return false; - } - - // Enqueues several items using an explicit producer token. - // Allocates memory if required. Only fails if memory allocation fails - // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). - // Note: Use std::make_move_iterator if the elements should be moved - // instead of copied. - // Thread-safe. - template - inline bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) - { - if ((Details::likely)(inner.enqueue_bulk(token, std::forward(itemFirst), count))) - { - sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count); - return true; - } - return false; - } - - // Enqueues a single item (by copying it). - // Does not allocate memory. Fails if not enough room to enqueue (or implicit - // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - // is 0). - // Thread-safe. - inline bool try_enqueue(T const& item) - { - if (inner.try_enqueue(item)) - { - sema->signal(); - return true; - } - return false; - } - - // Enqueues a single item (by moving it, if possible). - // Does not allocate memory (except for one-time implicit producer). - // Fails if not enough room to enqueue (or implicit production is - // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). - // Thread-safe. - inline bool try_enqueue(T&& item) - { - if (inner.try_enqueue(std::move(item))) - { - sema->signal(); - return true; - } - return false; - } - - // Enqueues a single item (by copying it) using an explicit producer token. - // Does not allocate memory. Fails if not enough room to enqueue. - // Thread-safe. - inline bool try_enqueue(producer_token_t const& token, T const& item) - { - if (inner.try_enqueue(token, item)) - { - sema->signal(); - return true; - } - return false; - } - - // Enqueues a single item (by moving it, if possible) using an explicit producer token. - // Does not allocate memory. Fails if not enough room to enqueue. - // Thread-safe. - inline bool try_enqueue(producer_token_t const& token, T&& item) - { - if (inner.try_enqueue(token, std::move(item))) - { - sema->signal(); - return true; - } - return false; - } - - // Enqueues several items. - // Does not allocate memory (except for one-time implicit producer). - // Fails if not enough room to enqueue (or implicit production is - // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). - // Note: Use std::make_move_iterator if the elements should be moved - // instead of copied. - // Thread-safe. - template - inline bool try_enqueue_bulk(It itemFirst, size_t count) - { - if (inner.try_enqueue_bulk(std::forward(itemFirst), count)) - { - sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count); - return true; - } - return false; - } - - // Enqueues several items using an explicit producer token. - // Does not allocate memory. Fails if not enough room to enqueue. - // Note: Use std::make_move_iterator if the elements should be moved - // instead of copied. - // Thread-safe. - template - inline bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) - { - if (inner.try_enqueue_bulk(token, std::forward(itemFirst), count)) - { - sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count); - return true; - } - return false; - } - - - // Attempts to dequeue from the queue. - // Returns false if all producer streams appeared empty at the time they - // were checked (so, the queue is likely but not guaranteed to be empty). - // Never allocates. Thread-safe. - template - inline bool try_dequeue(U& item) - { - if (sema->tryWait()) - { - while (!inner.try_dequeue(item)) - { - continue; - } - return true; - } - return false; - } - - // Attempts to dequeue from the queue using an explicit consumer token. - // Returns false if all producer streams appeared empty at the time they - // were checked (so, the queue is likely but not guaranteed to be empty). - // Never allocates. Thread-safe. - template - inline bool try_dequeue(consumer_token_t& token, U& item) - { - if (sema->tryWait()) - { - while (!inner.try_dequeue(token, item)) - { - continue; - } - return true; - } - return false; - } - - // Attempts to dequeue several elements from the queue. - // Returns the number of items actually dequeued. - // Returns 0 if all producer streams appeared empty at the time they - // were checked (so, the queue is likely but not guaranteed to be empty). - // Never allocates. Thread-safe. - template - inline size_t try_dequeue_bulk(It itemFirst, size_t max) - { - size_t count = 0; - max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max); - while (count != max) - { - count += inner.template try_dequeue_bulk(itemFirst, max - count); - } - return count; - } - - // Attempts to dequeue several elements from the queue using an explicit consumer token. - // Returns the number of items actually dequeued. - // Returns 0 if all producer streams appeared empty at the time they - // were checked (so, the queue is likely but not guaranteed to be empty). - // Never allocates. Thread-safe. - template - inline size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max) - { - size_t count = 0; - max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max); - while (count != max) - { - count += inner.template try_dequeue_bulk(token, itemFirst, max - count); - } - return count; - } - - - - // Blocks the current thread until there's something to dequeue, then - // dequeues it. - // Never allocates. Thread-safe. - template - inline void wait_dequeue(U& item) - { - while (!sema->wait()) - { - continue; - } - while (!inner.try_dequeue(item)) - { - continue; - } - } - - // Blocks the current thread until either there's something to dequeue - // or the timeout (specified in microseconds) expires. Returns false - // without setting `item` if the timeout expires, otherwise assigns - // to `item` and returns true. - // Using a negative timeout indicates an indefinite timeout, - // and is thus functionally equivalent to calling wait_dequeue. - // Never allocates. Thread-safe. - template - inline bool wait_dequeue_timed(U& item, std::int64_t timeout_usecs) - { - if (!sema->wait(timeout_usecs)) - { - return false; - } - while (!inner.try_dequeue(item)) - { - continue; - } - return true; - } - - // Blocks the current thread until either there's something to dequeue - // or the timeout expires. Returns false without setting `item` if the - // timeout expires, otherwise assigns to `item` and returns true. - // Never allocates. Thread-safe. - template - inline bool wait_dequeue_timed(U& item, std::chrono::duration const& timeout) - { - return wait_dequeue_timed(item, std::chrono::duration_cast(timeout).count()); - } - - // Blocks the current thread until there's something to dequeue, then - // dequeues it using an explicit consumer token. - // Never allocates. Thread-safe. - template - inline void wait_dequeue(consumer_token_t& token, U& item) - { - while (!sema->wait()) - { - continue; - } - while (!inner.try_dequeue(token, item)) - { - continue; - } - } - - // Blocks the current thread until either there's something to dequeue - // or the timeout (specified in microseconds) expires. Returns false - // without setting `item` if the timeout expires, otherwise assigns - // to `item` and returns true. - // Using a negative timeout indicates an indefinite timeout, - // and is thus functionally equivalent to calling wait_dequeue. - // Never allocates. Thread-safe. - template - inline bool wait_dequeue_timed(consumer_token_t& token, U& item, std::int64_t timeout_usecs) - { - if (!sema->wait(timeout_usecs)) - { - return false; - } - while (!inner.try_dequeue(token, item)) - { - continue; - } - return true; - } - - // Blocks the current thread until either there's something to dequeue - // or the timeout expires. Returns false without setting `item` if the - // timeout expires, otherwise assigns to `item` and returns true. - // Never allocates. Thread-safe. - template - inline bool wait_dequeue_timed(consumer_token_t& token, U& item, std::chrono::duration const& timeout) - { - return wait_dequeue_timed(token, item, std::chrono::duration_cast(timeout).count()); - } - - // Attempts to dequeue several elements from the queue. - // Returns the number of items actually dequeued, which will - // always be at least one (this method blocks until the queue - // is non-empty) and at most max. - // Never allocates. Thread-safe. - template - inline size_t wait_dequeue_bulk(It itemFirst, size_t max) - { - size_t count = 0; - max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max); - while (count != max) - { - count += inner.template try_dequeue_bulk(itemFirst, max - count); - } - return count; - } - - // Attempts to dequeue several elements from the queue. - // Returns the number of items actually dequeued, which can - // be 0 if the timeout expires while waiting for elements, - // and at most max. - // Using a negative timeout indicates an indefinite timeout, - // and is thus functionally equivalent to calling wait_dequeue_bulk. - // Never allocates. Thread-safe. - template - inline size_t wait_dequeue_bulk_timed(It itemFirst, size_t max, std::int64_t timeout_usecs) - { - size_t count = 0; - max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, timeout_usecs); - while (count != max) - { - count += inner.template try_dequeue_bulk(itemFirst, max - count); - } - return count; - } - - // Attempts to dequeue several elements from the queue. - // Returns the number of items actually dequeued, which can - // be 0 if the timeout expires while waiting for elements, - // and at most max. - // Never allocates. Thread-safe. - template - inline size_t wait_dequeue_bulk_timed(It itemFirst, size_t max, std::chrono::duration const& timeout) - { - return wait_dequeue_bulk_timed(itemFirst, max, std::chrono::duration_cast(timeout).count()); - } - - // Attempts to dequeue several elements from the queue using an explicit consumer token. - // Returns the number of items actually dequeued, which will - // always be at least one (this method blocks until the queue - // is non-empty) and at most max. - // Never allocates. Thread-safe. - template - inline size_t wait_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max) - { - size_t count = 0; - max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max); - while (count != max) - { - count += inner.template try_dequeue_bulk(token, itemFirst, max - count); - } - return count; - } - - // Attempts to dequeue several elements from the queue using an explicit consumer token. - // Returns the number of items actually dequeued, which can - // be 0 if the timeout expires while waiting for elements, - // and at most max. - // Using a negative timeout indicates an indefinite timeout, - // and is thus functionally equivalent to calling wait_dequeue_bulk. - // Never allocates. Thread-safe. - template - inline size_t wait_dequeue_bulk_timed(consumer_token_t& token, It itemFirst, size_t max, std::int64_t timeout_usecs) - { - size_t count = 0; - max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, timeout_usecs); - while (count != max) - { - count += inner.template try_dequeue_bulk(token, itemFirst, max - count); - } - return count; - } - - // Attempts to dequeue several elements from the queue using an explicit consumer token. - // Returns the number of items actually dequeued, which can - // be 0 if the timeout expires while waiting for elements, - // and at most max. - // Never allocates. Thread-safe. - template - inline size_t wait_dequeue_bulk_timed(consumer_token_t& token, It itemFirst, size_t max, std::chrono::duration const& timeout) - { - return wait_dequeue_bulk_timed(token, itemFirst, max, std::chrono::duration_cast(timeout).count()); - } - - - // Returns an estimate of the total number of elements currently in the queue. This - // estimate is only accurate if the queue has completely stabilized before it is called - // (i.e. all enqueue and dequeue operations have completed and their memory effects are - // visible on the calling thread, and no further operations start while this method is - // being called). - // Thread-safe. - inline size_t size_approx() const - { - return (size_t)sema->availableApprox(); - } - - - // Returns true if the underlying atomic variables used by - // the queue are lock-free (they should be on most platforms). - // Thread-safe. - static constexpr bool is_lock_free() - { - return ConcurrentQueue::is_lock_free(); - } - - -private: - template - static inline U* create(A1&& a1, A2&& a2) - { - void* p = (Traits::malloc)(sizeof(U)); - return p != nullptr ? new (p) U(std::forward(a1), std::forward(a2)) : nullptr; - } - - template - static inline void destroy(U* p) - { - if (p != nullptr) - { - p->~U(); - } - (Traits::free)(p); - } - -private: - ConcurrentQueue inner; - std::unique_ptr sema; -}; - - -template -inline void swap(BlockingConcurrentQueue& a, BlockingConcurrentQueue& b) noexcept -{ - a.swap(b); -} - -} // end namespace Poco diff --git a/Foundation/include/Poco/ConcurrentQueue.h b/Foundation/include/Poco/ConcurrentQueue.h deleted file mode 100644 index ec506f381c..0000000000 --- a/Foundation/include/Poco/ConcurrentQueue.h +++ /dev/null @@ -1,3635 +0,0 @@ -// -// ConcurrentQueue.h -// -// Library: Foundation -// Package: Core -// Module: ConcurrentQueue -// -// Copyright Copyright (c) 2013-2020, Cameron Desrochers. All rights reserved. -// Extracted from https://github.com/cameron314/concurrentqueue lib and adapted for poco: Bychuk Alexander 2023 -// -// SPDX-License-Identifier: BSL-1.0 -// - -// Provides a C++11 implementation of a multi-producer, multi-consumer lock-free queue. -// based on -// http://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++ -// The full design is also described in excruciating detail at: -// http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue - -// Simplified BSD license: -// Copyright (c) 2013-2020, Cameron Desrochers. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, this list of -// conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, this list of -// conditions and the following disclaimer in the documentation and/or other materials -// provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT -// OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR -// TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, -// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// Also dual-licensed under the Boost Software License (see LICENSE.md) - -#pragma once - -#if defined(__GNUC__) && !defined(__INTEL_COMPILER) -// Disable -Wconversion warnings (spuriously triggered when Traits::size_t and -// Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings -// upon assigning any computed values) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - -#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17) -// VS2019 with /W4 warns about constant conditional expressions but unless /std=c++17 or higher -// does not support `if constexpr`, so we have no choice but to simply disable the warning -#pragma warning(push) -#pragma warning(disable: 4127) // conditional expression is constant -#endif - -#include "Poco/Thread.h" -#include "Poco/ThreadLocal.h" -#include "Poco/Mutex.h" -#include "Poco/Nullable.h" - -#if POCO_OS == POCO_OS_MAC_OS_X -#include "TargetConditionals.h" -#endif - -#include // Requires C++11. Sorry VS2010. -#include - -#include // for max_align_t -#include -#include -#include -#include -#include -#include -#include // for CHAR_BIT -#include - -// Platform-specific definitions of a numeric thread ID type and an invalid value -namespace Poco { namespace Details { - template struct thread_id_converter - { - using thread_id_numeric_size_t = thread_id_t; - using thread_id_hash_t = thread_id_t; - static inline thread_id_hash_t prehash(thread_id_t const& x) { return x; } - }; -} } - -namespace Poco { namespace Details { - using thread_id_t = long; - static const thread_id_t invalid_thread_id = 0; // Address can't be nullptr - static const thread_id_t invalid_thread_id2 = 1; // Member accesses off a null pointer are also generally invalid. Plus it's not aligned. - inline thread_id_t thread_id() - { - static Poco::ThreadLocal> x; - if ((*x).isNull()) - { - (*x) = Poco::Thread::currentOsTid(); - } - return x->value(); - } -} } - -// Constexpr if -#ifndef POCO_CONSTEXPR_IF -#if (defined(_MSC_VER) && defined(_HAS_CXX17) && _HAS_CXX17) || __cplusplus > 201402L -#define POCO_CONSTEXPR_IF if constexpr -#else -#define POCO_CONSTEXPR_IF if -#endif -#endif - -namespace Poco { namespace Details { -#ifndef POCO_ALIGNAS -// VS2013 doesn't support alignas or alignof, and align() requires a constant literal -#if defined(_MSC_VER) && _MSC_VER <= 1800 -#define POCO_ALIGNAS(alignment) __declspec(align(alignment)) -#define POCO_ALIGNOF(obj) __alignof(obj) -#define POCO_ALIGNED_TYPE_LIKE(T, obj) typename Details::Vs2013Aligned::value, T>::type - template struct Vs2013Aligned { }; // default, unsupported alignment - template struct Vs2013Aligned<1, T> { typedef __declspec(align(1)) T type; }; - template struct Vs2013Aligned<2, T> { typedef __declspec(align(2)) T type; }; - template struct Vs2013Aligned<4, T> { typedef __declspec(align(4)) T type; }; - template struct Vs2013Aligned<8, T> { typedef __declspec(align(8)) T type; }; - template struct Vs2013Aligned<16, T> { typedef __declspec(align(16)) T type; }; - template struct Vs2013Aligned<32, T> { typedef __declspec(align(32)) T type; }; - template struct Vs2013Aligned<64, T> { typedef __declspec(align(64)) T type; }; - template struct Vs2013Aligned<128, T> { typedef __declspec(align(128)) T type; }; - template struct Vs2013Aligned<256, T> { typedef __declspec(align(256)) T type; }; -#else - template struct identity { using type = T; }; -#define POCO_ALIGNAS(alignment) alignas(alignment) -#define POCO_ALIGNOF(obj) alignof(obj) -#define POCO_ALIGNED_TYPE_LIKE(T, obj) alignas(alignof(obj)) typename Details::identity::type -#endif -#endif -} } - - -// TSAN can false report races in lock-free code. To enable TSAN to be used from projects that use this one, -// we can apply per-function compile-time suppression. -// See https://clang.llvm.org/docs/ThreadSanitizer.html#has-feature-thread-sanitizer -#define POCO_NO_TSAN -#if defined(__has_feature) - #if __has_feature(thread_sanitizer) - #undef POCO_NO_TSAN - #define POCO_NO_TSAN __attribute__((no_sanitize("thread"))) - #endif // TSAN -#endif // TSAN - -// Compiler-specific likely/unlikely hints -namespace Poco { namespace Details { -#if defined(__GNUC__) - static inline bool (likely)(bool x) { return __builtin_expect((x), true); } - static inline bool (unlikely)(bool x) { return __builtin_expect((x), false); } -#else - static inline bool (likely)(bool x) { return x; } - static inline bool (unlikely)(bool x) { return x; } -#endif -} } - -namespace Poco { -namespace Details { - template - struct const_numeric_max - { - static_assert(std::is_integral::value, "const_numeric_max can only be used with integers"); - static const T value = std::numeric_limits::is_signed - ? (static_cast(1) << (sizeof(T) * CHAR_BIT - 1)) - static_cast(1) - : static_cast(-1); - }; - -#if defined(__GLIBCXX__) - typedef ::max_align_t std_max_align_t; // libstdc++ forgot to add it to std:: for a while -#else - using std_max_align_t = std::max_align_t; // Others (e.g. MSVC) insist it can *only* be accessed via std:: -#endif - - // Some platforms have incorrectly set max_align_t to a type with <8 bytes alignment even while supporting - // 8-byte aligned scalar values (*cough* 32-bit iOS). Work around this with our own union. See issue #64. - using max_align_t = union { - std_max_align_t x; - long long y; - void* z; - }; -} - -// Default traits for the ConcurrentQueue. To change some of the -// traits without re-implementing all of them, inherit from this -// struct and shadow the declarations you wish to be different; -// since the traits are used as a template type parameter, the -// shadowed declarations will be used where defined, and the defaults -// otherwise. -struct ConcurrentQueueDefaultTraits -{ - // General-purpose size type. std::size_t is strongly recommended. - using size_t = std::size_t; - - // The type used for the enqueue and dequeue indices. Must be at least as - // large as size_t. Should be significantly larger than the number of elements - // you expect to hold at once, especially if you have a high turnover rate; - // for example, on 32-bit x86, if you expect to have over a hundred million - // elements or pump several million elements through your queue in a very - // short space of time, using a 32-bit type *may* trigger a race condition. - // A 64-bit int type is recommended in that case, and in practice will - // prevent a race condition no matter the usage of the queue. Note that - // whether the queue is lock-free with a 64-int type depends on the whether - // std::atomic is lock-free, which is platform-specific. - using index_t = std::size_t; - - // Internally, all elements are enqueued and dequeued from multi-element - // blocks; this is the smallest controllable unit. If you expect few elements - // but many producers, a smaller block size should be favoured. For few producers - // and/or many elements, a larger block size is preferred. A sane default - // is provided. Must be a power of 2. - static const size_t BLOCK_SIZE = 32; - - // For explicit producers (i.e. when using a producer token), the block is - // checked for being empty by iterating through a list of flags, one per element. - // For large block sizes, this is too inefficient, and switching to an atomic - // counter-based approach is faster. The switch is made for block sizes strictly - // larger than this threshold. - static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32; - - // How many full blocks can be expected for a single explicit producer? This should - // reflect that number's maximum for optimal performance. Must be a power of 2. - static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32; - - // How many full blocks can be expected for a single implicit producer? This should - // reflect that number's maximum for optimal performance. Must be a power of 2. - static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32; - - // The initial size of the hash table mapping thread IDs to implicit producers. - // Note that the hash is resized every time it becomes half full. - // Must be a power of two, and either 0 or at least 1. If 0, implicit production - // (using the enqueue methods without an explicit producer token) is disabled. - static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32; - - // Controls the number of items that an explicit consumer (i.e. one with a token) - // must consume before it causes all consumers to rotate and move on to the next - // internal queue. - static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256; - - // The maximum number of elements (inclusive) that can be enqueued to a sub-queue. - // Enqueue operations that would cause this limit to be surpassed will fail. Note - // that this limit is enforced at the block level (for performance reasons), i.e. - // it's rounded up to the nearest block size. - static const size_t MAX_SUBQUEUE_SIZE = Details::const_numeric_max::value; - - // The number of times to spin before sleeping when waiting on a semaphore. - // Recommended values are on the order of 1000-10000 unless the number of - // consumer threads exceeds the number of idle cores (in which case try 0-100). - // Only affects instances of the BlockingConcurrentQueue. - static const int MAX_SEMA_SPINS = 10000; - - // Whether to recycle dynamically-allocated blocks into an internal free list or - // not. If false, only pre-allocated blocks (controlled by the constructor - // arguments) will be recycled, and all others will be `free`d back to the heap. - // Note that blocks consumed by explicit producers are only freed on destruction - // of the queue (not following destruction of the token) regardless of this trait. - static const bool RECYCLE_ALLOCATED_BLOCKS = false; - - -#if defined(malloc) || defined(free) - // Gah, this is 2015, stop defining macros that break standard code already! - // Work around malloc/free being special macros: - static inline void* WORKAROUND_malloc(size_t size) { return malloc(size); } - static inline void WORKAROUND_free(void* ptr) { return free(ptr); } - static inline void* (malloc)(size_t size) { return WORKAROUND_malloc(size); } - static inline void (free)(void* ptr) { return WORKAROUND_free(ptr); } -#else - static inline void* malloc(size_t size) { return std::malloc(size); } - static inline void free(void* ptr) { return std::free(ptr); } -#endif -}; - - -// When producing or consuming many elements, the most efficient way is to: -// 1) Use one of the bulk-operation methods of the queue with a token -// 2) Failing that, use the bulk-operation methods without a token -// 3) Failing that, create a token and use that with the single-item methods -// 4) Failing that, use the single-parameter methods of the queue -// Having said that, don't create tokens willy-nilly -- ideally there should be -// a maximum of one token per thread (of each kind). -struct ProducerToken; -struct ConsumerToken; - -template class ConcurrentQueue; -template class BlockingConcurrentQueue; -class ConcurrentQueueTests; - - -namespace Details { - struct ConcurrentQueueProducerTypelessBase - { - ConcurrentQueueProducerTypelessBase* next{nullptr}; - std::atomic inactive{false}; - ProducerToken* token{nullptr}; - - ConcurrentQueueProducerTypelessBase() = default; - }; - - struct hash_32_or_64 - { - template = true> - static inline std::uint32_t hash(T h) - { - // MurmurHash3 finalizer -- see https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp - // Since the thread ID is already unique, all we really want to do is propagate that - // uniqueness evenly across all the bits, so that we can use a subset of the bits while - // reducing collisions significantly - h ^= h >> 16; - h *= 0x85ebca6b; - h ^= h >> 13; - h *= 0xc2b2ae35; - return h ^ (h >> 16); - } - template 4), bool> = true> - static inline std::uint32_t hash(T h) - { - h ^= h >> 33; - h *= 0xff51afd7ed558ccd; - h ^= h >> 33; - h *= 0xc4ceb9fe1a85ec53; - return h ^ (h >> 33); - } - }; - - static inline size_t hash_thread_id(thread_id_t id) - { - static_assert(sizeof(thread_id_t) <= 8, "Expected a platform where thread IDs are at most 64-bit values"); - return static_cast(hash_32_or_64::hash(thread_id_converter::prehash(id))); - } - - template - static inline bool circular_less_than(T a, T b) - { - static_assert(std::is_integral::value && !std::numeric_limits::is_signed, "circular_less_than is intended to be used only with unsigned integer types"); - return static_cast(a - b) > static_cast(static_cast(1) << (static_cast(sizeof(T) * CHAR_BIT - 1))); - // Note: extra parens around rhs of operator<< is MSVC bug: https://developercommunity2.visualstudio.com/t/C4554-triggers-when-both-lhs-and-rhs-is/10034931 - // silencing the bug requires #pragma warning(disable: 4554) around the calling code and has no effect when done here. - } - - template - static inline char* align_for(char* ptr) - { - const std::size_t alignment = std::alignment_of::value; - return ptr + (alignment - (reinterpret_cast(ptr) % alignment)) % alignment; - } - - template - static inline T ceil_to_pow_2(T x) - { - static_assert(std::is_integral::value && !std::numeric_limits::is_signed, "ceil_to_pow_2 is intended to be used only with unsigned integer types"); - - // Adapted from http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 - --x; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - for (std::size_t i = 1; i < sizeof(T); i <<= 1) { - x |= x >> (i << 3); - } - ++x; - return x; - } - - template - static inline void swap_relaxed(std::atomic& left, std::atomic& right) - { - T temp = std::move(left.load(std::memory_order_relaxed)); - left.store(std::move(right.load(std::memory_order_relaxed)), std::memory_order_relaxed); - right.store(std::move(temp), std::memory_order_relaxed); - } - - template - static inline T const& nomove(T const& x) - { - return x; - } - - template - struct nomove_if - { - template - static inline T const& eval(T const& x) - { - return x; - } - }; - - template<> - struct nomove_if - { - template - static inline auto eval(U&& x) -> decltype(std::forward(x)) - { - return std::forward(x); - } - }; - - template - static inline auto deref_noexcept(It& it) noexcept -> decltype(*it) - { - return *it; - } - -#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) - template struct is_trivially_destructible : std::is_trivially_destructible { }; -#else - template struct is_trivially_destructible : std::has_trivial_destructor { }; -#endif - class ThreadExitNotifier; - - struct ThreadExitListener - { - using callback_t = void (*)(void *); - callback_t callback{}; - void* userData{nullptr}; - - ThreadExitListener* next{nullptr}; // reserved for use by the ThreadExitNotifier - ThreadExitNotifier* chain{nullptr}; // reserved for use by the ThreadExitNotifier - }; - - class ThreadExitNotifier - { - public: - static void subscribe(ThreadExitListener* listener) - { - auto& tlsInst = instance(); - Poco::FastMutex::ScopedLock guard(mutex()); - listener->next = tlsInst.tail; - listener->chain = &tlsInst; - tlsInst.tail = listener; - } - - static void unsubscribe(ThreadExitListener* listener) - { - Poco::FastMutex::ScopedLock guard(mutex()); - if (!listener->chain) - { - return; // race with ~ThreadExitNotifier - } - auto& tlsInst = *listener->chain; - listener->chain = nullptr; - ThreadExitListener** prev = &tlsInst.tail; - for (auto ptr = tlsInst.tail; ptr != nullptr; ptr = ptr->next) - { - if (ptr == listener) - { - *prev = ptr->next; - break; - } - prev = &ptr->next; - } - } - - private: - ThreadExitNotifier() = default; - ThreadExitNotifier(ThreadExitNotifier const&) = delete; - ThreadExitNotifier& operator=(ThreadExitNotifier const&) = delete; - - ~ThreadExitNotifier() - { - // This thread is about to exit, let everyone know! - assert(this == &instance() && "If this assert fails, you likely have a buggy compiler! Thread local support problem."); - Poco::FastMutex::ScopedLock guard(mutex()); - for (auto ptr = tail; ptr != nullptr; ptr = ptr->next) - { - ptr->chain = nullptr; - ptr->callback(ptr->userData); - } - } - - // Thread-local - static inline ThreadExitNotifier& instance() - { - static thread_local ThreadExitNotifier notifier; - return notifier; - } - - static inline Poco::FastMutex& mutex() - { - // Must be static because the ThreadExitNotifier could be destroyed while unsubscribe is called - static Poco::FastMutex mutex; - return mutex; - } - - private: - ThreadExitListener* tail{nullptr}; - }; - - - template struct static_is_lock_free_num { enum { value = 0 }; }; - template<> struct static_is_lock_free_num { enum { value = ATOMIC_CHAR_LOCK_FREE }; }; - template<> struct static_is_lock_free_num { enum { value = ATOMIC_SHORT_LOCK_FREE }; }; - template<> struct static_is_lock_free_num { enum { value = ATOMIC_INT_LOCK_FREE }; }; - template<> struct static_is_lock_free_num { enum { value = ATOMIC_LONG_LOCK_FREE }; }; - template<> struct static_is_lock_free_num { enum { value = ATOMIC_LLONG_LOCK_FREE }; }; - template struct static_is_lock_free : static_is_lock_free_num::type> { }; - template<> struct static_is_lock_free { enum { value = ATOMIC_BOOL_LOCK_FREE }; }; - template struct static_is_lock_free { enum { value = ATOMIC_POINTER_LOCK_FREE }; }; -} - -struct ProducerToken -{ - template - explicit ProducerToken(ConcurrentQueue& queue); - - template - explicit ProducerToken(BlockingConcurrentQueue& queue); - - ProducerToken(ProducerToken&& other) noexcept - : producer(other.producer) - { - other.producer = nullptr; - if (producer != nullptr) - { - producer->token = this; - } - } - - inline ProducerToken& operator=(ProducerToken&& other) noexcept - { - swap(other); - return *this; - } - - void swap(ProducerToken& other) noexcept - { - std::swap(producer, other.producer); - if (producer != nullptr) - { - producer->token = this; - } - if (other.producer != nullptr) - { - other.producer->token = &other; - } - } - - // A token is always valid unless: - // 1) Memory allocation failed during construction - // 2) It was moved via the move constructor - // (Note: assignment does a swap, leaving both potentially valid) - // 3) The associated queue was destroyed - // Note that if valid() returns true, that only indicates - // that the token is valid for use with a specific queue, - // but not which one; that's up to the user to track. - inline bool valid() const { return producer != nullptr; } - - ~ProducerToken() - { - if (producer != nullptr) - { - producer->token = nullptr; - producer->inactive.store(true, std::memory_order_release); - } - } - - // Disable copying and assignment - ProducerToken(ProducerToken const&) = delete; - ProducerToken& operator=(ProducerToken const&) = delete; - -private: - template friend class ConcurrentQueue; - friend class ConcurrentQueueTests; - -protected: - Details::ConcurrentQueueProducerTypelessBase* producer; -}; - - -struct ConsumerToken -{ - template - explicit ConsumerToken(ConcurrentQueue& q); - - template - explicit ConsumerToken(BlockingConcurrentQueue& q); - - ConsumerToken(ConsumerToken&& other) noexcept - : initialOffset(other.initialOffset), lastKnownGlobalOffset(other.lastKnownGlobalOffset), itemsConsumedFromCurrent(other.itemsConsumedFromCurrent), currentProducer(other.currentProducer), desiredProducer(other.desiredProducer) - { } - - inline ConsumerToken& operator=(ConsumerToken&& other) noexcept - { - swap(other); - return *this; - } - - void swap(ConsumerToken& other) noexcept - { - std::swap(initialOffset, other.initialOffset); - std::swap(lastKnownGlobalOffset, other.lastKnownGlobalOffset); - std::swap(itemsConsumedFromCurrent, other.itemsConsumedFromCurrent); - std::swap(currentProducer, other.currentProducer); - std::swap(desiredProducer, other.desiredProducer); - } - - // Disable copying and assignment - ConsumerToken(ConsumerToken const&) = delete; - ConsumerToken& operator=(ConsumerToken const&) = delete; - -private: - template friend class ConcurrentQueue; - friend class ConcurrentQueueTests; - -private: // but shared with ConcurrentQueue - std::uint32_t initialOffset{0}; - std::uint32_t lastKnownGlobalOffset{0}; - std::uint32_t itemsConsumedFromCurrent{0}; - Details::ConcurrentQueueProducerTypelessBase* currentProducer{nullptr}; - Details::ConcurrentQueueProducerTypelessBase* desiredProducer{nullptr}; -}; - -// Need to forward-declare this swap because it's in a namespace. -// See http://stackoverflow.com/questions/4492062/why-does-a-c-friend-class-need-a-forward-declaration-only-in-other-namespaces -template -inline void swap(typename ConcurrentQueue::ImplicitProducerKVP& a, typename ConcurrentQueue::ImplicitProducerKVP& b) noexcept; - - -template -class ConcurrentQueue -{ -public: - using producer_token_t = ::Poco::ProducerToken; - using consumer_token_t = ::Poco::ConsumerToken; - - using index_t = typename Traits::index_t; - using size_t = typename Traits::size_t; - - static const size_t BLOCK_SIZE = static_cast(Traits::BLOCK_SIZE); - static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = static_cast(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD); - static const size_t EXPLICIT_INITIAL_INDEX_SIZE = static_cast(Traits::EXPLICIT_INITIAL_INDEX_SIZE); - static const size_t IMPLICIT_INITIAL_INDEX_SIZE = static_cast(Traits::IMPLICIT_INITIAL_INDEX_SIZE); - static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = static_cast(Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE); - static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = static_cast(Traits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE); -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable: 4307) // + integral constant overflow (that's what the ternary expression is for!) -#pragma warning(disable: 4309) // static_cast: Truncation of constant value -#endif - static const size_t MAX_SUBQUEUE_SIZE = (Details::const_numeric_max::value - static_cast(Traits::MAX_SUBQUEUE_SIZE) < BLOCK_SIZE) ? Details::const_numeric_max::value : ((static_cast(Traits::MAX_SUBQUEUE_SIZE) + (BLOCK_SIZE - 1)) / BLOCK_SIZE * BLOCK_SIZE); -#ifdef _MSC_VER -#pragma warning(pop) -#endif - - static_assert(!std::numeric_limits::is_signed && std::is_integral::value, "Traits::size_t must be an unsigned integral type"); - static_assert(!std::numeric_limits::is_signed && std::is_integral::value, "Traits::index_t must be an unsigned integral type"); - static_assert(sizeof(index_t) >= sizeof(size_t), "Traits::index_t must be at least as wide as Traits::size_t"); - static_assert((BLOCK_SIZE > 1) && !(BLOCK_SIZE & (BLOCK_SIZE - 1)), "Traits::BLOCK_SIZE must be a power of 2 (and at least 2)"); - static_assert((EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD > 1) && !(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD & (EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD - 1)), "Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a power of 2 (and greater than 1)"); - static_assert((EXPLICIT_INITIAL_INDEX_SIZE > 1) && !(EXPLICIT_INITIAL_INDEX_SIZE & (EXPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)"); - static_assert((IMPLICIT_INITIAL_INDEX_SIZE > 1) && !(IMPLICIT_INITIAL_INDEX_SIZE & (IMPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::IMPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)"); - static_assert((INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) || !(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE & (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - 1)), "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be a power of 2"); - static_assert(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0 || INITIAL_IMPLICIT_PRODUCER_HASH_SIZE >= 1, "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be at least 1 (or 0 to disable implicit enqueueing)"); - -public: - // Creates a queue with at least `capacity` element slots; note that the - // actual number of elements that can be inserted without additional memory - // allocation depends on the number of producers and the block size (e.g. if - // the block size is equal to `capacity`, only a single block will be allocated - // up-front, which means only a single producer will be able to enqueue elements - // without an extra allocation -- blocks aren't shared between producers). - // This method is not thread safe -- it is up to the user to ensure that the - // queue is fully constructed before it starts being used by other threads (this - // includes making the memory effects of construction visible, possibly with a - // memory barrier). - explicit ConcurrentQueue(size_t capacity = 32 * BLOCK_SIZE) - : producerListTail(nullptr), - producerCount(0), - initialBlockPoolIndex(0), - nextExplicitConsumerId(0), - globalExplicitConsumerOffset(0) - { - implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); - populate_initial_implicit_producer_hash(); - populate_initial_block_list(capacity / BLOCK_SIZE + ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1)); - } - - // Computes the correct amount of pre-allocated blocks for you based - // on the minimum number of elements you want available at any given - // time, and the maximum concurrent number of each type of producer. - ConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers) - : producerListTail(nullptr), - producerCount(0), - initialBlockPoolIndex(0), - nextExplicitConsumerId(0), - globalExplicitConsumerOffset(0) - { - implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); - populate_initial_implicit_producer_hash(); - size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) * (maxExplicitProducers + 1) + 2 * (maxExplicitProducers + maxImplicitProducers); - populate_initial_block_list(blocks); - } - - // Note: The queue should not be accessed concurrently while it's - // being deleted. It's up to the user to synchronize this. - // This method is not thread safe. - ~ConcurrentQueue() - { - // Destroy producers - auto ptr = producerListTail.load(std::memory_order_relaxed); - while (ptr != nullptr) - { - auto next = ptr->next_prod(); - if (ptr->token != nullptr) - { - ptr->token->producer = nullptr; - } - destroy(ptr); - ptr = next; - } - - // Destroy implicit producer hash tables - POCO_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) - { - auto hash = implicitProducerHash.load(std::memory_order_relaxed); - while (hash != nullptr) - { - auto prev = hash->prev; - if (prev != nullptr) // The last hash is part of this object and was not allocated dynamically - { - for (size_t i = 0; i != hash->capacity; ++i) - { - hash->entries[i].~ImplicitProducerKVP(); - } - hash->~ImplicitProducerHash(); - (Traits::free)(hash); - } - hash = prev; - } - } - - // Destroy global free list - auto block = freeList.head_unsafe(); - while (block != nullptr) - { - auto next = block->freeListNext.load(std::memory_order_relaxed); - if (block->dynamicallyAllocated) - { - destroy(block); - } - block = next; - } - - // Destroy initial free list - destroy_array(initialBlockPool, initialBlockPoolSize); - } - - // Disable copying and copy assignment - ConcurrentQueue(ConcurrentQueue const&) = delete; - ConcurrentQueue& operator=(ConcurrentQueue const&) = delete; - - // Moving is supported, but note that it is *not* a thread-safe operation. - // Nobody can use the queue while it's being moved, and the memory effects - // of that move must be propagated to other threads before they can use it. - // Note: When a queue is moved, its tokens are still valid but can only be - // used with the destination queue (i.e. semantically they are moved along - // with the queue itself). - ConcurrentQueue(ConcurrentQueue&& other) noexcept - : producerListTail(other.producerListTail.load(std::memory_order_relaxed)), - producerCount(other.producerCount.load(std::memory_order_relaxed)), - initialBlockPoolIndex(other.initialBlockPoolIndex.load(std::memory_order_relaxed)), - initialBlockPool(other.initialBlockPool), - initialBlockPoolSize(other.initialBlockPoolSize), - freeList(std::move(other.freeList)), - nextExplicitConsumerId(other.nextExplicitConsumerId.load(std::memory_order_relaxed)), - globalExplicitConsumerOffset(other.globalExplicitConsumerOffset.load(std::memory_order_relaxed)) - { - // Move the other one into this, and leave the other one as an empty queue - implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); - populate_initial_implicit_producer_hash(); - swap_implicit_producer_hashes(other); - - other.producerListTail.store(nullptr, std::memory_order_relaxed); - other.producerCount.store(0, std::memory_order_relaxed); - other.nextExplicitConsumerId.store(0, std::memory_order_relaxed); - other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed); - - other.initialBlockPoolIndex.store(0, std::memory_order_relaxed); - other.initialBlockPoolSize = 0; - other.initialBlockPool = nullptr; - - reown_producers(); - } - - inline ConcurrentQueue& operator=(ConcurrentQueue&& other) noexcept - { - return swap_internal(other); - } - - // Swaps this queue's state with the other's. Not thread-safe. - // Swapping two queues does not invalidate their tokens, however - // the tokens that were created for one queue must be used with - // only the swapped queue (i.e. the tokens are tied to the - // queue's movable state, not the object itself). - inline void swap(ConcurrentQueue& other) noexcept - { - swap_internal(other); - } - -private: - ConcurrentQueue& swap_internal(ConcurrentQueue& other) - { - if (this == &other) - { - return *this; - } - - Details::swap_relaxed(producerListTail, other.producerListTail); - Details::swap_relaxed(producerCount, other.producerCount); - Details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex); - std::swap(initialBlockPool, other.initialBlockPool); - std::swap(initialBlockPoolSize, other.initialBlockPoolSize); - freeList.swap(other.freeList); - Details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId); - Details::swap_relaxed(globalExplicitConsumerOffset, other.globalExplicitConsumerOffset); - - swap_implicit_producer_hashes(other); - - reown_producers(); - other.reown_producers(); - - return *this; - } - -public: - // Enqueues a single item (by copying it). - // Allocates memory if required. Only fails if memory allocation fails (or implicit - // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, - // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). - // Thread-safe. - template - inline typename std::enable_if::type enqueue(U const& item) - { - return false; - } - template - inline typename std::enable_if::type enqueue(U const& item) - { - return inner_enqueue(item); - } - - // Enqueues a single item (by moving it, if possible). - // Allocates memory if required. Only fails if memory allocation fails (or implicit - // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, - // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). - // Thread-safe. - template - inline typename std::enable_if::type enqueue(U&& item) - { - return false; - } - template - inline typename std::enable_if::type enqueue(U&& item) - { - return inner_enqueue(std::move(item)); - } - - // Enqueues a single item (by copying it) using an explicit producer token. - // Allocates memory if required. Only fails if memory allocation fails (or - // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). - // Thread-safe. - inline bool enqueue(producer_token_t const& token, T const& item) - { - return inner_enqueue(token, item); - } - - // Enqueues a single item (by moving it, if possible) using an explicit producer token. - // Allocates memory if required. Only fails if memory allocation fails (or - // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). - // Thread-safe. - inline bool enqueue(producer_token_t const& token, T&& item) - { - return inner_enqueue(token, std::move(item)); - } - - // Enqueues several items. - // Allocates memory if required. Only fails if memory allocation fails (or - // implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - // is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). - // Note: Use std::make_move_iterator if the elements should be moved instead of copied. - // Thread-safe. - template - typename std::enable_if::type enqueue_bulk(It itemFirst, size_t count) - { - return false; - } - template - typename std::enable_if::type enqueue_bulk(It itemFirst, size_t count) - { - return inner_enqueue_bulk(itemFirst, count); - } - - // Enqueues several items using an explicit producer token. - // Allocates memory if required. Only fails if memory allocation fails - // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). - // Note: Use std::make_move_iterator if the elements should be moved - // instead of copied. - // Thread-safe. - template - bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) - { - return inner_enqueue_bulk(token, itemFirst, count); - } - - // Enqueues a single item (by copying it). - // Does not allocate memory. Fails if not enough room to enqueue (or implicit - // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - // is 0). - // Thread-safe. - template - inline typename std::enable_if::type try_enqueue(U const& item) - { - return false; - } - template - inline typename std::enable_if::type try_enqueue(U const& item) - { - return inner_enqueue(item); - } - - // Enqueues a single item (by moving it, if possible). - // Does not allocate memory (except for one-time implicit producer). - // Fails if not enough room to enqueue (or implicit production is - // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). - // Thread-safe. - template - inline typename std::enable_if::type try_enqueue(U&& item) - { - return false; - } - template - inline typename std::enable_if::type try_enqueue(U&& item) - { - return inner_enqueue(std::move(item)); - } - - // Enqueues a single item (by copying it) using an explicit producer token. - // Does not allocate memory. Fails if not enough room to enqueue. - // Thread-safe. - inline bool try_enqueue(producer_token_t const& token, T const& item) - { - return inner_enqueue(token, item); - } - - // Enqueues a single item (by moving it, if possible) using an explicit producer token. - // Does not allocate memory. Fails if not enough room to enqueue. - // Thread-safe. - inline bool try_enqueue(producer_token_t const& token, T&& item) - { - return inner_enqueue(token, std::move(item)); - } - - // Enqueues several items. - // Does not allocate memory (except for one-time implicit producer). - // Fails if not enough room to enqueue (or implicit production is - // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). - // Note: Use std::make_move_iterator if the elements should be moved - // instead of copied. - // Thread-safe. - template - typename std::enable_if::type try_enqueue_bulk(It itemFirst, size_t count) - { - return false; - } - template - typename std::enable_if::type try_enqueue_bulk(It itemFirst, size_t count) - { - return inner_enqueue_bulk(itemFirst, count); - } - - // Enqueues several items using an explicit producer token. - // Does not allocate memory. Fails if not enough room to enqueue. - // Note: Use std::make_move_iterator if the elements should be moved - // instead of copied. - // Thread-safe. - template - bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) - { - return inner_enqueue_bulk(token, itemFirst, count); - } - - - - // Attempts to dequeue from the queue. - // Returns false if all producer streams appeared empty at the time they - // were checked (so, the queue is likely but not guaranteed to be empty). - // Never allocates. Thread-safe. - template - bool try_dequeue(U& item) - { - // Instead of simply trying each producer in turn (which could cause needless contention on the first - // producer), we score them heuristically. - size_t nonEmptyCount = 0; - ProducerBase* best = nullptr; - size_t bestSize = 0; - for (auto ptr = producerListTail.load(std::memory_order_acquire); nonEmptyCount < 3 && ptr != nullptr; ptr = ptr->next_prod()) - { - auto size = ptr->size_approx(); - if (size > 0) - { - if (size > bestSize) - { - bestSize = size; - best = ptr; - } - ++nonEmptyCount; - } - } - - // If there was at least one non-empty queue but it appears empty at the time - // we try to dequeue from it, we need to make sure every queue's been tried - if (nonEmptyCount > 0) - { - if ((Details::likely)(best->dequeue(item))) - { - return true; - } - for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) - { - if (ptr != best && ptr->dequeue(item)) { - return true; - } - } - } - return false; - } - - // Attempts to dequeue from the queue. - // Returns false if all producer streams appeared empty at the time they - // were checked (so, the queue is likely but not guaranteed to be empty). - // This differs from the try_dequeue(item) method in that this one does - // not attempt to reduce contention by interleaving the order that producer - // streams are dequeued from. So, using this method can reduce overall throughput - // under contention, but will give more predictable results in single-threaded - // consumer scenarios. This is mostly only useful for internal unit tests. - // Never allocates. Thread-safe. - template - bool try_dequeue_non_interleaved(U& item) - { - for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) - { - if (ptr->dequeue(item)) - { - return true; - } - } - return false; - } - - // Attempts to dequeue from the queue using an explicit consumer token. - // Returns false if all producer streams appeared empty at the time they - // were checked (so, the queue is likely but not guaranteed to be empty). - // Never allocates. Thread-safe. - template - bool try_dequeue(consumer_token_t& token, U& item) - { - // The idea is roughly as follows: - // Every 256 items from one producer, make everyone rotate (increase the global offset) -> this means the highest efficiency consumer dictates the rotation speed of everyone else, more or less - // If you see that the global offset has changed, you must reset your consumption counter and move to your designated place - // If there's no items where you're supposed to be, keep moving until you find a producer with some items - // If the global offset has not changed but you've run out of items to consume, move over from your current position until you find an producer with something in it - - if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) - { - if (!update_current_producer_after_rotation(token)) { - return false; - } - } - - // If there was at least one non-empty queue but it appears empty at the time - // we try to dequeue from it, we need to make sure every queue's been tried - if (static_cast(token.currentProducer)->dequeue(item)) - { - if (++token.itemsConsumedFromCurrent == EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) - { - globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed); - } - return true; - } - - auto tail = producerListTail.load(std::memory_order_acquire); - auto ptr = static_cast(token.currentProducer)->next_prod(); - if (ptr == nullptr) - { - ptr = tail; - } - while (ptr != static_cast(token.currentProducer)) - { - if (ptr->dequeue(item)) - { - token.currentProducer = ptr; - token.itemsConsumedFromCurrent = 1; - return true; - } - ptr = ptr->next_prod(); - if (ptr == nullptr) - { - ptr = tail; - } - } - return false; - } - - // Attempts to dequeue several elements from the queue. - // Returns the number of items actually dequeued. - // Returns 0 if all producer streams appeared empty at the time they - // were checked (so, the queue is likely but not guaranteed to be empty). - // Never allocates. Thread-safe. - template - size_t try_dequeue_bulk(It itemFirst, size_t max) - { - size_t count = 0; - for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) - { - count += ptr->dequeue_bulk(itemFirst, max - count); - if (count == max) - { - break; - } - } - return count; - } - - // Attempts to dequeue several elements from the queue using an explicit consumer token. - // Returns the number of items actually dequeued. - // Returns 0 if all producer streams appeared empty at the time they - // were checked (so, the queue is likely but not guaranteed to be empty). - // Never allocates. Thread-safe. - template - size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max) - { - if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) - { - if (!update_current_producer_after_rotation(token)) - { - return 0; - } - } - - size_t count = static_cast(token.currentProducer)->dequeue_bulk(itemFirst, max); - if (count == max) - { - if ((token.itemsConsumedFromCurrent += static_cast(max)) >= EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) - { - globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed); - } - return max; - } - token.itemsConsumedFromCurrent += static_cast(count); - max -= count; - - auto tail = producerListTail.load(std::memory_order_acquire); - auto ptr = static_cast(token.currentProducer)->next_prod(); - if (ptr == nullptr) - { - ptr = tail; - } - while (ptr != static_cast(token.currentProducer)) - { - auto dequeued = ptr->dequeue_bulk(itemFirst, max); - count += dequeued; - if (dequeued != 0) - { - token.currentProducer = ptr; - token.itemsConsumedFromCurrent = static_cast(dequeued); - } - if (dequeued == max) - { - break; - } - max -= dequeued; - ptr = ptr->next_prod(); - if (ptr == nullptr) - { - ptr = tail; - } - } - return count; - } - - - - // Attempts to dequeue from a specific producer's inner queue. - // If you happen to know which producer you want to dequeue from, this - // is significantly faster than using the general-case try_dequeue methods. - // Returns false if the producer's queue appeared empty at the time it - // was checked (so, the queue is likely but not guaranteed to be empty). - // Never allocates. Thread-safe. - template - inline bool try_dequeue_from_producer(producer_token_t const& producer, U& item) - { - return static_cast(producer.producer)->dequeue(item); - } - - // Attempts to dequeue several elements from a specific producer's inner queue. - // Returns the number of items actually dequeued. - // If you happen to know which producer you want to dequeue from, this - // is significantly faster than using the general-case try_dequeue methods. - // Returns 0 if the producer's queue appeared empty at the time it - // was checked (so, the queue is likely but not guaranteed to be empty). - // Never allocates. Thread-safe. - template - inline size_t try_dequeue_bulk_from_producer(producer_token_t const& producer, It itemFirst, size_t max) - { - return static_cast(producer.producer)->dequeue_bulk(itemFirst, max); - } - - - // Returns an estimate of the total number of elements currently in the queue. This - // estimate is only accurate if the queue has completely stabilized before it is called - // (i.e. all enqueue and dequeue operations have completed and their memory effects are - // visible on the calling thread, and no further operations start while this method is - // being called). - // Thread-safe. - size_t size_approx() const - { - size_t size = 0; - for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) - { - size += ptr->size_approx(); - } - return size; - } - - - // Returns true if the underlying atomic variables used by - // the queue are lock-free (they should be on most platforms). - // Thread-safe. - static constexpr bool is_lock_free() - { - return - Details::static_is_lock_free::value == 2 && - Details::static_is_lock_free::value == 2 && - Details::static_is_lock_free::value == 2 && - Details::static_is_lock_free::value == 2 && - Details::static_is_lock_free::value == 2 && - Details::static_is_lock_free::thread_id_numeric_size_t>::value == 2; - } - - -private: - friend struct ProducerToken; - friend struct ConsumerToken; - struct ExplicitProducer; - friend struct ExplicitProducer; - struct ImplicitProducer; - friend struct ImplicitProducer; - friend class ConcurrentQueueTests; - - enum AllocationMode { CanAlloc, CannotAlloc }; - - - /////////////////////////////// - // Queue methods - /////////////////////////////// - - template - inline bool inner_enqueue(producer_token_t const& token, U&& element) - { - return static_cast(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue(std::forward(element)); - } - - template - inline bool inner_enqueue(U&& element) - { - auto producer = get_or_add_implicit_producer(); - return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue(std::forward(element)); - } - - template - inline bool inner_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) - { - return static_cast(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_bulk(itemFirst, count); - } - - template - inline bool inner_enqueue_bulk(It itemFirst, size_t count) - { - auto producer = get_or_add_implicit_producer(); - return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue_bulk(itemFirst, count); - } - - inline bool update_current_producer_after_rotation(consumer_token_t& token) - { - // Ah, there's been a rotation, figure out where we should be! - auto tail = producerListTail.load(std::memory_order_acquire); - if (token.desiredProducer == nullptr && tail == nullptr) - { - return false; - } - auto prodCount = producerCount.load(std::memory_order_relaxed); - auto globalOffset = globalExplicitConsumerOffset.load(std::memory_order_relaxed); - if ((Details::unlikely)(token.desiredProducer == nullptr)) - { - // Aha, first time we're dequeueing anything. - // Figure out our local position - // Note: offset is from start, not end, but we're traversing from end -- subtract from count first - std::uint32_t offset = prodCount - 1 - (token.initialOffset % prodCount); - token.desiredProducer = tail; - for (std::uint32_t i = 0; i != offset; ++i) - { - token.desiredProducer = static_cast(token.desiredProducer)->next_prod(); - if (token.desiredProducer == nullptr) - { - token.desiredProducer = tail; - } - } - } - - std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset; - if (delta >= prodCount) - { - delta = delta % prodCount; - } - for (std::uint32_t i = 0; i != delta; ++i) - { - token.desiredProducer = static_cast(token.desiredProducer)->next_prod(); - if (token.desiredProducer == nullptr) - { - token.desiredProducer = tail; - } - } - - token.lastKnownGlobalOffset = globalOffset; - token.currentProducer = token.desiredProducer; - token.itemsConsumedFromCurrent = 0; - return true; - } - - - /////////////////////////// - // Free list - /////////////////////////// - - template - struct FreeListNode - { - FreeListNode() : freeListRefs(0), freeListNext(nullptr) { } - - std::atomic freeListRefs; - std::atomic freeListNext; - }; - - // A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but - // simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly - // speedy under low contention. - template // N must inherit FreeListNode or have the same fields (and initialization of them) - struct FreeList - { - FreeList() = default; - FreeList(FreeList&& other) : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) { other.freeListHead.store(nullptr, std::memory_order_relaxed); } - void swap(FreeList& other) { Details::swap_relaxed(freeListHead, other.freeListHead); } - - FreeList(FreeList const&) = delete; - FreeList& operator=(FreeList const&) = delete; - - inline void add(N* node) - { - // We know that the should-be-on-freelist bit is 0 at this point, so it's safe to - // set it using a fetch_add - if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, std::memory_order_acq_rel) == 0) - { - // Oh look! We were the last ones referencing this node, and we know - // we want to add it to the free list, so let's do it! - add_knowing_refcount_is_zero(node); - } - } - - inline N* try_get() - { - auto head = freeListHead.load(std::memory_order_acquire); - while (head != nullptr) - { - auto prevHead = head; - auto refs = head->freeListRefs.load(std::memory_order_relaxed); - if ((refs & REFS_MASK) == 0 || !head->freeListRefs.compare_exchange_strong(refs, refs + 1, std::memory_order_acquire, std::memory_order_relaxed)) - { - head = freeListHead.load(std::memory_order_acquire); - continue; - } - - // Good, reference count has been incremented (it wasn't at zero), which means we can read the - // next and not worry about it changing between now and the time we do the CAS - auto next = head->freeListNext.load(std::memory_order_relaxed); - if (freeListHead.compare_exchange_strong(head, next, std::memory_order_acquire, std::memory_order_relaxed)) - { - // Yay, got the node. This means it was on the list, which means shouldBeOnFreeList must be false no - // matter the refcount (because nobody else knows it's been taken off yet, it can't have been put back on). - assert((head->freeListRefs.load(std::memory_order_relaxed) & SHOULD_BE_ON_FREELIST) == 0); - - // Decrease refcount twice, once for our ref, and once for the list's ref - head->freeListRefs.fetch_sub(2, std::memory_order_release); - return head; - } - - // OK, the head must have changed on us, but we still need to decrease the refcount we increased. - // Note that we don't need to release any memory effects, but we do need to ensure that the reference - // count decrement happens-after the CAS on the head. - refs = prevHead->freeListRefs.fetch_sub(1, std::memory_order_acq_rel); - if (refs == SHOULD_BE_ON_FREELIST + 1) - { - add_knowing_refcount_is_zero(prevHead); - } - } - - return nullptr; - } - - // Useful for traversing the list when there's no contention (e.g. to destroy remaining nodes) - N* head_unsafe() const { return freeListHead.load(std::memory_order_relaxed); } - - private: - inline void add_knowing_refcount_is_zero(N* node) - { - // Since the refcount is zero, and nobody can increase it once it's zero (except us, and we run - // only one copy of this method per node at a time, i.e. the single thread case), then we know - // we can safely change the next pointer of the node; however, once the refcount is back above - // zero, then other threads could increase it (happens under heavy contention, when the refcount - // goes to zero in between a load and a refcount increment of a node in try_get, then back up to - // something non-zero, then the refcount increment is done by the other thread) -- so, if the CAS - // to add the node to the actual list fails, decrease the refcount and leave the add operation to - // the next thread who puts the refcount back at zero (which could be us, hence the loop). - auto head = freeListHead.load(std::memory_order_relaxed); - while (true) - { - node->freeListNext.store(head, std::memory_order_relaxed); - node->freeListRefs.store(1, std::memory_order_release); - if (!freeListHead.compare_exchange_strong(head, node, std::memory_order_release, std::memory_order_relaxed)) - { - // Hmm, the add failed, but we can only try again when the refcount goes back to zero - if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST - 1, std::memory_order_release) == 1) - { - continue; - } - } - return; - } - } - - private: - // Implemented like a stack, but where node order doesn't matter (nodes are inserted out of order under contention) - std::atomic freeListHead{nullptr}; - - static const std::uint32_t REFS_MASK = 0x7FFFFFFF; - static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000; - }; - - - /////////////////////////// - // Block - /////////////////////////// - - enum InnerQueueContext { implicit_context = 0, explicit_context = 1 }; - - struct Block - { - Block() = default; - - template - inline typename std::enable_if<(context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD), bool>::type is_empty() const - { - - // Check flags - for (size_t i = 0; i < BLOCK_SIZE; ++i) - { - if (!emptyFlags[i].load(std::memory_order_relaxed)) - { - return false; - } - } - - // Aha, empty; make sure we have all other memory effects that happened before the empty flags were set - std::atomic_thread_fence(std::memory_order_acquire); - return true; - } - template - inline typename std::enable_if::type is_empty() const - { - // Check counter - if (elementsCompletelyDequeued.load(std::memory_order_relaxed) == BLOCK_SIZE) - { - std::atomic_thread_fence(std::memory_order_acquire); - return true; - } - assert(elementsCompletelyDequeued.load(std::memory_order_relaxed) <= BLOCK_SIZE); - return false; - } - - // Returns true if the block is now empty (does not apply in explicit context) - template - inline typename std::enable_if<(context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD), bool>::type set_empty(index_t i) - { - // Set flag - assert(!emptyFlags[BLOCK_SIZE - 1 - static_cast(i & static_cast(BLOCK_SIZE - 1))].load(std::memory_order_relaxed)); - emptyFlags[BLOCK_SIZE - 1 - static_cast(i & static_cast(BLOCK_SIZE - 1))].store(true, std::memory_order_release); - return false; - } - // Returns true if the block is now empty (does not apply in explicit context) - template - inline typename std::enable_if::type set_empty(POCO_UNUSED index_t i) - { - // Increment counter - auto prevVal = elementsCompletelyDequeued.fetch_add(1, std::memory_order_release); - assert(prevVal < BLOCK_SIZE); - return prevVal == BLOCK_SIZE - 1; - } - - // Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0). - // Returns true if the block is now empty (does not apply in explicit context). - template - inline typename std::enable_if<(context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD), bool>::type set_many_empty(index_t i, size_t count) - { - // Set flags - std::atomic_thread_fence(std::memory_order_release); - i = BLOCK_SIZE - 1 - static_cast(i & static_cast(BLOCK_SIZE - 1)) - count + 1; - for (size_t j = 0; j != count; ++j) - { - assert(!emptyFlags[i + j].load(std::memory_order_relaxed)); - emptyFlags[i + j].store(true, std::memory_order_relaxed); - } - return false; - } - template - inline typename std::enable_if::type set_many_empty(POCO_UNUSED index_t i, size_t count) - { - // Increment counter - auto prevVal = elementsCompletelyDequeued.fetch_add(count, std::memory_order_release); - assert(prevVal + count <= BLOCK_SIZE); - return prevVal + count == BLOCK_SIZE; - } - - template - inline typename std::enable_if<(context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD), void>::type set_all_empty() - { - // Set all flags - for (size_t i = 0; i != BLOCK_SIZE; ++i) - { - emptyFlags[i].store(true, std::memory_order_relaxed); - } - } - template - inline typename std::enable_if::type set_all_empty() - { - // Reset counter - elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed); - } - - template - inline typename std::enable_if<(context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD), void>::type reset_empty() - { - // Reset flags - for (size_t i = 0; i != BLOCK_SIZE; ++i) - { - emptyFlags[i].store(false, std::memory_order_relaxed); - } - } - template - inline typename std::enable_if::type reset_empty() - { - // Reset counter - elementsCompletelyDequeued.store(0, std::memory_order_relaxed); - } - - inline T* operator[](index_t idx) noexcept - { - return static_cast(static_cast(elements)) + static_cast(idx & static_cast(BLOCK_SIZE - 1)); - } - inline T const* operator[](index_t idx) const noexcept - { - return static_cast(static_cast(elements)) + static_cast(idx & static_cast(BLOCK_SIZE - 1)); - } - - private: - static_assert(std::alignment_of::value <= sizeof(T), "The queue does not support types with an alignment greater than their size at this time"); - POCO_ALIGNED_TYPE_LIKE(char[sizeof(T) * BLOCK_SIZE], T) elements; - public: - Block* next{nullptr}; - std::atomic elementsCompletelyDequeued{0}; - std::atomic emptyFlags[BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE : 1]{}; - public: - std::atomic freeListRefs{0}; - std::atomic freeListNext{nullptr}; - bool dynamicallyAllocated{true}; // Perhaps a better name for this would be 'isNotPartOfInitialBlockPool' - }; - static_assert(std::alignment_of::value >= std::alignment_of::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping"); - - /////////////////////////// - // Producer base - /////////////////////////// - - struct ProducerBase : public Details::ConcurrentQueueProducerTypelessBase - { - ProducerBase(ConcurrentQueue* parent_, bool isExplicit_) : - tailIndex(0), - headIndex(0), - dequeueOptimisticCount(0), - dequeueOvercommit(0), - tailBlock(nullptr), - isExplicit(isExplicit_), - parent(parent_) - { } - - virtual ~ProducerBase() = default; - - template - inline bool dequeue(U& element) - { - if (isExplicit) - { - return static_cast(this)->dequeue(element); - } - else - { - return static_cast(this)->dequeue(element); - } - } - - template - inline size_t dequeue_bulk(It& itemFirst, size_t max) - { - if (isExplicit) - { - return static_cast(this)->dequeue_bulk(itemFirst, max); - } - else - { - return static_cast(this)->dequeue_bulk(itemFirst, max); - } - } - - inline ProducerBase* next_prod() const { return static_cast(next); } - - inline size_t size_approx() const - { - auto tail = tailIndex.load(std::memory_order_relaxed); - auto head = headIndex.load(std::memory_order_relaxed); - return Details::circular_less_than(head, tail) ? static_cast(tail - head) : 0; - } - - inline index_t getTail() const { return tailIndex.load(std::memory_order_relaxed); } - protected: - std::atomic tailIndex; // Where to enqueue to next - std::atomic headIndex; // Where to dequeue from next - - std::atomic dequeueOptimisticCount; - std::atomic dequeueOvercommit; - - Block* tailBlock; - - public: - bool isExplicit; - ConcurrentQueue* parent; - }; - - - /////////////////////////// - // Explicit queue - /////////////////////////// - - struct ExplicitProducer : public ProducerBase - { - explicit ExplicitProducer(ConcurrentQueue* parent_) : - ProducerBase(parent_, true), - blockIndex(nullptr), - pr_blockIndexSlotsUsed(0), - pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1), - pr_blockIndexFront(0), - pr_blockIndexEntries(nullptr), - pr_blockIndexRaw(nullptr) - { - size_t poolBasedIndexSize = Details::ceil_to_pow_2(parent_->initialBlockPoolSize) >> 1; - if (poolBasedIndexSize > pr_blockIndexSize) - { - pr_blockIndexSize = poolBasedIndexSize; - } - - new_block_index(0); // This creates an index with double the number of current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE - } - - ~ExplicitProducer() - { - // Destruct any elements not yet dequeued. - // Since we're in the destructor, we can assume all elements - // are either completely dequeued or completely not (no halfways). - if (this->tailBlock != nullptr) // Note this means there must be a block index too - { - // First find the block that's partially dequeued, if any - Block* halfDequeuedBlock = nullptr; - if ((this->headIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)) != 0) - { - // The head's not on a block boundary, meaning a block somewhere is partially dequeued - // (or the head block is the tail block and was fully dequeued, but the head/tail are still not on a boundary) - size_t i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & (pr_blockIndexSize - 1); - while (Details::circular_less_than(pr_blockIndexEntries[i].base + BLOCK_SIZE, this->headIndex.load(std::memory_order_relaxed))) - { - i = (i + 1) & (pr_blockIndexSize - 1); - } - assert(Details::circular_less_than(pr_blockIndexEntries[i].base, this->headIndex.load(std::memory_order_relaxed))); - halfDequeuedBlock = pr_blockIndexEntries[i].block; - } - - // Start at the head block (note the first line in the loop gives us the head from the tail on the first iteration) - auto block = this->tailBlock; - do - { - block = block->next; - if (block->ConcurrentQueue::Block::template is_empty()) - { - continue; - } - - size_t i = 0; // Offset into block - if (block == halfDequeuedBlock) - { - i = static_cast(this->headIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)); - } - - // Walk through all the items in the block; if this is the tail block, we need to stop when we reach the tail index - auto lastValidIndex = (this->tailIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)) == 0 ? BLOCK_SIZE : static_cast(this->tailIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)); - while (i != BLOCK_SIZE && (block != this->tailBlock || i != lastValidIndex)) - { - (*block)[i++]->~T(); - } - } - while (block != this->tailBlock); - } - - // Destroy all blocks that we own - if (this->tailBlock != nullptr) - { - auto block = this->tailBlock; - do - { - auto nextBlock = block->next; - this->parent->add_block_to_free_list(block); - block = nextBlock; - } - while (block != this->tailBlock); - } - - // Destroy the block indices - auto header = static_cast(pr_blockIndexRaw); - while (header != nullptr) - { - auto prev = static_cast(header->prev); - header->~BlockIndexHeader(); - (Traits::free)(header); - header = prev; - } - } - - template - inline bool enqueue(U&& element) - { - index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed); - index_t newTailIndex = 1 + currentTailIndex; - if ((currentTailIndex & static_cast(BLOCK_SIZE - 1)) == 0) - { - // We reached the end of a block, start a new one - auto startBlock = this->tailBlock; - auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed; - if (this->tailBlock != nullptr && this->tailBlock->next->ConcurrentQueue::Block::template is_empty()) - { - // We can re-use the block ahead of us, it's empty! - this->tailBlock = this->tailBlock->next; - this->tailBlock->ConcurrentQueue::Block::template reset_empty(); - - // We'll put the block on the block index (guaranteed to be room since we're conceptually removing the - // last block from it first -- except instead of removing then adding, we can just overwrite). - // Note that there must be a valid block index here, since even if allocation failed in the ctor, - // it would have been re-attempted when adding the first block to the queue; since there is such - // a block, a block index must have been successfully allocated. - } - else - { - // Whatever head value we see here is >= the last value we saw here (relatively), - // and <= its current value. Since we have the most recent tail, the head must be - // <= to it. - auto head = this->headIndex.load(std::memory_order_relaxed); - assert(!Details::circular_less_than(currentTailIndex, head)); - if (!Details::circular_less_than(head, currentTailIndex + BLOCK_SIZE) - || (MAX_SUBQUEUE_SIZE != Details::const_numeric_max::value - && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) - { - // We can't enqueue in another block because there's not enough leeway -- the - // tail could surpass the head by the time the block fills up! (Or we'll exceed - // the size limit, if the second part of the condition was true.) - return false; - } - // We're going to need a new block; check that the block index has room - if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize) - { - // Hmm, the circular block index is already full -- we'll need - // to allocate a new index. Note pr_blockIndexRaw can only be nullptr if - // the initial allocation failed in the constructor. - - POCO_CONSTEXPR_IF (allocMode == CannotAlloc) - { - return false; - } - else if (!new_block_index(pr_blockIndexSlotsUsed)) - { - return false; - } - } - - // Insert a new block in the circular linked list - auto newBlock = this->parent->ConcurrentQueue::template requisition_block(); - if (newBlock == nullptr) - { - return false; - } - newBlock->ConcurrentQueue::Block::template reset_empty(); - if (this->tailBlock == nullptr) - { - newBlock->next = newBlock; - } - else - { - newBlock->next = this->tailBlock->next; - this->tailBlock->next = newBlock; - } - this->tailBlock = newBlock; - ++pr_blockIndexSlotsUsed; - } - - POCO_CONSTEXPR_IF (!noexcept(new (static_cast(nullptr)) T(std::forward(element)))) - { - // The constructor may throw. We want the element not to appear in the queue in - // that case (without corrupting the queue): - try - { - new ((*this->tailBlock)[currentTailIndex]) T(std::forward(element)); - } - catch (...) - { - // Revert change to the current block, but leave the new block available - // for next time - pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; - this->tailBlock = startBlock == nullptr ? this->tailBlock : startBlock; - throw; - } - } - else - { - (void)startBlock; - (void)originalBlockIndexSlotsUsed; - } - - // Add block to block index - auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; - entry.base = currentTailIndex; - entry.block = this->tailBlock; - blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release); - pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); - - POCO_CONSTEXPR_IF (!noexcept(new (static_cast(nullptr)) T(std::forward(element)))) - { - this->tailIndex.store(newTailIndex, std::memory_order_release); - return true; - } - } - - // Enqueue - new ((*this->tailBlock)[currentTailIndex]) T(std::forward(element)); - - this->tailIndex.store(newTailIndex, std::memory_order_release); - return true; - } - - template - bool dequeue(U& element) - { - auto tail = this->tailIndex.load(std::memory_order_relaxed); - auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); - if (Details::circular_less_than(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) - { - // Might be something to dequeue, let's give it a try - - // Note that this if is purely for performance purposes in the common case when the queue is - // empty and the values are eventually consistent -- we may enter here spuriously. - - // Note that whatever the values of overcommit and tail are, they are not going to change (unless we - // change them) and must be the same value at this point (inside the if) as when the if condition was - // evaluated. - - // We insert an acquire fence here to synchronize-with the release upon incrementing dequeueOvercommit below. - // This ensures that whatever the value we got loaded into overcommit, the load of dequeueOptisticCount in - // the fetch_add below will result in a value at least as recent as that (and therefore at least as large). - // Note that I believe a compiler (signal) fence here would be sufficient due to the nature of fetch_add (all - // read-modify-write operations are guaranteed to work on the latest value in the modification order), but - // unfortunately that can't be shown to be correct using only the C++11 standard. - // See http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case - std::atomic_thread_fence(std::memory_order_acquire); - - // Increment optimistic counter, then check if it went over the boundary - auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed); - - // Note that since dequeueOvercommit must be <= dequeueOptimisticCount (because dequeueOvercommit is only ever - // incremented after dequeueOptimisticCount -- this is enforced in the `else` block below), and since we now - // have a version of dequeueOptimisticCount that is at least as recent as overcommit (due to the release upon - // incrementing dequeueOvercommit and the acquire above that synchronizes with it), overcommit <= myDequeueCount. - // However, we can't assert this since both dequeueOptimisticCount and dequeueOvercommit may (independently) - // overflow; in such a case, though, the logic still holds since the difference between the two is maintained. - - // Note that we reload tail here in case it changed; it will be the same value as before or greater, since - // this load is sequenced after (happens after) the earlier load above. This is supported by read-read - // coherency (as defined in the standard), explained here: http://en.cppreference.com/w/cpp/atomic/memory_order - tail = this->tailIndex.load(std::memory_order_acquire); - if ((Details::likely)(Details::circular_less_than(myDequeueCount - overcommit, tail))) - { - // Guaranteed to be at least one element to dequeue! - - // Get the index. Note that since there's guaranteed to be at least one element, this - // will never exceed tail. We need to do an acquire-release fence here since it's possible - // that whatever condition got us to this point was for an earlier enqueued element (that - // we already see the memory effects for), but that by the time we increment somebody else - // has incremented it, and we need to see the memory effects for *that* element, which is - // in such a case is necessarily visible on the thread that incremented it in the first - // place with the more current condition (they must have acquired a tail that is at least - // as recent). - auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel); - - - // Determine which block the element is in - - auto localBlockIndex = blockIndex.load(std::memory_order_acquire); - auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire); - - // We need to be careful here about subtracting and dividing because of index wrap-around. - // When an index wraps, we need to preserve the sign of the offset when dividing it by the - // block size (in order to get a correct signed block count offset in all cases): - auto headBase = localBlockIndex->entries[localBlockIndexHead].base; - auto blockBaseIndex = index & ~static_cast(BLOCK_SIZE - 1); - auto offset = static_cast(static_cast::type>(blockBaseIndex - headBase) / static_cast::type>(BLOCK_SIZE)); - auto block = localBlockIndex->entries[(localBlockIndexHead + offset) & (localBlockIndex->size - 1)].block; - - // Dequeue - auto& el = *((*block)[index]); - if (!noexcept(element = std::move(el))) - { - // Make sure the element is still fully dequeued and destroyed even if the assignment - // throws - struct Guard - { - Block* block; - index_t index; - - ~Guard() - { - (*block)[index]->~T(); - block->ConcurrentQueue::Block::template set_empty(index); - } - } guard = { block, index }; - - element = std::move(el); // NOLINT - } - else - { - element = std::move(el); // NOLINT - el.~T(); // NOLINT - block->ConcurrentQueue::Block::template set_empty(index); - } - - return true; - } - else - { - // Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent - this->dequeueOvercommit.fetch_add(1, std::memory_order_release); // Release so that the fetch_add on dequeueOptimisticCount is guaranteed to happen before this write - } - } - - return false; - } - - template - bool POCO_NO_TSAN enqueue_bulk(It itemFirst, size_t count) - { - // First, we need to make sure we have enough room to enqueue all of the elements; - // this means pre-allocating blocks and putting them in the block index (but only if - // all the allocations succeeded). - index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed); - auto startBlock = this->tailBlock; - auto originalBlockIndexFront = pr_blockIndexFront; - auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed; - - Block* firstAllocatedBlock = nullptr; - - // Figure out how many blocks we'll need to allocate, and do so - size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1)); - index_t currentTailIndex = (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); - if (blockBaseDiff > 0) - { - // Allocate as many blocks as possible from ahead - while (blockBaseDiff > 0 && this->tailBlock != nullptr && this->tailBlock->next != firstAllocatedBlock && this->tailBlock->next->ConcurrentQueue::Block::template is_empty()) - { - blockBaseDiff -= static_cast(BLOCK_SIZE); - currentTailIndex += static_cast(BLOCK_SIZE); - - this->tailBlock = this->tailBlock->next; - firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock; - - auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; - entry.base = currentTailIndex; - entry.block = this->tailBlock; - pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); - } - - // Now allocate as many blocks as necessary from the block pool - while (blockBaseDiff > 0) - { - blockBaseDiff -= static_cast(BLOCK_SIZE); - currentTailIndex += static_cast(BLOCK_SIZE); - - auto head = this->headIndex.load(std::memory_order_relaxed); - assert(!Details::circular_less_than(currentTailIndex, head)); - bool full = !Details::circular_less_than(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != Details::const_numeric_max::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head)); - if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize || full) - { - POCO_CONSTEXPR_IF (allocMode == CannotAlloc) - { - // Failed to allocate, undo changes (but keep injected blocks) - pr_blockIndexFront = originalBlockIndexFront; - pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; - this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; - return false; - } - else if (full || !new_block_index(originalBlockIndexSlotsUsed)) - { - // Failed to allocate, undo changes (but keep injected blocks) - pr_blockIndexFront = originalBlockIndexFront; - pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; - this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; - return false; - } - - // pr_blockIndexFront is updated inside new_block_index, so we need to - // update our fallback value too (since we keep the new index even if we - // later fail) - originalBlockIndexFront = originalBlockIndexSlotsUsed; - } - - // Insert a new block in the circular linked list - auto newBlock = this->parent->ConcurrentQueue::template requisition_block(); - if (newBlock == nullptr) - { - pr_blockIndexFront = originalBlockIndexFront; - pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; - this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; - return false; - } - newBlock->ConcurrentQueue::Block::template set_all_empty(); - if (this->tailBlock == nullptr) - { - newBlock->next = newBlock; - } - else - { - newBlock->next = this->tailBlock->next; - this->tailBlock->next = newBlock; - } - this->tailBlock = newBlock; - firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock; - - ++pr_blockIndexSlotsUsed; - - auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; - entry.base = currentTailIndex; - entry.block = this->tailBlock; - pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); - } - - // Excellent, all allocations succeeded. Reset each block's emptiness before we fill them up, and - // publish the new block index front - auto block = firstAllocatedBlock; - while (true) - { - block->ConcurrentQueue::Block::template reset_empty(); - if (block == this->tailBlock) { - break; - } - block = block->next; - } - - POCO_CONSTEXPR_IF (noexcept(new (static_cast(nullptr)) T(Details::deref_noexcept(itemFirst)))) - { - blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release); - } - } - - // Enqueue, one block at a time - index_t newTailIndex = startTailIndex + static_cast(count); - currentTailIndex = startTailIndex; - auto endBlock = this->tailBlock; - this->tailBlock = startBlock; - assert((startTailIndex & static_cast(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0); - if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) - { - this->tailBlock = firstAllocatedBlock; - } - while (true) - { - index_t stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); - if (Details::circular_less_than(newTailIndex, stopIndex)) - { - stopIndex = newTailIndex; - } - POCO_CONSTEXPR_IF (noexcept(new (static_cast(nullptr)) T(Details::deref_noexcept(itemFirst)))) - { - while (currentTailIndex != stopIndex) { - new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++); - } - } - else - { - try - { - while (currentTailIndex != stopIndex) - { - // Must use copy constructor even if move constructor is available - // because we may have to revert if there's an exception. - // Sorry about the horrible templated next line, but it was the only way - // to disable moving *at compile time*, which is important because a type - // may only define a (noexcept) move constructor, and so calls to the - // cctor will not compile, even if they are in an if branch that will never - // be executed - new ((*this->tailBlock)[currentTailIndex]) T(Details::nomove_if(nullptr)) T(Details::deref_noexcept(itemFirst)))>::eval(*itemFirst)); - ++currentTailIndex; - ++itemFirst; - } - } - catch (...) { - // Oh dear, an exception's been thrown -- destroy the elements that - // were enqueued so far and revert the entire bulk operation (we'll keep - // any allocated blocks in our linked list for later, though). - auto constructedStopIndex = currentTailIndex; - auto lastBlockEnqueued = this->tailBlock; - - pr_blockIndexFront = originalBlockIndexFront; - pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; - this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; - - if (!Details::is_trivially_destructible::value) - { - auto block = startBlock; - if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == 0) - { - block = firstAllocatedBlock; - } - currentTailIndex = startTailIndex; - while (true) - { - stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); - if (Details::circular_less_than(constructedStopIndex, stopIndex)) - { - stopIndex = constructedStopIndex; - } - while (currentTailIndex != stopIndex) - { - (*block)[currentTailIndex++]->~T(); - } - if (block == lastBlockEnqueued) - { - break; - } - block = block->next; - } - } - throw; - } - } - - if (this->tailBlock == endBlock) - { - assert(currentTailIndex == newTailIndex); - break; - } - this->tailBlock = this->tailBlock->next; - } - - POCO_CONSTEXPR_IF (!noexcept(new (static_cast(nullptr)) T(Details::deref_noexcept(itemFirst)))) - { - if (firstAllocatedBlock != nullptr) - blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release); - } - - this->tailIndex.store(newTailIndex, std::memory_order_release); - return true; - } - - template - size_t dequeue_bulk(It& itemFirst, size_t max) - { - auto tail = this->tailIndex.load(std::memory_order_relaxed); - auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); - auto desiredCount = static_cast(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit)); - if (Details::circular_less_than(0, desiredCount)) - { - desiredCount = desiredCount < max ? desiredCount : max; - std::atomic_thread_fence(std::memory_order_acquire); - - auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed); - - tail = this->tailIndex.load(std::memory_order_acquire); - auto actualCount = static_cast(tail - (myDequeueCount - overcommit)); - if (Details::circular_less_than(0, actualCount)) { - actualCount = desiredCount < actualCount ? desiredCount : actualCount; - if (actualCount < desiredCount) - { - this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release); - } - - // Get the first index. Note that since there's guaranteed to be at least actualCount elements, this - // will never exceed tail. - auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel); - - // Determine which block the first element is in - auto localBlockIndex = blockIndex.load(std::memory_order_acquire); - auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire); - - auto headBase = localBlockIndex->entries[localBlockIndexHead].base; - auto firstBlockBaseIndex = firstIndex & ~static_cast(BLOCK_SIZE - 1); - auto offset = static_cast(static_cast::type>(firstBlockBaseIndex - headBase) / static_cast::type>(BLOCK_SIZE)); - auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1); - - // Iterate the blocks and dequeue - auto index = firstIndex; - do - { - auto firstIndexInBlock = index; - index_t endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); - endIndex = Details::circular_less_than(firstIndex + static_cast(actualCount), endIndex) ? firstIndex + static_cast(actualCount) : endIndex; - auto block = localBlockIndex->entries[indexIndex].block; - if (noexcept(Details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) - { - while (index != endIndex) - { - auto& el = *((*block)[index]); - *itemFirst++ = std::move(el); - el.~T(); - ++index; - } - } - else - { - try - { - while (index != endIndex) - { - auto& el = *((*block)[index]); - *itemFirst = std::move(el); - ++itemFirst; - el.~T(); - ++index; - } - } - catch (...) - { - // It's too late to revert the dequeue, but we can make sure that all - // the dequeued objects are properly destroyed and the block index - // (and empty count) are properly updated before we propagate the exception - do - { - block = localBlockIndex->entries[indexIndex].block; - while (index != endIndex) - { - (*block)[index++]->~T(); - } - block->ConcurrentQueue::Block::template set_many_empty(firstIndexInBlock, static_cast(endIndex - firstIndexInBlock)); - indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1); - - firstIndexInBlock = index; - endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); - endIndex = Details::circular_less_than(firstIndex + static_cast(actualCount), endIndex) ? firstIndex + static_cast(actualCount) : endIndex; - } - while (index != firstIndex + actualCount); - - throw; - } - } - block->ConcurrentQueue::Block::template set_many_empty(firstIndexInBlock, static_cast(endIndex - firstIndexInBlock)); - indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1); - } - while (index != firstIndex + actualCount); - - return actualCount; - } - else - { - // Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent - this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release); - } - } - - return 0; - } - - private: - struct BlockIndexEntry - { - index_t base; - Block* block; - }; - - struct BlockIndexHeader - { - size_t size; - std::atomic front; // Current slot (not next, like pr_blockIndexFront) - BlockIndexEntry* entries; - void* prev; - }; - - - bool new_block_index(size_t numberOfFilledSlotsToExpose) - { - auto prevBlockSizeMask = pr_blockIndexSize - 1; - - // Create the new block - pr_blockIndexSize <<= 1; - auto newRawPtr = static_cast((Traits::malloc)(sizeof(BlockIndexHeader) + std::alignment_of::value - 1 + sizeof(BlockIndexEntry) * pr_blockIndexSize)); - if (newRawPtr == nullptr) - { - pr_blockIndexSize >>= 1; // Reset to allow graceful retry - return false; - } - - auto newBlockIndexEntries = reinterpret_cast(Details::align_for(newRawPtr + sizeof(BlockIndexHeader))); - - // Copy in all the old indices, if any - size_t j = 0; - if (pr_blockIndexSlotsUsed != 0) - { - auto i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & prevBlockSizeMask; - do - { - newBlockIndexEntries[j++] = pr_blockIndexEntries[i]; - i = (i + 1) & prevBlockSizeMask; - } - while (i != pr_blockIndexFront); - } - - // Update everything - auto header = new (newRawPtr) BlockIndexHeader; - header->size = pr_blockIndexSize; - header->front.store(numberOfFilledSlotsToExpose - 1, std::memory_order_relaxed); - header->entries = newBlockIndexEntries; - header->prev = pr_blockIndexRaw; // we link the new block to the old one so we can free it later - - pr_blockIndexFront = j; - pr_blockIndexEntries = newBlockIndexEntries; - pr_blockIndexRaw = newRawPtr; - blockIndex.store(header, std::memory_order_release); - - return true; - } - - private: - std::atomic blockIndex{nullptr}; - - // To be used by producer only -- consumer must use the ones in referenced by blockIndex - size_t pr_blockIndexSlotsUsed{0}; - size_t pr_blockIndexSize{0}; - size_t pr_blockIndexFront{0}; // Next slot (not current) - BlockIndexEntry* pr_blockIndexEntries{nullptr}; - void* pr_blockIndexRaw{nullptr}; - }; - - - ////////////////////////////////// - // Implicit queue - ////////////////////////////////// - - struct ImplicitProducer : public ProducerBase - { - ImplicitProducer(ConcurrentQueue* parent_) : - ProducerBase(parent_, false), - nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE), - blockIndex(nullptr) - { - new_block_index(); - } - - ~ImplicitProducer() - { - // Note that since we're in the destructor we can assume that all enqueue/dequeue operations - // completed already; this means that all undequeued elements are placed contiguously across - // contiguous blocks, and that only the first and last remaining blocks can be only partially - // empty (all other remaining blocks must be completely full). - - // Unregister ourselves for thread termination notification - if (!this->inactive.load(std::memory_order_relaxed)) - { - Details::ThreadExitNotifier::unsubscribe(&threadExitListener); - } - // Destroy all remaining elements! - auto tail = this->tailIndex.load(std::memory_order_relaxed); - auto index = this->headIndex.load(std::memory_order_relaxed); - Block* block = nullptr; - assert(index == tail || Details::circular_less_than(index, tail)); - bool forceFreeLastBlock = index != tail; // If we enter the loop, then the last (tail) block will not be freed - while (index != tail) - { - if ((index & static_cast(BLOCK_SIZE - 1)) == 0 || block == nullptr) - { - if (block != nullptr) - { - // Free the old block - this->parent->add_block_to_free_list(block); - } - - block = get_block_index_entry_for_index(index)->value.load(std::memory_order_relaxed); - } - - ((*block)[index])->~T(); - ++index; - } - // Even if the queue is empty, there's still one block that's not on the free list - // (unless the head index reached the end of it, in which case the tail will be poised - // to create a new block). - if (this->tailBlock != nullptr && (forceFreeLastBlock || (tail & static_cast(BLOCK_SIZE - 1)) != 0)) - { - this->parent->add_block_to_free_list(this->tailBlock); - } - - // Destroy block index - auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); - if (localBlockIndex != nullptr) - { - for (size_t i = 0; i != localBlockIndex->capacity; ++i) - { - localBlockIndex->index[i]->~BlockIndexEntry(); - } - do - { - auto prev = localBlockIndex->prev; - localBlockIndex->~BlockIndexHeader(); - (Traits::free)(localBlockIndex); - localBlockIndex = prev; - } - while (localBlockIndex != nullptr); - } - } - - template - inline bool enqueue(U&& element) - { - index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed); - index_t newTailIndex = 1 + currentTailIndex; - if ((currentTailIndex & static_cast(BLOCK_SIZE - 1)) == 0) - { - // We reached the end of a block, start a new one - auto head = this->headIndex.load(std::memory_order_relaxed); - assert(!Details::circular_less_than(currentTailIndex, head)); - if (!Details::circular_less_than(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != Details::const_numeric_max::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) - { - return false; - } - // Find out where we'll be inserting this block in the block index - BlockIndexEntry* idxEntry = nullptr; - if (!insert_block_index_entry(idxEntry, currentTailIndex)) - { - return false; - } - - // Get ahold of a new block - auto newBlock = this->parent->ConcurrentQueue::template requisition_block(); - if (newBlock == nullptr) - { - rewind_block_index_tail(); - idxEntry->value.store(nullptr, std::memory_order_relaxed); - return false; - } - newBlock->ConcurrentQueue::Block::template reset_empty(); - - POCO_CONSTEXPR_IF (!noexcept(new (static_cast(nullptr)) T(std::forward(element)))) - { - // May throw, try to insert now before we publish the fact that we have this new block - try - { - new ((*newBlock)[currentTailIndex]) T(std::forward(element)); - } - catch (...) - { - rewind_block_index_tail(); - idxEntry->value.store(nullptr, std::memory_order_relaxed); - this->parent->add_block_to_free_list(newBlock); - throw; - } - } - - // Insert the new block into the index - idxEntry->value.store(newBlock, std::memory_order_relaxed); - - this->tailBlock = newBlock; - - POCO_CONSTEXPR_IF (!noexcept(new (static_cast(nullptr)) T(std::forward(element)))) - { - this->tailIndex.store(newTailIndex, std::memory_order_release); - return true; - } - } - - // Enqueue - new ((*this->tailBlock)[currentTailIndex]) T(std::forward(element)); - - this->tailIndex.store(newTailIndex, std::memory_order_release); - return true; - } - - template - bool dequeue(U& element) - { - // See ExplicitProducer::dequeue for rationale and explanation - index_t tail = this->tailIndex.load(std::memory_order_relaxed); - index_t overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); - if (Details::circular_less_than(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) - { - std::atomic_thread_fence(std::memory_order_acquire); - - index_t myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed); - tail = this->tailIndex.load(std::memory_order_acquire); - if ((Details::likely)(Details::circular_less_than(myDequeueCount - overcommit, tail))) - { - index_t index = this->headIndex.fetch_add(1, std::memory_order_acq_rel); - - // Determine which block the element is in - auto entry = get_block_index_entry_for_index(index); - - // Dequeue - auto block = entry->value.load(std::memory_order_relaxed); - auto& el = *((*block)[index]); - - if (!noexcept(element = std::move(el))) - { - struct Guard - { - Block* block; - index_t index; - BlockIndexEntry* entry; - ConcurrentQueue* parent; - - ~Guard() - { - (*block)[index]->~T(); - if (block->ConcurrentQueue::Block::template set_empty(index)) - { - entry->value.store(nullptr, std::memory_order_relaxed); - parent->add_block_to_free_list(block); - } - } - } guard = { block, index, entry, this->parent }; - - element = std::move(el); // NOLINT - } - else - { - element = std::move(el); // NOLINT - el.~T(); // NOLINT - - if (block->ConcurrentQueue::Block::template set_empty(index)) - { - { - // Add the block back into the global free pool (and remove from block index) - entry->value.store(nullptr, std::memory_order_relaxed); - } - this->parent->add_block_to_free_list(block); // releases the above store - } - } - - return true; - } - else - { - this->dequeueOvercommit.fetch_add(1, std::memory_order_release); - } - } - - return false; - } - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable: 4706) // assignment within conditional expression -#endif - template - bool enqueue_bulk(It itemFirst, size_t count) - { - // First, we need to make sure we have enough room to enqueue all of the elements; - // this means pre-allocating blocks and putting them in the block index (but only if - // all the allocations succeeded). - - // Note that the tailBlock we start off with may not be owned by us any more; - // this happens if it was filled up exactly to the top (setting tailIndex to - // the first index of the next block which is not yet allocated), then dequeued - // completely (putting it on the free list) before we enqueue again. - - index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed); - auto startBlock = this->tailBlock; - Block* firstAllocatedBlock = nullptr; - auto endBlock = this->tailBlock; - - // Figure out how many blocks we'll need to allocate, and do so - size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1)); - index_t currentTailIndex = (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); - if (blockBaseDiff > 0) - { - do - { - blockBaseDiff -= static_cast(BLOCK_SIZE); - currentTailIndex += static_cast(BLOCK_SIZE); - - // Find out where we'll be inserting this block in the block index - BlockIndexEntry* idxEntry = nullptr; // initialization here unnecessary but compiler can't always tell - Block* newBlock = nullptr; - bool indexInserted = false; - auto head = this->headIndex.load(std::memory_order_relaxed); - assert(!Details::circular_less_than(currentTailIndex, head)); - bool full = !Details::circular_less_than(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != Details::const_numeric_max::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head)); - - if (full || !(indexInserted = insert_block_index_entry(idxEntry, currentTailIndex)) || (newBlock = this->parent->ConcurrentQueue::template requisition_block()) == nullptr) - { - // Index allocation or block allocation failed; revert any other allocations - // and index insertions done so far for this operation - if (indexInserted) - { - rewind_block_index_tail(); - idxEntry->value.store(nullptr, std::memory_order_relaxed); - } - currentTailIndex = (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); - for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) - { - currentTailIndex += static_cast(BLOCK_SIZE); - idxEntry = get_block_index_entry_for_index(currentTailIndex); - idxEntry->value.store(nullptr, std::memory_order_relaxed); - rewind_block_index_tail(); - } - this->parent->add_blocks_to_free_list(firstAllocatedBlock); - this->tailBlock = startBlock; - - return false; - } - newBlock->ConcurrentQueue::Block::template reset_empty(); - newBlock->next = nullptr; - - // Insert the new block into the index - idxEntry->value.store(newBlock, std::memory_order_relaxed); - - // Store the chain of blocks so that we can undo if later allocations fail, - // and so that we can find the blocks when we do the actual enqueueing - if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr) - { - assert(this->tailBlock != nullptr); - this->tailBlock->next = newBlock; - } - this->tailBlock = newBlock; - endBlock = newBlock; - firstAllocatedBlock = firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock; - } - while (blockBaseDiff > 0); - } - - // Enqueue, one block at a time - index_t newTailIndex = startTailIndex + static_cast(count); - currentTailIndex = startTailIndex; - this->tailBlock = startBlock; - assert((startTailIndex & static_cast(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0); - if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) - { - this->tailBlock = firstAllocatedBlock; - } - while (true) - { - index_t stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); - if (Details::circular_less_than(newTailIndex, stopIndex)) - { - stopIndex = newTailIndex; - } - POCO_CONSTEXPR_IF (noexcept(new (static_cast(nullptr)) T(Details::deref_noexcept(itemFirst)))) - { - while (currentTailIndex != stopIndex) - { - new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++); - } - } - else - { - try - { - while (currentTailIndex != stopIndex) - { - new ((*this->tailBlock)[currentTailIndex]) T(Details::nomove_if(nullptr)) T(Details::deref_noexcept(itemFirst)))>::eval(*itemFirst)); - ++currentTailIndex; - ++itemFirst; - } - } - catch (...) - { - auto constructedStopIndex = currentTailIndex; - auto lastBlockEnqueued = this->tailBlock; - - if (!Details::is_trivially_destructible::value) - { - auto block = startBlock; - if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == 0) - { - block = firstAllocatedBlock; - } - currentTailIndex = startTailIndex; - while (true) - { - stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); - if (Details::circular_less_than(constructedStopIndex, stopIndex)) - { - stopIndex = constructedStopIndex; - } - while (currentTailIndex != stopIndex) - { - (*block)[currentTailIndex++]->~T(); - } - if (block == lastBlockEnqueued) - { - break; - } - block = block->next; - } - } - - currentTailIndex = (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); - for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) - { - currentTailIndex += static_cast(BLOCK_SIZE); - auto idxEntry = get_block_index_entry_for_index(currentTailIndex); - idxEntry->value.store(nullptr, std::memory_order_relaxed); - rewind_block_index_tail(); - } - this->parent->add_blocks_to_free_list(firstAllocatedBlock); - this->tailBlock = startBlock; - throw; - } - } - - if (this->tailBlock == endBlock) - { - assert(currentTailIndex == newTailIndex); - break; - } - this->tailBlock = this->tailBlock->next; - } - this->tailIndex.store(newTailIndex, std::memory_order_release); - return true; - } -#ifdef _MSC_VER -#pragma warning(pop) -#endif - - template - size_t dequeue_bulk(It& itemFirst, size_t max) - { - auto tail = this->tailIndex.load(std::memory_order_relaxed); - auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); - auto desiredCount = static_cast(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit)); - if (Details::circular_less_than(0, desiredCount)) - { - desiredCount = desiredCount < max ? desiredCount : max; - std::atomic_thread_fence(std::memory_order_acquire); - - auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed); - - tail = this->tailIndex.load(std::memory_order_acquire); - auto actualCount = static_cast(tail - (myDequeueCount - overcommit)); - if (Details::circular_less_than(0, actualCount)) - { - actualCount = desiredCount < actualCount ? desiredCount : actualCount; - if (actualCount < desiredCount) - { - this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release); - } - - // Get the first index. Note that since there's guaranteed to be at least actualCount elements, this - // will never exceed tail. - auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel); - - // Iterate the blocks and dequeue - auto index = firstIndex; - BlockIndexHeader* localBlockIndex = nullptr; - auto indexIndex = get_block_index_index_for_index(index, localBlockIndex); - do - { - auto blockStartIndex = index; - index_t endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); - endIndex = Details::circular_less_than(firstIndex + static_cast(actualCount), endIndex) ? firstIndex + static_cast(actualCount) : endIndex; - - auto entry = localBlockIndex->index[indexIndex]; - auto block = entry->value.load(std::memory_order_relaxed); - if (noexcept(Details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) - { - while (index != endIndex) - { - auto& el = *((*block)[index]); - *itemFirst++ = std::move(el); - el.~T(); - ++index; - } - } - else - { - try - { - while (index != endIndex) - { - auto& el = *((*block)[index]); - *itemFirst = std::move(el); - ++itemFirst; - el.~T(); - ++index; - } - } - catch (...) - { - do - { - entry = localBlockIndex->index[indexIndex]; - block = entry->value.load(std::memory_order_relaxed); - while (index != endIndex) - { - (*block)[index++]->~T(); - } - - if (block->ConcurrentQueue::Block::template set_many_empty(blockStartIndex, static_cast(endIndex - blockStartIndex))) - { - entry->value.store(nullptr, std::memory_order_relaxed); - this->parent->add_block_to_free_list(block); - } - indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1); - - blockStartIndex = index; - endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); - endIndex = Details::circular_less_than(firstIndex + static_cast(actualCount), endIndex) ? firstIndex + static_cast(actualCount) : endIndex; - } - while (index != firstIndex + actualCount); - - throw; - } - } - if (block->ConcurrentQueue::Block::template set_many_empty(blockStartIndex, static_cast(endIndex - blockStartIndex))) - { - { - // Note that the set_many_empty above did a release, meaning that anybody who acquires the block - // we're about to free can use it safely since our writes (and reads!) will have happened-before then. - entry->value.store(nullptr, std::memory_order_relaxed); - } - this->parent->add_block_to_free_list(block); // releases the above store - } - indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1); - } - while (index != firstIndex + actualCount); - - return actualCount; - } - else - { - this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release); - } - } - - return 0; - } - - private: - // The block size must be > 1, so any number with the low bit set is an invalid block base index - static const index_t INVALID_BLOCK_BASE = 1; - - struct BlockIndexEntry - { - std::atomic key; - std::atomic value; - }; - - struct BlockIndexHeader - { - size_t capacity; - std::atomic tail; - BlockIndexEntry* entries; - BlockIndexEntry** index; - BlockIndexHeader* prev; - }; - - template - inline bool insert_block_index_entry(BlockIndexEntry*& idxEntry, index_t blockStartIndex) - { - auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); // We're the only writer thread, relaxed is OK - if (localBlockIndex == nullptr) - { - return false; // this can happen if new_block_index failed in the constructor - } - size_t newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1); - idxEntry = localBlockIndex->index[newTail]; - if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE || - idxEntry->value.load(std::memory_order_relaxed) == nullptr) - { - - idxEntry->key.store(blockStartIndex, std::memory_order_relaxed); - localBlockIndex->tail.store(newTail, std::memory_order_release); - return true; - } - - // No room in the old block index, try to allocate another one! - POCO_CONSTEXPR_IF (allocMode == CannotAlloc) - { - return false; - } - else if (!new_block_index()) - { - return false; - } - else - { - localBlockIndex = blockIndex.load(std::memory_order_relaxed); - newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1); - idxEntry = localBlockIndex->index[newTail]; - assert(idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE); - idxEntry->key.store(blockStartIndex, std::memory_order_relaxed); - localBlockIndex->tail.store(newTail, std::memory_order_release); - return true; - } - } - - inline void rewind_block_index_tail() - { - auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); - localBlockIndex->tail.store((localBlockIndex->tail.load(std::memory_order_relaxed) - 1) & (localBlockIndex->capacity - 1), std::memory_order_relaxed); - } - - inline BlockIndexEntry* get_block_index_entry_for_index(index_t index) const - { - BlockIndexHeader* localBlockIndex = nullptr; - auto idx = get_block_index_index_for_index(index, localBlockIndex); - return localBlockIndex->index[idx]; - } - - inline size_t get_block_index_index_for_index(index_t index, BlockIndexHeader*& localBlockIndex) const - { - index &= ~static_cast(BLOCK_SIZE - 1); - localBlockIndex = blockIndex.load(std::memory_order_acquire); - auto tail = localBlockIndex->tail.load(std::memory_order_acquire); - auto tailBase = localBlockIndex->index[tail]->key.load(std::memory_order_relaxed); - assert(tailBase != INVALID_BLOCK_BASE); - // Note: Must use division instead of shift because the index may wrap around, causing a negative - // offset, whose negativity we want to preserve - auto offset = static_cast(static_cast::type>(index - tailBase) / static_cast::type>(BLOCK_SIZE)); - size_t idx = (tail + offset) & (localBlockIndex->capacity - 1); - assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) == index && localBlockIndex->index[idx]->value.load(std::memory_order_relaxed) != nullptr); - return idx; - } - - bool new_block_index() - { - auto prev = blockIndex.load(std::memory_order_relaxed); - size_t prevCapacity = prev == nullptr ? 0 : prev->capacity; - auto entryCount = prev == nullptr ? nextBlockIndexCapacity : prevCapacity; - auto raw = static_cast((Traits::malloc)( - sizeof(BlockIndexHeader) + - std::alignment_of::value - 1 + sizeof(BlockIndexEntry) * entryCount + - std::alignment_of::value - 1 + sizeof(BlockIndexEntry*) * nextBlockIndexCapacity)); - if (raw == nullptr) - { - return false; - } - - auto header = new (raw) BlockIndexHeader; - auto entries = reinterpret_cast(Details::align_for(raw + sizeof(BlockIndexHeader))); - auto index = reinterpret_cast(Details::align_for(reinterpret_cast(entries) + sizeof(BlockIndexEntry) * entryCount)); - if (prev != nullptr) - { - auto prevTail = prev->tail.load(std::memory_order_relaxed); - auto prevPos = prevTail; - size_t i = 0; - do - { - prevPos = (prevPos + 1) & (prev->capacity - 1); - index[i++] = prev->index[prevPos]; - } - while (prevPos != prevTail); - assert(i == prevCapacity); - } - for (size_t i = 0; i != entryCount; ++i) - { - new (entries + i) BlockIndexEntry; - entries[i].key.store(INVALID_BLOCK_BASE, std::memory_order_relaxed); - index[prevCapacity + i] = entries + i; - } - header->prev = prev; - header->entries = entries; - header->index = index; - header->capacity = nextBlockIndexCapacity; - header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1), std::memory_order_relaxed); - - blockIndex.store(header, std::memory_order_release); - - nextBlockIndexCapacity <<= 1; - - return true; - } - - private: - size_t nextBlockIndexCapacity{IMPLICIT_INITIAL_INDEX_SIZE}; - std::atomic blockIndex{nullptr}; - public: - Details::ThreadExitListener threadExitListener; - }; - - - ////////////////////////////////// - // Block pool manipulation - ////////////////////////////////// - - void populate_initial_block_list(size_t blockCount) - { - initialBlockPoolSize = blockCount; - if (initialBlockPoolSize == 0) - { - initialBlockPool = nullptr; - return; - } - - initialBlockPool = create_array(blockCount); - if (initialBlockPool == nullptr) - { - initialBlockPoolSize = 0; - } - for (size_t i = 0; i < initialBlockPoolSize; ++i) - { - initialBlockPool[i].dynamicallyAllocated = false; - } - } - - inline Block* try_get_block_from_initial_pool() - { - if (initialBlockPoolIndex.load(std::memory_order_relaxed) >= initialBlockPoolSize) - { - return nullptr; - } - - auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed); - - return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr; - } - - inline void add_block_to_free_list(Block* block) - { - if (!Traits::RECYCLE_ALLOCATED_BLOCKS && block->dynamicallyAllocated) - { - destroy(block); - } - else - { - freeList.add(block); - } - } - - inline void add_blocks_to_free_list(Block* block) - { - while (block != nullptr) - { - auto next = block->next; - add_block_to_free_list(block); - block = next; - } - } - - inline Block* try_get_block_from_free_list() - { - return freeList.try_get(); - } - - // Gets a free block from one of the memory pools, or allocates a new one (if applicable) - template - Block* requisition_block() - { - auto block = try_get_block_from_initial_pool(); - if (block != nullptr) - { - return block; - } - - block = try_get_block_from_free_list(); - if (block != nullptr) - { - return block; - } - - POCO_CONSTEXPR_IF (canAlloc == CanAlloc) - { - return create(); - } - else - { - return nullptr; - } - } - - ////////////////////////////////// - // Producer list manipulation - ////////////////////////////////// - - ProducerBase* recycle_or_create_producer(bool isExplicit) - { - // Try to re-use one first - for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) - { - if (ptr->inactive.load(std::memory_order_relaxed) && ptr->isExplicit == isExplicit) - { - bool expected = true; - if (ptr->inactive.compare_exchange_strong(expected, /* desired */ false, std::memory_order_acquire, std::memory_order_relaxed)) - { - // We caught one! It's been marked as activated, the caller can have it - return ptr; - } - } - } - - return add_producer(isExplicit ? static_cast(create(this)) : create(this)); - } - - ProducerBase* add_producer(ProducerBase* producer) - { - // Handle failed memory allocation - if (producer == nullptr) - { - return nullptr; - } - - producerCount.fetch_add(1, std::memory_order_relaxed); - - // Add it to the lock-free list - auto prevTail = producerListTail.load(std::memory_order_relaxed); - do - { - producer->next = prevTail; - } - while (!producerListTail.compare_exchange_weak(prevTail, producer, std::memory_order_release, std::memory_order_relaxed)); - - return producer; - } - - void reown_producers() - { - // After another instance is moved-into/swapped-with this one, all the - // producers we stole still think their parents are the other queue. - // So fix them up! - for (auto ptr = producerListTail.load(std::memory_order_relaxed); ptr != nullptr; ptr = ptr->next_prod()) - { - ptr->parent = this; - } - } - - - ////////////////////////////////// - // Implicit producer hash - ////////////////////////////////// - - struct ImplicitProducerKVP - { - std::atomic key{0}; - ImplicitProducer* value{nullptr}; // No need for atomicity since it's only read by the thread that sets it in the first place - - ImplicitProducerKVP() = default; - - ImplicitProducerKVP(ImplicitProducerKVP&& other) noexcept - { - key.store(other.key.load(std::memory_order_relaxed), std::memory_order_relaxed); - value = other.value; - } - - inline ImplicitProducerKVP& operator=(ImplicitProducerKVP&& other) noexcept - { - swap(other); - return *this; - } - - inline void swap(ImplicitProducerKVP& other) noexcept - { - if (this != &other) - { - Details::swap_relaxed(key, other.key); - std::swap(value, other.value); - } - } - }; - - template - friend void Poco::swap(typename ConcurrentQueue::ImplicitProducerKVP&, typename ConcurrentQueue::ImplicitProducerKVP&) noexcept; - - struct ImplicitProducerHash - { - size_t capacity; - ImplicitProducerKVP* entries; - ImplicitProducerHash* prev; - }; - - template - inline typename std::enable_if::type populate_initial_implicit_producer_hash() - { - return; - } - template - inline typename std::enable_if::type populate_initial_implicit_producer_hash() - { - implicitProducerHashCount.store(0, std::memory_order_relaxed); - auto hash = &initialImplicitProducerHash; - hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; - hash->entries = &initialImplicitProducerHashEntries[0]; - for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) - { - initialImplicitProducerHashEntries[i].key.store(Details::invalid_thread_id, std::memory_order_relaxed); - } - hash->prev = nullptr; - implicitProducerHash.store(hash, std::memory_order_relaxed); - } - - template - typename std::enable_if::type swap_implicit_producer_hashes(ConcurrentQueue& other) - { - return; - } - template - typename std::enable_if::type swap_implicit_producer_hashes(ConcurrentQueue& other) - { - // Swap (assumes our implicit producer hash is initialized) - initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries); - initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0]; - other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0]; - - Details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount); - - Details::swap_relaxed(implicitProducerHash, other.implicitProducerHash); - if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) - { - implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed); - } - else - { - ImplicitProducerHash* hash = nullptr; - for (hash = implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &other.initialImplicitProducerHash; hash = hash->prev) - { - continue; - } - hash->prev = &initialImplicitProducerHash; - } - if (other.implicitProducerHash.load(std::memory_order_relaxed) == &initialImplicitProducerHash) - { - other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed); - } - else - { - ImplicitProducerHash* hash = nullptr; - for (hash = other.implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &initialImplicitProducerHash; hash = hash->prev) - { - continue; - } - hash->prev = &other.initialImplicitProducerHash; - } - } - - // Only fails (returns nullptr) if memory allocation fails - ImplicitProducer* get_or_add_implicit_producer() - { - // Note that since the data is essentially thread-local (key is thread ID), - // there's a reduced need for fences (memory ordering is already consistent - // for any individual thread), except for the current table itself. - - // Start by looking for the thread ID in the current and all previous hash tables. - // If it's not found, it must not be in there yet, since this same thread would - // have added it previously to one of the tables that we traversed. - - // Code and algorithm adapted from http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table - - auto id = Details::thread_id(); - auto hashedId = Details::hash_thread_id(id); - - auto mainHash = implicitProducerHash.load(std::memory_order_acquire); - assert(mainHash != nullptr); // silence clang-tidy and MSVC warnings (hash cannot be null) - for (auto hash = mainHash; hash != nullptr; hash = hash->prev) - { - // Look for the id in this hash - auto index = hashedId; - while (true) // Not an infinite loop because at least one slot is free in the hash table - { - index &= hash->capacity - 1u; - - auto probedKey = hash->entries[index].key.load(std::memory_order_relaxed); - if (probedKey == id) - { - // Found it! If we had to search several hashes deep, though, we should lazily add it - // to the current main hash table to avoid the extended search next time. - // Note there's guaranteed to be room in the current hash table since every subsequent - // table implicitly reserves space for all previous tables (there's only one - // implicitProducerHashCount). - auto value = hash->entries[index].value; - if (hash != mainHash) - { - index = hashedId; - while (true) - { - index &= mainHash->capacity - 1u; - auto empty = Details::invalid_thread_id; - auto reusable = Details::invalid_thread_id2; - if (mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_seq_cst, std::memory_order_relaxed) || - mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_seq_cst, std::memory_order_relaxed)) - { - mainHash->entries[index].value = value; - break; - } - ++index; - } - } - - return value; - } - if (probedKey == Details::invalid_thread_id) - { - break; // Not in this hash table - } - ++index; - } - } - - // Insert! - auto newCount = 1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed); - while (true) - { - // NOLINTNEXTLINE(clang-analyzer-core.NullDereference) - if (newCount >= (mainHash->capacity >> 1) && !implicitProducerHashResizeInProgress.test_and_set(std::memory_order_acquire)) - { - // We've acquired the resize lock, try to allocate a bigger hash table. - // Note the acquire fence synchronizes with the release fence at the end of this block, and hence when - // we reload implicitProducerHash it must be the most recent version (it only gets changed within this - // locked block). - mainHash = implicitProducerHash.load(std::memory_order_acquire); - if (newCount >= (mainHash->capacity >> 1)) - { - size_t newCapacity = mainHash->capacity << 1; - while (newCount >= (newCapacity >> 1)) - { - newCapacity <<= 1; - } - auto raw = static_cast((Traits::malloc)(sizeof(ImplicitProducerHash) + std::alignment_of::value - 1 + sizeof(ImplicitProducerKVP) * newCapacity)); - if (raw == nullptr) - { - // Allocation failed - implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); - implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); - return nullptr; - } - - auto newHash = new (raw) ImplicitProducerHash; - newHash->capacity = static_cast(newCapacity); - newHash->entries = reinterpret_cast(Details::align_for(raw + sizeof(ImplicitProducerHash))); - for (size_t i = 0; i != newCapacity; ++i) - { - new (newHash->entries + i) ImplicitProducerKVP; - newHash->entries[i].key.store(Details::invalid_thread_id, std::memory_order_relaxed); - } - newHash->prev = mainHash; - implicitProducerHash.store(newHash, std::memory_order_release); - implicitProducerHashResizeInProgress.clear(std::memory_order_release); - mainHash = newHash; - } - else - { - implicitProducerHashResizeInProgress.clear(std::memory_order_release); - } - } - - // If it's < three-quarters full, add to the old one anyway so that we don't have to wait for the next table - // to finish being allocated by another thread (and if we just finished allocating above, the condition will - // always be true) - if (newCount < (mainHash->capacity >> 1) + (mainHash->capacity >> 2)) - { - auto producer = static_cast(recycle_or_create_producer(false)); - if (producer == nullptr) - { - implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); - return nullptr; - } - - producer->threadExitListener.callback = &ConcurrentQueue::implicit_producer_thread_exited_callback; - producer->threadExitListener.userData = producer; - Details::ThreadExitNotifier::subscribe(&producer->threadExitListener); - - auto index = hashedId; - while (true) - { - index &= mainHash->capacity - 1u; - auto empty = Details::invalid_thread_id; - auto reusable = Details::invalid_thread_id2; - if (mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_seq_cst, std::memory_order_relaxed)) - { - implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); // already counted as a used slot - mainHash->entries[index].value = producer; - break; - } - if (mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_seq_cst, std::memory_order_relaxed)) - { - mainHash->entries[index].value = producer; - break; - } - ++index; - } - return producer; - } - - // Hmm, the old hash is quite full and somebody else is busy allocating a new one. - // We need to wait for the allocating thread to finish (if it succeeds, we add, if not, - // we try to allocate ourselves). - mainHash = implicitProducerHash.load(std::memory_order_acquire); - } - } - - - void implicit_producer_thread_exited(ImplicitProducer* producer) - { - // Remove from hash - auto hash = implicitProducerHash.load(std::memory_order_acquire); - assert(hash != nullptr); // The thread exit listener is only registered if we were added to a hash in the first place - auto id = Details::thread_id(); - auto hashedId = Details::hash_thread_id(id); - Details::thread_id_t probedKey{0}; - - // We need to traverse all the hashes just in case other threads aren't on the current one yet and are - // trying to add an entry thinking there's a free slot (because they reused a producer) - for (; hash != nullptr; hash = hash->prev) - { - auto index = hashedId; - do - { - index &= hash->capacity - 1u; - probedKey = id; - if (hash->entries[index].key.compare_exchange_strong(probedKey, Details::invalid_thread_id2, std::memory_order_seq_cst, std::memory_order_relaxed)) - { - break; - } - ++index; - } - while (probedKey != Details::invalid_thread_id); // Can happen if the hash has changed but we weren't put back in it yet, or if we weren't added to this hash in the first place - } - - // Mark the queue as being recyclable - producer->inactive.store(true, std::memory_order_release); - } - - static void implicit_producer_thread_exited_callback(void* userData) - { - auto producer = static_cast(userData); - auto queue = producer->parent; - queue->implicit_producer_thread_exited(producer); - } - - ////////////////////////////////// - // Utility functions - ////////////////////////////////// - - template - static inline typename std::enable_if<(std::alignment_of::value <= std::alignment_of::value), void*>::type aligned_malloc(size_t size) - { - return (Traits::malloc)(size); - } - template - static inline typename std::enable_if::value <= std::alignment_of::value), void*>::type aligned_malloc(size_t size) - { - size_t alignment = std::alignment_of::value; - void* raw = (Traits::malloc)(size + alignment - 1 + sizeof(void*)); - if (!raw) - { - return nullptr; - } - char* ptr = Details::align_for(reinterpret_cast(raw) + sizeof(void*)); - *(reinterpret_cast(ptr) - 1) = raw; - return ptr; - } - - template - static inline typename std::enable_if<(std::alignment_of::value <= std::alignment_of::value), void>::type aligned_free(void* ptr) - { - return (Traits::free)(ptr); - } - template - static inline typename std::enable_if::value <= std::alignment_of::value), void>::type aligned_free(void* ptr) - { - (Traits::free)(ptr ? *(reinterpret_cast(ptr) - 1) : nullptr); - } - - template - static inline U* create_array(size_t count) - { - assert(count > 0); - U* p = static_cast(aligned_malloc(sizeof(U) * count)); - if (p == nullptr) return nullptr; - - for (size_t i = 0; i != count; ++i) - { - new (p + i) U(); - } - return p; - } - - template - static inline void destroy_array(U* p, size_t count) - { - if (p != nullptr) - { - assert(count > 0); - for (size_t i = count; i != 0; ) - { - (p + --i)->~U(); - } - } - aligned_free(p); - } - - template - static inline U* create() - { - void* p = aligned_malloc(sizeof(U)); - return p != nullptr ? new (p) U : nullptr; - } - - template - static inline U* create(A1&& a1) - { - void* p = aligned_malloc(sizeof(U)); - return p != nullptr ? new (p) U(std::forward(a1)) : nullptr; - } - - template - static inline void destroy(U* p) - { - if (p != nullptr) p->~U(); - aligned_free(p); - } - -private: - std::atomic producerListTail{nullptr}; - std::atomic producerCount{0}; - - std::atomic initialBlockPoolIndex{0}; - Block* initialBlockPool{nullptr}; - size_t initialBlockPoolSize{0}; - - FreeList freeList; - - std::atomic implicitProducerHash{nullptr}; - std::atomic implicitProducerHashCount{0}; // Number of slots logically used - ImplicitProducerHash initialImplicitProducerHash; - std::array initialImplicitProducerHashEntries; - std::atomic_flag implicitProducerHashResizeInProgress{}; - - std::atomic nextExplicitConsumerId{0}; - std::atomic globalExplicitConsumerOffset{0}; -}; - - -template -ProducerToken::ProducerToken(ConcurrentQueue& queue) - : producer(queue.recycle_or_create_producer(true)) -{ - if (producer != nullptr) - { - producer->token = this; - } -} - -template -ProducerToken::ProducerToken(BlockingConcurrentQueue& queue) - : producer(reinterpret_cast*>(&queue)->recycle_or_create_producer(true)) -{ - if (producer != nullptr) - { - producer->token = this; - } -} - -template -ConsumerToken::ConsumerToken(ConcurrentQueue& queue) : - initialOffset(queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release)), - lastKnownGlobalOffset(static_cast(-1)) -{ } - -template -ConsumerToken::ConsumerToken(BlockingConcurrentQueue& queue) : - initialOffset(reinterpret_cast*>(&queue)->nextExplicitConsumerId.fetch_add(1, std::memory_order_release)), - lastKnownGlobalOffset(static_cast(-1)) -{ } - -template -inline void swap(ConcurrentQueue& a, ConcurrentQueue& b) noexcept -{ - a.swap(b); -} - -inline void swap(ProducerToken& a, ProducerToken& b) noexcept -{ - a.swap(b); -} - -inline void swap(ConsumerToken& a, ConsumerToken& b) noexcept -{ - a.swap(b); -} - -template -inline void swap(typename ConcurrentQueue::ImplicitProducerKVP& a, typename ConcurrentQueue::ImplicitProducerKVP& b) noexcept -{ - a.swap(b); -} - -} - -#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17) -#pragma warning(pop) -#endif - -#if defined(__GNUC__) && !defined(__INTEL_COMPILER) -#pragma GCC diagnostic pop -#endif diff --git a/Foundation/include/Poco/LightWeightSemaphore.h b/Foundation/include/Poco/LightWeightSemaphore.h deleted file mode 100644 index 99f5323ed0..0000000000 --- a/Foundation/include/Poco/LightWeightSemaphore.h +++ /dev/null @@ -1,271 +0,0 @@ -// -// LightWeightSemaphore.h -// -// Library: Foundation -// Package: Core -// Module: LightWeightSemaphore -// -// Copyright Copyright (c) 2013-2020, Cameron Desrochers. All rights reserved. -// Extracted from https://github.com/cameron314/concurrentqueue lib and adapted for poco: Bychuk Alexander 2023 -// -// SPDX-License-Identifier: BSL-1.0 -// - -// Provides an efficient implementation of a semaphore (LightweightSemaphore). -// This is an extension of Jeff Preshing's sempahore implementation (licensed -// under the terms of its separate zlib license) that has been adapted and -// extended by Cameron Desrochers. - -#pragma once - -#include // For std::size_t -#include -#include // For std::make_signed -#include -#include "Poco/Semaphore.h" - -namespace Poco { - -// Code in the mpmc_sema namespace below is an adaptation of Jeff Preshing's -// portable + lightweight semaphore implementations, originally from -// https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h -// LICENSE: -// Copyright (c) 2015 Jeff Preshing -// -// This software is provided 'as-is', without any express or implied -// warranty. In no event will the authors be held liable for any damages -// arising from the use of this software. -// -// Permission is granted to anyone to use this software for any purpose, -// including commercial applications, and to alter it and redistribute it -// freely, subject to the following restrictions: -// -// 1. The origin of this software must not be misrepresented; you must not -// claim that you wrote the original software. If you use this software -// in a product, an acknowledgement in the product documentation would be -// appreciated but is not required. -// 2. Altered source versions must be plainly marked as such, and must not be -// misrepresented as being the original software. -// 3. This notice may not be removed or altered from any source distribution. - - - -//--------------------------------------------------------- -// LightweightSemaphore -//--------------------------------------------------------- -class LightweightSemaphore -{ -public: - using ssize_t = std::make_signed::type; - -private: - std::atomic m_count; - Poco::Semaphore m_sema; - int m_maxSpins; - - bool waitWithPartialSpinning(std::int64_t timeout_millisecs = -1) - { - ssize_t oldCount = 0; - int spin = m_maxSpins; - while (--spin >= 0) - { - oldCount = m_count.load(std::memory_order_relaxed); - if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed)) - return true; - std::atomic_signal_fence(std::memory_order_acquire); // Prevent the compiler from collapsing the loop. - } - oldCount = m_count.fetch_sub(1, std::memory_order_acquire); - if (oldCount > 0) - return true; - if (timeout_millisecs < 0) - { - - try - { - m_sema.wait(); - return true; - } - catch(Poco::SystemException &) {} - } - if (timeout_millisecs > 0) - { - try - { - m_sema.wait(timeout_millisecs); - return true; - } - catch(Poco::SystemException &) {} - } - // At this point, we've timed out waiting for the semaphore, but the - // count is still decremented indicating we may still be waiting on - // it. So we have to re-adjust the count, but only if the semaphore - // wasn't signaled enough times for us too since then. If it was, we - // need to release the semaphore too. - while (true) - { - oldCount = m_count.load(std::memory_order_acquire); - if (oldCount >= 0 && m_sema.tryWait(timeout_millisecs)) - return true; - if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, std::memory_order_relaxed)) - return false; - } - } - - ssize_t waitManyWithPartialSpinning(ssize_t max, std::int64_t timeout_millisecs = -1) - { - assert(max > 0); - ssize_t oldCount = 0; - int spin = m_maxSpins; - while (--spin >= 0) - { - oldCount = m_count.load(std::memory_order_relaxed); - if (oldCount > 0) - { - ssize_t newCount = oldCount > max ? oldCount - max : 0; - if (m_count.compare_exchange_strong(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed)) - return oldCount - newCount; - } - std::atomic_signal_fence(std::memory_order_acquire); - } - oldCount = m_count.fetch_sub(1, std::memory_order_acquire); - if (oldCount <= 0) - { - auto calculateOldClount = [&timeout_millisecs, this]() -> ssize_t { - ssize_t oldCount = 0; - while (true) - { - oldCount = m_count.load(std::memory_order_acquire); - if (oldCount >= 0 && m_sema.tryWait(timeout_millisecs)) - break; - if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, std::memory_order_relaxed)) - return 0; - } - return oldCount; - }; - if (timeout_millisecs == 0) - { - oldCount = calculateOldClount(); - if (oldCount == 0) return oldCount; - } - else if (timeout_millisecs < 0) - { - try - { - m_sema.wait(); - } - catch (Poco::SystemException &) - { - oldCount = calculateOldClount(); - if (oldCount == 0) return oldCount; - } - - } - else if (timeout_millisecs > 0) - { - try - { - m_sema.wait(timeout_millisecs); - } - catch (...) { - oldCount = calculateOldClount(); - if (oldCount == 0) return oldCount; - } - } - } - if (max > 1) - return 1 + tryWaitMany(max - 1); - return 1; - } - -public: - LightweightSemaphore(ssize_t initialCount = 0, int maxSpins = 10000) : m_count(initialCount), m_sema(initialCount), m_maxSpins(maxSpins) - { - assert(initialCount >= 0); - assert(maxSpins >= 0); - } - - bool tryWait() - { - ssize_t oldCount = m_count.load(std::memory_order_relaxed); - while (oldCount > 0) - { - if (m_count.compare_exchange_weak(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed)) - return true; - } - return false; - } - - bool wait() - { - return tryWait() || waitWithPartialSpinning(); - } - - bool wait(std::int64_t timeout_usecs) - { - return tryWait() || waitWithPartialSpinning(timeout_usecs); - } - - // Acquires between 0 and (greedily) max, inclusive - ssize_t tryWaitMany(ssize_t max) - { - assert(max >= 0); - ssize_t oldCount = m_count.load(std::memory_order_relaxed); - while (oldCount > 0) - { - ssize_t newCount = oldCount > max ? oldCount - max : 0; - if (m_count.compare_exchange_weak(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed)) - return oldCount - newCount; - } - return 0; - } - - // Acquires at least one, and (greedily) at most max - ssize_t waitMany(ssize_t max, std::int64_t timeout_usecs) - { - assert(max >= 0); - ssize_t result = tryWaitMany(max); - if (result == 0 && max > 0) - result = waitManyWithPartialSpinning(max, timeout_usecs); - return result; - } - - ssize_t waitMany(ssize_t max) - { - ssize_t result = waitMany(max, -1); - assert(result > 0); - return result; - } - - void signal(ssize_t count = 1) - { - assert(count >= 0); - ssize_t oldCount = m_count.fetch_add(count, std::memory_order_release); - ssize_t toRelease = -oldCount < count ? -oldCount : count; - if (toRelease > 0) - { - while (count-- > 0) - { - bool setOk = false; - do { - try - { - m_sema.set(); - setOk = true; - } - catch(Poco::SystemException &ex) - { } - } - while(!setOk); - - } - } - } - - std::size_t availableApprox() const - { - ssize_t count = m_count.load(std::memory_order_relaxed); - return count > 0 ? static_cast(count) : 0; - } -}; - -} // end namespace Poco