From cb096cc4602d9925044cdd9e0dde20e5c1c24031 Mon Sep 17 00:00:00 2001 From: Ashley Roll Date: Wed, 8 Nov 2023 19:07:47 +1000 Subject: [PATCH] :art: RLE indices no longer materialise `bitset`s The previous interim implementation of RLE encoded message indices would decode data into fully materialised `bitset`s in order to fit into the existing infrastructure easily. This commit now eliminates the need to materialise a `bitset` to compute which callbacks are to be executed for a given message. The `rle_intersect` type is a proxy object that collects together multiple `rle_decoder`s created from the indexed RLE data during message matching. This proxy object can then be used to virtually combine the run length encoded bitset data and enumerate the resulting `1` bits. It is still possible to materialise the `bitset`s if needed. --- docs/implementation_details.adoc | 157 +++++++++++++++ docs/index.adoc | 1 + docs/message.adoc | 11 +- include/msg/detail/rle_codec.hpp | 251 +++++++++++++++++++++--- include/msg/rle_indexed_builder.hpp | 9 +- include/msg/rle_indexed_handler.hpp | 8 +- test/msg/detail/rle_codec.cpp | 287 ++++++++++++++++++++++++++-- 7 files changed, 675 insertions(+), 49 deletions(-) create mode 100644 docs/implementation_details.adoc diff --git a/docs/implementation_details.adoc b/docs/implementation_details.adoc new file mode 100644 index 00000000..483ced7b --- /dev/null +++ b/docs/implementation_details.adoc @@ -0,0 +1,157 @@ + +== Implementation Details + +This section details some of the internal implementation details to assist contributors. +The details here are not required to use the `cib` library. + +=== Run Length Encoded Message Indices + +To switch to using the RLE indices is as simple as converting your `msg::indexed_service` to a +`msg::rle_indexed_service`. + +The initial building of the mapping indices proceeds the same as +the normal ones, where a series of entries in an index is generated +and the callback that match are encoded into a `stdx::bitset`. + +However, once this initial representation is built, we then take this and +perform additional work (at compile time) to encode the bitsets as RLE +data, and store in the index just an offset into the blob of RLE data +rather than the bitset itself. + +This is good for message maps that contain a large number of handlers as +we trade off storage space for some decoding overhead. + +Once encoded, the normal operation of the lookup process at run time +proceeds and a set of candidate matches is collected, these are then +_intersected_ from the RLE data and the final set of callbacks invoked +without needing to materialise any of the underlying bitsets. + +==== RLE Data Encoding + +There are several options for encoding the bitset into an RLE pattern, many of which will result +in smaller size, but a lot of bit-shifting to extract data. We have chosen to trade off encoded +size for faster decoding, as it is likely the handling of the RLE data and index lookup will be +in the critical path for system state changes. + +The encoding chosen is simply the number of consecutive bits of `0`​s or `1`​s. + +Specifics: + +- The encoding runs from the least significant bit to most significant bit +- The number of consecutive bits is stored as a `std::byte` and ranges `0...255` +- The first byte of the encoding counts the number of `0` bits +- If there are more than 255 consecutive identical bits, they can only be encoded in + blocks of 255, and an additional 0 is needed to indicate zero opposite bits are needed. + +[ditaa, format="svg", scale=1.5] +---- + Bitset RLE Data +/-------------+ +---+ +| 0b0000`0000 |--->| 8 | ++-------------/ +---+ + +/-------------+ +---+---+ +| 0b1111`1111 |--->| 0 | 8 | ++-------------/ +---+---+ + +/-------------+ +---+---+---+ +| 0b0000`0001 |--->| 0 | 1 | 7 | ++-------------/ +---+---+---+ + +/-------------+ +---+---+---+---+ +| 0b1000`0011 |--->| 0 | 2 | 5 | 1 | ++-------------/ +---+---+---+---+ + +/-------------+ +---+---+---+---+ +| 0b1100`1110 |--->| 1 | 3 | 2 | 2 | ++-------------/ +---+---+---+---+ + + +/------------------------------+ +---+---+-----+---+-----+---+-----+---+-----+ +| 1000 `0`s and one `1` in LSB |--->| 0 | 1 | 255 | 0 | 255 | 0 | 255 | 0 | 235 | ++------------------------------/ +---+---+-----+---+-----+---+-----+---+-----+ +---- + +The `msg::rle_indexed_builder` will go through a process to take the indices and +their bitset data and build a single blob of RLE encoded data for all indices, stored in +an instance of a `msg::detail::rle_storage`. It also generates a set of +`msg::detail::rle_index` entries for each of the index entries that maps the original bitmap +to a location in the shared storage blob. + +The `rle_storage` object contains a simple array of all RLE data bytes. The `rle_index` +contains a simple offset into that array. We compute the smallest size that can contain the +offset to avoid wasted storage and use that. + +NOTE: The specific `rle_storage` and `rle_index`​s are locked together using a unique type +so that the `rle_index` can not be used with the wrong `rle_storage` object. + +When building the shared blob, the encoder will attempt to reduce the storage size by finding +and reusing repeated patterns in the RLE data. + +The final `msg::indexed_handler` contains an instance of the `msg::rle_indices` which contains +both the storage and the maps referring to all the `rle_index` objects. + +This means that the final compile time data generated consists of: + +- The Message Map lookups as per the normal implementation, however they store a simple offset + rather than a bitset. +- The blob of all RLE bitset data for all indices in the message handling map + +==== Runtime Handling + +The `msg::indexed_handler` implementation will delegate the mapping call for an incoming +message down to the `msg::rle_indices` implementation. It will further call into it's +storage indices and match to the set of `rle_index` values for each mapping index. + +This set of `rle_index` values (which are just offsets) are then converted to instances of +a `msg::detail::rle_decoder` by the `rle_storage`. This converts the offset into a +pointer to the sequence of `std::byte`​s for the RLE encoding. + +All the collected `rle_decoders` from the various maps in the set of indices are then passed +to an instance of the `msg::detail::rle_intersect` object and returned from the `rle_indices` +call operator. + +The `rle_decoder` provides a single-use enumerator that will step over the groups of +`0`​s or `1`​s, providing a way to advance through them by arbitrary increments. + +The `rle_intersect` implementation wraps the variadic set of `rle_decoder`​s so that +the caller can iterate through all `1`​s, calling the appropriate callback as it goes. + +===== Efficient Iteration of Bits + +The `msg::detail::rle_decoder::chunk_enumerator` provides a way to step through the RLE +data for the encoded bitset an arbitrary number of bits at a time. It does this by exposing +the current number of bits of consecutive value. + +This is presented so that it is possible to efficiently find: + +- the longest run of `0`​s +- or, if none, the shortest run of `1`​s. + +Remember that we are trying to compute the intersection of all the encoded bitsets, so +where all bitsets have a `1`, we call the associated callback, where any of the bitsets +has a `0`, we skip that callback. + +So the `chunk_enumerator` will return a signed 16 bit (at least) value indicating: + +- *negative* value - the number of `0`​s +- *positive* value - the number of `1`​s +- *zero* when past the end (special case) + +The `rle_intersect` will initialise an array of `rle_decoder::chunk_enumerators` +when it is asked to run a lambda for each `1` bit using the `for_each()` method. + +This list is then searched for the _minimum_ value of chunk size. This will either +be the largest negative value, and so the longest run of `0`​s, or the smallest +number of `1`​s, representing the next set of bits that are set in all bitsets. + +The `for_each()` method will then advance past all the `0`​s, or execute the lambda +for that many set bits, until it has consumed all bits in the encoded bitsets. + +This means that the cost of intersection of `N` indices is a number of pointers and +a small amount of state for tracking the current run of bits and their type for each index. + +There is no need to materialise a full bitset at all. This can be quite a memory saving if +there are a large number of callbacks. The trade-off, of course, is more complex iteration +of bits to discover the callbacks to run. + diff --git a/docs/index.adoc b/docs/index.adoc index 4e255a29..15ec6dfa 100644 --- a/docs/index.adoc +++ b/docs/index.adoc @@ -11,3 +11,4 @@ include::flows.adoc[] include::interrupts.adoc[] include::match.adoc[] include::message.adoc[] +include::implementation_details.adoc[] diff --git a/docs/message.adoc b/docs/message.adoc index 4ef63767..a8d44d27 100644 --- a/docs/message.adoc +++ b/docs/message.adoc @@ -181,7 +181,7 @@ cib::service->handle(my_message{"my field"_field = 0x80}); Notice in this case that our callback is defined with a matcher that always matches, but also that the field in `my_message` has a matcher that requires it -to equal `0x80`. Therefore handling the following message will not call the +to equal `0x80`. Therefore, handling the following message will not call the callback: [source,cpp] ---- @@ -242,7 +242,12 @@ minimal effort at runtime. For each field in the `msg::index_spec`, we build a map from field values to bitsets, where the values in the bitsets represent callback indices. -NOTE: The bitsets may be run-length encoded: this is a work in progress. +NOTE: The bitsets may be run-length encoded by using the `rle_indexed_service` +inplace of the `indexed_service`. This may be useful if you have limited space +and/or a large set of possible callbacks. +See xref:implementation_details.adoc#run_length_encoded_message_indices[Run Length +Encoding Implementation Details] + Each `indexed_callback` has a matcher that may be an xref:match.adoc#_boolean_algebra_with_matchers[arbitrary Boolean matcher @@ -442,4 +447,4 @@ compile time. For each callback, we now run the remaining matcher expression to deal with any unindexed but constrained fields, and call the callback if it passes. Bob's your -uncle. +uncle. \ No newline at end of file diff --git a/include/msg/detail/rle_codec.hpp b/include/msg/detail/rle_codec.hpp index f0fe5019..b54d25f4 100644 --- a/include/msg/detail/rle_codec.hpp +++ b/include/msg/detail/rle_codec.hpp @@ -16,29 +16,112 @@ namespace msg::detail { -template struct smallest_storage { - // select a minimum sized type for indexing into the RLE data blob - static CONSTEVAL auto select_index_storage() { - if constexpr (N <= std::numeric_limits::max()) { - return std::uint8_t{}; - } else if constexpr (N <= std::numeric_limits::max()) { - return std::uint16_t{}; - } else if constexpr (N <= std::numeric_limits::max()) { - return std::uint32_t{}; - } else { - return std::size_t{}; +template +using smallest_storage_type = decltype(stdx::detail::select_storage()); + +// Captures RLE data for decoding and provides a mechanism to +// get a single-use enumerator to step through bits in the encoded +// bitset. +template class rle_decoder { + public: + using bitset_type = BitSetType; + constexpr static auto num_bits = BitSetType::size(); + + constexpr explicit rle_decoder(std::byte const *start_rle_data) + : rle_data{start_rle_data} {} + + // A type to allow "iteration" over the RLE encoded data in a way that + // can be efficiently used to decode the runs of bits for intersection. + // + // This will return + // - a negative number for the number of consecutive 0's + // - a positive number for the number of consecutive 1's + // - zero if there are no more bits to decode + // + // Can traverse only a single time. + class chunk_enumerator { + public: + constexpr static auto num_bits = BitSetType::size(); + + constexpr chunk_enumerator() : rle_data{nullptr} {} + + constexpr explicit chunk_enumerator(std::byte const *start_rle_data) + : rle_data{start_rle_data} { + next_chunk(); + } + + // Get the current chunk of continuous bits. -ve values are 0s + // +ve values are 1s. range is -255...255. will return 0 if finished + [[nodiscard]] constexpr auto bit_chunk() const -> std::int_fast16_t { + return bit_value ? current_run : -current_run; + } + + // Advance the bit chunk by `bits` number of bits. This might consume + // only a portion of the remain bits in the chunk, skip to the next + // chunk or skip over multiple chunks. The current bit position in the + // data stream must be provided by caller to avoid reading past end of + // RLE data. We rely on the caller to avoid needing multiple bit + // counters when there are multiple chunk_enumerators in play. + // + // Returns the new current_bit (or num_bits if we pass the end of the + // data) after the bits are consumed. + constexpr auto advance(std::size_t bits, std::size_t current_bit) + -> std::size_t { + while (bits > 0 && current_bit < num_bits) { + // more available than we are consuming? + if (bits < current_run) { + current_run -= static_cast(bits); + return current_bit + bits; + } + + // consume all the currently available bits + // and get the next chunk + bits -= current_run; + current_bit += current_run; + // only load next chunk of we are not at the end + if (current_bit < num_bits) { + next_chunk(); + } else { + // no more bits. + current_run = 0; + } + } + + return current_bit; + } + + private: + std::byte const *rle_data; + // initial load values so the first next_chunk() call in constructor + // starts in the 0's state (pretend we just did some 1s). + std::uint8_t current_run{0}; + bool bit_value{true}; + + constexpr void next_chunk() { + // skipping the next bit count for a > 255 run? + if (*rle_data == std::byte{0}) { + // keep same bit_value and skip this encoded byte + ++rle_data; + } else { + // invert bit_value to generate run of opposite bits + bit_value = not bit_value; + } + current_run = static_cast(*rle_data++); } + }; + + [[nodiscard]] constexpr auto make_chunk_enumerator() const + -> chunk_enumerator { + return chunk_enumerator{rle_data}; } - using type = decltype(select_index_storage()); + private: + std::byte const *rle_data; }; -template -using smallest_storage_type = typename smallest_storage::type; - template struct rle_codec { using bitset_type = BitSetType; - constexpr static auto num_bits = BitSetType::size(); + constexpr static auto const num_bits = BitSetType::size(); // assume worst case of each bitmap being alternating bit values using max_rle_data_type = stdx::cx_vector; @@ -48,6 +131,7 @@ template struct rle_codec { max_rle_data_type data{}; std::size_t count{0}; bool last{false}; + for (std::size_t bit{0}; bit < num_bits; ++bit) { if (bitset[bit] != last) { data.push_back(static_cast(count)); @@ -71,27 +155,140 @@ template struct rle_codec { constexpr static auto decode(std::byte const *rle_data) -> bitset_type { bitset_type result{}; + auto decoder = + rle_decoder{rle_data}.make_chunk_enumerator(); std::size_t bit{0}; - bool bit_val{false}; - // accumulate the correct total number of bits + while (bit < decoder.num_bits) { + auto chunk_bits = decoder.bit_chunk(); + if (chunk_bits < 0) { + // skip 0's + bit = + decoder.advance(static_cast(-chunk_bits), bit); + } else { + auto temp_bit = bit; + bit = + decoder.advance(static_cast(chunk_bits), bit); + + // add the 1s + while (chunk_bits-- > 0) { + result.set(temp_bit++); + } + } + } + + return result; + } +}; + +template >... Decoders> + requires(sizeof...(Decoders) > 0) +struct rle_intersect { + using bitset_type = BitSetType; + using decoder_type = rle_decoder; + using chunk_type = typename decoder_type::chunk_enumerator; + constexpr static auto const num_bits = decoder_type::num_bits; + constexpr static auto const num_decoders = sizeof...(Decoders); + + std::array decoder_list; + + constexpr explicit rle_intersect(Decoders &&...decoders) + : decoder_list{std::forward(decoders)...} {} + + // iterate over set bits, passing them to bool (&f)(auto bit_number). + // if f returns true, we abort and return true indicating early exit + // otherwise return false to indicate no early abort of iteration + template constexpr auto for_each_until(F &&f) const -> bool { + // allocate a set of chunk_enumerators for each decoder + // so that we can traverse the bit set intersection + stdx::cx_vector chunks{}; + for (auto &d : decoder_list) { + chunks.push_back(d.make_chunk_enumerator()); + } + + // advance all chunks by a number of bits and return the new + // current bit position + auto advance = [&](std::size_t bits, + std::size_t current_bit) -> std::size_t { + std::size_t new_current{current_bit}; + for (auto &c : chunks) { + new_current = c.advance(bits, current_bit); + } + return new_current; + }; + + // comparison of chunk bit counts + auto min_chunk = [](chunk_type const &a, chunk_type const &b) -> bool { + return a.bit_chunk() < b.bit_chunk(); + }; + + std::size_t bit{0}; while (bit < num_bits) { - // get the next number of consecutive bits - auto cur_bits = static_cast(*rle_data++); - if (bit_val) { - // write cur_bits of 1s - while (cur_bits-- > 0) { - result.set(bit++); + // the min "bit_chunk" item in the chunk list is the smallest run + // length chunk value. If that value is -ve this is a run of zeros, + // and so we can immediately consume that many bits because + // 0 and X = 0. Otherwise, if +ve, it will be the smallest number of + // consecutive 1s, and all other chunks will contain more 1s, and so + // we can add that many ones to the result. + // + // NOTE: zero length chunks are ignored as the bits counter will end + // the loop as each rle_decoder finishes at the same time + auto min_chunk_bits = + (*std::min_element(chunks.begin(), chunks.end(), min_chunk)) + .bit_chunk(); + + if (min_chunk_bits > 0) { + // this will be the minimum number of 1s and all other + // chunks must also be 1s. + auto temp_bit = bit; + bit = advance(static_cast(min_chunk_bits), bit); + while (min_chunk_bits-- > 0) { + // call F, but abort if it indicates abort requested + if (f(temp_bit++)) { + return true; // early abort + } } } else { - // skip cur_bits of 0s - bit += cur_bits; + // otherwise it was the maximum run of 0s no need to invoke F + bit = advance(static_cast(-min_chunk_bits), bit); } - bit_val = !bit_val; } + return false; // full traversal + } + + template constexpr auto for_each(F &&f) const -> F { + for_each_until([&](auto i) { + f(i); + return false; + }); + return std::forward(f); + } + + template + friend constexpr auto for_each(F &&f, rle_intersect const &intersect) -> F { + return intersect.for_each(std::forward(f)); + } + + [[nodiscard]] constexpr auto any() const -> bool { + // iterate until we find at least a single bit. + return for_each_until([](auto /*unused*/) { return true; }); + } + + [[nodiscard]] constexpr auto none() const -> bool { return not any(); } + + [[nodiscard]] constexpr auto get_bitset() const -> bitset_type { + bitset_type result{}; + + for_each([&](auto i) { result.set(i); }); return result; } }; +// at least 1 decoder is required +template ... Others> +rle_intersect(Decoder d, Others... others) + -> rle_intersect; + } // namespace msg::detail diff --git a/include/msg/rle_indexed_builder.hpp b/include/msg/rle_indexed_builder.hpp index 3de8ffcc..d042ed17 100644 --- a/include/msg/rle_indexed_builder.hpp +++ b/include/msg/rle_indexed_builder.hpp @@ -80,10 +80,17 @@ template struct rle_storage { return codec_type::decode(std::next(data.begin(), idx.offset)); } + template + constexpr auto + decode(rle_index idx) const + -> rle_decoder { + return rle_decoder{std::next(data.begin(), idx.offset)}; + } + storage_type data; }; -// Build the encoded RLE data with a max lenght of MaxDataLen +// Build the encoded RLE data with a max length of MaxDataLen // Take the opportunity to reuse byte sequences where possible. template struct rle_storage_builder { using offset_type = detail::smallest_storage_type; diff --git a/include/msg/rle_indexed_handler.hpp b/include/msg/rle_indexed_handler.hpp index d6952f43..bbd42af1 100644 --- a/include/msg/rle_indexed_handler.hpp +++ b/include/msg/rle_indexed_handler.hpp @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -12,10 +13,9 @@ struct rle_indices : IndicesT... { : IndicesT{index_args}..., storage{data} {} constexpr auto operator()(auto const &data) const { - // TODO: efficient bitand that doesn't need to materialise full bitset - - // use the index to decode the bitset from storage - return (storage.get(this->IndicesT::operator()(data)) & ...); + // proxy to allow intersection without materializing a full bitset. + return detail::rle_intersect{ + storage.decode(this->IndicesT::operator()(data))...}; } // index entries will map into this storage to decode RLE data diff --git a/test/msg/detail/rle_codec.cpp b/test/msg/detail/rle_codec.cpp index c28fb72a..8e7813bd 100644 --- a/test/msg/detail/rle_codec.cpp +++ b/test/msg/detail/rle_codec.cpp @@ -1,5 +1,7 @@ #include +#include + #include #include @@ -9,7 +11,7 @@ using Catch::Matchers::RangeEquals; namespace { -auto operator"" _b(unsigned long long int v) -> std::byte { +CONSTEVAL auto operator"" _b(unsigned long long v) -> std::byte { return static_cast(v); } } // namespace @@ -24,45 +26,206 @@ TEST_CASE("rle_codec can encode all zeros", "[rle_codec]") { using bs = stdx::bitset<10>; using codec = rle_codec; - CHECK_THAT(codec::encode(bs{}), - RangeEquals(std::to_array({10_b}))); + CHECK_THAT(codec::encode(bs{}), RangeEquals(std::array{10_b})); } TEST_CASE("rle_codec can encode all ones", "[rle_codec]") { using bs = stdx::bitset<12>; using codec = rle_codec; - CHECK_THAT(codec::encode(~bs{}), - RangeEquals(std::to_array({0_b, 12_b}))); + CHECK_THAT(codec::encode(~bs{}), RangeEquals(std::array{0_b, 12_b})); } TEST_CASE("rle_codec can encode all zeros for large bit count", "[rle_codec]") { using bs = stdx::bitset<512>; using codec = rle_codec; - CHECK_THAT(codec::encode(bs{}), RangeEquals(std::to_array( - {255_b, 0_b, 255_b, 0_b, 2_b}))); + CHECK_THAT(codec::encode(bs{}), + RangeEquals(std::array{255_b, 0_b, 255_b, 0_b, 2_b})); } TEST_CASE("rle_codec can encode all ones for large bit count", "[rle_codec]") { using bs = stdx::bitset<512>; using codec = rle_codec; - CHECK_THAT(codec::encode(~bs{}), RangeEquals(std::to_array( - {0_b, 255_b, 0_b, 255_b, 0_b, 2_b}))); + CHECK_THAT(codec::encode(~bs{}), + RangeEquals(std::array{0_b, 255_b, 0_b, 255_b, 0_b, 2_b})); } TEST_CASE("rle_codec can encode alternating bits", "[rle_codec]") { using bs = stdx::bitset<8>; using codec = rle_codec; - CHECK_THAT(codec::encode(bs{stdx::place_bits, 0, 2, 4, 6}), - RangeEquals(std::to_array( - {0_b, 1_b, 1_b, 1_b, 1_b, 1_b, 1_b, 1_b, 1_b}))); + CHECK_THAT( + codec::encode(bs{stdx::place_bits, 0, 2, 4, 6}), + RangeEquals(std::array{0_b, 1_b, 1_b, 1_b, 1_b, 1_b, 1_b, 1_b, 1_b})); CHECK_THAT(codec::encode(bs{stdx::place_bits, 1, 3, 5, 7}), - RangeEquals(std::to_array( - {1_b, 1_b, 1_b, 1_b, 1_b, 1_b, 1_b, 1_b}))); + RangeEquals(std::array{1_b, 1_b, 1_b, 1_b, 1_b, 1_b, 1_b, 1_b})); +} + +TEST_CASE("rle_decoder can iterate chunks 1", "[rle_codec]") { + using bs = stdx::bitset<8>; + using decoder = rle_decoder; + + auto rle = std::array{1_b, 1_b, 1_b, 1_b, 1_b, 1_b, 1_b, 1_b}; + + decoder dec{rle.data()}; + auto chunk = dec.make_chunk_enumerator(); + std::size_t bit{0}; + + REQUIRE(chunk.bit_chunk() == -1); + + bit = chunk.advance(1, bit); + REQUIRE(chunk.bit_chunk() == 1); + REQUIRE(bit == 1); + + bit = chunk.advance(1, bit); + REQUIRE(chunk.bit_chunk() == -1); + REQUIRE(bit == 2); + + bit = chunk.advance(1, bit); + REQUIRE(chunk.bit_chunk() == 1); + REQUIRE(bit == 3); + + bit = chunk.advance(1, bit); + REQUIRE(chunk.bit_chunk() == -1); + REQUIRE(bit == 4); + + bit = chunk.advance(1, bit); + REQUIRE(chunk.bit_chunk() == 1); + REQUIRE(bit == 5); + + bit = chunk.advance(1, bit); + REQUIRE(chunk.bit_chunk() == -1); + REQUIRE(bit == 6); + + bit = chunk.advance(1, bit); + REQUIRE(chunk.bit_chunk() == 1); + REQUIRE(bit == 7); + + bit = chunk.advance(1, bit); + REQUIRE(chunk.bit_chunk() == 0); + REQUIRE(bit == 8); +} + +TEST_CASE("rle_decoder can iterate chunks 2", "[rle_codec]") { + using bs = stdx::bitset<8>; + using decoder = rle_decoder; + + auto rle = std::array{0_b, 2_b, 2_b, 2_b, 0_b, 2_b}; + decoder dec{rle.data()}; + auto chunk = dec.make_chunk_enumerator(); + std::size_t bit{0}; + + REQUIRE(chunk.bit_chunk() == 2); + + bit = chunk.advance(2, bit); + REQUIRE(chunk.bit_chunk() == -2); + REQUIRE(bit == 2); + + bit = chunk.advance(2, bit); + REQUIRE(chunk.bit_chunk() == 2); + REQUIRE(bit == 4); + + bit = chunk.advance(2, bit); + REQUIRE(chunk.bit_chunk() == 2); + REQUIRE(bit == 6); + + bit = chunk.advance(2, bit); + REQUIRE(chunk.bit_chunk() == 0); + REQUIRE(bit == 8); +} + +TEST_CASE("rle_decoder can iterate chunks 3", "[rle_codec]") { + using bs = stdx::bitset<1000>; + using decoder = rle_decoder; + + auto rle = std::array{255_b, 0_b, 255_b, 0_b, 255_b, 235_b}; + decoder dec{rle.data()}; + auto chunk = dec.make_chunk_enumerator(); + + std::size_t bit{0}; + + REQUIRE(chunk.bit_chunk() == -255); + + bit = chunk.advance(510, bit); + REQUIRE(chunk.bit_chunk() == -255); + REQUIRE(bit == 510); + + bit = chunk.advance(255, bit); + REQUIRE(chunk.bit_chunk() == 235); + REQUIRE(bit == 765); + + bit = chunk.advance(235, bit); + REQUIRE(chunk.bit_chunk() == 0); + REQUIRE(bit == 1000); +} + +TEST_CASE("rle_decoder can iterate sub-chunks", "[rle_codec]") { + using bs = stdx::bitset<8>; + using decoder = rle_decoder; + + auto rle = std::array{0_b, 2_b, 2_b, 2_b, 0_b, 2_b}; + decoder dec{rle.data()}; + auto chunk = dec.make_chunk_enumerator(); + std::size_t bit{0}; + + REQUIRE(chunk.bit_chunk() == 2); + + bit = chunk.advance(1, bit); + REQUIRE(chunk.bit_chunk() == 1); + REQUIRE(bit == 1); + + bit = chunk.advance(1, bit); + REQUIRE(chunk.bit_chunk() == -2); + REQUIRE(bit == 2); + + bit = chunk.advance(1, bit); + REQUIRE(chunk.bit_chunk() == -1); + REQUIRE(bit == 3); + + bit = chunk.advance(1, bit); + REQUIRE(chunk.bit_chunk() == 2); + REQUIRE(bit == 4); + + bit = chunk.advance(1, bit); + REQUIRE(chunk.bit_chunk() == 1); + REQUIRE(bit == 5); + + bit = chunk.advance(1, bit); + REQUIRE(chunk.bit_chunk() == 2); + REQUIRE(bit == 6); + + bit = chunk.advance(1, bit); + REQUIRE(chunk.bit_chunk() == 1); + REQUIRE(bit == 7); + + bit = chunk.advance(1, bit); + // we are now out of bits and should get zero back + // and not attempt to read beyond end of rle data + REQUIRE(chunk.bit_chunk() == 0); + REQUIRE(bit == 8); + + bit = chunk.advance(10, bit); + REQUIRE(chunk.bit_chunk() == 0); + REQUIRE(bit == 8); +} + +TEST_CASE("rle_decoder stops after num_bits", "[rle_codec]") { + using bs = stdx::bitset<8>; + using decoder = rle_decoder; + + // contains extra data which should be ignored + auto rle = std::array{0_b, 2_b, 2_b, 2_b, 0_b, 2_b, 255_b, 255_b}; + decoder dec{rle.data()}; + auto chunk = dec.make_chunk_enumerator(); + std::size_t bit{0}; + + REQUIRE(chunk.bit_chunk() == 2); + bit = chunk.advance(100, bit); + REQUIRE(chunk.bit_chunk() == 0); + REQUIRE(bit == 8); } TEST_CASE("rle_codec can decode", "[rle_codec]") { @@ -79,4 +242,100 @@ TEST_CASE("rle_codec can decode", "[rle_codec]") { CHECK(codec::decode(codec::encode(bone).cbegin()) == bone); } +TEST_CASE("rle_codec can decode multiple zero pads", "[rle_codec]") { + using bs = stdx::bitset<2000>; + using codec = rle_codec; + auto const b1 = bs{stdx::place_bits, 1, 2, 3, 499, 1999}; + auto const b2 = bs{stdx::place_bits, 2, 3, 5, 7, 11, 13, 17, 400, 1999}; + auto const bzero = bs{}; + auto const bone = ~bs{}; + + CHECK(codec::decode(codec::encode(b1).cbegin()) == b1); + CHECK(codec::decode(codec::encode(b2).cbegin()) == b2); + CHECK(codec::decode(codec::encode(bzero).cbegin()) == bzero); + CHECK(codec::decode(codec::encode(bone).cbegin()) == bone); +} + +TEST_CASE("rle_intersect works with 1 rle bitset", "[rle_intersect]") { + using bs = stdx::bitset<8>; + using codec = rle_codec; + using decoder_t = rle_decoder; + + auto rle_1 = codec::encode(bs{"00011100"}); + + auto dec_1 = decoder_t{rle_1.cbegin()}; + + auto intersection = + rle_intersect{std::forward(dec_1)}.get_bitset(); + + CHECK(intersection == bs{"00011100"}); +} + +TEST_CASE("rle_intersect works with 2 rle bitsets", "[rle_intersect]") { + using bs = stdx::bitset<8>; + using codec = rle_codec; + using decoder_t = rle_decoder; + + auto rle_1 = codec::encode(bs{"00011100"}); + auto rle_2 = codec::encode(bs{"11110101"}); + + auto dec_1 = decoder_t{rle_1.cbegin()}; + auto dec_2 = decoder_t{rle_2.cbegin()}; + + auto intersection = rle_intersect{std::forward(dec_1), + std::forward(dec_2)} + .get_bitset(); + + CHECK(intersection == bs{"00010100"}); +} + +TEST_CASE("rle_intersect works with many rle bitsets", "[rle_intersect]") { + using bs = stdx::bitset<32>; + using codec = rle_codec; + using decoder_t = rle_decoder; + + auto rle_1 = codec::encode(bs{"11111111111111110000000000000000"}); + auto rle_2 = codec::encode(bs{"11111111000000001111111100000000"}); + auto rle_3 = codec::encode(bs{"11110000111100001111000011110000"}); + auto rle_4 = codec::encode(bs{"11001100110011001100110011001100"}); + auto rle_5 = codec::encode(bs{"10101010101010101010101010101010"}); + auto expected = /* pad */ bs{"10000000000000000000000000000000"}; + + auto dec_1 = decoder_t{rle_1.cbegin()}; + auto dec_2 = decoder_t{rle_2.cbegin()}; + auto dec_3 = decoder_t{rle_3.cbegin()}; + auto dec_4 = decoder_t{rle_4.cbegin()}; + auto dec_5 = decoder_t{rle_5.cbegin()}; + + auto intersection = rle_intersect{std::forward(dec_1), + std::forward(dec_2), + std::forward(dec_3), + std::forward(dec_4), + std::forward(dec_5)} + .get_bitset(); + + CHECK(intersection == expected); +} + +TEST_CASE("rle_intersect for_each()", "[rle_intersect]") { + using bs = stdx::bitset<8>; + using codec = rle_codec; + using decoder_t = rle_decoder; + + auto rle_1 = codec::encode(bs{"00011100"}); + auto rle_2 = codec::encode(bs{"11110101"}); + auto expected = /* pad */ bs{"00010100"}; + + auto dec_1 = decoder_t{rle_1.cbegin()}; + auto dec_2 = decoder_t{rle_2.cbegin()}; + + auto intersection = rle_intersect{std::forward(dec_1), + std::forward(dec_2)}; + + bs result{}; + intersection.for_each([&](auto i) { result.set(i); }); + + CHECK(result == expected); +} + } // namespace msg::detail