Skip to content

Commit

Permalink
__len__
Browse files Browse the repository at this point in the history
  • Loading branch information
squeaky-pl committed Nov 5, 2024
1 parent c499c0c commit db189f0
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 29 deletions.
81 changes: 55 additions & 26 deletions inbox/uid_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,42 +7,65 @@
MAX_UINT32 = 2**32 - 1


def compress_ranges(iterable: "Iterable[int]") -> "Iterable[int | tuple[int, int]]":
sorted_iterable = sorted(iterable)
class CompressRanges:
def __init__(self, iterable: "Iterable[int]"):
self.iterable = iterable
self.__length: "int | None" = None

if not sorted_iterable:
return
def __iter__(self) -> "Iterator[int | tuple[int, int]]":
sorted_iterable = sorted(self.iterable)

cache: "int | tuple[int, int] | None" = None
if not sorted_iterable:
self.__length = 0
return

for element in sorted_iterable:
assert 0 < element <= MAX_UINT32
length = 0
cache: "int | tuple[int, int] | None" = None

if cache is None:
cache = element
continue
for element in sorted_iterable:
assert 0 < element <= MAX_UINT32

if isinstance(cache, int):
if element == cache:
if cache is None:
cache = element
length += 1
continue

if element - cache == 1:
cache = (cache, element)
continue
if isinstance(cache, int):
if element == cache:
continue

if isinstance(cache, tuple):
if element == cache[1]:
continue
if element - cache == 1:
cache = (cache, element)
length += 1
continue

if element - cache[1] == 1:
cache = (cache[0], element)
continue
if isinstance(cache, tuple):
if element == cache[1]:
continue

if element - cache[1] == 1:
cache = (cache[0], element)
length += 1
continue

yield cache
cache = element
yield cache
cache = element
length += 1

if cache is not None:
yield cache
if cache is not None:
yield cache

self.__length = length

@property
def length(self) -> int:
if self.__length is None:
raise TypeError("Length is not available until the iterator is exhausted")
return self.__length


def compress_ranges(iterable: "Iterable[int]") -> "Iterable[int | tuple[int, int]]":
return CompressRanges(iterable)


def decompress_ranges(
Expand Down Expand Up @@ -125,7 +148,9 @@ def tokenize_backwards(stream: bytes) -> "Iterable[bytes]":

class UidSet(Iterable[int]):
def __init__(self, iterable: "Iterable[int]", *, compress=True):
_data = b"".join(encode_compressed_ranges(compress_ranges(iterable)))
compress_ranges_iterator = CompressRanges(iterable)
_data = b"".join(encode_compressed_ranges(compress_ranges_iterator))
self.__length = compress_ranges_iterator.length

# TODO: use a generator here

Expand Down Expand Up @@ -161,6 +186,9 @@ def __reversed__(self) -> "Iterator[int]":
)
)

def __len__(self) -> int:
return self.__length


def make_data(length: int, ratio: float) -> list[int]:
return [i for i in range(1, int(length * 1 / ratio) + 1) if random.random() < ratio]
Expand All @@ -176,6 +204,7 @@ def main():
start = time.monotonic()
ten_uid_set = UidSet(ten_list, compress=False)
end = time.monotonic()
assert len(ten_uid_set) == len(ten_list)
print(f"Time to create: {end - start:.2f}")
print("Uid set length:", asizeof(ten_uid_set))
print(f"Proportion: {asizeof(ten_uid_set) / asizeof(ten_list):.2f}")
Expand Down
12 changes: 9 additions & 3 deletions tests/test_uid_set.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import pytest

from inbox.uid_set import (
CompressRanges,
UidSet,
compress_ranges,
decode_compressed_ranges,
decompress_ranges,
encode_compressed_ranges,
Expand All @@ -25,7 +25,9 @@
),
)
def test_compress_decompress_ranges(example, expected):
assert list(compress_ranges(example)) == expected
compress_ranges = CompressRanges(example)
assert list(compress_ranges) == expected
assert compress_ranges.length == len(example)
assert list(decompress_ranges(expected)) == sorted(example)


Expand Down Expand Up @@ -54,6 +56,8 @@ def test_encode_compressed_ranges(compressed_ranges, encoded_ranges):
[1, 2],
[1, 2, 3],
[3, 1, 2],
[1, 1, 1],
[1, 2, 2, 5],
[1, 2, 4],
[1, 3, 4],
[1, 2, 4, 5],
Expand All @@ -62,4 +66,6 @@ def test_encode_compressed_ranges(compressed_ranges, encoded_ranges):
),
)
def test_uid_set(iterable):
assert set(UidSet(iterable)) == set(iterable)
uid_set = UidSet(iterable)
assert set(uid_set) == set(iterable)
assert len(uid_set) == len(set(iterable))

0 comments on commit db189f0

Please sign in to comment.