From 6602f70dd63703b9953d43edb9aca1a719a8f7a3 Mon Sep 17 00:00:00 2001
From: rudolfix <rudolfix@rudolfix.org>
Date: Tue, 10 Dec 2024 21:41:17 +0100
Subject: [PATCH 1/3] checks notebook presence before finding userdata (#2117)

---
 dlt/common/configuration/providers/toml.py    |  6 ++++++
 .../configuration/test_toml_provider.py       | 21 ++++++++++++++++++-
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/dlt/common/configuration/providers/toml.py b/dlt/common/configuration/providers/toml.py
index 3636565fae..e586fef225 100644
--- a/dlt/common/configuration/providers/toml.py
+++ b/dlt/common/configuration/providers/toml.py
@@ -124,6 +124,12 @@ def _read_google_colab_secrets(self, name: str, file_name: str) -> tomlkit.TOMLD
         """Try to load the toml from google colab userdata object"""
         try:
             from google.colab import userdata
+            from dlt.common.runtime.exec_info import is_notebook
+
+            # make sure we work in interactive mode (get_ipython() is available)
+            # when dlt cli is run, userdata is available but without a kernel
+            if not is_notebook():
+                return None
 
             try:
                 return tomlkit.loads(userdata.get(file_name))
diff --git a/tests/common/configuration/test_toml_provider.py b/tests/common/configuration/test_toml_provider.py
index 481c21b7bb..9538849976 100644
--- a/tests/common/configuration/test_toml_provider.py
+++ b/tests/common/configuration/test_toml_provider.py
@@ -4,6 +4,7 @@
 import yaml
 from typing import Any, Dict, Type
 import datetime  # noqa: I251
+from unittest.mock import Mock
 
 import dlt
 from dlt.common import pendulum, json
@@ -538,11 +539,28 @@ def loader() -> Dict[str, Any]:
 
 
 def test_colab_toml() -> None:
+    import builtins
+
     # use a path without any settings files
     try:
         sys.path.append("tests/common/cases/modules")
-        # secrets are in user data
+
+        # ipython not present
         provider: SettingsTomlProvider = SecretsTomlProvider("tests/common/null", global_dir=None)
+        assert provider.is_empty
+
+        get_ipython_m = Mock()
+        get_ipython_m.return_value = "google.colab.Shell"
+        # make it available to all modules
+        builtins.get_ipython = get_ipython_m  # type: ignore[attr-defined]
+        # test mock
+        assert get_ipython() == "google.colab.Shell"  # type: ignore[name-defined] # noqa
+        from dlt.common.runtime.exec_info import is_notebook
+
+        assert is_notebook()
+
+        # secrets are in user data
+        provider = SecretsTomlProvider("tests/common/null", global_dir=None)
         assert provider.to_toml() == 'api_key="api"'
         # config is not in userdata
         provider = ConfigTomlProvider("tests/common/null", "unknown")
@@ -551,4 +569,5 @@ def test_colab_toml() -> None:
         provider = SecretsTomlProvider("tests/common/cases/configuration/.dlt", global_dir=None)
         assert provider.get_value("secret_value", str, None) == ("2137", "secret_value")
     finally:
+        delattr(builtins, "get_ipython")
         sys.path.pop()

From 51b11d24acf579d4f12abc15f2b661778f2995d9 Mon Sep 17 00:00:00 2001
From: Steinthor Palsson <steini90@gmail.com>
Date: Tue, 10 Dec 2024 17:35:22 -0500
Subject: [PATCH 2/3] Add open/closed range arguments for incremental (#1991)

* Add open/closed range arguments for incremental

* Docs for incremental range args

* Docstring

* Typo

* Ensure deduplication is disabled when range_start=='open'

* Cache transformer settings
---
 dlt/common/incremental/typing.py              |   4 +
 dlt/extract/incremental/__init__.py           |  60 +++--
 dlt/extract/incremental/transform.py          |  75 ++++--
 dlt/sources/sql_database/helpers.py           |  12 +-
 .../verified-sources/sql_database/advanced.md |  49 +++-
 .../docs/general-usage/incremental-loading.md |   5 +-
 tests/extract/test_incremental.py             | 111 +++++++-
 .../load/sources/sql_database/test_helpers.py | 237 ++++++++++++------
 .../sql_database/test_sql_database_source.py  |  17 +-
 9 files changed, 434 insertions(+), 136 deletions(-)

diff --git a/dlt/common/incremental/typing.py b/dlt/common/incremental/typing.py
index 460e2f234b..2ca981bff0 100644
--- a/dlt/common/incremental/typing.py
+++ b/dlt/common/incremental/typing.py
@@ -8,6 +8,8 @@
 LastValueFunc = Callable[[Sequence[TCursorValue]], Any]
 OnCursorValueMissing = Literal["raise", "include", "exclude"]
 
+TIncrementalRange = Literal["open", "closed"]
+
 
 class IncrementalColumnState(TypedDict):
     initial_value: Optional[Any]
@@ -26,3 +28,5 @@ class IncrementalArgs(TypedDict, total=False):
     allow_external_schedulers: Optional[bool]
     lag: Optional[Union[float, int]]
     on_cursor_value_missing: Optional[OnCursorValueMissing]
+    range_start: Optional[TIncrementalRange]
+    range_end: Optional[TIncrementalRange]
diff --git a/dlt/extract/incremental/__init__.py b/dlt/extract/incremental/__init__.py
index 28d33bb71f..5e7bae49c6 100644
--- a/dlt/extract/incremental/__init__.py
+++ b/dlt/extract/incremental/__init__.py
@@ -42,6 +42,7 @@
     LastValueFunc,
     OnCursorValueMissing,
     IncrementalArgs,
+    TIncrementalRange,
 )
 from dlt.extract.items import SupportsPipe, TTableHintTemplate, ItemTransform
 from dlt.extract.incremental.transform import (
@@ -104,6 +105,11 @@ class Incremental(ItemTransform[TDataItem], BaseConfiguration, Generic[TCursorVa
             Note that if logical "end date" is present then also "end_value" will be set which means that resource state is not used and exactly this range of date will be loaded
         on_cursor_value_missing: Specify what happens when the cursor_path does not exist in a record or a record has `None` at the cursor_path: raise, include, exclude
         lag: Optional value used to define a lag or attribution window. For datetime cursors, this is interpreted as seconds. For other types, it uses the + or - operator depending on the last_value_func.
+        range_start: Decide whether the incremental filtering range is `open` or `closed` on the start value side. Default is `closed`.
+            Setting this to `open` means that items with the same cursor value as the last value from the previous run (or `initial_value`) are excluded from the result.
+            The `open` range disables deduplication logic so it can serve as an optimization when you know cursors don't overlap between pipeline runs.
+        range_end: Decide whether the incremental filtering range is `open` or `closed` on the end value side. Default is `open` (exact `end_value` is excluded).
+            Setting this to `closed` means that items with the exact same cursor value as the `end_value` are included in the result.
     """
 
     # this is config/dataclass so declare members
@@ -116,6 +122,8 @@ class Incremental(ItemTransform[TDataItem], BaseConfiguration, Generic[TCursorVa
     on_cursor_value_missing: OnCursorValueMissing = "raise"
     lag: Optional[float] = None
     duplicate_cursor_warning_threshold: ClassVar[int] = 200
+    range_start: TIncrementalRange = "closed"
+    range_end: TIncrementalRange = "open"
 
     # incremental acting as empty
     EMPTY: ClassVar["Incremental[Any]"] = None
@@ -132,6 +140,8 @@ def __init__(
         allow_external_schedulers: bool = False,
         on_cursor_value_missing: OnCursorValueMissing = "raise",
         lag: Optional[float] = None,
+        range_start: TIncrementalRange = "closed",
+        range_end: TIncrementalRange = "open",
     ) -> None:
         # make sure that path is valid
         if cursor_path:
@@ -174,9 +184,11 @@ def __init__(
         self.start_out_of_range: bool = False
         """Becomes true on the first item that is out of range of `start_value`. I.e. when using `max` this is a value that is lower than `start_value`"""
 
-        self._transformers: Dict[str, IncrementalTransform] = {}
+        self._transformers: Dict[Type[IncrementalTransform], IncrementalTransform] = {}
         self._bound_pipe: SupportsPipe = None
         """Bound pipe"""
+        self.range_start = range_start
+        self.range_end = range_end
 
     @property
     def primary_key(self) -> Optional[TTableHintTemplate[TColumnNames]]:
@@ -190,22 +202,6 @@ def primary_key(self, value: str) -> None:
             for transform in self._transformers.values():
                 transform.primary_key = value
 
-    def _make_transforms(self) -> None:
-        types = [("arrow", ArrowIncremental), ("json", JsonIncremental)]
-        for dt, kls in types:
-            self._transformers[dt] = kls(
-                self.resource_name,
-                self.cursor_path,
-                self.initial_value,
-                self.start_value,
-                self.end_value,
-                self.last_value_func,
-                self._primary_key,
-                set(self._cached_state["unique_hashes"]),
-                self.on_cursor_value_missing,
-                self.lag,
-            )
-
     @classmethod
     def from_existing_state(
         cls, resource_name: str, cursor_path: str
@@ -489,7 +485,8 @@ def bind(self, pipe: SupportsPipe) -> "Incremental[TCursorValue]":
         )
         # cache state
         self._cached_state = self.get_state()
-        self._make_transforms()
+        # Clear transforms so we get new instances
+        self._transformers.clear()
         return self
 
     def can_close(self) -> bool:
@@ -520,15 +517,34 @@ def __str__(self) -> str:
             f" {self.last_value_func}"
         )
 
+    def _make_or_get_transformer(self, cls: Type[IncrementalTransform]) -> IncrementalTransform:
+        if transformer := self._transformers.get(cls):
+            return transformer
+        transformer = self._transformers[cls] = cls(
+            self.resource_name,
+            self.cursor_path,
+            self.initial_value,
+            self.start_value,
+            self.end_value,
+            self.last_value_func,
+            self._primary_key,
+            set(self._cached_state["unique_hashes"]),
+            self.on_cursor_value_missing,
+            self.lag,
+            self.range_start,
+            self.range_end,
+        )
+        return transformer
+
     def _get_transformer(self, items: TDataItems) -> IncrementalTransform:
         # Assume list is all of the same type
         for item in items if isinstance(items, list) else [items]:
             if is_arrow_item(item):
-                return self._transformers["arrow"]
+                return self._make_or_get_transformer(ArrowIncremental)
             elif pandas is not None and isinstance(item, pandas.DataFrame):
-                return self._transformers["arrow"]
-            return self._transformers["json"]
-        return self._transformers["json"]
+                return self._make_or_get_transformer(ArrowIncremental)
+            return self._make_or_get_transformer(JsonIncremental)
+        return self._make_or_get_transformer(JsonIncremental)
 
     def __call__(self, rows: TDataItems, meta: Any = None) -> Optional[TDataItems]:
         if rows is None:
diff --git a/dlt/extract/incremental/transform.py b/dlt/extract/incremental/transform.py
index 22b1194b51..1d213e26c2 100644
--- a/dlt/extract/incremental/transform.py
+++ b/dlt/extract/incremental/transform.py
@@ -13,7 +13,12 @@
     IncrementalPrimaryKeyMissing,
     IncrementalCursorPathHasValueNone,
 )
-from dlt.common.incremental.typing import TCursorValue, LastValueFunc, OnCursorValueMissing
+from dlt.common.incremental.typing import (
+    TCursorValue,
+    LastValueFunc,
+    OnCursorValueMissing,
+    TIncrementalRange,
+)
 from dlt.extract.utils import resolve_column_value
 from dlt.extract.items import TTableHintTemplate
 
@@ -57,6 +62,8 @@ def __init__(
         unique_hashes: Set[str],
         on_cursor_value_missing: OnCursorValueMissing = "raise",
         lag: Optional[float] = None,
+        range_start: TIncrementalRange = "closed",
+        range_end: TIncrementalRange = "open",
     ) -> None:
         self.resource_name = resource_name
         self.cursor_path = cursor_path
@@ -71,6 +78,9 @@ def __init__(
         self.start_unique_hashes = set(unique_hashes)
         self.on_cursor_value_missing = on_cursor_value_missing
         self.lag = lag
+        self.range_start = range_start
+        self.range_end = range_end
+
         # compile jsonpath
         self._compiled_cursor_path = compile_path(cursor_path)
         # for simple column name we'll fallback to search in dict
@@ -107,6 +117,8 @@ def __call__(
     def deduplication_disabled(self) -> bool:
         """Skip deduplication when length of the key is 0 or if lag is applied."""
         # disable deduplication if end value is set - state is not saved
+        if self.range_start == "open":
+            return True
         if self.end_value is not None:
             return True
         # disable deduplication if lag is applied - destination must deduplicate ranges
@@ -191,10 +203,10 @@ def __call__(
         # Filter end value ranges exclusively, so in case of "max" function we remove values >= end_value
         if self.end_value is not None:
             try:
-                if (
-                    last_value_func((row_value, self.end_value)) != self.end_value
-                    or last_value_func((row_value,)) == self.end_value
-                ):
+                if last_value_func((row_value, self.end_value)) != self.end_value:
+                    return None, False, True
+
+                if self.range_end == "open" and last_value_func((row_value,)) == self.end_value:
                     return None, False, True
             except Exception as ex:
                 raise IncrementalCursorInvalidCoercion(
@@ -221,6 +233,9 @@ def __call__(
             ) from ex
         # new_value is "less" or equal to last_value (the actual max)
         if last_value == new_value:
+            if self.range_start == "open":
+                # We only want greater than last_value
+                return None, False, False
             # use func to compute row_value into last_value compatible
             processed_row_value = last_value_func((row_value,))
             # skip the record that is not a start_value or new_value: that record was already processed
@@ -258,6 +273,31 @@ def __call__(
 class ArrowIncremental(IncrementalTransform):
     _dlt_index = "_dlt_index"
 
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        if self.last_value_func is max:
+            self.compute = pa.compute.max
+            self.end_compare = (
+                pa.compute.less if self.range_end == "open" else pa.compute.less_equal
+            )
+            self.last_value_compare = (
+                pa.compute.greater_equal if self.range_start == "closed" else pa.compute.greater
+            )
+            self.new_value_compare = pa.compute.greater
+        elif self.last_value_func is min:
+            self.compute = pa.compute.min
+            self.end_compare = (
+                pa.compute.greater if self.range_end == "open" else pa.compute.greater_equal
+            )
+            self.last_value_compare = (
+                pa.compute.less_equal if self.range_start == "closed" else pa.compute.less
+            )
+            self.new_value_compare = pa.compute.less
+        else:
+            raise NotImplementedError(
+                "Only min or max last_value_func is supported for arrow tables"
+            )
+
     def compute_unique_values(self, item: "TAnyArrowItem", unique_columns: List[str]) -> List[str]:
         if not unique_columns:
             return []
@@ -312,28 +352,13 @@ def __call__(
         if not tbl:  # row is None or empty arrow table
             return tbl, start_out_of_range, end_out_of_range
 
-        if self.last_value_func is max:
-            compute = pa.compute.max
-            end_compare = pa.compute.less
-            last_value_compare = pa.compute.greater_equal
-            new_value_compare = pa.compute.greater
-        elif self.last_value_func is min:
-            compute = pa.compute.min
-            end_compare = pa.compute.greater
-            last_value_compare = pa.compute.less_equal
-            new_value_compare = pa.compute.less
-        else:
-            raise NotImplementedError(
-                "Only min or max last_value_func is supported for arrow tables"
-            )
-
         # TODO: Json path support. For now assume the cursor_path is a column name
         cursor_path = self.cursor_path
 
         # The new max/min value
         try:
             # NOTE: datetimes are always pendulum in UTC
-            row_value = from_arrow_scalar(compute(tbl[cursor_path]))
+            row_value = from_arrow_scalar(self.compute(tbl[cursor_path]))
             cursor_data_type = tbl.schema.field(cursor_path).type
             row_value_scalar = to_arrow_scalar(row_value, cursor_data_type)
         except KeyError as e:
@@ -364,10 +389,10 @@ def __call__(
                     cursor_data_type,
                     str(ex),
                 ) from ex
-            tbl = tbl.filter(end_compare(tbl[cursor_path], end_value_scalar))
+            tbl = tbl.filter(self.end_compare(tbl[cursor_path], end_value_scalar))
             # Is max row value higher than end value?
             # NOTE: pyarrow bool *always* evaluates to python True. `as_py()` is necessary
-            end_out_of_range = not end_compare(row_value_scalar, end_value_scalar).as_py()
+            end_out_of_range = not self.end_compare(row_value_scalar, end_value_scalar).as_py()
 
         if self.start_value is not None:
             try:
@@ -383,7 +408,7 @@ def __call__(
                     str(ex),
                 ) from ex
             # Remove rows lower or equal than the last start value
-            keep_filter = last_value_compare(tbl[cursor_path], start_value_scalar)
+            keep_filter = self.last_value_compare(tbl[cursor_path], start_value_scalar)
             start_out_of_range = bool(pa.compute.any(pa.compute.invert(keep_filter)).as_py())
             tbl = tbl.filter(keep_filter)
             if not self.deduplication_disabled:
@@ -407,7 +432,7 @@ def __call__(
 
         if (
             self.last_value is None
-            or new_value_compare(
+            or self.new_value_compare(
                 row_value_scalar, to_arrow_scalar(self.last_value, cursor_data_type)
             ).as_py()
         ):  # Last value has changed
diff --git a/dlt/sources/sql_database/helpers.py b/dlt/sources/sql_database/helpers.py
index a8be2a6427..ee38c7dd98 100644
--- a/dlt/sources/sql_database/helpers.py
+++ b/dlt/sources/sql_database/helpers.py
@@ -94,12 +94,16 @@ def __init__(
             self.end_value = incremental.end_value
             self.row_order: TSortOrder = self.incremental.row_order
             self.on_cursor_value_missing = self.incremental.on_cursor_value_missing
+            self.range_start = self.incremental.range_start
+            self.range_end = self.incremental.range_end
         else:
             self.cursor_column = None
             self.last_value = None
             self.end_value = None
             self.row_order = None
             self.on_cursor_value_missing = None
+            self.range_start = None
+            self.range_end = None
 
     def _make_query(self) -> SelectAny:
         table = self.table
@@ -110,11 +114,11 @@ def _make_query(self) -> SelectAny:
 
         # generate where
         if last_value_func is max:  # Query ordered and filtered according to last_value function
-            filter_op = operator.ge
-            filter_op_end = operator.lt
+            filter_op = operator.ge if self.range_start == "closed" else operator.gt
+            filter_op_end = operator.lt if self.range_end == "open" else operator.le
         elif last_value_func is min:
-            filter_op = operator.le
-            filter_op_end = operator.gt
+            filter_op = operator.le if self.range_start == "closed" else operator.lt
+            filter_op_end = operator.gt if self.range_end == "open" else operator.ge
         else:  # Custom last_value, load everything and let incremental handle filtering
             return query  # type: ignore[no-any-return]
 
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/advanced.md b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/advanced.md
index 6ff3a267d2..c532f6d357 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/advanced.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/advanced.md
@@ -16,7 +16,7 @@ Efficient data management often requires loading only new or updated data from y
 
 Incremental loading uses a cursor column (e.g., timestamp or auto-incrementing ID) to load only data newer than a specified initial value, enhancing efficiency by reducing processing time and resource use. Read [here](../../../walkthroughs/sql-incremental-configuration) for more details on incremental loading with `dlt`.
 
-#### How to configure
+### How to configure
 1. **Choose a cursor column**: Identify a column in your SQL table that can serve as a reliable indicator of new or updated rows. Common choices include timestamp columns or auto-incrementing IDs.
 1. **Set an initial value**: Choose a starting value for the cursor to begin loading data. This could be a specific timestamp or ID from which you wish to start loading data.
 1. **Deduplication**: When using incremental loading, the system automatically handles the deduplication of rows based on the primary key (if available) or row hash for tables without a primary key.
@@ -27,7 +27,7 @@ Incremental loading uses a cursor column (e.g., timestamp or auto-incrementing I
 If your cursor column name contains special characters (e.g., `$`) you need to escape it when passing it to the `incremental` function. For example, if your cursor column is `example_$column`, you should pass it as `"'example_$column'"` or `'"example_$column"'` to the `incremental` function: `incremental("'example_$column'", initial_value=...)`.
 :::
 
-#### Examples
+### Examples
 
 1. **Incremental loading with the resource `sql_table`**.
 
@@ -52,7 +52,7 @@ If your cursor column name contains special characters (e.g., `$`) you need to e
   print(extract_info)
   ```
 
-  Behind the scene, the loader generates a SQL query filtering rows with `last_modified` values greater than the incremental value. In the first run, this is the initial value (midnight (00:00:00) January 1, 2024).
+  Behind the scene, the loader generates a SQL query filtering rows with `last_modified` values greater or equal to the incremental value. In the first run, this is the initial value (midnight (00:00:00) January 1, 2024).
   In subsequent runs, it is the latest value of `last_modified` that `dlt` stores in [state](../../../general-usage/state).
 
 2. **Incremental loading with the source `sql_database`**.
@@ -78,6 +78,49 @@ If your cursor column name contains special characters (e.g., `$`) you need to e
     * `apply_hints` is a powerful method that enables schema modifications after resource creation, like adjusting write disposition and primary keys. You can choose from various tables and use `apply_hints` multiple times to create pipelines with merged, appended, or replaced resources.
   :::
 
+### Inclusive and exclusive filtering
+
+By default the incremental filtering is inclusive on the start value side so that
+rows with cursor equal to the last run's cursor are fetched again from the database.
+
+The SQL query generated looks something like this (assuming `last_value_func` is `max`):
+
+```sql
+SELECT * FROM family
+WHERE last_modified >= :start_value
+ORDER BY last_modified ASC
+```
+
+That means some rows overlapping with the previous load are fetched from the database.
+Duplicates are then filtered out by dlt using either the primary key or a hash of the row's contents.
+
+This ensures there are no gaps in the extracted sequence. But it does come with some performance overhead,
+both due to the deduplication processing and the cost of fetching redundant records from the database.
+
+This is not always needed. If you know that your data does not contain overlapping cursor values then you
+can optimize extraction by passing `range_start="open"` to incremental.
+
+This both disables the deduplication process and changes the operator used in the SQL `WHERE` clause from `>=` (greater-or-equal) to `>` (greater than), so that no overlapping rows are fetched.
+
+E.g.
+
+```py
+table = sql_table(
+    table='family',
+    incremental=dlt.sources.incremental(
+        'last_modified',  # Cursor column name
+        initial_value=pendulum.DateTime(2024, 1, 1, 0, 0, 0),  # Initial cursor value
+        range_start="open",  # exclude the start value
+    )
+)
+```
+
+It's a good option if:
+
+* The cursor is an auto incrementing ID
+* The cursor is a high precision timestamp and two records are never created at exactly the same time
+* Your pipeline runs are timed in such a way that new data is not generated during the load
+
 ## Parallelized extraction
 
 You can extract each table in a separate thread (no multiprocessing at this point). This will decrease loading time if your queries take time to execute or your network latency/speed is low. To enable this, declare your sources/resources as follows:
diff --git a/docs/website/docs/general-usage/incremental-loading.md b/docs/website/docs/general-usage/incremental-loading.md
index 3f452f0d16..5008795ed4 100644
--- a/docs/website/docs/general-usage/incremental-loading.md
+++ b/docs/website/docs/general-usage/incremental-loading.md
@@ -693,7 +693,7 @@ august_issues = repo_issues(
 ...
 ```
 
-Note that dlt's incremental filtering considers the ranges half-closed. `initial_value` is inclusive, `end_value` is exclusive, so chaining ranges like above works without overlaps.
+Note that dlt's incremental filtering considers the ranges half-closed. `initial_value` is inclusive, `end_value` is exclusive, so chaining ranges like above works without overlaps. This behaviour can be changed with the `range_start` (default `"closed"`) and `range_end` (default `"open"`) arguments.
 
 ### Declare row order to not request unnecessary data
 
@@ -793,6 +793,9 @@ def some_data(last_timestamp=dlt.sources.incremental("item.ts", primary_key=()))
         yield {"delta": i, "item": {"ts": pendulum.now().timestamp()}}
 ```
 
+This deduplication process is always enabled when `range_start` is set to `"closed"` (default).
+When you pass `range_start="open"` no deduplication is done as it is not needed as rows with the previous cursor value are excluded. This can be a useful optimization to avoid the performance overhead of deduplication if the cursor field is guaranteed to be unique.
+
 ### Using `dlt.sources.incremental` with dynamically created resources
 
 When resources are [created dynamically](source.md#create-resources-dynamically), it is possible to use the `dlt.sources.incremental` definition as well.
diff --git a/tests/extract/test_incremental.py b/tests/extract/test_incremental.py
index 725872b621..3ebc9d1201 100644
--- a/tests/extract/test_incremental.py
+++ b/tests/extract/test_incremental.py
@@ -5,7 +5,7 @@
 from datetime import datetime, date  # noqa: I251
 from itertools import chain, count
 from time import sleep
-from typing import Any, Optional, Literal, Sequence, Dict
+from typing import Any, Optional, Literal, Sequence, Dict, Iterable
 from unittest import mock
 
 import duckdb
@@ -1522,6 +1522,7 @@ def some_data(last_timestamp=dlt.sources.incremental("ts", primary_key=())):
 
 @pytest.mark.parametrize("item_type", ALL_TEST_DATA_ITEM_FORMATS)
 def test_apply_hints_incremental(item_type: TestDataItemFormat) -> None:
+    os.environ["COMPLETED_PROB"] = "1.0"  # make it complete immediately
     p = dlt.pipeline(pipeline_name=uniq_id(), destination="dummy")
     data = [{"created_at": 1}, {"created_at": 2}, {"created_at": 3}]
     source_items = data_to_item_format(item_type, data)
@@ -3851,3 +3852,111 @@ def some_data():
 
     for col in table_schema["columns"].values():
         assert "incremental" not in col
+
+
+@pytest.mark.parametrize("item_type", ALL_TEST_DATA_ITEM_FORMATS)
+@pytest.mark.parametrize("last_value_func", [min, max])
+def test_start_range_open(item_type: TestDataItemFormat, last_value_func: Any) -> None:
+    data_range: Iterable[int] = range(1, 12)
+    if last_value_func == max:
+        initial_value = 5
+        # Only items higher than inital extracted
+        expected_items = list(range(6, 12))
+        order_dir = "ASC"
+    elif last_value_func == min:
+        data_range = reversed(data_range)  # type: ignore[call-overload]
+        initial_value = 5
+        # Only items lower than inital extracted
+        expected_items = list(reversed(range(1, 5)))
+        order_dir = "DESC"
+
+    @dlt.resource
+    def some_data(
+        updated_at: dlt.sources.incremental[int] = dlt.sources.incremental(
+            "updated_at",
+            initial_value=initial_value,
+            range_start="open",
+            last_value_func=last_value_func,
+        ),
+    ) -> Any:
+        data = [{"updated_at": i} for i in data_range]
+        yield data_to_item_format(item_type, data)
+
+    pipeline = dlt.pipeline(pipeline_name=uniq_id(), destination="duckdb")
+    pipeline.run(some_data())
+
+    with pipeline.sql_client() as client:
+        items = [
+            row[0]
+            for row in client.execute_sql(
+                f"SELECT updated_at FROM some_data ORDER BY updated_at {order_dir}"
+            )
+        ]
+
+    assert items == expected_items
+
+
+@pytest.mark.parametrize("item_type", ALL_TEST_DATA_ITEM_FORMATS)
+def test_start_range_open_no_deduplication(item_type: TestDataItemFormat) -> None:
+    @dlt.source
+    def dummy():
+        @dlt.resource
+        def some_data(
+            updated_at: dlt.sources.incremental[int] = dlt.sources.incremental(
+                "updated_at",
+                range_start="open",
+            )
+        ):
+            yield [{"updated_at": i} for i in range(3)]
+
+        yield some_data
+
+    pipeline = dlt.pipeline(pipeline_name=uniq_id())
+    pipeline.extract(dummy())
+
+    state = pipeline.state["sources"]["dummy"]["resources"]["some_data"]["incremental"][
+        "updated_at"
+    ]
+
+    # No unique values should be computed
+    assert state["unique_hashes"] == []
+
+
+@pytest.mark.parametrize("item_type", ALL_TEST_DATA_ITEM_FORMATS)
+@pytest.mark.parametrize("last_value_func", [min, max])
+def test_end_range_closed(item_type: TestDataItemFormat, last_value_func: Any) -> None:
+    values = [5, 10]
+    expected_items = list(range(5, 11))
+    if last_value_func == max:
+        order_dir = "ASC"
+    elif last_value_func == min:
+        values = list(reversed(values))
+        expected_items = list(reversed(expected_items))
+        order_dir = "DESC"
+
+    @dlt.resource
+    def some_data(
+        updated_at: dlt.sources.incremental[int] = dlt.sources.incremental(
+            "updated_at",
+            initial_value=values[0],
+            end_value=values[1],
+            range_end="closed",
+            last_value_func=last_value_func,
+        ),
+    ) -> Any:
+        data = [{"updated_at": i} for i in range(1, 12)]
+        yield data_to_item_format(item_type, data)
+
+    pipeline = dlt.pipeline(pipeline_name=uniq_id(), destination="duckdb")
+    pipeline.run(some_data())
+
+    with pipeline.sql_client() as client:
+        items = [
+            row[0]
+            for row in client.execute_sql(
+                f"SELECT updated_at FROM some_data ORDER BY updated_at {order_dir}"
+            )
+        ]
+
+    # Includes values 5-10 inclusive
+    assert items == expected_items
diff --git a/tests/load/sources/sql_database/test_helpers.py b/tests/load/sources/sql_database/test_helpers.py
index def5430146..43da9c955f 100644
--- a/tests/load/sources/sql_database/test_helpers.py
+++ b/tests/load/sources/sql_database/test_helpers.py
@@ -1,3 +1,6 @@
+from typing import Callable, Any, TYPE_CHECKING
+from dataclasses import dataclass
+
 import pytest
 
 import dlt
@@ -14,6 +17,18 @@
     pytest.skip("Tests require sql alchemy", allow_module_level=True)
 
 
+@dataclass
+class MockIncremental:
+    last_value: Any
+    last_value_func: Callable[[Any], Any]
+    cursor_path: str
+    row_order: str = None
+    end_value: Any = None
+    on_cursor_value_missing: str = "raise"
+    range_start: str = "closed"
+    range_end: str = "open"
+
+
 @pytest.mark.parametrize("backend", ["sqlalchemy", "pyarrow", "pandas", "connectorx"])
 def test_cursor_or_unique_column_not_in_table(
     sql_source_db: SQLAlchemySourceDB, backend: TableBackend
@@ -36,13 +51,12 @@ def test_make_query_incremental_max(
 ) -> None:
     """Verify query is generated according to incremental settings"""
 
-    class MockIncremental:
-        last_value = dlt.common.pendulum.now()
-        last_value_func = max
-        cursor_path = "created_at"
-        row_order = "asc"
-        end_value = None
-        on_cursor_value_missing = "raise"
+    incremental = MockIncremental(
+        last_value=dlt.common.pendulum.now(),
+        last_value_func=max,
+        cursor_path="created_at",
+        row_order="asc",
+    )
 
     table = sql_source_db.get_table("chat_message")
     loader = TableLoader(
@@ -50,14 +64,14 @@ class MockIncremental:
         backend,
         table,
         table_to_columns(table),
-        incremental=MockIncremental(),  # type: ignore[arg-type]
+        incremental=incremental,  # type: ignore[arg-type]
     )
 
     query = loader.make_query()
     expected = (
         table.select()
         .order_by(table.c.created_at.asc())
-        .where(table.c.created_at >= MockIncremental.last_value)
+        .where(table.c.created_at >= incremental.last_value)
     )
 
     assert query.compare(expected)
@@ -67,13 +81,14 @@ class MockIncremental:
 def test_make_query_incremental_min(
     sql_source_db: SQLAlchemySourceDB, backend: TableBackend
 ) -> None:
-    class MockIncremental:
-        last_value = dlt.common.pendulum.now()
-        last_value_func = min
-        cursor_path = "created_at"
-        row_order = "desc"
-        end_value = None
-        on_cursor_value_missing = "raise"
+    incremental = MockIncremental(
+        last_value=dlt.common.pendulum.now(),
+        last_value_func=min,
+        cursor_path="created_at",
+        row_order="desc",
+        end_value=None,
+        on_cursor_value_missing="raise",
+    )
 
     table = sql_source_db.get_table("chat_message")
     loader = TableLoader(
@@ -81,14 +96,14 @@ class MockIncremental:
         backend,
         table,
         table_to_columns(table),
-        incremental=MockIncremental(),  # type: ignore[arg-type]
+        incremental=incremental,  # type: ignore[arg-type]
     )
 
     query = loader.make_query()
     expected = (
         table.select()
         .order_by(table.c.created_at.asc())  # `min` func swaps order
-        .where(table.c.created_at <= MockIncremental.last_value)
+        .where(table.c.created_at <= incremental.last_value)
     )
 
     assert query.compare(expected)
@@ -103,13 +118,14 @@ def test_make_query_incremental_on_cursor_value_missing_set(
     with_end_value: bool,
     cursor_value_missing: str,
 ) -> None:
-    class MockIncremental:
-        last_value = dlt.common.pendulum.now()
-        last_value_func = max
-        cursor_path = "created_at"
-        row_order = "asc"
-        end_value = None if not with_end_value else dlt.common.pendulum.now().add(hours=1)
-        on_cursor_value_missing = cursor_value_missing
+    incremental = MockIncremental(
+        last_value=dlt.common.pendulum.now(),
+        last_value_func=max,
+        cursor_path="created_at",
+        row_order="asc",
+        end_value=None if not with_end_value else dlt.common.pendulum.now().add(hours=1),
+        on_cursor_value_missing=cursor_value_missing,
+    )
 
     table = sql_source_db.get_table("chat_message")
     loader = TableLoader(
@@ -117,7 +133,7 @@ class MockIncremental:
         backend,
         table,
         table_to_columns(table),
-        incremental=MockIncremental(),  # type: ignore[arg-type]
+        incremental=incremental,  # type: ignore[arg-type]
     )
 
     query = loader.make_query()
@@ -131,14 +147,14 @@ class MockIncremental:
     if with_end_value:
         where_clause = operator(
             sa.and_(
-                table.c.created_at >= MockIncremental.last_value,
-                table.c.created_at < MockIncremental.end_value,
+                table.c.created_at >= incremental.last_value,
+                table.c.created_at < incremental.end_value,
             ),
             missing_cond,
         )
     else:
         where_clause = operator(
-            table.c.created_at >= MockIncremental.last_value,
+            table.c.created_at >= incremental.last_value,
             missing_cond,
         )
     expected = table.select().order_by(table.c.created_at.asc()).where(where_clause)
@@ -152,13 +168,14 @@ def test_make_query_incremental_on_cursor_value_missing_no_last_value(
     backend: TableBackend,
     cursor_value_missing: str,
 ) -> None:
-    class MockIncremental:
-        last_value = None
-        last_value_func = max
-        cursor_path = "created_at"
-        row_order = "asc"
-        end_value = None
-        on_cursor_value_missing = cursor_value_missing
+    incremental = MockIncremental(
+        last_value=None,
+        last_value_func=max,
+        cursor_path="created_at",
+        row_order="asc",
+        end_value=None,
+        on_cursor_value_missing=cursor_value_missing,
+    )
 
     table = sql_source_db.get_table("chat_message")
     loader = TableLoader(
@@ -166,7 +183,7 @@ class MockIncremental:
         backend,
         table,
         table_to_columns(table),
-        incremental=MockIncremental(),  # type: ignore[arg-type]
+        incremental=incremental,  # type: ignore[arg-type]
     )
 
     query = loader.make_query()
@@ -189,13 +206,14 @@ def test_make_query_incremental_end_value(
 ) -> None:
     now = dlt.common.pendulum.now()
 
-    class MockIncremental:
-        last_value = now
-        last_value_func = min
-        cursor_path = "created_at"
-        end_value = now.add(hours=1)
-        row_order = None
-        on_cursor_value_missing = "raise"
+    incremental = MockIncremental(
+        last_value=now,
+        last_value_func=min,
+        cursor_path="created_at",
+        end_value=now.add(hours=1),
+        row_order=None,
+        on_cursor_value_missing="raise",
+    )
 
     table = sql_source_db.get_table("chat_message")
     loader = TableLoader(
@@ -203,14 +221,14 @@ class MockIncremental:
         backend,
         table,
         table_to_columns(table),
-        incremental=MockIncremental(),  # type: ignore[arg-type]
+        incremental=incremental,  # type: ignore[arg-type]
     )
 
     query = loader.make_query()
     expected = table.select().where(
         sa.and_(
-            table.c.created_at <= MockIncremental.last_value,
-            table.c.created_at > MockIncremental.end_value,
+            table.c.created_at <= incremental.last_value,
+            table.c.created_at > incremental.end_value,
         )
     )
 
@@ -221,13 +239,14 @@ class MockIncremental:
 def test_make_query_incremental_any_fun(
     sql_source_db: SQLAlchemySourceDB, backend: TableBackend
 ) -> None:
-    class MockIncremental:
-        last_value = dlt.common.pendulum.now()
-        last_value_func = lambda x: x[-1]
-        cursor_path = "created_at"
-        row_order = "asc"
-        end_value = dlt.common.pendulum.now()
-        on_cursor_value_missing = "raise"
+    incremental = MockIncremental(
+        last_value=dlt.common.pendulum.now(),
+        last_value_func=lambda x: x[-1],
+        cursor_path="created_at",
+        row_order="asc",
+        end_value=dlt.common.pendulum.now(),
+        on_cursor_value_missing="raise",
+    )
 
     table = sql_source_db.get_table("chat_message")
     loader = TableLoader(
@@ -235,7 +254,7 @@ class MockIncremental:
         backend,
         table,
         table_to_columns(table),
-        incremental=MockIncremental(),  # type: ignore[arg-type]
+        incremental=incremental,  # type: ignore[arg-type]
     )
 
     query = loader.make_query()
@@ -256,12 +275,11 @@ def test_cursor_path_field_name_with_a_special_chars(
     if special_field_name not in table.c:
         table.append_column(sa.Column(special_field_name, sa.String))
 
-    class MockIncremental:
-        cursor_path = "'id$field'"
-        last_value = None
-        end_value = None
-        row_order = None
-        on_cursor_value_missing = None
+    incremental = MockIncremental(
+        cursor_path="'id$field'",
+        last_value=None,
+        last_value_func=max,
+    )
 
     # Should not raise any exception
     loader = TableLoader(
@@ -269,7 +287,7 @@ class MockIncremental:
         backend,
         table,
         table_to_columns(table),
-        incremental=MockIncremental(),  # type: ignore[arg-type]
+        incremental=incremental,  # type: ignore[arg-type]
     )
     assert loader.cursor_column == table.c[special_field_name]
 
@@ -281,12 +299,11 @@ def test_cursor_path_multiple_fields(
     """Test that a cursor_path with multiple fields raises a ValueError."""
     table = sql_source_db.get_table("chat_message")
 
-    class MockIncremental:
-        cursor_path = "created_at,updated_at"
-        last_value = None
-        end_value = None
-        row_order = None
-        on_cursor_value_missing = None
+    incremental = MockIncremental(
+        cursor_path="created_at,updated_at",
+        last_value=None,
+        last_value_func=max,
+    )
 
     with pytest.raises(ValueError) as excinfo:
         TableLoader(
@@ -294,7 +311,7 @@ class MockIncremental:
             backend,
             table,
             table_to_columns(table),
-            incremental=MockIncremental(),  # type: ignore[arg-type]
+            incremental=incremental,  # type: ignore[arg-type]
         )
     assert "must be a simple column name" in str(excinfo.value)
 
@@ -306,12 +323,11 @@ def test_cursor_path_complex_expression(
     """Test that a complex JSONPath expression in cursor_path raises a ValueError."""
     table = sql_source_db.get_table("chat_message")
 
-    class MockIncremental:
-        cursor_path = "$.users[0].id"
-        last_value = None
-        end_value = None
-        row_order = None
-        on_cursor_value_missing = None
+    incremental = MockIncremental(
+        cursor_path="$.users[0].id",
+        last_value=None,
+        last_value_func=max,
+    )
 
     with pytest.raises(ValueError) as excinfo:
         TableLoader(
@@ -319,11 +335,80 @@ class MockIncremental:
             backend,
             table,
             table_to_columns(table),
-            incremental=MockIncremental(),  # type: ignore[arg-type]
+            incremental=incremental,  # type: ignore[arg-type]
         )
     assert "must be a simple column name" in str(excinfo.value)
 
 
+@pytest.mark.parametrize("backend", ["sqlalchemy", "pyarrow", "pandas", "connectorx"])
+@pytest.mark.parametrize("last_value_func", [min, max])
+def test_make_query_incremental_range_start_open(
+    sql_source_db: SQLAlchemySourceDB, backend: TableBackend, last_value_func: Callable[[Any], Any]
+) -> None:
+    incremental = MockIncremental(
+        last_value=dlt.common.pendulum.now(),
+        last_value_func=last_value_func,
+        cursor_path="created_at",
+        end_value=None,
+        on_cursor_value_missing="raise",
+        range_start="open",
+    )
+
+    table = sql_source_db.get_table("chat_message")
+
+    loader = TableLoader(
+        sql_source_db.engine,
+        backend,
+        table,
+        table_to_columns(table),
+        incremental=incremental,  # type: ignore[arg-type]
+    )
+
+    query = loader.make_query()
+    expected = table.select()
+
+    if last_value_func == min:
+        expected = expected.where(table.c.created_at < incremental.last_value)
+    else:
+        expected = expected.where(table.c.created_at > incremental.last_value)
+
+    assert query.compare(expected)
+
+
+@pytest.mark.parametrize("backend", ["sqlalchemy", "pyarrow", "pandas", "connectorx"])
+@pytest.mark.parametrize("last_value_func", [min, max])
+def test_make_query_incremental_range_end_closed(
+    sql_source_db: SQLAlchemySourceDB, backend: TableBackend, last_value_func: Callable[[Any], Any]
+) -> None:
+    incremental = MockIncremental(
+        last_value=dlt.common.pendulum.now(),
+        last_value_func=last_value_func,
+        cursor_path="created_at",
+        end_value=None,
+        on_cursor_value_missing="raise",
+        range_end="closed",
+    )
+
+    table = sql_source_db.get_table("chat_message")
+    loader = TableLoader(
+        sql_source_db.engine,
+        backend,
+        table,
+        table_to_columns(table),
+        incremental=incremental,  # type: ignore[arg-type]
+    )
+
+    query = loader.make_query()
+    expected = table.select()
+
+    if last_value_func == min:
+        expected = expected.where(table.c.created_at <= incremental.last_value)
+    else:
+        expected = expected.where(table.c.created_at >= incremental.last_value)
+
+    assert query.compare(expected)
+
+
 def mock_json_column(field: str) -> TDataItem:
     """"""
     import pyarrow as pa
diff --git a/tests/load/sources/sql_database/test_sql_database_source.py b/tests/load/sources/sql_database/test_sql_database_source.py
index 9079638586..00257471e0 100644
--- a/tests/load/sources/sql_database/test_sql_database_source.py
+++ b/tests/load/sources/sql_database/test_sql_database_source.py
@@ -13,6 +13,7 @@
 from dlt.common.utils import uniq_id
 
 from dlt.extract.exceptions import ResourceExtractionError
+from dlt.extract.incremental.transform import JsonIncremental, ArrowIncremental
 from dlt.sources import DltResource
 
 from tests.pipeline.utils import (
@@ -831,8 +832,12 @@ def _assert_incremental(item):
         else:
             assert _r.incremental.primary_key == ["id"]
         assert _r.incremental._incremental.primary_key == ["id"]
-        assert _r.incremental._incremental._transformers["json"].primary_key == ["id"]
-        assert _r.incremental._incremental._transformers["arrow"].primary_key == ["id"]
+        assert _r.incremental._incremental._make_or_get_transformer(
+            JsonIncremental
+        ).primary_key == ["id"]
+        assert _r.incremental._incremental._make_or_get_transformer(
+            ArrowIncremental
+        ).primary_key == ["id"]
         return item
 
     pipeline = make_pipeline("duckdb")
@@ -841,8 +846,12 @@ def _assert_incremental(item):
 
     assert resource.incremental.primary_key == ["id"]
     assert resource.incremental._incremental.primary_key == ["id"]
-    assert resource.incremental._incremental._transformers["json"].primary_key == ["id"]
-    assert resource.incremental._incremental._transformers["arrow"].primary_key == ["id"]
+    assert resource.incremental._incremental._make_or_get_transformer(
+        JsonIncremental
+    ).primary_key == ["id"]
+    assert resource.incremental._incremental._make_or_get_transformer(
+        ArrowIncremental
+    ).primary_key == ["id"]
 
 
 @pytest.mark.parametrize("backend", ["sqlalchemy", "pyarrow", "pandas", "connectorx"])

From 80ef80401b97646901b48e15dade262ef5c3fd52 Mon Sep 17 00:00:00 2001
From: David Scharf <shrps@posteo.net>
Date: Tue, 10 Dec 2024 23:44:01 +0100
Subject: [PATCH 3/3] bump semver to minimum version 3.0.0 (#2132)

---
 poetry.lock    | 104 ++++++++++++++++++++++++++++++++++++++++++++++++-
 pyproject.toml |   2 +-
 2 files changed, 103 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 732ba0e219..6232b383c8 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
 
 [[package]]
 name = "about-time"
@@ -3900,6 +3900,106 @@ files = [
     {file = "google_re2-1.1-4-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1f4d4f0823e8b2f6952a145295b1ff25245ce9bb136aff6fe86452e507d4c1dd"},
     {file = "google_re2-1.1-4-cp39-cp39-win32.whl", hash = "sha256:1afae56b2a07bb48cfcfefaa15ed85bae26a68f5dc7f9e128e6e6ea36914e847"},
     {file = "google_re2-1.1-4-cp39-cp39-win_amd64.whl", hash = "sha256:aa7d6d05911ab9c8adbf3c225a7a120ab50fd2784ac48f2f0d140c0b7afc2b55"},
+    {file = "google_re2-1.1-5-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:222fc2ee0e40522de0b21ad3bc90ab8983be3bf3cec3d349c80d76c8bb1a4beb"},
+    {file = "google_re2-1.1-5-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:d4763b0b9195b72132a4e7de8e5a9bf1f05542f442a9115aa27cfc2a8004f581"},
+    {file = "google_re2-1.1-5-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:209649da10c9d4a93d8a4d100ecbf9cc3b0252169426bec3e8b4ad7e57d600cf"},
+    {file = "google_re2-1.1-5-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:68813aa333c1604a2df4a495b2a6ed065d7c8aebf26cc7e7abb5a6835d08353c"},
+    {file = "google_re2-1.1-5-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:370a23ec775ad14e9d1e71474d56f381224dcf3e72b15d8ca7b4ad7dd9cd5853"},
+    {file = "google_re2-1.1-5-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:14664a66a3ddf6bc9e56f401bf029db2d169982c53eff3f5876399104df0e9a6"},
+    {file = "google_re2-1.1-5-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ea3722cc4932cbcebd553b69dce1b4a73572823cff4e6a244f1c855da21d511"},
+    {file = "google_re2-1.1-5-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e14bb264c40fd7c627ef5678e295370cd6ba95ca71d835798b6e37502fc4c690"},
+    {file = "google_re2-1.1-5-cp310-cp310-win32.whl", hash = "sha256:39512cd0151ea4b3969c992579c79b423018b464624ae955be685fc07d94556c"},
+    {file = "google_re2-1.1-5-cp310-cp310-win_amd64.whl", hash = "sha256:ac66537aa3bc5504320d922b73156909e3c2b6da19739c866502f7827b3f9fdf"},
+    {file = "google_re2-1.1-5-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5b5ea68d54890c9edb1b930dcb2658819354e5d3f2201f811798bbc0a142c2b4"},
+    {file = "google_re2-1.1-5-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:33443511b6b83c35242370908efe2e8e1e7cae749c766b2b247bf30e8616066c"},
+    {file = "google_re2-1.1-5-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:413d77bdd5ba0bfcada428b4c146e87707452ec50a4091ec8e8ba1413d7e0619"},
+    {file = "google_re2-1.1-5-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:5171686e43304996a34baa2abcee6f28b169806d0e583c16d55e5656b092a414"},
+    {file = "google_re2-1.1-5-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3b284db130283771558e31a02d8eb8fb756156ab98ce80035ae2e9e3a5f307c4"},
+    {file = "google_re2-1.1-5-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:296e6aed0b169648dc4b870ff47bd34c702a32600adb9926154569ef51033f47"},
+    {file = "google_re2-1.1-5-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:38d50e68ead374160b1e656bbb5d101f0b95fb4cc57f4a5c12100155001480c5"},
+    {file = "google_re2-1.1-5-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2a0416a35921e5041758948bcb882456916f22845f66a93bc25070ef7262b72a"},
+    {file = "google_re2-1.1-5-cp311-cp311-win32.whl", hash = "sha256:a1d59568bbb5de5dd56dd6cdc79907db26cce63eb4429260300c65f43469e3e7"},
+    {file = "google_re2-1.1-5-cp311-cp311-win_amd64.whl", hash = "sha256:72f5a2f179648b8358737b2b493549370debd7d389884a54d331619b285514e3"},
+    {file = "google_re2-1.1-5-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:cbc72c45937b1dc5acac3560eb1720007dccca7c9879138ff874c7f6baf96005"},
+    {file = "google_re2-1.1-5-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:5fadd1417fbef7235fa9453dba4eb102e6e7d94b1e4c99d5fa3dd4e288d0d2ae"},
+    {file = "google_re2-1.1-5-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:040f85c63cc02696485b59b187a5ef044abe2f99b92b4fb399de40b7d2904ccc"},
+    {file = "google_re2-1.1-5-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:64e3b975ee6d9bbb2420494e41f929c1a0de4bcc16d86619ab7a87f6ea80d6bd"},
+    {file = "google_re2-1.1-5-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:8ee370413e00f4d828eaed0e83b8af84d7a72e8ee4f4bd5d3078bc741dfc430a"},
+    {file = "google_re2-1.1-5-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:5b89383001079323f693ba592d7aad789d7a02e75adb5d3368d92b300f5963fd"},
+    {file = "google_re2-1.1-5-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:63cb4fdfbbda16ae31b41a6388ea621510db82feb8217a74bf36552ecfcd50ad"},
+    {file = "google_re2-1.1-5-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ebedd84ae8be10b7a71a16162376fd67a2386fe6361ef88c622dcf7fd679daf"},
+    {file = "google_re2-1.1-5-cp312-cp312-win32.whl", hash = "sha256:c8e22d1692bc2c81173330c721aff53e47ffd3c4403ff0cd9d91adfd255dd150"},
+    {file = "google_re2-1.1-5-cp312-cp312-win_amd64.whl", hash = "sha256:5197a6af438bb8c4abda0bbe9c4fbd6c27c159855b211098b29d51b73e4cbcf6"},
+    {file = "google_re2-1.1-5-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:b6727e0b98417e114b92688ad2aa256102ece51f29b743db3d831df53faf1ce3"},
+    {file = "google_re2-1.1-5-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:711e2b6417eb579c61a4951029d844f6b95b9b373b213232efd413659889a363"},
+    {file = "google_re2-1.1-5-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:71ae8b3df22c5c154c8af0f0e99d234a450ef1644393bc2d7f53fc8c0a1e111c"},
+    {file = "google_re2-1.1-5-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:94a04e214bc521a3807c217d50cf099bbdd0c0a80d2d996c0741dbb995b5f49f"},
+    {file = "google_re2-1.1-5-cp38-cp38-macosx_14_0_arm64.whl", hash = "sha256:a770f75358508a9110c81a1257721f70c15d9bb592a2fb5c25ecbd13566e52a5"},
+    {file = "google_re2-1.1-5-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:07c9133357f7e0b17c6694d5dcb82e0371f695d7c25faef2ff8117ef375343ff"},
+    {file = "google_re2-1.1-5-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:204ca6b1cf2021548f4a9c29ac015e0a4ab0a7b6582bf2183d838132b60c8fda"},
+    {file = "google_re2-1.1-5-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f0b95857c2c654f419ca684ec38c9c3325c24e6ba7d11910a5110775a557bb18"},
+    {file = "google_re2-1.1-5-cp38-cp38-win32.whl", hash = "sha256:347ac770e091a0364e822220f8d26ab53e6fdcdeaec635052000845c5a3fb869"},
+    {file = "google_re2-1.1-5-cp38-cp38-win_amd64.whl", hash = "sha256:ec32bb6de7ffb112a07d210cf9f797b7600645c2d5910703fa07f456dd2150e0"},
+    {file = "google_re2-1.1-5-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:eb5adf89060f81c5ff26c28e261e6b4997530a923a6093c9726b8dec02a9a326"},
+    {file = "google_re2-1.1-5-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:a22630c9dd9ceb41ca4316bccba2643a8b1d5c198f21c00ed5b50a94313aaf10"},
+    {file = "google_re2-1.1-5-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:544dc17fcc2d43ec05f317366375796351dec44058e1164e03c3f7d050284d58"},
+    {file = "google_re2-1.1-5-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:19710af5ea88751c7768575b23765ce0dfef7324d2539de576f75cdc319d6654"},
+    {file = "google_re2-1.1-5-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:f82995a205e08ad896f4bd5ce4847c834fab877e1772a44e5f262a647d8a1dec"},
+    {file = "google_re2-1.1-5-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:63533c4d58da9dc4bc040250f1f52b089911699f0368e0e6e15f996387a984ed"},
+    {file = "google_re2-1.1-5-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79e00fcf0cb04ea35a22b9014712d448725ce4ddc9f08cc818322566176ca4b0"},
+    {file = "google_re2-1.1-5-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bc41afcefee2da6c4ed883a93d7f527c4b960cd1d26bbb0020a7b8c2d341a60a"},
+    {file = "google_re2-1.1-5-cp39-cp39-win32.whl", hash = "sha256:486730b5e1f1c31b0abc6d80abe174ce4f1188fe17d1b50698f2bf79dc6e44be"},
+    {file = "google_re2-1.1-5-cp39-cp39-win_amd64.whl", hash = "sha256:4de637ca328f1d23209e80967d1b987d6b352cd01b3a52a84b4d742c69c3da6c"},
+    {file = "google_re2-1.1-6-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:621e9c199d1ff0fdb2a068ad450111a84b3bf14f96dfe5a8a7a0deae5f3f4cce"},
+    {file = "google_re2-1.1-6-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:220acd31e7dde95373f97c3d1f3b3bd2532b38936af28b1917ee265d25bebbf4"},
+    {file = "google_re2-1.1-6-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:db34e1098d164f76251a6ece30e8f0ddfd65bb658619f48613ce71acb3f9cbdb"},
+    {file = "google_re2-1.1-6-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:5152bac41d8073977582f06257219541d0fc46ad99b0bbf30e8f60198a43b08c"},
+    {file = "google_re2-1.1-6-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:6191294799e373ee1735af91f55abd23b786bdfd270768a690d9d55af9ea1b0d"},
+    {file = "google_re2-1.1-6-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:070cbafbb4fecbb02e98feb28a1eb292fb880f434d531f38cc33ee314b521f1f"},
+    {file = "google_re2-1.1-6-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8437d078b405a59a576cbed544490fe041140f64411f2d91012e8ec05ab8bf86"},
+    {file = "google_re2-1.1-6-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f00f9a9af8896040e37896d9b9fc409ad4979f1ddd85bb188694a7d95ddd1164"},
+    {file = "google_re2-1.1-6-cp310-cp310-win32.whl", hash = "sha256:df26345f229a898b4fd3cafd5f82259869388cee6268fc35af16a8e2293dd4e5"},
+    {file = "google_re2-1.1-6-cp310-cp310-win_amd64.whl", hash = "sha256:3665d08262c57c9b28a5bdeb88632ad792c4e5f417e5645901695ab2624f5059"},
+    {file = "google_re2-1.1-6-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:b26b869d8aa1d8fe67c42836bf3416bb72f444528ee2431cfb59c0d3e02c6ce3"},
+    {file = "google_re2-1.1-6-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:41fd4486c57dea4f222a6bb7f1ff79accf76676a73bdb8da0fcbd5ba73f8da71"},
+    {file = "google_re2-1.1-6-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:0ee378e2e74e25960070c338c28192377c4dd41e7f4608f2688064bd2badc41e"},
+    {file = "google_re2-1.1-6-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:a00cdbf662693367b36d075b29feb649fd7ee1b617cf84f85f2deebeda25fc64"},
+    {file = "google_re2-1.1-6-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:4c09455014217a41499432b8c8f792f25f3df0ea2982203c3a8c8ca0e7895e69"},
+    {file = "google_re2-1.1-6-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:6501717909185327935c7945e23bb5aa8fc7b6f237b45fe3647fa36148662158"},
+    {file = "google_re2-1.1-6-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3510b04790355f199e7861c29234081900e1e1cbf2d1484da48aa0ba6d7356ab"},
+    {file = "google_re2-1.1-6-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8c0e64c187ca406764f9e9ad6e750d62e69ed8f75bf2e865d0bfbc03b642361c"},
+    {file = "google_re2-1.1-6-cp311-cp311-win32.whl", hash = "sha256:2a199132350542b0de0f31acbb3ca87c3a90895d1d6e5235f7792bb0af02e523"},
+    {file = "google_re2-1.1-6-cp311-cp311-win_amd64.whl", hash = "sha256:83bdac8ceaece8a6db082ea3a8ba6a99a2a1ee7e9f01a9d6d50f79c6f251a01d"},
+    {file = "google_re2-1.1-6-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:81985ff894cd45ab5a73025922ac28c0707759db8171dd2f2cc7a0e856b6b5ad"},
+    {file = "google_re2-1.1-6-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:5635af26065e6b45456ccbea08674ae2ab62494008d9202df628df3b267bc095"},
+    {file = "google_re2-1.1-6-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:813b6f04de79f4a8fdfe05e2cb33e0ccb40fe75d30ba441d519168f9d958bd54"},
+    {file = "google_re2-1.1-6-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:5ec2f5332ad4fd232c3f2d6748c2c7845ccb66156a87df73abcc07f895d62ead"},
+    {file = "google_re2-1.1-6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:5a687b3b32a6cbb731647393b7c4e3fde244aa557f647df124ff83fb9b93e170"},
+    {file = "google_re2-1.1-6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:39a62f9b3db5d3021a09a47f5b91708b64a0580193e5352751eb0c689e4ad3d7"},
+    {file = "google_re2-1.1-6-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ca0f0b45d4a1709cbf5d21f355e5809ac238f1ee594625a1e5ffa9ff7a09eb2b"},
+    {file = "google_re2-1.1-6-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a64b3796a7a616c7861247bd061c9a836b5caf0d5963e5ea8022125601cf7b09"},
+    {file = "google_re2-1.1-6-cp312-cp312-win32.whl", hash = "sha256:32783b9cb88469ba4cd9472d459fe4865280a6b1acdad4480a7b5081144c4eb7"},
+    {file = "google_re2-1.1-6-cp312-cp312-win_amd64.whl", hash = "sha256:259ff3fd2d39035b9cbcbf375995f83fa5d9e6a0c5b94406ff1cc168ed41d6c6"},
+    {file = "google_re2-1.1-6-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:e4711bcffe190acd29104d8ecfea0c0e42b754837de3fb8aad96e6cc3c613cdc"},
+    {file = "google_re2-1.1-6-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:4d081cce43f39c2e813fe5990e1e378cbdb579d3f66ded5bade96130269ffd75"},
+    {file = "google_re2-1.1-6-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:4f123b54d48450d2d6b14d8fad38e930fb65b5b84f1b022c10f2913bd956f5b5"},
+    {file = "google_re2-1.1-6-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:e1928b304a2b591a28eb3175f9db7f17c40c12cf2d4ec2a85fdf1cc9c073ff91"},
+    {file = "google_re2-1.1-6-cp38-cp38-macosx_14_0_arm64.whl", hash = "sha256:3a69f76146166aec1173003c1f547931bdf288c6b135fda0020468492ac4149f"},
+    {file = "google_re2-1.1-6-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:fc08c388f4ebbbca345e84a0c56362180d33d11cbe9ccfae663e4db88e13751e"},
+    {file = "google_re2-1.1-6-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b057adf38ce4e616486922f2f47fc7d19c827ba0a7f69d540a3664eba2269325"},
+    {file = "google_re2-1.1-6-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4138c0b933ab099e96f5d8defce4486f7dfd480ecaf7f221f2409f28022ccbc5"},
+    {file = "google_re2-1.1-6-cp38-cp38-win32.whl", hash = "sha256:9693e45b37b504634b1abbf1ee979471ac6a70a0035954592af616306ab05dd6"},
+    {file = "google_re2-1.1-6-cp38-cp38-win_amd64.whl", hash = "sha256:5674d437baba0ea287a5a7f8f81f24265d6ae8f8c09384e2ef7b6f84b40a7826"},
+    {file = "google_re2-1.1-6-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:7783137cb2e04f458a530c6d0ee9ef114815c1d48b9102f023998c371a3b060e"},
+    {file = "google_re2-1.1-6-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:a49b7153935e7a303675f4deb5f5d02ab1305adefc436071348706d147c889e0"},
+    {file = "google_re2-1.1-6-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:a96a8bb309182090704593c60bdb369a2756b38fe358bbf0d40ddeb99c71769f"},
+    {file = "google_re2-1.1-6-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:dff3d4be9f27ef8ec3705eed54f19ef4ab096f5876c15fe011628c69ba3b561c"},
+    {file = "google_re2-1.1-6-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:40f818b0b39e26811fa677978112a8108269977fdab2ba0453ac4363c35d9e66"},
+    {file = "google_re2-1.1-6-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:8a7e53538cdb40ef4296017acfbb05cab0c19998be7552db1cfb85ba40b171b9"},
+    {file = "google_re2-1.1-6-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6ee18e7569fb714e5bb8c42809bf8160738637a5e71ed5a4797757a1fb4dc4de"},
+    {file = "google_re2-1.1-6-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1cda4f6d1a7d5b43ea92bc395f23853fba0caf8b1e1efa6e8c48685f912fcb89"},
+    {file = "google_re2-1.1-6-cp39-cp39-win32.whl", hash = "sha256:6a9cdbdc36a2bf24f897be6a6c85125876dc26fea9eb4247234aec0decbdccfd"},
+    {file = "google_re2-1.1-6-cp39-cp39-win_amd64.whl", hash = "sha256:73f646cecfad7cc5b4330b4192c25f2e29730a3b8408e089ffd2078094208196"},
 ]
 
 [[package]]
@@ -10518,4 +10618,4 @@ weaviate = ["weaviate-client"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<3.13"
-content-hash = "c0607d05ab37a1a6addf3ae7264bf5972cb6ce6e46df1dcdc2da3cff72e5008e"
+content-hash = "1bf3deccd929c083b880c1a82be0983430ab49f7ade247b1c5573bb8c70d9ff5"
diff --git a/pyproject.toml b/pyproject.toml
index 7377b03fde..f736fc65ad 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,7 +29,7 @@ requests = ">=2.26.0"
 pendulum = ">=2.1.2"
 simplejson = ">=3.17.5"
 PyYAML = ">=5.4.1"
-semver = ">=2.13.0"
+semver = ">=3.0.0"
 hexbytes = ">=0.2.2"
 tzdata = ">=2022.1"
 tomlkit = ">=0.11.3"