From e6223001085da566449db904896f6af9ceb44e8d Mon Sep 17 00:00:00 2001 From: Ilya Gurov Date: Wed, 13 Mar 2024 01:48:17 +0400 Subject: [PATCH] feat(core): drop default value for write disposition (#1057) * feat(core): drop default value for write disposition * don't use default value in apply_hints * applies default write disposition in empty apply hints --------- Co-authored-by: Marcin Rudolf --- dlt/common/schema/schema.py | 4 +++- dlt/extract/hints.py | 5 ++++- tests/extract/test_extract.py | 8 ++++++++ tests/extract/test_sources.py | 15 +++++++++++++-- 4 files changed, 28 insertions(+), 4 deletions(-) diff --git a/dlt/common/schema/schema.py b/dlt/common/schema/schema.py index 4c81c8af72..302ac54148 100644 --- a/dlt/common/schema/schema.py +++ b/dlt/common/schema/schema.py @@ -82,7 +82,9 @@ class Schema: _imported_version_hash: str # version hash of recently imported schema _schema_description: str # optional schema description _schema_tables: TSchemaTables - _settings: TSchemaSettings # schema settings to hold default hints, preferred types and other settings + _settings: ( + TSchemaSettings # schema settings to hold default hints, preferred types and other settings + ) # list of preferred types: map regex on columns into types _compiled_preferred_types: List[Tuple[REPattern, TDataType]] diff --git a/dlt/extract/hints.py b/dlt/extract/hints.py index f298e414a1..54ce00a806 100644 --- a/dlt/extract/hints.py +++ b/dlt/extract/hints.py @@ -82,6 +82,8 @@ def make_hints( ) if not table_name: new_template.pop("name") + if not write_disposition and "write_disposition" in new_template: + new_template.pop("write_disposition") # remember original columns if columns is not None: new_template["original_columns"] = columns @@ -197,10 +199,11 @@ def apply_hints( """ if not self._hints: # if there is no template yet, create and set a new one. + default_wd = None if parent_table_name else DEFAULT_WRITE_DISPOSITION t = make_hints( table_name, parent_table_name, - write_disposition, + write_disposition or default_wd, columns, primary_key, merge_key, diff --git a/tests/extract/test_extract.py b/tests/extract/test_extract.py index 28b08c3648..b86e198988 100644 --- a/tests/extract/test_extract.py +++ b/tests/extract/test_extract.py @@ -89,6 +89,14 @@ def table_name_with_lambda(_range): assert "table_name_with_lambda" not in schema.tables +def test_make_hints_default() -> None: + hints = make_hints() + assert hints == {"columns": {}} + + hints = make_hints(write_disposition=None) + assert hints == {"columns": {}} + + def test_extract_hints_mark(extract_step: Extract) -> None: @dlt.resource def with_table_hints(): diff --git a/tests/extract/test_sources.py b/tests/extract/test_sources.py index 5895c3b658..d9c73dfb20 100644 --- a/tests/extract/test_sources.py +++ b/tests/extract/test_sources.py @@ -1326,9 +1326,11 @@ def empty_gen(): empty_r = empty() with pytest.raises(InconsistentTableTemplate): - empty_r.apply_hints(parent_table_name=lambda ev: ev["p"]) + empty_r.apply_hints(parent_table_name=lambda ev: ev["p"], write_disposition=None) - empty_r.apply_hints(table_name=lambda ev: ev["t"], parent_table_name=lambda ev: ev["p"]) + empty_r.apply_hints( + table_name=lambda ev: ev["t"], parent_table_name=lambda ev: ev["p"], write_disposition=None + ) assert empty_r._table_name_hint_fun is not None assert empty_r._table_has_other_dynamic_hints is True @@ -1360,6 +1362,15 @@ def empty_gen(): assert table["columns"]["tags"] == {"name": "tags"} +def test_resource_no_template() -> None: + empty = DltResource.from_data([1, 2, 3], name="table") + assert empty.write_disposition == "append" + assert empty.compute_table_schema()["write_disposition"] == "append" + empty.apply_hints() + assert empty.write_disposition == "append" + assert empty.compute_table_schema()["write_disposition"] == "append" + + def test_selected_pipes_with_duplicates(): def input_gen(): yield from [1, 2, 3]