From 59ac70f63c74bd37720933910c986d20a67970eb Mon Sep 17 00:00:00 2001 From: Tsung-Ju Lii Date: Tue, 20 Aug 2024 01:03:13 +0800 Subject: [PATCH] [sc-28135] Redact literal values in CASE clauses (#955) --- metaphor/common/docs/process_query.md | 2 ++ metaphor/common/sql/process_query/config.py | 5 ++++ .../common/sql/process_query/process_query.py | 11 ++++++++ pyproject.toml | 2 +- .../sql/process_query/test_process_query.py | 27 +++++++++++++++++++ 5 files changed, 46 insertions(+), 1 deletion(-) diff --git a/metaphor/common/docs/process_query.md b/metaphor/common/docs/process_query.md index 21fcde16..c91cf78c 100644 --- a/metaphor/common/docs/process_query.md +++ b/metaphor/common/docs/process_query.md @@ -7,6 +7,8 @@ process_query: redact_literals: where_clauses: # Whether to redact all literal values in WHERE clauses. Default is `false`. + case_clauses: # Whether to redact all literal values in CASE clauses. Default is `false`. + when_not_matched_insert_clauses: # Whether to redact literal values in WHEN NOT MATCHED INSERT clauses. If set to `True`, all literal values will be redacted to a predefined string value. Default is `false`. placeholder_literal: # The redacted values will be replaced by this placeholder string. Default is ''. diff --git a/metaphor/common/sql/process_query/config.py b/metaphor/common/sql/process_query/config.py index 596dc5a1..07f2ded3 100644 --- a/metaphor/common/sql/process_query/config.py +++ b/metaphor/common/sql/process_query/config.py @@ -16,6 +16,11 @@ class RedactPIILiteralsConfig: Whether to redact literal values in WHERE clauses. If set to `True`, all literal values will be redacted to a predefined string value. """ + case_clauses: bool = False + """ + Whether to redact literal values in CASE clauses. If set to `True`, all literal values will be redacted to a predefined string value. + """ + when_not_matched_insert_clauses: bool = False """ Whether to redact literal values in WHEN NOT MATCHED INSERT clauses. If set to `True`, all literal values will be redacted to a predefined string value. diff --git a/metaphor/common/sql/process_query/process_query.py b/metaphor/common/sql/process_query/process_query.py index e1f6becb..fa9da539 100644 --- a/metaphor/common/sql/process_query/process_query.py +++ b/metaphor/common/sql/process_query/process_query.py @@ -21,6 +21,14 @@ def _redact_literal_values_in_where_clauses( lit.args["this"] = config.redact_literals.placeholder_literal +def _redact_literal_values_in_case_clauses( + expression: Expression, config: ProcessQueryConfig +) -> None: + for case in expression.find_all(exp.Case): + for lit in case.find_all(exp.Literal): + lit.args["this"] = config.redact_literals.placeholder_literal + + def _redact_literal_values_in_when_not_matched_insert_clauses( expression: Expression, config: ProcessQueryConfig, @@ -90,6 +98,9 @@ def process_query( if config.redact_literals.where_clauses: _redact_literal_values_in_where_clauses(expression, config) + if config.redact_literals.case_clauses: + _redact_literal_values_in_case_clauses(expression, config) + if config.redact_literals.when_not_matched_insert_clauses: _redact_literal_values_in_when_not_matched_insert_clauses(expression, config) diff --git a/pyproject.toml b/pyproject.toml index 8205bbdc..f23336b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "metaphor-connectors" -version = "0.14.79" +version = "0.14.80" license = "Apache-2.0" description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app." authors = ["Metaphor "] diff --git a/tests/common/sql/process_query/test_process_query.py b/tests/common/sql/process_query/test_process_query.py index 7b930425..5f8a414c 100644 --- a/tests/common/sql/process_query/test_process_query.py +++ b/tests/common/sql/process_query/test_process_query.py @@ -8,6 +8,7 @@ config = ProcessQueryConfig( redact_literals=RedactPIILiteralsConfig( where_clauses=True, + case_clauses=True, when_not_matched_insert_clauses=True, ), ignore_insert_values_into=True, @@ -163,3 +164,29 @@ def test_merge_insert_when_not_matched(): processed == "MERGE INTO TargetProducts AS Target USING SourceProducts AS Source ON Source.ProductID = Target.ProductID WHEN NOT MATCHED THEN INSERT (ProductID, ProductName, Price) VALUES (, '', ) WHEN MATCHED THEN UPDATE SET Target.ProductName = Source.ProductName, Target.Price = Source.Price" ) + + +def test_redact_where_clauses(): + sql = """ +INSERT INTO target_table (first_name, last_name, email, status) +SELECT + first_name, + last_name, + email, + CASE + WHEN age < 18 THEN 'Minor' + WHEN age >= 18 AND age < 65 THEN 'Adult' + ELSE 'Senior' + END as status +FROM source_table +WHERE email IS NOT NULL; + """ + processed = process_query( + sql, + DataPlatform.SNOWFLAKE, + config, + ) + assert ( + processed + == "INSERT INTO target_table (first_name, last_name, email, status) SELECT first_name, last_name, email, CASE WHEN age < THEN '' WHEN age >= AND age < THEN '' ELSE '' END AS status FROM source_table WHERE NOT email IS NULL" + )