Skip to content

Commit

Permalink
[sc-28135] Redact literal values in CASE clauses (#955)
Browse files Browse the repository at this point in the history
  • Loading branch information
usefulalgorithm authored Aug 19, 2024
1 parent e4d25b4 commit 59ac70f
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 1 deletion.
2 changes: 2 additions & 0 deletions metaphor/common/docs/process_query.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ process_query:
redact_literals:
where_clauses: <true | false> # Whether to redact all literal values in WHERE clauses. Default is `false`.

case_clauses: <true | false> # Whether to redact all literal values in CASE clauses. Default is `false`.

when_not_matched_insert_clauses: <true | false> # Whether to redact literal values in WHEN NOT MATCHED INSERT clauses. If set to `True`, all literal values will be redacted to a predefined string value. Default is `false`.

placeholder_literal: <placeholder literal> # The redacted values will be replaced by this placeholder string. Default is '<REDACTED>'.
Expand Down
5 changes: 5 additions & 0 deletions metaphor/common/sql/process_query/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ class RedactPIILiteralsConfig:
Whether to redact literal values in WHERE clauses. If set to `True`, all literal values will be redacted to a predefined string value.
"""

case_clauses: bool = False
"""
Whether to redact literal values in CASE clauses. If set to `True`, all literal values will be redacted to a predefined string value.
"""

when_not_matched_insert_clauses: bool = False
"""
Whether to redact literal values in WHEN NOT MATCHED INSERT clauses. If set to `True`, all literal values will be redacted to a predefined string value.
Expand Down
11 changes: 11 additions & 0 deletions metaphor/common/sql/process_query/process_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@ def _redact_literal_values_in_where_clauses(
lit.args["this"] = config.redact_literals.placeholder_literal


def _redact_literal_values_in_case_clauses(
expression: Expression, config: ProcessQueryConfig
) -> None:
for case in expression.find_all(exp.Case):
for lit in case.find_all(exp.Literal):
lit.args["this"] = config.redact_literals.placeholder_literal


def _redact_literal_values_in_when_not_matched_insert_clauses(
expression: Expression,
config: ProcessQueryConfig,
Expand Down Expand Up @@ -90,6 +98,9 @@ def process_query(
if config.redact_literals.where_clauses:
_redact_literal_values_in_where_clauses(expression, config)

if config.redact_literals.case_clauses:
_redact_literal_values_in_case_clauses(expression, config)

if config.redact_literals.when_not_matched_insert_clauses:
_redact_literal_values_in_when_not_matched_insert_clauses(expression, config)

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "metaphor-connectors"
version = "0.14.79"
version = "0.14.80"
license = "Apache-2.0"
description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app."
authors = ["Metaphor <[email protected]>"]
Expand Down
27 changes: 27 additions & 0 deletions tests/common/sql/process_query/test_process_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
config = ProcessQueryConfig(
redact_literals=RedactPIILiteralsConfig(
where_clauses=True,
case_clauses=True,
when_not_matched_insert_clauses=True,
),
ignore_insert_values_into=True,
Expand Down Expand Up @@ -163,3 +164,29 @@ def test_merge_insert_when_not_matched():
processed
== "MERGE INTO TargetProducts AS Target USING SourceProducts AS Source ON Source.ProductID = Target.ProductID WHEN NOT MATCHED THEN INSERT (ProductID, ProductName, Price) VALUES (<REDACTED>, '<REDACTED>', <REDACTED>) WHEN MATCHED THEN UPDATE SET Target.ProductName = Source.ProductName, Target.Price = Source.Price"
)


def test_redact_where_clauses():
sql = """
INSERT INTO target_table (first_name, last_name, email, status)
SELECT
first_name,
last_name,
email,
CASE
WHEN age < 18 THEN 'Minor'
WHEN age >= 18 AND age < 65 THEN 'Adult'
ELSE 'Senior'
END as status
FROM source_table
WHERE email IS NOT NULL;
"""
processed = process_query(
sql,
DataPlatform.SNOWFLAKE,
config,
)
assert (
processed
== "INSERT INTO target_table (first_name, last_name, email, status) SELECT first_name, last_name, email, CASE WHEN age < <REDACTED> THEN '<REDACTED>' WHEN age >= <REDACTED> AND age < <REDACTED> THEN '<REDACTED>' ELSE '<REDACTED>' END AS status FROM source_table WHERE NOT email IS NULL"
)

0 comments on commit 59ac70f

Please sign in to comment.