Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[sc-28135] Redact literal values in CASE clauses #955

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions metaphor/common/docs/process_query.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ process_query:
redact_literals:
where_clauses: <true | false> # Whether to redact all literal values in WHERE clauses. Default is `false`.

case_clauses: <true | false> # Whether to redact all literal values in CASE clauses. Default is `false`.

when_not_matched_insert_clauses: <true | false> # Whether to redact literal values in WHEN NOT MATCHED INSERT clauses. If set to `True`, all literal values will be redacted to a predefined string value. Default is `false`.

placeholder_literal: <placeholder literal> # The redacted values will be replaced by this placeholder string. Default is '<REDACTED>'.
Expand Down
5 changes: 5 additions & 0 deletions metaphor/common/sql/process_query/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ class RedactPIILiteralsConfig:
Whether to redact literal values in WHERE clauses. If set to `True`, all literal values will be redacted to a predefined string value.
"""

case_clauses: bool = False
"""
Whether to redact literal values in CASE clauses. If set to `True`, all literal values will be redacted to a predefined string value.
"""

when_not_matched_insert_clauses: bool = False
"""
Whether to redact literal values in WHEN NOT MATCHED INSERT clauses. If set to `True`, all literal values will be redacted to a predefined string value.
Expand Down
11 changes: 11 additions & 0 deletions metaphor/common/sql/process_query/process_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@ def _redact_literal_values_in_where_clauses(
lit.args["this"] = config.redact_literals.placeholder_literal


def _redact_literal_values_in_case_clauses(
expression: Expression, config: ProcessQueryConfig
) -> None:
for case in expression.find_all(exp.Case):
for lit in case.find_all(exp.Literal):
lit.args["this"] = config.redact_literals.placeholder_literal


def _redact_literal_values_in_when_not_matched_insert_clauses(
expression: Expression,
config: ProcessQueryConfig,
Expand Down Expand Up @@ -90,6 +98,9 @@ def process_query(
if config.redact_literals.where_clauses:
_redact_literal_values_in_where_clauses(expression, config)

if config.redact_literals.case_clauses:
_redact_literal_values_in_case_clauses(expression, config)

if config.redact_literals.when_not_matched_insert_clauses:
_redact_literal_values_in_when_not_matched_insert_clauses(expression, config)

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "metaphor-connectors"
version = "0.14.79"
version = "0.14.80"
license = "Apache-2.0"
description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app."
authors = ["Metaphor <[email protected]>"]
Expand Down
27 changes: 27 additions & 0 deletions tests/common/sql/process_query/test_process_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
config = ProcessQueryConfig(
redact_literals=RedactPIILiteralsConfig(
where_clauses=True,
case_clauses=True,
when_not_matched_insert_clauses=True,
),
ignore_insert_values_into=True,
Expand Down Expand Up @@ -163,3 +164,29 @@ def test_merge_insert_when_not_matched():
processed
== "MERGE INTO TargetProducts AS Target USING SourceProducts AS Source ON Source.ProductID = Target.ProductID WHEN NOT MATCHED THEN INSERT (ProductID, ProductName, Price) VALUES (<REDACTED>, '<REDACTED>', <REDACTED>) WHEN MATCHED THEN UPDATE SET Target.ProductName = Source.ProductName, Target.Price = Source.Price"
)


def test_redact_where_clauses():
sql = """
INSERT INTO target_table (first_name, last_name, email, status)
SELECT
first_name,
last_name,
email,
CASE
WHEN age < 18 THEN 'Minor'
WHEN age >= 18 AND age < 65 THEN 'Adult'
ELSE 'Senior'
END as status
FROM source_table
WHERE email IS NOT NULL;
"""
processed = process_query(
sql,
DataPlatform.SNOWFLAKE,
config,
)
assert (
processed
== "INSERT INTO target_table (first_name, last_name, email, status) SELECT first_name, last_name, email, CASE WHEN age < <REDACTED> THEN '<REDACTED>' WHEN age >= <REDACTED> AND age < <REDACTED> THEN '<REDACTED>' ELSE '<REDACTED>' END AS status FROM source_table WHERE NOT email IS NULL"
)
Loading