Skip to content

Commit

Permalink
poc for secret string obfuscation in validate_dict
Browse files Browse the repository at this point in the history
  • Loading branch information
sh-rp committed Nov 5, 2024
1 parent b732eb2 commit db97219
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 7 deletions.
8 changes: 7 additions & 1 deletion dlt/common/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import Any, AnyStr, Dict, List, Sequence, Optional, Iterable, Type, TypedDict

from dlt.common.typing import StrAny


class ExceptionTrace(TypedDict, total=False):
"""Exception trace. NOTE: we intend to change it with an extended line by line trace with code snippets"""
Expand Down Expand Up @@ -97,18 +99,22 @@ class DictValidationException(DltException):
def __init__(
self,
msg: str,
doc: StrAny,
path: str,
expected_type: Type[Any] = None,
field: str = None,
value: Any = None,
nested_exceptions: List["DictValidationException"] = None,
) -> None:
from dlt.common.utils import obfuscate_values_in_string

self.doc = doc
self.path = path
self.expected_type = expected_type
self.field = field
self.value = value
self.nested_exceptions = nested_exceptions
self.msg = msg
self.msg = obfuscate_values_in_string(doc, msg)
super().__init__(msg)

def __str__(self) -> str:
Expand Down
8 changes: 6 additions & 2 deletions dlt/common/schema/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ def simple_regex_validator(path: str, pk: str, pv: Any, t: Any) -> bool:
if not isinstance(pv, str):
raise DictValidationException(
f"field {pk} value {pv} has invalid type {type(pv).__name__} while str is expected",
{},
path,
t,
pk,
Expand All @@ -255,6 +256,7 @@ def simple_regex_validator(path: str, pk: str, pv: Any, t: Any) -> bool:
except Exception as e:
raise DictValidationException(
f"field {pk} value {pv[3:]} does not compile as regex: {str(e)}",
{},
path,
t,
pk,
Expand All @@ -264,6 +266,7 @@ def simple_regex_validator(path: str, pk: str, pv: Any, t: Any) -> bool:
if RE_NON_ALPHANUMERIC_UNDERSCORE.match(pv):
raise DictValidationException(
f"field {pk} value {pv} looks like a regex, please prefix with re:",
{},
path,
t,
pk,
Expand All @@ -283,6 +286,7 @@ def validator(path: str, pk: str, pv: Any, t: Any) -> bool:
raise DictValidationException(
f"field {pk} value {pv} has invalid type {type(pv).__name__} while"
" str is expected",
{},
path,
t,
pk,
Expand All @@ -291,11 +295,11 @@ def validator(path: str, pk: str, pv: Any, t: Any) -> bool:
try:
if naming.normalize_path(pv) != pv:
raise DictValidationException(
f"field {pk}: {pv} is not a valid column name", path, t, pk, pv
f"field {pk}: {pv} is not a valid column name", {}, path, t, pk, pv
)
except ValueError:
raise DictValidationException(
f"field {pk}: {pv} is not a valid column name", path, t, pk, pv
f"field {pk}: {pv} is not a valid column name", {}, path, t, pk, pv
)
return True
else:
Expand Down
39 changes: 39 additions & 0 deletions dlt/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,3 +635,42 @@ def __getattribute__(self, name: str) -> Any:
raise RuntimeError("This instance has been dropped and cannot be used anymore.")

return DefunctClass


def obfuscate_string(s: str) -> str:
"""Obfuscates string by replacing some or all characters with asterisks"""
if len(s) < 6:
return "*" * len(s)
return s[0] + "*" * (len(s) - 2) + s[-1]


def all_strings(d: StrAny) -> List[str]:
"""Returns all string values found in the given object"""
strings = []

if isinstance(d, str):
return [d]
if isinstance(d, dict):
for v in d.values():
if isinstance(v, str):
strings.append(v)
elif isinstance(v, dict):
strings.extend(all_strings(v))
elif isinstance(d, list):
for v in d:
strings.extend(all_strings(v))
else:
return []
return strings


def obfuscate_values_in_string(d: StrAny, msg: str) -> str:
"""Obfuscates all string values found in the dictionary and its nested dictionaries in the message"""

# create mapping of obfuscated strings
obfuscated_strings = {s: obfuscate_string(s) for s in all_strings(d)}

for s in obfuscated_strings:
msg = msg.replace(s, obfuscated_strings[s])

return msg
16 changes: 12 additions & 4 deletions dlt/common/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,11 @@ def validate_dict(
# check missing props
missing = set(required_props.keys()).difference(props.keys())
if len(missing):
raise DictValidationException(f"following required fields are missing {missing}", path)
raise DictValidationException(f"following required fields are missing {missing}", doc, path)
# check unknown props
unexpected = set(props.keys()).difference(allowed_props.keys())
if len(unexpected):
raise DictValidationException(f"following fields are unexpected {unexpected}", path)
raise DictValidationException(f"following fields are unexpected {unexpected}", doc, path)

def verify_prop(pk: str, pv: Any, t: Any) -> None:
# covers none in optional and union types
Expand Down Expand Up @@ -108,6 +108,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
msg += f"For {get_type_name(failed.expected_type)}: " + str(failed) + "\n"
raise DictValidationException(
msg,
doc,
path,
t,
pk,
Expand All @@ -118,13 +119,14 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
a_l = get_literal_args(t)
if pv not in a_l:
raise DictValidationException(
f"field '{pk}' with value {pv} is not one of: {a_l}", path, t, pk, pv
f"field '{pk}' with value {pv} is not one of: {a_l}", doc, path, t, pk, pv
)
elif t in [int, bool, str, float]:
if not isinstance(pv, t):
raise DictValidationException(
f"field '{pk}' with value {pv} has invalid type '{type(pv).__name__}' while"
f" '{t.__name__}' is expected",
doc,
path,
t,
pk,
Expand All @@ -135,6 +137,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
raise DictValidationException(
f"field '{pk}' with value {pv} has invalid type '{type(pv).__name__}' while"
f" '{get_type_name(t)}' is expected",
doc,
path,
t,
pk,
Expand All @@ -146,6 +149,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
raise DictValidationException(
f"field '{pk}' with value {pv} has invalid type '{type(pv).__name__}' while"
" 'list' is expected",
doc,
path,
t,
pk,
Expand All @@ -160,6 +164,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
raise DictValidationException(
f"field '{pk}' with value {pv} has invalid type '{type(pv).__name__}' while"
" 'dict' is expected",
doc,
path,
t,
pk,
Expand All @@ -170,7 +175,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
for d_k, d_v in pv.items():
if not isinstance(d_k, str):
raise DictValidationException(
f"field '{pk}' with key {d_k} must be a string", path, t, pk, d_k
f"field '{pk}' with key {d_k} must be a string", doc, path, t, pk, d_k
)
verify_prop(f"{pk}[{d_k}]", d_v, d_v_t)
elif t is Any:
Expand All @@ -188,6 +193,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
raise DictValidationException(
f"field '{pk}' expects callable (function or class instance) but got "
f" '{pv}'. Mind that signatures are not validated",
doc,
path,
t,
pk,
Expand All @@ -203,6 +209,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
raise DictValidationException(
f"field '{pk}' expects class '{type_name}' but got instance of"
f" '{pv_type_name}'",
doc,
path,
t,
pk,
Expand All @@ -212,6 +219,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
type_name = get_type_name(t)
raise DictValidationException(
f"field '{pk}' has expected type '{type_name}' which lacks validator",
doc,
path,
t,
pk,
Expand Down
12 changes: 12 additions & 0 deletions tests/common/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
Optional,
Union,
)
from dlt.common.configuration.specs import BaseConfiguration

from dlt.common import Decimal, jsonpath
from dlt.common.exceptions import DictValidationException
Expand Down Expand Up @@ -406,6 +407,17 @@ class TTestRecordClassUnion(TypedDict):
validate_dict(TTestRecordClassUnion, test_item_2, path=".")


def test_secrets_obfuscation() -> None:
class Config(TypedDict):
a: str
b: int

with pytest.raises(DictValidationException) as e:
validate_dict(Config, {"a": "123456", "b": {"c": "inner_value"}}, ".")
assert "inner_value" not in e.value.msg
assert "{'c': 'i*********e'}" in e.value.msg


# def test_union_merge() -> None:
# """Overriding fields is simply illegal in TypedDict"""
# class EndpointResource(TypedDict, total=False):
Expand Down

0 comments on commit db97219

Please sign in to comment.