From 63365a34dc44ea43881be155d6702145fb90699b Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Fri, 17 Jul 2020 00:28:10 +0000 Subject: [PATCH] Make propagate_unknown respect any explciit value propagate_unknown will still traverse any series of nested documents, meaning that once you set propagate_unknown=True, it is true for the whole schema structure. However, this introduces tracking for whether or not `unknown` was set explicitly. If `unknown=RAISE` is set because no value was specified, we will set a new flag on the schema, `auto_unknown=True`. propagate_unknown now has the following behavior: - if the nested schema has auto_unknown=False, use the current value for `unknown` in the nested `load` call - if a nested field has its `unknown` attribute set, use that in place of any value sent via `propagate_unknown` Effectively, this means that if you set `unknown` explicitly anywhere in a nested schema structure, it will propagate downwards from that point. Combined with the fact that propagate_unknown=True propagates downwards across all schema barriers, including if `propagate_unknown=False` is set explicitly somewhere, this could be confusing. However, because the idea is for `propagate_unknown=True` to eventually be the only supported behavior for marshmallow, this is acceptable as a limitation. auto_unknown is an attribute of schema opts and of schema instances, with the same kind of inheritance behavior as other fields. --- src/marshmallow/fields.py | 9 ++++++ src/marshmallow/schema.py | 12 +++++++- tests/test_schema.py | 63 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+), 1 deletion(-) diff --git a/src/marshmallow/fields.py b/src/marshmallow/fields.py index 02e40d61a..4926fecf0 100644 --- a/src/marshmallow/fields.py +++ b/src/marshmallow/fields.py @@ -608,6 +608,15 @@ def _deserialize( Add ``partial`` parameter. """ self._test_collection(value) + # check if self.unknown or self.schema.unknown is set + # however, we should only respect `self.schema.unknown` if + # `auto_unknown` is False, meaning that it was set explicitly on the + # schema class or instance + explicit_unknown = self.unknown or ( + self.schema.unknown if not self.schema.auto_unknown else None + ) + if explicit_unknown: + unknown = explicit_unknown return self._load( value, data, diff --git a/src/marshmallow/schema.py b/src/marshmallow/schema.py index c52aadc9a..690ded829 100644 --- a/src/marshmallow/schema.py +++ b/src/marshmallow/schema.py @@ -227,7 +227,14 @@ def __init__(self, meta, ordered: bool = False): self.include = getattr(meta, "include", {}) self.load_only = getattr(meta, "load_only", ()) self.dump_only = getattr(meta, "dump_only", ()) - self.unknown = getattr(meta, "unknown", RAISE) + # self.unknown defaults to "RAISE", but note whether it was explicit or + # not, so that when we're handling propagate_unknown we can decide + # whether or not to propagate based on whether or not it was set + # explicitly + self.unknown = getattr(meta, "unknown", None) + self.auto_unknown = self.unknown is None + if self.auto_unknown: + self.unknown = RAISE self.propagate_unknown = getattr(meta, "propagate_unknown", False) self.register = getattr(meta, "register", True) @@ -391,6 +398,9 @@ def __init__( self.dump_only = set(dump_only) or set(self.opts.dump_only) self.partial = partial self.unknown = unknown or self.opts.unknown + # if unknown was not set explicitly AND self.opts.auto_unknown is true, + # then the value should be considered "automatic" + self.auto_unknown = (not unknown) and self.opts.auto_unknown self.propagate_unknown = propagate_unknown or self.opts.propagate_unknown self.context = context or {} self._normalize_nested_options() diff --git a/tests/test_schema.py b/tests/test_schema.py index cc06200e8..c93d16ce4 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -2891,3 +2891,66 @@ class DefinitelyUniqueSchema(Schema): SchemaClass = class_registry.get_class(DefinitelyUniqueSchema.__name__) assert SchemaClass is DefinitelyUniqueSchema + + +def test_propagate_unknown_stops_at_explicit_value_for_nested(): + # propagate_unknown=True should traverse any "auto_unknown" values and + # replace them with the "unknown" value from the parent context (schema or + # load arguments) + # this test makes sure that it stops when a nested field or schema has + # "unknown" set explicitly (so auto_unknown=False) + + class Bottom(Schema): + x = fields.Str() + + class Middle(Schema): + x = fields.Str() + # set unknown explicitly on a nested field, so auto_unknown will be + # false going into Bottom + child = fields.Nested(Bottom, unknown=EXCLUDE) + + class Top(Schema): + x = fields.Str() + child = fields.Nested(Middle) + + data = { + "x": "hi", + "y": "bye", + "child": {"x": "hi", "y": "bye", "child": {"x": "hi", "y": "bye"}}, + } + result = Top(unknown=INCLUDE, propagate_unknown=True).load(data) + assert result == { + "x": "hi", + "y": "bye", + "child": {"x": "hi", "y": "bye", "child": {"x": "hi"}}, + } + + +def test_propagate_unknown_stops_at_explicit_value_for_meta(): + # this is the same as the above test of propagate_unknown stopping where + # auto_unknown=False, but it checks that this applies when `unknown` is set + # by means of `Meta` + + class Bottom(Schema): + x = fields.Str() + + class Middle(Schema): + x = fields.Str() + child = fields.Nested(Bottom) + + # set unknown explicitly on a nested field, so auto_unknown will be + # false going into Bottom + class Meta: + unknown = EXCLUDE + + class Top(Schema): + x = fields.Str() + child = fields.Nested(Middle) + + data = { + "x": "hi", + "y": "bye", + "child": {"x": "hi", "y": "bye", "child": {"x": "hi", "y": "bye"}}, + } + result = Top(unknown=INCLUDE, propagate_unknown=True).load(data) + assert result == {"x": "hi", "y": "bye", "child": {"x": "hi", "child": {"x": "hi"}}}