Skip to content

Commit

Permalink
optimise dumping to reduce unnecessary overhead
Browse files Browse the repository at this point in the history
When dumping many objects, marshmallow is calling the same field methods
over and over again, which return the same values. Parts of this process
can be called only once per dump, which reduces python method call
overhead significantly.

`Field.get_serializer` returns the optimized serializer for the current
dump operation, avoiding the expensive lookups for properties which will
not change during a single dump (such as `data_key`, `default`, etc)

Also, the default `Schema.get_attribute` is also not used because all it
does is calling `utils._get_value_for_key(s)`.
  • Loading branch information
dsimidzija committed Mar 7, 2021
1 parent 19f1cad commit e55eb9e
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 14 deletions.
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,4 @@ Contributors (chronological)
- Stephen Rosen `@sirosen <https://github.com/sirosen>`_
- Vladimir Mikhaylov `@vemikhaylov <https://github.com/vemikhaylov>`_
- Stephen Eaton `@madeinoz67 <https://github.com/madeinoz67>`_
- Dusko Simidzija `@dsimidzija <https://github.com/dsimidzija>`_
46 changes: 46 additions & 0 deletions src/marshmallow/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
missing as missing_,
resolve_field_instance,
is_aware,
_get_value_for_key,
_get_value_for_keys,
)
from marshmallow.exceptions import (
ValidationError,
Expand Down Expand Up @@ -304,6 +306,50 @@ def _validate_missing(self, value):
if hasattr(self, "allow_none") and self.allow_none is not True:
raise self.make_error("null")

def get_serializer(
self,
attr: str,
accessor: typing.Optional[
typing.Callable[[typing.Any, str, typing.Any], typing.Any]
] = None,
**kwargs
) -> typing.Callable[[typing.Any], typing.Any]:
"""Return an optimized serializer for this Field object.
:param str attr: The attribute or key on the object to be serialized.
:param dict kwargs: Field-specific keyword arguments.
:return: Serializer function.
"""
if not self._CHECK_ATTRIBUTE:
return lambda obj: self._serialize(None, attr, obj, **kwargs)

attribute = getattr(self, "attribute", None)
check_key = attr if attribute is None else attribute
default = None
callable_default = False
has_default = hasattr(self, "default")
if has_default:
default = self.default
callable_default = callable(default)
if accessor:
accessor_func = accessor
else:
if not isinstance(check_key, int) and "." in check_key:
accessor_func = _get_value_for_keys
check_key = check_key.split(".")
else:
accessor_func = _get_value_for_key

def _serializer(obj):
value = accessor_func(obj, check_key, missing_)
if value is missing_ and has_default:
value = default() if callable_default else default
if value is missing_:
return value
return self._serialize(value, attr, obj, **kwargs)

return _serializer

def serialize(
self,
attr: str,
Expand Down
54 changes: 40 additions & 14 deletions src/marshmallow/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,7 @@ def __init__(
self.fields = {} # type: typing.Dict[str, ma_fields.Field]
self.load_fields = {} # type: typing.Dict[str, ma_fields.Field]
self.dump_fields = {} # type: typing.Dict[str, ma_fields.Field]
self.dump_serializers = {} # type: typing.Dict[str, typing.Callable]
self._init_fields()
messages = {}
messages.update(self._default_error_messages)
Expand Down Expand Up @@ -469,7 +470,7 @@ def handle_error(
"""
pass

def get_attribute(self, obj: typing.Any, attr: str, default: typing.Any):
def default_get_attribute(self, obj: typing.Any, attr: str, default: typing.Any):
"""Defines how to pull values from an object to serialize.
.. versionadded:: 2.0.0
Expand All @@ -479,6 +480,8 @@ def get_attribute(self, obj: typing.Any, attr: str, default: typing.Any):
"""
return get_value(obj, attr, default)

get_attribute = default_get_attribute

##### Serialization/Deserialization API #####

@staticmethod
Expand Down Expand Up @@ -513,19 +516,41 @@ def _serialize(
.. versionchanged:: 1.0.0
Renamed from ``marshal``.
"""
if many and obj is not None:
return [
self._serialize(d, many=False)
for d in typing.cast(typing.Iterable[_T], obj)
]
ret = self.dict_class()
for attr_name, field_obj in self.dump_fields.items():
value = field_obj.serialize(attr_name, obj, accessor=self.get_attribute)
if value is missing:
continue
key = field_obj.data_key if field_obj.data_key is not None else attr_name
ret[key] = value
return ret
if not self.dump_serializers:
accessor = (
None
) # type: typing.Optional[typing.Callable[[typing.Any, str, typing.Any], typing.Any]]
if self.get_attribute != self.default_get_attribute:
accessor = self.get_attribute

for field_name, field_obj in self.dump_fields.items():
key = (
field_obj.data_key if field_obj.data_key is not None else field_name
)
self.dump_serializers[key] = field_obj.get_serializer(
field_name, accessor
)

source_obj = [None] # typing: typing.List[typing.Any]

if not many:
source_obj = [typing.cast(typing.Any, obj)]
elif many and obj is not None:
source_obj = typing.cast(typing.List[typing.Any], obj)

output = []
for current_obj in source_obj:
ret = self.dict_class()
for key, serializer in self.dump_serializers.items():
value = serializer(current_obj)
if value is missing:
continue
ret[key] = value
output.append(ret)

if not many:
return output[0]
return output

def dump(self, obj: typing.Any, *, many: typing.Optional[bool] = None):
"""Serialize an object to native Python data types according to this
Expand Down Expand Up @@ -1018,6 +1043,7 @@ def _init_fields(self) -> None:
self.fields = fields_dict
self.dump_fields = dump_fields
self.load_fields = load_fields
self.dump_serializers = {}

def on_bind_field(self, field_name: str, field_obj: ma_fields.Field) -> None:
"""Hook to modify a field when it is bound to the `Schema`.
Expand Down

0 comments on commit e55eb9e

Please sign in to comment.