Skip to content

Commit

Permalink
index converters by type when no converter is found
Browse files Browse the repository at this point in the history
This changes how converters with class paths/names
as types are handled by asdf.

Prior to this commit, when converting a class instance
(foo) of class (Foo) the class path of the instance
was inspected with ``get_class_name`` and the
``_converters_by_type`` index was searched using
the class name. This index contained both classes
and class paths (as converters could use both/either)
with classes being preferred. This creates issues
when a module has a different class path from the
typical 'public' path for the class as the module
might move the private implementation without changing
the 'public' path with the expectation that this will
not break downstream code. For converters that use
class paths these types of moves will break the converter.

This commit changes the handling of class paths used
for converters so that the 'public' path can be used.
If a converter is not found for a class in
``_converters_by_type`` (which now only contains classes
as keys) then ``_converters_by_class_path`` is indexed
by checking what classes referred to by the paths
are already imported (to avoid importing every class
supported by every extension). If the class is imported
it is added to ``_converters_by_type`` and removed from
``_converters_by_class_path``.
  • Loading branch information
braingram committed Oct 16, 2023
1 parent 959a83d commit 9cc868b
Showing 1 changed file with 82 additions and 15 deletions.
97 changes: 82 additions & 15 deletions asdf/extension/_manager.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import sys
from functools import lru_cache

from asdf.tagged import Tagged
Expand All @@ -23,8 +24,35 @@ def __init__(self, extensions):

self._tag_defs_by_tag = {}
self._converters_by_tag = {}
# This dict has both str and type keys:

# To optimize performance converters can be registered using either:
# - the class/type they convert
# - the name/path (string) of the class/type they convert
# This allows the registration to continue without importing
# every module for every extension (which would be needed to turn
# the class paths into proper classes). Using class paths can be
# complicated by packages that have private implementations of
# classes that are exposed at a different 'public' location.
# These private classes are may change between minor versions
# and would break converters that are registered using the private
# class path. However, often libraries to not modify the module
# of the 'public' class (so inspecting the class path returns
# the private class path). One example of this in asdf is
# Converter (exposed as ``asdf.extension.Converter`` but with
# a class path of ``asdf.extension._converter.Converter``).
# To allow converters to be registered with the public location
# we will need to attempt to import the public class path
# and then register the private class path after the class is
# imported. We don't want to do this unnecessarily and since
# class instances do not contain the public class path
# we adopt a strategy of checking class paths and only
# registering those that have already been imported. Thiss
# is ok because asdf will only use the converter type
# when attempting to serialize an object in memory (so the
# public class path will already be imported at the time
# the converter is needed).
self._converters_by_type = {}
self._converters_by_class_path = {}

validators = set()

Expand All @@ -38,13 +66,16 @@ def __init__(self, extensions):
self._converters_by_tag[tag] = converter
for typ in converter.types:
if isinstance(typ, str):
if typ not in self._converters_by_type:
self._converters_by_type[typ] = converter
if typ not in self._converters_by_class_path:
self._converters_by_class_path[typ] = converter
else:
type_class_name = get_class_name(typ, instance=False)
if typ not in self._converters_by_type and type_class_name not in self._converters_by_type:
if (
typ not in self._converters_by_type
and type_class_name not in self._converters_by_class_path
):
self._converters_by_type[typ] = converter
self._converters_by_type[type_class_name] = converter
self._converters_by_class_path[type_class_name] = converter

validators.update(extension.validators)

Expand Down Expand Up @@ -90,7 +121,10 @@ def handles_type(self, typ):
-------
bool
"""
return typ in self._converters_by_type or get_class_name(typ, instance=False) in self._converters_by_type
if typ in self._converters_by_type:
return True
self._index_converters()
return typ in self._converters_by_type

def handles_tag_definition(self, tag):
"""
Expand Down Expand Up @@ -172,18 +206,51 @@ def get_converter_for_type(self, typ):
KeyError
Unrecognized type.
"""
if typ not in self._converters_by_type:
self._index_converters()
try:
return self._converters_by_type[typ]
except KeyError:
class_name = get_class_name(typ, instance=False)
try:
return self._converters_by_type[class_name]
except KeyError:
msg = (
f"No support available for Python type '{get_class_name(typ, instance=False)}'. "
"You may need to install or enable an extension."
)
raise KeyError(msg) from None
msg = (
f"No support available for Python type '{get_class_name(typ, instance=False)}'. "
"You may need to install or enable an extension."
)
raise KeyError(msg) from None

def _index_converters(self):
"""
Search _converters_by_class_path for paths (strings) that
refer to classes that are currently imported. For imported
classes, add them to _converters_by_class (if the class
doesn't already have a converter).
"""
# search class paths to find ones that are imported
for class_path in list(self._converters_by_class_path):
class_ = None
if "." not in class_path:
# this class path does not appear to include a module
if class_path in globals():
class_ = globals()[class_path]
elif class_path in sys.modules:
class_ = sys.modules[class_path]
else:
continue
else:
# this class is part of a module
module_name, class_name = class_path.rsplit(".", maxsplit=1)
# if the module is not imported, don't index it
if module_name not in sys.modules:
continue
module = sys.modules[module_name]
if not hasattr(module, class_name):
# the imported module does not have this class, perhaps
# it is dynamically created so do not index it yet
continue
class_ = getattr(module, class_name)
if class_ is not None:
if class_ not in self._converters_by_type:
self._converters_by_type[class_] = self._converters_by_class_path[class_path]
del self._converters_by_class_path[class_path]

@property
def validator_manager(self):
Expand Down

0 comments on commit 9cc868b

Please sign in to comment.