diff --git a/asdf/extension/_manager.py b/asdf/extension/_manager.py index 8094ef701..7acc843c6 100644 --- a/asdf/extension/_manager.py +++ b/asdf/extension/_manager.py @@ -1,3 +1,4 @@ +import sys from functools import lru_cache from asdf.tagged import Tagged @@ -23,8 +24,35 @@ def __init__(self, extensions): self._tag_defs_by_tag = {} self._converters_by_tag = {} - # This dict has both str and type keys: + + # To optimize performance converters can be registered using either: + # - the class/type they convert + # - the name/path (string) of the class/type they convert + # This allows the registration to continue without importing + # every module for every extension (which would be needed to turn + # the class paths into proper classes). Using class paths can be + # complicated by packages that have private implementations of + # classes that are exposed at a different 'public' location. + # These private classes are may change between minor versions + # and would break converters that are registered using the private + # class path. However, often libraries to not modify the module + # of the 'public' class (so inspecting the class path returns + # the private class path). One example of this in asdf is + # Converter (exposed as ``asdf.extension.Converter`` but with + # a class path of ``asdf.extension._converter.Converter``). + # To allow converters to be registered with the public location + # we will need to attempt to import the public class path + # and then register the private class path after the class is + # imported. We don't want to do this unnecessarily and since + # class instances do not contain the public class path + # we adopt a strategy of checking class paths and only + # registering those that have already been imported. Thiss + # is ok because asdf will only use the converter type + # when attempting to serialize an object in memory (so the + # public class path will already be imported at the time + # the converter is needed). self._converters_by_type = {} + self._converters_by_class_path = {} validators = set() @@ -38,13 +66,16 @@ def __init__(self, extensions): self._converters_by_tag[tag] = converter for typ in converter.types: if isinstance(typ, str): - if typ not in self._converters_by_type: - self._converters_by_type[typ] = converter + if typ not in self._converters_by_class_path: + self._converters_by_class_path[typ] = converter else: type_class_name = get_class_name(typ, instance=False) - if typ not in self._converters_by_type and type_class_name not in self._converters_by_type: + if ( + typ not in self._converters_by_type + and type_class_name not in self._converters_by_class_path + ): self._converters_by_type[typ] = converter - self._converters_by_type[type_class_name] = converter + self._converters_by_class_path[type_class_name] = converter validators.update(extension.validators) @@ -90,7 +121,10 @@ def handles_type(self, typ): ------- bool """ - return typ in self._converters_by_type or get_class_name(typ, instance=False) in self._converters_by_type + if typ in self._converters_by_type: + return True + self._index_converters() + return typ in self._converters_by_type def handles_tag_definition(self, tag): """ @@ -172,18 +206,51 @@ def get_converter_for_type(self, typ): KeyError Unrecognized type. """ + if typ not in self._converters_by_type: + self._index_converters() try: return self._converters_by_type[typ] except KeyError: - class_name = get_class_name(typ, instance=False) - try: - return self._converters_by_type[class_name] - except KeyError: - msg = ( - f"No support available for Python type '{get_class_name(typ, instance=False)}'. " - "You may need to install or enable an extension." - ) - raise KeyError(msg) from None + msg = ( + f"No support available for Python type '{get_class_name(typ, instance=False)}'. " + "You may need to install or enable an extension." + ) + raise KeyError(msg) from None + + def _index_converters(self): + """ + Search _converters_by_class_path for paths (strings) that + refer to classes that are currently imported. For imported + classes, add them to _converters_by_class (if the class + doesn't already have a converter). + """ + # search class paths to find ones that are imported + for class_path in list(self._converters_by_class_path): + class_ = None + if "." not in class_path: + # this class path does not appear to include a module + if class_path in globals(): + class_ = globals()[class_path] + elif class_path in sys.modules: + class_ = sys.modules[class_path] + else: + continue + else: + # this class is part of a module + module_name, class_name = class_path.rsplit(".", maxsplit=1) + # if the module is not imported, don't index it + if module_name not in sys.modules: + continue + module = sys.modules[module_name] + if not hasattr(module, class_name): + # the imported module does not have this class, perhaps + # it is dynamically created so do not index it yet + continue + class_ = getattr(module, class_name) + if class_ is not None: + if class_ not in self._converters_by_type: + self._converters_by_type[class_] = self._converters_by_class_path[class_path] + del self._converters_by_class_path[class_path] @property def validator_manager(self):