From c49388807734ce903162df3b652c6e8d4d58619c Mon Sep 17 00:00:00 2001 From: Brett Date: Thu, 28 Sep 2023 10:54:38 -0400 Subject: [PATCH] index converters by type when no converter is found This changes how converters with class paths/names as types are handled by asdf. Prior to this commit, when converting a class instance (foo) of class (Foo) the class path of the instance was inspected with ``get_class_name`` and the ``_converters_by_type`` index was searched using the class name. This index contained both classes and class paths (as converters could use both/either) with classes being preferred. This creates issues when a module has a different class path from the typical 'public' path for the class as the module might move the private implementation without changing the 'public' path with the expectation that this will not break downstream code. For converters that use class paths these types of moves will break the converter. This commit changes the handling of class paths used for converters so that the 'public' path can be used. If a converter is not found for a class in ``_converters_by_type`` (which now only contains classes as keys) then ``_converters_by_class_path`` is indexed by checking what classes referred to by the paths are already imported (to avoid importing every class supported by every extension). If the class is imported it is added to ``_converters_by_type`` and removed from ``_converters_by_class_path``. --- asdf/extension/_manager.py | 97 ++++++++++++++++++++++++++++++++------ 1 file changed, 82 insertions(+), 15 deletions(-) diff --git a/asdf/extension/_manager.py b/asdf/extension/_manager.py index 8094ef701..7acc843c6 100644 --- a/asdf/extension/_manager.py +++ b/asdf/extension/_manager.py @@ -1,3 +1,4 @@ +import sys from functools import lru_cache from asdf.tagged import Tagged @@ -23,8 +24,35 @@ def __init__(self, extensions): self._tag_defs_by_tag = {} self._converters_by_tag = {} - # This dict has both str and type keys: + + # To optimize performance converters can be registered using either: + # - the class/type they convert + # - the name/path (string) of the class/type they convert + # This allows the registration to continue without importing + # every module for every extension (which would be needed to turn + # the class paths into proper classes). Using class paths can be + # complicated by packages that have private implementations of + # classes that are exposed at a different 'public' location. + # These private classes are may change between minor versions + # and would break converters that are registered using the private + # class path. However, often libraries to not modify the module + # of the 'public' class (so inspecting the class path returns + # the private class path). One example of this in asdf is + # Converter (exposed as ``asdf.extension.Converter`` but with + # a class path of ``asdf.extension._converter.Converter``). + # To allow converters to be registered with the public location + # we will need to attempt to import the public class path + # and then register the private class path after the class is + # imported. We don't want to do this unnecessarily and since + # class instances do not contain the public class path + # we adopt a strategy of checking class paths and only + # registering those that have already been imported. Thiss + # is ok because asdf will only use the converter type + # when attempting to serialize an object in memory (so the + # public class path will already be imported at the time + # the converter is needed). self._converters_by_type = {} + self._converters_by_class_path = {} validators = set() @@ -38,13 +66,16 @@ def __init__(self, extensions): self._converters_by_tag[tag] = converter for typ in converter.types: if isinstance(typ, str): - if typ not in self._converters_by_type: - self._converters_by_type[typ] = converter + if typ not in self._converters_by_class_path: + self._converters_by_class_path[typ] = converter else: type_class_name = get_class_name(typ, instance=False) - if typ not in self._converters_by_type and type_class_name not in self._converters_by_type: + if ( + typ not in self._converters_by_type + and type_class_name not in self._converters_by_class_path + ): self._converters_by_type[typ] = converter - self._converters_by_type[type_class_name] = converter + self._converters_by_class_path[type_class_name] = converter validators.update(extension.validators) @@ -90,7 +121,10 @@ def handles_type(self, typ): ------- bool """ - return typ in self._converters_by_type or get_class_name(typ, instance=False) in self._converters_by_type + if typ in self._converters_by_type: + return True + self._index_converters() + return typ in self._converters_by_type def handles_tag_definition(self, tag): """ @@ -172,18 +206,51 @@ def get_converter_for_type(self, typ): KeyError Unrecognized type. """ + if typ not in self._converters_by_type: + self._index_converters() try: return self._converters_by_type[typ] except KeyError: - class_name = get_class_name(typ, instance=False) - try: - return self._converters_by_type[class_name] - except KeyError: - msg = ( - f"No support available for Python type '{get_class_name(typ, instance=False)}'. " - "You may need to install or enable an extension." - ) - raise KeyError(msg) from None + msg = ( + f"No support available for Python type '{get_class_name(typ, instance=False)}'. " + "You may need to install or enable an extension." + ) + raise KeyError(msg) from None + + def _index_converters(self): + """ + Search _converters_by_class_path for paths (strings) that + refer to classes that are currently imported. For imported + classes, add them to _converters_by_class (if the class + doesn't already have a converter). + """ + # search class paths to find ones that are imported + for class_path in list(self._converters_by_class_path): + class_ = None + if "." not in class_path: + # this class path does not appear to include a module + if class_path in globals(): + class_ = globals()[class_path] + elif class_path in sys.modules: + class_ = sys.modules[class_path] + else: + continue + else: + # this class is part of a module + module_name, class_name = class_path.rsplit(".", maxsplit=1) + # if the module is not imported, don't index it + if module_name not in sys.modules: + continue + module = sys.modules[module_name] + if not hasattr(module, class_name): + # the imported module does not have this class, perhaps + # it is dynamically created so do not index it yet + continue + class_ = getattr(module, class_name) + if class_ is not None: + if class_ not in self._converters_by_type: + self._converters_by_type[class_] = self._converters_by_class_path[class_path] + del self._converters_by_class_path[class_path] @property def validator_manager(self):