From 4dff15afa5241aa24a3fe6274be54b191c9bb7fc Mon Sep 17 00:00:00 2001 From: Kevin Lloyd Bernal Date: Fri, 25 Mar 2022 21:26:17 +0800 Subject: [PATCH 1/2] create new export() method in PageObjectRegistry --- docs/intro/pop.rst | 76 ++++++++++++++----- tests/test_pop.py | 60 +++++++++++++++ tests_pop/__init__.py | 0 tests_pop/base_A_package/__init__.py | 3 + tests_pop/base_A_package/base.py | 2 + tests_pop/base_A_package/site_1.py | 8 ++ tests_pop/base_A_package/site_2.py | 8 ++ tests_pop/base_B_package/__init__.py | 3 + tests_pop/base_B_package/base.py | 2 + tests_pop/base_B_package/site_2.py | 8 ++ tests_pop/base_B_package/site_3.py | 8 ++ tests_pop/combine_A_B_package/__init__.py | 9 +++ tests_pop/combine_A_B_package/base_A_package | 1 + tests_pop/combine_A_B_package/base_B_package | 1 + .../combine_A_B_subset_package/__init__.py | 16 ++++ .../combine_A_B_subset_package/base_B_package | 1 + .../improved_A_package | 1 + tests_pop/improved_A_package/__init__.py | 3 + tests_pop/improved_A_package/base_A_package | 1 + tests_pop/improved_A_package/site_1.py | 9 +++ web_poet/overrides.py | 22 ++++++ 21 files changed, 224 insertions(+), 18 deletions(-) create mode 100644 tests/test_pop.py create mode 100644 tests_pop/__init__.py create mode 100644 tests_pop/base_A_package/__init__.py create mode 100644 tests_pop/base_A_package/base.py create mode 100644 tests_pop/base_A_package/site_1.py create mode 100644 tests_pop/base_A_package/site_2.py create mode 100644 tests_pop/base_B_package/__init__.py create mode 100644 tests_pop/base_B_package/base.py create mode 100644 tests_pop/base_B_package/site_2.py create mode 100644 tests_pop/base_B_package/site_3.py create mode 100644 tests_pop/combine_A_B_package/__init__.py create mode 120000 tests_pop/combine_A_B_package/base_A_package create mode 120000 tests_pop/combine_A_B_package/base_B_package create mode 100644 tests_pop/combine_A_B_subset_package/__init__.py create mode 120000 tests_pop/combine_A_B_subset_package/base_B_package create mode 120000 tests_pop/combine_A_B_subset_package/improved_A_package create mode 100644 tests_pop/improved_A_package/__init__.py create mode 120000 tests_pop/improved_A_package/base_A_package create mode 100644 tests_pop/improved_A_package/site_1.py diff --git a/docs/intro/pop.rst b/docs/intro/pop.rst index 1353844f..58930fff 100644 --- a/docs/intro/pop.rst +++ b/docs/intro/pop.rst @@ -118,6 +118,8 @@ by simply importing them: page = FurnitureProductPage(response) item = page.to_item() +.. _`pop-recommended-requirements`: + Recommended Requirements ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -141,45 +143,83 @@ all :class:`~.OverrideRule` by writing the following code inside of .. code-block:: python - from web_poet import consume_modules + from web_poet import default_registry + + REGISTRY = default_registry.export(__package__) - # This allows all of the OverrideRules declared inside the package - # using @handle_urls to be properly discovered and loaded. - consume_modules(__package__) +This does two things: -.. note:: + 1. The :meth:`~.PageObjectRegistry.export` method returns a new instance of + :class:`~.PageObjectRegistry` which contains only the :class:`~.OverrideRule` + from the given package. This means that if there are other **POPs** using the + recommended ``default_registry``, any :class:`~.OverrideRule` that are not part + of the package are not included. - Remember, code in Python like annotations are only read and executed + 2. Remember that code in Python like annotations are only read and executed when the module it belongs to is imported. Thus, in order for all the ``@handle_urls`` annotation to properly reflect its data, they need to - be imported recursively via :func:`~.consume_modules`. + be imported recursively via :func:`~.consume_modules`. Fortunately, + :meth:`~.PageObjectRegistry.export` already consumes the ``__package__`` + for us. This allows developers to properly access all of the :class:`~.OverrideRule` -declared using the ``@handle_urls`` annotation inside the **POP**. In turn, -this also allows **POPs** which use ``web_poet.default_registry`` to have all -their rules discovered if they are adhering to using Convention **#3** -(see :ref:`best-practices`). +declared using the ``@handle_urls`` annotation inside the **POP**: -In other words, importing the ``ecommerce_page_objects`` **POP** to a -project immediately loads all of the rules in **web-poet's** -``default_registry``: +.. code-block:: python + + import ecommerce_page_objects + + ecommerce_rules = ecommerce_page_objects.get_overrides() + +At the same time, this also allows **POPs** which use ``web_poet.default_registry`` +to have all their rules discovered if they are adhering to using Convention **#3** +(see :ref:`conventions-and-best-practices`). In other words, importing the +``ecommerce_page_objects`` **POP** to a project immediately loads all of the rules +in **web-poet's** ``default_registry``: .. code-block:: python from web_poet import default_registry + ecommerce_rules = ecommerce_page_objects.get_overrides() + import ecommerce_page_objects - # All the rules are now available. - rules = default_registry.get_overrides() + # All the rules are also available once ecommerce_page_objects is imported. + all_rules = default_registry.get_overrides() If this recommended requirement is followed properly, there's no need to call ``consume_modules("ecommerce_page_objects")`` before performing the :meth:`~.PageObjectRegistry.get_overrides`, since all the :class:`~.OverrideRule` -were already discovered upon **POP** importation. +were already discovered upon **POP** importation. + +Lastly, when trying to repackage multiple **POPs** into a single unifying **POP** +which contains all of the :class:`~.OverrideRule`, it can easily be packaged +as: + +.. code-block:: python + + from web_poet import PageObjectRegistry + + import base_A_package + import base_B_package + + # If on Python 3.9+ + combined_reg = base_A_package.REGISTRY | base_B_package.REGISTRY + + # If on lower Python versions + combined_reg = {**base_A_package.REGISTRY, **base_B_package.REGISTRY} + + REGISTRY = PageObjectRegistry(combined_reg) -.. _`best-practices`: +Note that you can also opt to use only a subset of the :class:`~.OverrideRule` +by selecting the specific ones in ``combined_reg`` before creating a new +:class:`~.PageObjectRegistry` instance. An **inclusion** rule is preferred than +an **exclusion** rule (see **Tip #4** in the :ref:`conventions-and-best-practices`). +You can use :meth:`~.PageObjectRegistry.search_overrides` when selecting the +rules. +.. _`conventions-and-best-practices`: Conventions and Best Practices ------------------------------ diff --git a/tests/test_pop.py b/tests/test_pop.py new file mode 100644 index 00000000..4882accb --- /dev/null +++ b/tests/test_pop.py @@ -0,0 +1,60 @@ +"""This tests the expected behavior of importing multiple different POPs such +that they don't leak the OverrideRules from other POPs that use the same +``default_registry``. + +Packacking and exporting a given POP should be resilient from such cases. + +In particular, this tests the :meth:`PageObjectRegistry.export` functionality. +""" + +def test_base_A(): + from tests_pop import base_A_package + + reg = base_A_package.REGISTRY + + assert len(reg) == 2 + assert base_A_package.site_1.A_Site1 in reg + assert base_A_package.site_2.A_Site2 in reg + + +def test_base_B(): + from tests_pop import base_B_package + + reg = base_B_package.REGISTRY + + assert len(reg) == 2 + assert base_B_package.site_2.B_Site2 in reg + assert base_B_package.site_3.B_Site3 in reg + + +def test_improved_A(): + from tests_pop import improved_A_package, base_A_package + + reg = improved_A_package.REGISTRY + + assert len(reg) == 3 + assert improved_A_package.site_1.A_Improved_Site1 in reg + assert improved_A_package.base_A_package.site_1.A_Site1 in reg + assert improved_A_package.base_A_package.site_2.A_Site2 in reg + + +def test_combine_A_B(): + from tests_pop import combine_A_B_package, base_A_package, base_B_package + + reg = combine_A_B_package.REGISTRY + + assert len(reg) == 4 + assert combine_A_B_package.base_A_package.site_1.A_Site1 in reg + assert combine_A_B_package.base_A_package.site_2.A_Site2 in reg + assert combine_A_B_package.base_B_package.site_2.B_Site2 in reg + assert combine_A_B_package.base_B_package.site_3.B_Site3 in reg + + +def test_combine_A_B_subset(): + from tests_pop import combine_A_B_subset_package, improved_A_package, base_B_package + + reg = combine_A_B_subset_package.REGISTRY + + assert len(reg) == 2 + assert combine_A_B_subset_package.improved_A_package.site_1.A_Improved_Site1 in reg + assert combine_A_B_subset_package.base_B_package.site_3.B_Site3 in reg diff --git a/tests_pop/__init__.py b/tests_pop/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests_pop/base_A_package/__init__.py b/tests_pop/base_A_package/__init__.py new file mode 100644 index 00000000..4b0957a4 --- /dev/null +++ b/tests_pop/base_A_package/__init__.py @@ -0,0 +1,3 @@ +from web_poet import default_registry + +REGISTRY = default_registry.export(__package__) diff --git a/tests_pop/base_A_package/base.py b/tests_pop/base_A_package/base.py new file mode 100644 index 00000000..aba39e7b --- /dev/null +++ b/tests_pop/base_A_package/base.py @@ -0,0 +1,2 @@ +class BasePage: + ... diff --git a/tests_pop/base_A_package/site_1.py b/tests_pop/base_A_package/site_1.py new file mode 100644 index 00000000..03c38984 --- /dev/null +++ b/tests_pop/base_A_package/site_1.py @@ -0,0 +1,8 @@ +from web_poet import handle_urls + +from .base import BasePage + + +@handle_urls("site_1.com", overrides=BasePage) +class A_Site1: + ... diff --git a/tests_pop/base_A_package/site_2.py b/tests_pop/base_A_package/site_2.py new file mode 100644 index 00000000..362d476b --- /dev/null +++ b/tests_pop/base_A_package/site_2.py @@ -0,0 +1,8 @@ +from web_poet import handle_urls + +from .base import BasePage + + +@handle_urls("site_2.com", overrides=BasePage) +class A_Site2: + ... diff --git a/tests_pop/base_B_package/__init__.py b/tests_pop/base_B_package/__init__.py new file mode 100644 index 00000000..4b0957a4 --- /dev/null +++ b/tests_pop/base_B_package/__init__.py @@ -0,0 +1,3 @@ +from web_poet import default_registry + +REGISTRY = default_registry.export(__package__) diff --git a/tests_pop/base_B_package/base.py b/tests_pop/base_B_package/base.py new file mode 100644 index 00000000..aba39e7b --- /dev/null +++ b/tests_pop/base_B_package/base.py @@ -0,0 +1,2 @@ +class BasePage: + ... diff --git a/tests_pop/base_B_package/site_2.py b/tests_pop/base_B_package/site_2.py new file mode 100644 index 00000000..d48f04d1 --- /dev/null +++ b/tests_pop/base_B_package/site_2.py @@ -0,0 +1,8 @@ +from web_poet import handle_urls + +from .base import BasePage + + +@handle_urls("site_2.com", overrides=BasePage) +class B_Site2: + ... diff --git a/tests_pop/base_B_package/site_3.py b/tests_pop/base_B_package/site_3.py new file mode 100644 index 00000000..39c69007 --- /dev/null +++ b/tests_pop/base_B_package/site_3.py @@ -0,0 +1,8 @@ +from web_poet import handle_urls + +from .base import BasePage + + +@handle_urls("site_3.com", overrides=BasePage) +class B_Site3: + ... diff --git a/tests_pop/combine_A_B_package/__init__.py b/tests_pop/combine_A_B_package/__init__.py new file mode 100644 index 00000000..3b256de9 --- /dev/null +++ b/tests_pop/combine_A_B_package/__init__.py @@ -0,0 +1,9 @@ +"""This POP simply wants to repackage POP "A" and "B" into one unifying package.""" + +from web_poet import PageObjectRegistry + +from . import base_A_package +from . import base_B_package + +combined = {**base_A_package.REGISTRY, **base_B_package.REGISTRY} +REGISTRY = PageObjectRegistry(combined) diff --git a/tests_pop/combine_A_B_package/base_A_package b/tests_pop/combine_A_B_package/base_A_package new file mode 120000 index 00000000..dda6d78a --- /dev/null +++ b/tests_pop/combine_A_B_package/base_A_package @@ -0,0 +1 @@ +../base_A_package \ No newline at end of file diff --git a/tests_pop/combine_A_B_package/base_B_package b/tests_pop/combine_A_B_package/base_B_package new file mode 120000 index 00000000..6ac73e06 --- /dev/null +++ b/tests_pop/combine_A_B_package/base_B_package @@ -0,0 +1 @@ +../base_B_package \ No newline at end of file diff --git a/tests_pop/combine_A_B_subset_package/__init__.py b/tests_pop/combine_A_B_subset_package/__init__.py new file mode 100644 index 00000000..3ca16123 --- /dev/null +++ b/tests_pop/combine_A_B_subset_package/__init__.py @@ -0,0 +1,16 @@ +"""This POP simply wants to repackage POP "A" and "B" into one unifying package.""" + +from web_poet import PageObjectRegistry + +from . import improved_A_package +from . import base_B_package + +rules_A_improved = improved_A_package.REGISTRY.search_overrides( + use=improved_A_package.site_1.A_Improved_Site1 # type:ignore +) +rules_B = base_B_package.REGISTRY.search_overrides( + use=base_B_package.site_3.B_Site3 # type: ignore +) + +combined_rules = rules_A_improved + rules_B +REGISTRY = PageObjectRegistry.from_override_rules(combined_rules) diff --git a/tests_pop/combine_A_B_subset_package/base_B_package b/tests_pop/combine_A_B_subset_package/base_B_package new file mode 120000 index 00000000..6ac73e06 --- /dev/null +++ b/tests_pop/combine_A_B_subset_package/base_B_package @@ -0,0 +1 @@ +../base_B_package \ No newline at end of file diff --git a/tests_pop/combine_A_B_subset_package/improved_A_package b/tests_pop/combine_A_B_subset_package/improved_A_package new file mode 120000 index 00000000..4c534b9f --- /dev/null +++ b/tests_pop/combine_A_B_subset_package/improved_A_package @@ -0,0 +1 @@ +../improved_A_package \ No newline at end of file diff --git a/tests_pop/improved_A_package/__init__.py b/tests_pop/improved_A_package/__init__.py new file mode 100644 index 00000000..4b0957a4 --- /dev/null +++ b/tests_pop/improved_A_package/__init__.py @@ -0,0 +1,3 @@ +from web_poet import default_registry + +REGISTRY = default_registry.export(__package__) diff --git a/tests_pop/improved_A_package/base_A_package b/tests_pop/improved_A_package/base_A_package new file mode 120000 index 00000000..dda6d78a --- /dev/null +++ b/tests_pop/improved_A_package/base_A_package @@ -0,0 +1 @@ +../base_A_package \ No newline at end of file diff --git a/tests_pop/improved_A_package/site_1.py b/tests_pop/improved_A_package/site_1.py new file mode 100644 index 00000000..96691770 --- /dev/null +++ b/tests_pop/improved_A_package/site_1.py @@ -0,0 +1,9 @@ +from web_poet import handle_urls + +from .base_A_package.base import BasePage +from .base_A_package.site_1 import A_Site1 + + +@handle_urls("site_1.com", overrides=BasePage) +class A_Improved_Site1(A_Site1): + ... # some improvements here after subclassing the original one. diff --git a/web_poet/overrides.py b/web_poet/overrides.py index 62f6f543..3fbcb1e7 100644 --- a/web_poet/overrides.py +++ b/web_poet/overrides.py @@ -4,6 +4,7 @@ import importlib.util import warnings import pkgutil +from copy import deepcopy from collections import deque from dataclasses import dataclass, field from operator import attrgetter @@ -121,6 +122,27 @@ def from_override_rules( """ return cls({rule.use: rule for rule in rules}) + def export(self, package: str) -> PageObjectRegistry: + """Returns a new :class:`~.PageObjectRegistry` instance containing + :class:`~.OverrideRule` which are only found in the provided **package**. + + This is used in cases wherein all of the :class:`~.OverrideRule` in a + given **Page Object Project (POP)** should be placed inside a dedicated + registry for packaging. See :ref:`POP Recommended Requirements + ` for more info about this. + + Note that the :func:`~.consume_modules` will be called on the said + **package** which adds any undiscovered :class:`~.OverrideRule` to the + original :class:`~.PageObjectRegistry` instance. There's no need to worry + about unrelated rules from being added since it wouldn't happen if the + given registry's ``@handle_urls`` annotation wasn't the one used. + """ + backup_state = deepcopy(self) + self.clear() + rules = self.get_overrides(consume=package) + self = backup_state + return self.from_override_rules(rules) + def handle_urls( self, include: Strings, From f882d8ff0a05fece49cbe9497b46e8a8890b9ea2 Mon Sep 17 00:00:00 2001 From: Kevin Lloyd Bernal Date: Tue, 29 Mar 2022 20:23:32 +0800 Subject: [PATCH 2/2] fix the POP example --- docs/intro/pop.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/intro/pop.rst b/docs/intro/pop.rst index 58930fff..0bcdd8c1 100644 --- a/docs/intro/pop.rst +++ b/docs/intro/pop.rst @@ -179,11 +179,11 @@ in **web-poet's** ``default_registry``: .. code-block:: python - from web_poet import default_registry + import ecommerce_page_objects ecommerce_rules = ecommerce_page_objects.get_overrides() - import ecommerce_page_objects + from web_poet import default_registry # All the rules are also available once ecommerce_page_objects is imported. all_rules = default_registry.get_overrides()