-
Notifications
You must be signed in to change notification settings - Fork 28
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Handle item overrides #164
Changes from 7 commits
0c49c5b
ac082a2
88cdc5e
d148db2
5e93669
12c375b
d902b45
6bcfcf0
24eebc1
c00451a
92d1f1e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -151,11 +151,20 @@ def build_plan(self, request: Request) -> andi.Plan: | |
) | ||
|
||
@inlineCallbacks | ||
def build_instances(self, request: Request, response: Response, plan: andi.Plan): | ||
def build_instances( | ||
self, | ||
request: Request, | ||
response: Response, | ||
plan: andi.Plan, | ||
prev_instances: Optional[Dict] = None, | ||
): | ||
"""Build the instances dict from a plan including external dependencies.""" | ||
# First we build the external dependencies using the providers | ||
instances = yield from self.build_instances_from_providers( | ||
request, response, plan | ||
request, | ||
response, | ||
plan, | ||
prev_instances, | ||
) | ||
# All the remaining dependencies are internal so they can be built just | ||
# following the andi plan. | ||
|
@@ -169,10 +178,14 @@ def build_instances(self, request: Request, response: Response, plan: andi.Plan) | |
|
||
@inlineCallbacks | ||
def build_instances_from_providers( | ||
self, request: Request, response: Response, plan: andi.Plan | ||
self, | ||
request: Request, | ||
response: Response, | ||
plan: andi.Plan, | ||
prev_instances: Optional[Dict] = None, | ||
): | ||
"""Build dependencies handled by registered providers""" | ||
instances: Dict[Callable, Any] = {} | ||
instances: Dict[Callable, Any] = prev_instances or {} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Passing the @attrs.define
class ProductPage(WebPage[Product]):
product: Product In the above example, before the |
||
scrapy_provided_dependencies = self.available_dependencies_for_providers( | ||
request, response | ||
) | ||
|
@@ -182,10 +195,22 @@ def build_instances_from_providers( | |
provided_classes = { | ||
cls for cls in dependencies_set if provider.is_provided(cls) | ||
} | ||
provided_classes -= instances.keys() # ignore already provided types | ||
|
||
# ignore already provided types if provider doesn't need to use them | ||
if not provider.allow_prev_instances: | ||
provided_classes -= instances.keys() | ||
|
||
if not provided_classes: | ||
continue | ||
|
||
# If dependency instances were already made by previously invoked | ||
# providers, don't try to build them again since it may result in | ||
# incorrect values (e.g. PO modifying an item > 2 times). | ||
required_deps = set(plan.dependencies[-1][1].values()) | ||
built_deps = set(instances.keys()) | ||
if required_deps and required_deps == built_deps: | ||
continue | ||
|
||
objs, fingerprint = [], None | ||
cache_hit = False | ||
if self.cache: | ||
|
@@ -221,6 +246,8 @@ def build_instances_from_providers( | |
externally_provided=scrapy_provided_dependencies, | ||
full_final_kwargs=False, | ||
).final_kwargs(scrapy_provided_dependencies) | ||
if provider.allow_prev_instances: | ||
kwargs.update({"prev_instances": instances}) | ||
try: | ||
# Invoke the provider to get the data | ||
objs = yield maybeDeferred_coro( | ||
|
@@ -414,6 +441,7 @@ class MySpider(Spider): | |
spider = MySpider() | ||
spider.settings = settings | ||
crawler.spider = spider | ||
crawler.stats = load_object(crawler.settings["STATS_CLASS"])(crawler) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Scrapy 2.11 changes broke this since There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ouch, I've already fixed it in master before looking here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But it's good to see we did it in (kinda) the same way! |
||
if not registry: | ||
registry = create_registry_instance(RulesRegistry, crawler) | ||
return Injector(crawler, registry=registry) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Increasing this value allows
ZyteApiProvider
(as well as any other item provider in the future) to run before scrapy-poet'sItemProvider
. This ensures any item dependency in page objects are available.