-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
RFC: Much simpler AssetExecutionContext #16417
Changes from all commits
175ba7b
019000c
635bd19
6c5b1f6
0eb5817
b376ea7
6fce334
557a398
6087098
683edcc
90b2f65
2a95562
73b5c18
d098622
db166a6
61b720d
62bf34b
6c24726
718d21c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
from abc import ABC, abstractmethod | ||
from abc import ABC, ABCMeta, abstractmethod | ||
from typing import ( | ||
AbstractSet, | ||
Any, | ||
|
@@ -12,8 +12,6 @@ | |
cast, | ||
) | ||
|
||
from typing_extensions import TypeAlias | ||
|
||
import dagster._check as check | ||
from dagster._annotations import deprecated, experimental, public | ||
from dagster._core.definitions.asset_check_spec import AssetCheckSpec | ||
|
@@ -46,11 +44,17 @@ | |
from dagster._core.log_manager import DagsterLogManager | ||
from dagster._core.storage.dagster_run import DagsterRun | ||
from dagster._utils.forked_pdb import ForkedPdb | ||
from dagster._utils.warnings import deprecation_warning | ||
|
||
from .system import StepExecutionContext | ||
|
||
|
||
class AbstractComputeExecutionContext(ABC): | ||
# This metaclass has to exist for OpExecutionContext to have a metaclass | ||
class AbstractComputeMetaclass(ABCMeta): | ||
pass | ||
|
||
|
||
class AbstractComputeExecutionContext(ABC, metaclass=AbstractComputeMetaclass): | ||
"""Base class for op context implemented by OpExecutionContext and DagstermillExecutionContext.""" | ||
|
||
@abstractmethod | ||
|
@@ -97,7 +101,18 @@ def op_config(self) -> Any: | |
"""The parsed config specific to this op.""" | ||
|
||
|
||
class OpExecutionContext(AbstractComputeExecutionContext): | ||
class OpExecutionContextMetaClass(AbstractComputeMetaclass): | ||
def __instancecheck__(cls, instance) -> bool: | ||
# This makes isinstance(context, OpExecutionContext) return True when | ||
# the context is an AssetExecutionContext. This makes the new | ||
# AssetExecutionContext backwards compatible with the old | ||
# OpExecutionContext codepaths. | ||
if isinstance(instance, AssetExecutionContext): | ||
return True | ||
return super().__instancecheck__(instance) | ||
|
||
|
||
class OpExecutionContext(AbstractComputeExecutionContext, metaclass=OpExecutionContextMetaClass): | ||
"""The ``context`` object that can be made available as the first argument to the function | ||
used for computing an op or asset. | ||
|
||
|
@@ -688,8 +703,191 @@ def asset_check_spec(self) -> AssetCheckSpec: | |
return asset_checks_def.spec | ||
|
||
|
||
# actually forking the object type for assets is tricky for users in the cases of: | ||
# * manually constructing ops to make AssetsDefinitions | ||
# * having ops in a graph that form a graph backed asset | ||
# so we have a single type that users can call by their preferred name where appropriate | ||
AssetExecutionContext: TypeAlias = OpExecutionContext | ||
OP_EXECUTION_CONTEXT_ONLY_METHODS = set( | ||
[ | ||
"describe_op", | ||
"file_manager", | ||
"has_assets_def", | ||
"get_mapping_key", | ||
# "get_step_execution_context", # used by internals | ||
"job_def", | ||
"job_name", | ||
"node_handle", | ||
"op", | ||
"op_config", | ||
# "op_def", # used by internals | ||
"op_handle", | ||
"retry_number", | ||
"step_launcher", | ||
# "has_events", # used by internals | ||
"consumer_events", | ||
] | ||
) | ||
|
||
|
||
PARTITION_KEY_RANGE_AS_ALT = "use partition_key_range or partition_key_range_for_asset instead" | ||
INPUT_OUTPUT_ALT = "not use input or output names and instead use asset keys directly" | ||
OUTPUT_METADATA_ALT = "return MaterializationResult from the asset instead" | ||
|
||
DEPRECATED_IO_MANAGER_CENTRIC_CONTEXT_METHODS = { | ||
"add_output_metadata": OUTPUT_METADATA_ALT, | ||
"asset_key_for_input": INPUT_OUTPUT_ALT, | ||
"asset_key_for_output": INPUT_OUTPUT_ALT, | ||
"asset_partition_key_for_input": PARTITION_KEY_RANGE_AS_ALT, | ||
"asset_partition_key_for_output": PARTITION_KEY_RANGE_AS_ALT, | ||
"asset_partition_key_range_for_input": PARTITION_KEY_RANGE_AS_ALT, | ||
"asset_partition_key_range_for_output": PARTITION_KEY_RANGE_AS_ALT, | ||
"asset_partition_keys_for_input": PARTITION_KEY_RANGE_AS_ALT, | ||
"asset_partition_keys_for_output": PARTITION_KEY_RANGE_AS_ALT, | ||
"asset_partitions_time_window_for_input": PARTITION_KEY_RANGE_AS_ALT, | ||
"asset_partitions_time_window_for_output": PARTITION_KEY_RANGE_AS_ALT, | ||
"asset_partitions_def_for_input": PARTITION_KEY_RANGE_AS_ALT, | ||
"asset_partitions_def_for_output": PARTITION_KEY_RANGE_AS_ALT, | ||
"get_output_metadata": "use op_execution_context.op_def.get_output(...).metadata", | ||
"merge_output_metadata": OUTPUT_METADATA_ALT, | ||
"output_for_asset_key": INPUT_OUTPUT_ALT, | ||
"selected_output_names": INPUT_OUTPUT_ALT, | ||
} | ||
|
||
ALTERNATE_AVAILABLE_METHODS = { | ||
"has_tag": "use dagster_run.has_tag instead", | ||
"get_tag": "use dagster_run.get_tag instead", | ||
"run_tags": "use dagster_run.tags instead", | ||
"set_data_version": "use MaterializationResult instead", | ||
} | ||
|
||
|
||
class AssetExecutionContext: | ||
def __init__(self, op_execution_context: OpExecutionContext) -> None: | ||
self._op_execution_context = check.inst_param( | ||
op_execution_context, "op_execution_context", OpExecutionContext | ||
) | ||
|
||
def __getattr__(self, attr) -> Any: | ||
check.str_param(attr, "attr") | ||
|
||
if attr in self.__dict__: | ||
return getattr(self, attr) | ||
|
||
if attr in OP_EXECUTION_CONTEXT_ONLY_METHODS: | ||
deprecation_warning( | ||
subject=f"AssetExecutionContext.{attr}", | ||
additional_warn_text=( | ||
f"You have called the deprecated method {attr} on AssetExecutionContext. Use" | ||
" the underlying OpExecutionContext instead by calling" | ||
f" op_execution_context.{attr}." | ||
), | ||
breaking_version="1.7", | ||
stacklevel=1, | ||
) | ||
|
||
if attr in DEPRECATED_IO_MANAGER_CENTRIC_CONTEXT_METHODS: | ||
alt = DEPRECATED_IO_MANAGER_CENTRIC_CONTEXT_METHODS[attr] | ||
|
||
# warnings.warn( | ||
deprecation_warning( | ||
subject=f"AssetExecutionContext.{attr}", | ||
additional_warn_text=( | ||
f"You have called method {attr} on AssetExecutionContext that is oriented" | ||
f" around I/O managers. If you not using I/O managers we suggest you {alt}. If" | ||
" you are using I/O managers the method still exists at" | ||
f" op_execution_context.{attr}." | ||
), | ||
breaking_version="1.7", | ||
stacklevel=1, | ||
) | ||
|
||
if attr in ALTERNATE_AVAILABLE_METHODS: | ||
deprecation_warning( | ||
subject=f"AssetExecutionContext.{attr}", | ||
additional_warn_text=f"Instead {ALTERNATE_AVAILABLE_METHODS[attr]}.", | ||
breaking_version="1.7", | ||
stacklevel=1, | ||
) | ||
|
||
return getattr(self._op_execution_context, attr) | ||
|
||
# include all supported methods below | ||
|
||
@public | ||
@property | ||
def op_execution_context(self) -> OpExecutionContext: | ||
return self._op_execution_context | ||
|
||
@public | ||
@property | ||
def run_id(self) -> str: | ||
return self._op_execution_context.run_id | ||
|
||
@public | ||
@property | ||
def dagster_run(self) -> DagsterRun: | ||
"""PipelineRun: The current pipeline run.""" | ||
return self._step_execution_context.dagster_run | ||
|
||
@public | ||
@property | ||
def asset_key(self) -> AssetKey: | ||
return self._op_execution_context.asset_key | ||
|
||
@public | ||
@property | ||
def pdb(self) -> ForkedPdb: | ||
return self._op_execution_context.pdb | ||
|
||
@public | ||
@property | ||
def log(self) -> DagsterLogManager: | ||
"""DagsterLogManager: The log manager available in the execution context.""" | ||
return self._op_execution_context.log | ||
|
||
@public | ||
# renaming to avoid ambiguity in single run and multi-partition case | ||
@property | ||
def is_partitioned_execution(self) -> bool: | ||
return self._op_execution_context.has_partition_key | ||
|
||
@public | ||
def log_event(self, event: UserEvent) -> None: | ||
return self._op_execution_context.log_event(event) | ||
|
||
@public | ||
@property | ||
def assets_def(self) -> AssetsDefinition: | ||
return self._op_execution_context.assets_def | ||
|
||
@public | ||
# TODO confirm semantics in the presense of asset subsetting | ||
# seems like there should be both "asset_keys" and "selected_asset_keys" | ||
@property | ||
def selected_asset_keys(self) -> AbstractSet[AssetKey]: | ||
return self._op_execution_context.selected_asset_keys | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we also likely will be adding selected_asset_checks here. This refactor might be an opportunity to condense them to some selection object |
||
|
||
@public | ||
@experimental | ||
def get_asset_provenance(self, asset_key: AssetKey) -> Optional[DataProvenance]: | ||
return self._op_execution_context.get_asset_provenance(asset_key) | ||
|
||
@property | ||
def asset_check_spec(self) -> AssetCheckSpec: | ||
return self._op_execution_context.asset_check_spec | ||
|
||
@public | ||
@property | ||
def partition_key_range(self) -> PartitionKeyRange: | ||
return self._op_execution_context.asset_partition_key_range | ||
|
||
@public | ||
def partition_key_range_for_asset_key(self, asset_key: AssetKey) -> PartitionKeyRange: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just leaving a note that making the partition methods agnostic to "input" or "output" lead to some pretty complex errors. Some of the methods (like the partition_key_range one) have different code paths for "inputs" and "outputs" because of partition mapping. Not worth trying to do all of that implementation detail in this RFC, but for a final implementation, we should make sure this code path is really thoroughly tested |
||
subset = self._op_execution_context.get_step_execution_context().asset_partitions_subset_for_asset_key( | ||
asset_key | ||
) | ||
partition_key_ranges = subset.get_partition_key_ranges( | ||
dynamic_partitions_store=self._op_execution_context.instance | ||
) | ||
if len(partition_key_ranges) != 1: | ||
check.failed( | ||
"Tried to access asset partition key range, but there are " | ||
f"({len(partition_key_ranges)}) key ranges associated with this asset key.", | ||
) | ||
return partition_key_ranges[0] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thread for figuring out why
AssetExecutionContext
was not made a subclass ofOpExecutionContext
to begin with.Background:
AssetExecutionContext
began as a type alias to align on naming and get a quick docs/examples improvement (here)AssetExecutionContext
was made a subclass ofOpExecutionContext
hereAssetExecutionContext
was reverted back to a type alias hereIn the revert PR, the reasoning for reverting was:
There is also a slack thread mentioning this where alex says
based on this, my interpretation is that the issue wasn't a technical one (python limitation, inability to pass the correct context through, etc), but more a design issue "What is the correct context for an
@op
in a@graph_backed_asset
to receive?"There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok got it. Thanks for digging that up.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can figure out a reasonable solution for the graph-backed asset case. We could alter what instance the user gets based on their typehint. We could also make an
asset_execution_context
property onOpExecutionContext
so you can do the reverse in that case.