-
Notifications
You must be signed in to change notification settings - Fork 116
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
SNOW-1663726 make session config updates thread safe #2302
Changes from 57 commits
56fb566
66003d1
0e58205
e75dde1
68a8c1c
31a5734
b4dadda
b8c6496
f39837e
31a196f
723bdf7
37c0419
8a2d433
a083989
947d384
fd51720
3077853
65c3186
94412cf
638dd09
7ae2c33
1689ebf
5e8a2d2
1c83ef2
a649761
f03d618
5f398d5
3807087
4eef3e9
df3263c
6769c54
af86f67
a737f33
8ca2730
81417a3
e340567
30952bb
03f25b5
6deb402
8e1dfe0
10bfeb4
879940a
5aad2d9
669eb91
a85a144
a79ffb4
4420350
5f1eaa6
9d62017
b58aa8b
db37033
dddd15f
57ee9e8
809a86e
6021ab8
43986f6
0430e92
095b04e
32707f9
3bf678d
3eade1a
1850d5d
1fa6ad2
7c85432
f994842
4621836
e1c68f3
e5b48dd
496e2be
980d3b7
54a6b5d
67609e8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
# | ||
# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. | ||
# | ||
|
||
from typing import Any | ||
|
||
|
||
class ConfigContext: | ||
"""Class to help reading a snapshot of configuration attributes from a session object. | ||
|
||
On instantiation, this object stores the configuration from the session object | ||
and returns the stored configuration attributes when requested. | ||
""" | ||
|
||
def __init__(self, session) -> None: | ||
self.session = session | ||
self.configs = { | ||
"_query_compilation_stage_enabled", | ||
"cte_optimization_enabled", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. instead of doing this, it might be more clear to provide a property interface in the session that teaks a snapshot. so it is the interface is at a closer place with the properties. Given the fact that we are adding more and more configs, i am thinking it might worth to do a refactoring to centralize all configs, and wrap all of them into a class to reduce the possiblility for developer make mistakes. |
||
"eliminate_numeric_sql_value_cast_enabled", | ||
"large_query_breakdown_complexity_bounds", | ||
"large_query_breakdown_enabled", | ||
} | ||
self._create_snapshot() | ||
|
||
def __getattr__(self, name: str) -> Any: | ||
if name in self.configs: | ||
return getattr(self.session, name) | ||
raise AttributeError(f"ConfigContext has no attribute {name}") | ||
|
||
def _create_snapshot(self) -> "ConfigContext": | ||
"""Reads the configuration attributes from the session object and stores them | ||
in the context object. | ||
""" | ||
for name in self.configs: | ||
setattr(self, name, getattr(self.session, name)) | ||
return self |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,7 @@ | |
Union, | ||
) | ||
|
||
from snowflake.snowpark._internal.analyzer.config_context import ConfigContext | ||
from snowflake.snowpark._internal.analyzer.query_plan_analysis_utils import ( | ||
PlanNodeCategory, | ||
sum_node_complexities, | ||
|
@@ -312,14 +313,16 @@ def children_plan_nodes(self) -> List[Union["Selectable", "SnowflakePlan"]]: | |
else: | ||
return [] | ||
|
||
def replace_repeated_subquery_with_cte(self) -> "SnowflakePlan": | ||
def replace_repeated_subquery_with_cte( | ||
self, config_context: ConfigContext | ||
) -> "SnowflakePlan": | ||
# parameter protection | ||
# the common subquery elimination will be applied if cte_optimization is not enabled | ||
# and the new compilation stage is not enabled. When new compilation stage is enabled, | ||
# the common subquery elimination will be done through the new plan transformation. | ||
if ( | ||
not self.session._cte_optimization_enabled | ||
or self.session._query_compilation_stage_enabled | ||
not config_context.cte_optimization_enabled | ||
or config_context._query_compilation_stage_enabled | ||
): | ||
return self | ||
|
||
|
@@ -355,8 +358,16 @@ def replace_repeated_subquery_with_cte(self) -> "SnowflakePlan": | |
# create CTE query | ||
final_query = create_cte_query(self, duplicate_plan_set) | ||
|
||
with self.session._lock: | ||
# copy depends on the cte_optimization_enabled value. We should keep it | ||
# consistent with the current context. | ||
original_cte_optimization = self.session.cte_optimization_enabled | ||
self.session.cte_optimization_enabled = ( | ||
config_context.cte_optimization_enabled | ||
) | ||
plan = copy.copy(self) | ||
self.session.cte_optimization_enabled = original_cte_optimization | ||
# all other parts of query are unchanged, but just replace the original query | ||
plan = copy.copy(self) | ||
plan.queries[-1].sql = final_query | ||
return plan | ||
|
||
|
@@ -531,6 +542,10 @@ def __init__( | |
# on the optimized plan. During the final query generation, no schema query is needed, | ||
# this helps reduces un-necessary overhead for the describing call. | ||
self._skip_schema_query = skip_schema_query | ||
self._config_context: Optional[ConfigContext] = None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The config context here should probably been done in the same way as what we have done for analyze and resolve as parameter for each builder. |
||
|
||
def set_config_context(self, config_context: ConfigContext) -> None: | ||
self._config_context = config_context | ||
|
||
@SnowflakePlan.Decorator.wrap_exception | ||
def build( | ||
|
@@ -564,9 +579,10 @@ def build( | |
), "No schema query is available in child SnowflakePlan" | ||
new_schema_query = schema_query or sql_generator(child.schema_query) | ||
|
||
config_context = self._config_context or ConfigContext(self.session) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. each build function seems can take a new snapshot, but multiple build can be called during the whole resolve, and they are suppose to get the same snapshot, it is safer and cleaner to simply extend each interface to take the config context. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I intend to remove |
||
placeholder_query = ( | ||
sql_generator(select_child._id) | ||
if self.session._cte_optimization_enabled and select_child._id is not None | ||
if config_context.cte_optimization_enabled and select_child._id is not None | ||
else None | ||
) | ||
|
||
|
@@ -603,9 +619,10 @@ def build_binary( | |
right_schema_query = schema_value_statement(select_right.attributes) | ||
schema_query = sql_generator(left_schema_query, right_schema_query) | ||
|
||
config_context = self._config_context or ConfigContext(self.session) | ||
placeholder_query = ( | ||
sql_generator(select_left._id, select_right._id) | ||
if self.session._cte_optimization_enabled | ||
if config_context.cte_optimization_enabled | ||
and select_left._id is not None | ||
and select_right._id is not None | ||
else None | ||
|
@@ -637,8 +654,8 @@ def build_binary( | |
|
||
referenced_ctes: Set[str] = set() | ||
if ( | ||
self.session.cte_optimization_enabled | ||
and self.session._query_compilation_stage_enabled | ||
config_context.cte_optimization_enabled | ||
and config_context._query_compilation_stage_enabled | ||
): | ||
# When the cte optimization and the new compilation stage is enabled, | ||
# the referred cte tables are propagated from left and right can have | ||
|
@@ -928,7 +945,9 @@ def save_as_table( | |
column_definition_with_hidden_columns, | ||
) | ||
|
||
child = child.replace_repeated_subquery_with_cte() | ||
child = child.replace_repeated_subquery_with_cte( | ||
self._config_context or ConfigContext(self.session) | ||
) | ||
|
||
def get_create_table_as_select_plan(child: SnowflakePlan, replace, error): | ||
return self.build( | ||
|
@@ -1116,7 +1135,9 @@ def create_or_replace_view( | |
if not is_sql_select_statement(child.queries[0].sql.lower().strip()): | ||
raise SnowparkClientExceptionMessages.PLAN_CREATE_VIEWS_FROM_SELECT_ONLY() | ||
|
||
child = child.replace_repeated_subquery_with_cte() | ||
child = child.replace_repeated_subquery_with_cte( | ||
self._config_context or ConfigContext(self.session) | ||
) | ||
return self.build( | ||
lambda x: create_or_replace_view_statement(name, x, is_temp, comment), | ||
child, | ||
|
@@ -1159,7 +1180,9 @@ def create_or_replace_dynamic_table( | |
# should never reach here | ||
raise ValueError(f"Unknown create mode: {create_mode}") # pragma: no cover | ||
|
||
child = child.replace_repeated_subquery_with_cte() | ||
child = child.replace_repeated_subquery_with_cte( | ||
self._config_context or ConfigContext(self.session) | ||
) | ||
return self.build( | ||
lambda x: create_or_replace_dynamic_table_statement( | ||
name=name, | ||
|
@@ -1462,7 +1485,9 @@ def copy_into_location( | |
header: bool = False, | ||
**copy_options: Optional[Any], | ||
) -> SnowflakePlan: | ||
query = query.replace_repeated_subquery_with_cte() | ||
query = query.replace_repeated_subquery_with_cte( | ||
self._config_context or ConfigContext(self.session) | ||
) | ||
return self.build( | ||
lambda x: copy_into_location( | ||
query=x, | ||
|
@@ -1489,7 +1514,9 @@ def update( | |
source_plan: Optional[LogicalPlan], | ||
) -> SnowflakePlan: | ||
if source_data: | ||
source_data = source_data.replace_repeated_subquery_with_cte() | ||
source_data = source_data.replace_repeated_subquery_with_cte( | ||
self._config_context or ConfigContext(self.session) | ||
) | ||
return self.build( | ||
lambda x: update_statement( | ||
table_name, | ||
|
@@ -1520,7 +1547,9 @@ def delete( | |
source_plan: Optional[LogicalPlan], | ||
) -> SnowflakePlan: | ||
if source_data: | ||
source_data = source_data.replace_repeated_subquery_with_cte() | ||
source_data = source_data.replace_repeated_subquery_with_cte( | ||
self._config_context or ConfigContext(self.session) | ||
) | ||
return self.build( | ||
lambda x: delete_statement( | ||
table_name, | ||
|
@@ -1549,7 +1578,9 @@ def merge( | |
clauses: List[str], | ||
source_plan: Optional[LogicalPlan], | ||
) -> SnowflakePlan: | ||
source_data = source_data.replace_repeated_subquery_with_cte() | ||
source_data = source_data.replace_repeated_subquery_with_cte( | ||
self._config_context or ConfigContext(self.session) | ||
) | ||
return self.build( | ||
lambda x: merge_statement(table_name, x, join_expr, clauses), | ||
source_data, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ | |
import time | ||
from typing import Dict, List | ||
|
||
from snowflake.snowpark._internal.analyzer.config_context import ConfigContext | ||
from snowflake.snowpark._internal.analyzer.query_plan_analysis_utils import ( | ||
get_complexity_score, | ||
) | ||
|
@@ -46,6 +47,7 @@ class PlanCompiler: | |
|
||
def __init__(self, plan: SnowflakePlan) -> None: | ||
self._plan = plan | ||
self._config_context = ConfigContext(self._plan.session) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That is good, we are only using one snapshot during the whole compilation stage |
||
|
||
def should_start_query_compilation(self) -> bool: | ||
""" | ||
|
@@ -65,10 +67,10 @@ def should_start_query_compilation(self) -> bool: | |
return ( | ||
not isinstance(current_session._conn, MockServerConnection) | ||
and (self._plan.source_plan is not None) | ||
and current_session._query_compilation_stage_enabled | ||
and self._config_context._query_compilation_stage_enabled | ||
and ( | ||
current_session.cte_optimization_enabled | ||
or current_session.large_query_breakdown_enabled | ||
self._config_context.cte_optimization_enabled | ||
or self._config_context.large_query_breakdown_enabled | ||
) | ||
) | ||
|
||
|
@@ -84,12 +86,12 @@ def compile(self) -> Dict[PlanQueryType, List[Query]]: | |
deep_copy_end_time = time.time() | ||
|
||
# 2. create a code generator with the original plan | ||
query_generator = create_query_generator(self._plan) | ||
query_generator = create_query_generator(self._plan, self._config_context) | ||
|
||
# 3. apply each optimizations if needed | ||
# CTE optimization | ||
cte_start_time = time.time() | ||
if self._plan.session.cte_optimization_enabled: | ||
if self._config_context.cte_optimization_enabled: | ||
repeated_subquery_eliminator = RepeatedSubqueryElimination( | ||
logical_plans, query_generator | ||
) | ||
|
@@ -102,9 +104,12 @@ def compile(self) -> Dict[PlanQueryType, List[Query]]: | |
] | ||
|
||
# Large query breakdown | ||
if self._plan.session.large_query_breakdown_enabled: | ||
if self._config_context.large_query_breakdown_enabled: | ||
large_query_breakdown = LargeQueryBreakdown( | ||
self._plan.session, query_generator, logical_plans | ||
self._plan.session, | ||
query_generator, | ||
logical_plans, | ||
self._config_context.large_query_breakdown_complexity_bounds, | ||
) | ||
logical_plans = large_query_breakdown.apply() | ||
|
||
|
@@ -124,9 +129,9 @@ def compile(self) -> Dict[PlanQueryType, List[Query]]: | |
total_time = time.time() - start_time | ||
session = self._plan.session | ||
summary_value = { | ||
TelemetryField.CTE_OPTIMIZATION_ENABLED.value: session.cte_optimization_enabled, | ||
TelemetryField.LARGE_QUERY_BREAKDOWN_ENABLED.value: session.large_query_breakdown_enabled, | ||
CompilationStageTelemetryField.COMPLEXITY_SCORE_BOUNDS.value: session.large_query_breakdown_complexity_bounds, | ||
TelemetryField.CTE_OPTIMIZATION_ENABLED.value: self._config_context.cte_optimization_enabled, | ||
TelemetryField.LARGE_QUERY_BREAKDOWN_ENABLED.value: self._config_context.large_query_breakdown_enabled, | ||
CompilationStageTelemetryField.COMPLEXITY_SCORE_BOUNDS.value: self._config_context.large_query_breakdown_complexity_bounds, | ||
CompilationStageTelemetryField.TIME_TAKEN_FOR_COMPILATION.value: total_time, | ||
CompilationStageTelemetryField.TIME_TAKEN_FOR_DEEP_COPY_PLAN.value: deep_copy_time, | ||
CompilationStageTelemetryField.TIME_TAKEN_FOR_CTE_OPTIMIZATION.value: cte_time, | ||
|
@@ -143,8 +148,9 @@ def compile(self) -> Dict[PlanQueryType, List[Query]]: | |
return queries | ||
else: | ||
final_plan = self._plan | ||
if self._plan.session.cte_optimization_enabled: | ||
final_plan = final_plan.replace_repeated_subquery_with_cte() | ||
final_plan = final_plan.replace_repeated_subquery_with_cte( | ||
self._config_context | ||
) | ||
return { | ||
PlanQueryType.QUERIES: final_plan.queries, | ||
PlanQueryType.POST_ACTIONS: final_plan.post_actions, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
with this, i think for the one you mentioned before for the access sql_simplifier_enabled, we should also use this configContext if it is accessing the sql_simplifier_enabled multiple times in the function call