Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(server/deps): Upgrade to pydantic ^2.4.2 [TCTC-7241] #1891

Merged
merged 23 commits into from
Dec 15, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
c9d040a
chore: bump pydantic
lukapeschke Sep 12, 2023
2a607f8
chore: run bump-pydantic
lukapeschke Sep 12, 2023
8bfc83a
fix remaining warnings
lukapeschke Sep 12, 2023
b68e22f
fix tests
lukapeschke Sep 12, 2023
fdd5af8
Stop using deprectaed pydantic methods
lukapeschke Sep 12, 2023
f48d92e
fix lockfile
lukapeschke Sep 12, 2023
a5fac15
update connectors
lukapeschke Sep 15, 2023
9f370be
refactor: Annotate PipelineOrDomainName to support steps as models
lukapeschke Sep 15, 2023
85e3560
Merge branch 'master' into upgrade-pydantic
lukapeschke Sep 28, 2023
0908533
bump pydandic ^2.4.2
lukapeschke Sep 29, 2023
9e0875b
refactor: force RelativeDate.date to be a datetime instance
lukapeschke Sep 29, 2023
9a2ac9d
Merge branch 'master' into upgrade-pydantic
lukapeschke Nov 22, 2023
3210283
updated connectors branch
lukapeschke Nov 23, 2023
1b1f9f0
fix: Add missing python-dateutil dependency
lukapeschke Nov 23, 2023
186a0bb
Merge branch 'master' into upgrade-pydantic
lukapeschke Dec 4, 2023
6a62bf1
fix: forbid PipelineStepWithRefs in PipelineWithVariables
lukapeschke Dec 6, 2023
c0665f5
feat(server): drop Python 3.10 support
lukapeschke Dec 7, 2023
2582ed4
refactor(server/combination): Allow PipelineOrDomainName to be a list…
lukapeschke Dec 7, 2023
c358026
refactor(server): replace update_forward_refs() with model_rebuild()
lukapeschke Dec 7, 2023
052b713
refactor(server): Stronger typing on pipelineOrDomainName
lukapeschke Dec 7, 2023
31a799b
fix(server): temporary fix for the discriminated union bug
lukapeschke Dec 7, 2023
69e8d9e
fix PipelineWithRefs typing
lukapeschke Dec 7, 2023
afe7a05
bump connectors
lukapeschke Dec 15, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 11 additions & 12 deletions server/playground.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,7 @@
from weaverbird.backends.pypika_translator.translate import (
translate_pipeline as pypika_translate_pipeline,
)
from weaverbird.pipeline.pipeline import Pipeline
from weaverbird.pipeline.references import PipelineWithRefs
from weaverbird.pipeline.pipeline import Pipeline, PipelineWithRefs
from weaverbird.pipeline.steps import DomainStep
from weaverbird.pipeline.steps.utils.combination import Reference

Expand Down Expand Up @@ -179,7 +178,7 @@ async def execute_pipeline(pipeline: Pipeline, **kwargs) -> str:
limit=output.pop("limit"),
total_rows=output.pop("total"),
retrieved_rows=len(output["data"]),
).dict()
).model_dump()

return json.dumps(
{
Expand All @@ -206,7 +205,7 @@ async def handle_pandas_backend_request():
return (
jsonify(
{
"step": e.step.dict(),
"step": e.step.model_dump(),
"index": e.index,
"message": e.message,
}
Expand Down Expand Up @@ -346,7 +345,7 @@ def execute_mongo_aggregation_query(collection, query, limit, offset):


async def dummy_reference_resolver(r: Reference) -> list[dict]:
return [DomainStep(domain=r.uid).dict()]
return [DomainStep(domain=r.uid).model_dump()]


@app.route("/mongo", methods=["GET", "POST"])
Expand Down Expand Up @@ -379,7 +378,7 @@ async def handle_mongo_backend_request():

return jsonify(
{
"pagination_info": pagination_info.dict(),
"pagination_info": pagination_info.model_dump(),
"data": results["data"],
"types": results["types"],
"query": mongo_query, # provided for inspection purposes
Expand All @@ -406,7 +405,7 @@ async def handle_mongo_translated_backend_request():

return jsonify(
{
"pagination_info": pagination_info.dict(),
"pagination_info": pagination_info.model_dump(),
"data": results["data"],
"types": results["types"],
"query": req_params["query"], # provided for inspection purposes
Expand Down Expand Up @@ -519,7 +518,7 @@ async def handle_snowflake_backend_request():
limit=limit,
retrieved_rows=len(df_results),
total_rows=total_count,
).dict(),
).model_dump(),
"schema": build_table_schema(df_results, index=False),
"data": json.loads(df_results.to_json(orient="records")),
"query": query, # provided for inspection purposes
Expand Down Expand Up @@ -637,7 +636,7 @@ async def handle_postgres_backend_request():
limit=limit,
retrieved_rows=len(query_results_page),
total_rows=query_total_count,
).dict(),
).model_dump(),
"results": {
"headers": query_results_columns,
"data": query_results_page,
Expand Down Expand Up @@ -699,7 +698,7 @@ async def handle_athena_post_request():
return {
"pagination_info": build_pagination_info(
offset=offset, limit=limit, retrieved_rows=len(result), total_rows=None
).dict(),
).model_dump(),
"results": {
"headers": result.columns.to_list(),
"data": json.loads(result.to_json(orient="records")),
Expand Down Expand Up @@ -767,7 +766,7 @@ async def hangle_bigquery_post_request():
return {
"pagination_info": build_pagination_info(
offset=offset, limit=limit, retrieved_rows=len(result), total_rows=None
).dict(),
).model_dump(),
"results": {
"headers": result.columns.to_list(),
"data": json.loads(result.to_json(orient="records")),
Expand Down Expand Up @@ -834,7 +833,7 @@ async def handle_mysql_post_request():
return {
"pagination_info": build_pagination_info(
offset=offset, limit=limit, retrieved_rows=len(result), total_rows=None
).dict(),
).model_dump(),
"results": {
"headers": result.columns.to_list(),
"data": json.loads(result[offset : offset + limit].to_json(orient="records")),
Expand Down
244 changes: 173 additions & 71 deletions server/poetry.lock

Large diffs are not rendered by default.

9 changes: 6 additions & 3 deletions server/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ readme = "README.md"

[tool.poetry.dependencies]
python = ">=3.10, <3.12"
pydantic = "^1.9.1"
pydantic = "^2.3.0"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

on peut mettre 2.4.1 ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


# Dependencies for extras
## Pandas
Expand All @@ -27,14 +27,17 @@ Quart-CORS = {version = ">=0.5,<0.8", optional = true}
hypercorn = {version = ">=0.13,<0.15", optional = true}
pymongo = {version = ">=4.2.0", optional = true, extras = ["srv", "tls"]}
psycopg = {optional = true, version = "^3.0.15"}
toucan-connectors = {version = "^4.5.1", optional = true, extras = ["google_big_query", "mongo", "Redshift", "snowflake", "awsathena", "mysql"]}
# toucan-connectors = {version = "^4.5.1", optional = true, extras = ["google_big_query", "mongo", "Redshift", "snowflake", "awsathena", "mysql"]}
toucan-connectors = {git = "https://github.com/ToucanToco/toucan-connectors", branch = "upgrade-pydantic", optional = true, extras = ["google_big_query", "mongo", "Redshift", "snowflake", "awsathena", "mysql"]}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pourquoi c'est une double dep ? (prod et dev)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

En prod c'est pour le playground, uniquement via l'extra playground. En dev c'est pour tirer les dépendances de connecteurs nativesql et pour avoir accès à nosql_apply_parameters_to_query

Copy link
Member

@PrettyWood PrettyWood Sep 29, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right j'avais pas vu le playground merci

pytest-asyncio = "^0.21.0"

[tool.poetry.group.dev.dependencies]
pytest-cov = "^4.1.0"
pytest-mock = "^3.11.1"
pytest-asyncio = "^0.21.0"
# required so we can use nosql_apply_parameters_to_query during tests
toucan-connectors = { version = "^4.5.1", extras = ["google_big_query", "mongo", "Redshift", "snowflake", "awsathena"] }
# toucan-connectors = { version = "^4.5.1", extras = ["google_big_query", "mongo", "Redshift", "snowflake", "awsathena"] }
toucan-connectors = {git = "https://github.com/ToucanToco/toucan-connectors", branch = "upgrade-pydantic", extras = ["google_big_query", "mongo", "Redshift", "snowflake", "awsathena", "mysql"]}
pytest-benchmark = "^4.0.0"
snowflake-sqlalchemy = "^1.5.0"
types-python-dateutil = "^2.8.19"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def execute_pipeline(
"type": "pandas",
"index": index + 1,
"name": step.name,
"details": step.dict(),
"details": step.model_dump(),
"elapsed_time": stopwatch.interval * 1000,
"sizes": {
"memory_used": convert_size(df.memory_usage(deep=True).sum()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def get_total_for_dimension(
]
aggregations = []
for aggregation in step.aggregations:
agg = aggregation.copy()
agg = aggregation.model_copy(deep=True)
agg.columns = agg.new_columns
aggregations.append(agg)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from datetime import datetime
from datetime import datetime, timezone

from numpy.ma import logical_and, logical_or
from pandas import DataFrame, Series, Timestamp
Expand All @@ -23,12 +23,12 @@ def _date_bound_condition_to_tz_aware_timestamp(value: str | RelativeDate | date
if isinstance(value, RelativeDate):
value = evaluate_relative_date(value)
if isinstance(value, datetime):
tz = value.tzinfo or "UTC"
tz = value.tzinfo or timezone.utc
# Cannot pass a tz-aware datetime object with tz arg
return Timestamp(value.replace(tzinfo=None), tz=tz)
else: # str
ts = Timestamp(value)
return ts if ts.tzinfo else ts.replace(tz="UTC")
return ts if ts.tzinfo else ts.replace(tzinfo=timezone.utc)


def apply_condition(condition: Condition, df: DataFrame) -> Series:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import operator
from datetime import datetime

from dateutil.parser import parse as parse_dt
from dateutil.relativedelta import relativedelta

from weaverbird.pipeline.dates import RelativeDate
Expand All @@ -24,7 +25,9 @@ def evaluate_relative_date(relative_date: RelativeDate) -> datetime:
quantity = relative_date.quantity
duration = relative_date.duration + "s"

return operation(
relative_date.date,
relativedelta(**{duration: quantity}), # type: ignore
as_dt = (
relative_date.date
if isinstance(relative_date.date, datetime)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pq ca marchait avant ? Ca coercait de base un str en datetime vu qu'on avait datetime | str alors que mtt non car la v2 est plus stricte ?
Si oui on voudrait pas plutôt changer dans RelativeDate et mettre que le type datetime avec un validator ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes c'est ça. Si ça ferait plus de sens mais je voulais être le plus conservateur possible dans le comportement. Mais tu as raison, quitte a faire un breaking change, autant faire la modif maintenant

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

else parse_dt(relative_date.date)
)
return operation(as_dt, relativedelta(**{duration: quantity}))
2 changes: 1 addition & 1 deletion server/src/weaverbird/backends/pandas_executor/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class StepExecutionReport(BaseModel):


class PipelineExecutionReport(BaseModel):
steps_reports: list[StepExecutionReport] = Field(min_items=0)
steps_reports: list[StepExecutionReport] = Field(min_length=0)


DomainRetriever = Callable[[str | Reference], DataFrame]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def _merge_first_steps(
and isinstance(second_step, TopStep)
and self._source_rows_subset < second_step.limit
):
second_step = second_step.copy(update={"limit": self._source_rows_subset})
second_step = second_step.model_copy(update={"limit": self._source_rows_subset})

ctx = step_method(step=second_step, prev_step_table=table, builder=None, columns=columns)

Expand Down
9 changes: 4 additions & 5 deletions server/src/weaverbird/pipeline/conditions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from datetime import datetime
from typing import Annotated, Any, Literal

from pydantic import BaseConfig, BaseModel, Field
from pydantic import BaseModel, ConfigDict, Field

from weaverbird.pipeline.dates import RelativeDate
from weaverbird.pipeline.types import ColumnName
Expand Down Expand Up @@ -32,7 +32,7 @@ class NullCondition(BaseCondition):
class MatchCondition(BaseCondition):
column: ColumnName
operator: Literal["matches", "notmatches"]
value: str
value: str | int | float


class DateBoundCondition(BaseModel):
Expand All @@ -48,11 +48,10 @@ class DateBoundCondition(BaseModel):


class BaseConditionCombo(BaseCondition, ABC):
class Config(BaseConfig):
allow_population_by_field_name = True
model_config = ConfigDict(populate_by_name=True)

def to_dict(self):
return self.dict(by_alias=True)
return self.model_dump(by_alias=True)


class ConditionComboAnd(BaseConditionCombo):
Expand Down
2 changes: 1 addition & 1 deletion server/src/weaverbird/pipeline/dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


class RelativeDate(BaseModel):
date: datetime | str | None
date: datetime | str | None = None
operator: Literal["from", "until", "before", "after"]
quantity: int
duration: Literal["year", "quarter", "month", "week", "day"]
2 changes: 1 addition & 1 deletion server/src/weaverbird/pipeline/formula_ast/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class ColumnName(BaseModel):


# Dataclasses do not supported recursive types for now
class Operation(BaseModel, smart_union=True):
class Operation(BaseModel):
left: Expression
right: Expression
operator: Operator
Expand Down
53 changes: 51 additions & 2 deletions server/src/weaverbird/pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@
InclusionCondition,
MatchCondition,
)
from weaverbird.pipeline.steps.append import AppendStepWithRefs
from weaverbird.pipeline.steps.domain import DomainStepWithRef
from weaverbird.pipeline.steps.hierarchy import HierarchyStep
from weaverbird.pipeline.steps.join import JoinStepWithRef
from weaverbird.pipeline.steps.utils.combination import ReferenceResolver

from .steps import (
AbsoluteValueStep,
Expand Down Expand Up @@ -155,8 +159,11 @@
class Pipeline(BaseModel):
steps: list[PipelineStep]

def model_dump(self, *, exclude_none: bool = True, **kwargs) -> dict:
return super().model_dump(exclude_none=exclude_none, **kwargs)

def dict(self, *, exclude_none: bool = True, **kwargs) -> dict:
return super().dict(exclude_none=True, **kwargs)
return self.model_dump(exclude_none=exclude_none, **kwargs)


PipelineStepWithVariables = Annotated[
Expand Down Expand Up @@ -384,7 +391,7 @@ def remove_void_conditions_from_mongo_steps(

# TODO move to a dedicated variables module
class PipelineWithVariables(BaseModel):
steps: list[PipelineStepWithVariables | PipelineStep]
steps: list[PipelineStepWithVariables | PipelineStep | "PipelineStepWithRefs"]

def render(self, variables: dict[str, Any], renderer) -> Pipeline:
# TODO it must be more efficient to render the full pipeline once
Expand All @@ -395,4 +402,46 @@ def render(self, variables: dict[str, Any], renderer) -> Pipeline:
return Pipeline(steps=steps_rendered)


PipelineStepWithRefs = Annotated[
AppendStepWithRefs | DomainStepWithRef | JoinStepWithRef,
Field(discriminator="name"),
]


class PipelineWithRefs(BaseModel):
"""
Represents a pipeline in which some steps can reference some other pipelines using the syntax
`{"type": "ref", "uid": "..."}`
"""

steps: list[PipelineStepWithRefs | PipelineStep | PipelineStepWithVariables]

async def resolve_references(
self, reference_resolver: ReferenceResolver
) -> PipelineWithVariables | None:
"""
Walk the pipeline steps and replace any reference by its corresponding pipeline.
The sub-pipelines added should also be handled, so that they will be no references anymore in the result.
"""
resolved_steps: list[PipelineStepWithRefs | PipelineStepWithVariables | PipelineStep] = []
for step in self.steps:
resolved_step = (
await step.resolve_references(reference_resolver)
if hasattr(step, "resolve_references")
else step
)
if isinstance(resolved_step, PipelineWithVariables):
resolved_steps.extend(resolved_step.steps)
elif resolved_step is not None: # None means the step should be skipped
resolved_steps.append(resolved_step)

return PipelineWithVariables(steps=resolved_steps)


PipelineWithVariables.update_forward_refs()


class ReferenceUnresolved(Exception):
"""
Raised when a mandatory reference is not resolved
"""
48 changes: 0 additions & 48 deletions server/src/weaverbird/pipeline/references.py

This file was deleted.

Loading
Loading