From 20664b2917d3d6b11c3572c3109f13335fb8d416 Mon Sep 17 00:00:00 2001 From: Ilya Gurov Date: Thu, 4 Apr 2024 15:14:30 +0400 Subject: [PATCH] feat(bigquery): add streaming inserts support (#1123) * feat(bigquery): add streaming inserts support * move jobs into job_impl.py add streaming arg into bigquery_adapter support parquet format * complete merge * erase excess imports * improve tests * move tests into load * add nested data test * query_job fix * add docs example * still allows bigquery 2.x client --------- Co-authored-by: Marcin Rudolf --- dlt/destinations/impl/bigquery/bigquery.py | 92 +++++++++++-- .../impl/bigquery/bigquery_adapter.py | 18 ++- .../impl/bigquery/configuration.py | 1 + dlt/destinations/impl/bigquery/sql_client.py | 4 +- .../impl/destination/destination.py | 127 ++---------------- dlt/destinations/job_impl.py | 120 ++++++++++++++++- .../dlt-ecosystem/destinations/bigquery.md | 15 +++ poetry.lock | 105 ++++++--------- .../test_bigquery_streaming_insert.py | 69 ++++++++++ 9 files changed, 354 insertions(+), 197 deletions(-) create mode 100644 tests/load/bigquery/test_bigquery_streaming_insert.py diff --git a/dlt/destinations/impl/bigquery/bigquery.py b/dlt/destinations/impl/bigquery/bigquery.py index d4261a1636..279917d3a0 100644 --- a/dlt/destinations/impl/bigquery/bigquery.py +++ b/dlt/destinations/impl/bigquery/bigquery.py @@ -1,10 +1,13 @@ +import functools import os from pathlib import Path -from typing import ClassVar, Optional, Sequence, Tuple, List, cast, Dict +from typing import Any, ClassVar, Dict, List, Optional, Sequence, Tuple, Type, cast import google.cloud.bigquery as bigquery # noqa: I250 from google.api_core import exceptions as api_core_exceptions from google.cloud import exceptions as gcp_exceptions +from google.api_core import retry +from google.cloud.bigquery.retry import _DEFAULT_RETRY_DEADLINE, _RETRYABLE_REASONS from dlt.common import json, logger from dlt.common.destination import DestinationCapabilitiesContext @@ -22,8 +25,11 @@ from dlt.common.schema.utils import table_schema_has_type from dlt.common.storages.file_storage import FileStorage from dlt.common.typing import DictStrAny +from dlt.destinations.job_impl import DestinationJsonlLoadJob, DestinationParquetLoadJob +from dlt.destinations.sql_client import SqlClientBase from dlt.destinations.exceptions import ( DestinationSchemaWillNotUpdate, + DestinationTerminalException, DestinationTransientException, LoadJobNotExistsException, LoadJobTerminalException, @@ -43,6 +49,7 @@ from dlt.destinations.job_impl import NewReferenceJob from dlt.destinations.sql_jobs import SqlMergeJob from dlt.destinations.type_mapping import TypeMapper +from dlt.pipeline.current import destination_state class BigQueryTypeMapper(TypeMapper): @@ -220,13 +227,41 @@ def start_file_load(self, table: TTableSchema, file_path: str, load_id: str) -> job = super().start_file_load(table, file_path, load_id) if not job: + insert_api = table.get("x-insert-api", "default") try: - job = BigQueryLoadJob( - FileStorage.get_file_name_from_file_path(file_path), - self._create_load_job(table, file_path), - self.config.http_timeout, - self.config.retry_deadline, - ) + if insert_api == "streaming": + if table["write_disposition"] != "append": + raise DestinationTerminalException( + ( + "BigQuery streaming insert can only be used with `append` write_disposition, while " + f'the given resource has `{table["write_disposition"]}`.' + ) + ) + if file_path.endswith(".jsonl"): + job_cls = DestinationJsonlLoadJob + elif file_path.endswith(".parquet"): + job_cls = DestinationParquetLoadJob # type: ignore + else: + raise ValueError( + f"Unsupported file type for BigQuery streaming inserts: {file_path}" + ) + + job = job_cls( + table, + file_path, + self.config, # type: ignore + self.schema, + destination_state(), + functools.partial(_streaming_load, self.sql_client), + [], + ) + else: + job = BigQueryLoadJob( + FileStorage.get_file_name_from_file_path(file_path), + self._create_load_job(table, file_path), + self.config.http_timeout, + self.config.retry_deadline, + ) except api_core_exceptions.GoogleAPICallError as gace: reason = BigQuerySqlClient._get_reason_from_errors(gace) if reason == "notFound": @@ -243,6 +278,7 @@ def start_file_load(self, table: TTableSchema, file_path: str, load_id: str) -> ) from gace else: raise DestinationTransientException(gace) from gace + return job def _get_table_update_sql( @@ -328,9 +364,7 @@ def prepare_load_table( def _get_column_def_sql(self, column: TColumnSchema, table_format: TTableFormat = None) -> str: name = self.capabilities.escape_identifier(column["name"]) - column_def_sql = ( - f"{name} {self.type_mapper.to_db_type(column, table_format)} {self._gen_not_null(column.get('nullable', True))}" - ) + column_def_sql = f"{name} {self.type_mapper.to_db_type(column, table_format)} {self._gen_not_null(column.get('nullable', True))}" if column.get(ROUND_HALF_EVEN_HINT, False): column_def_sql += " OPTIONS (rounding_mode='ROUND_HALF_EVEN')" if column.get(ROUND_HALF_AWAY_FROM_ZERO_HINT, False): @@ -425,3 +459,41 @@ def _from_db_type( self, bq_t: str, precision: Optional[int], scale: Optional[int] ) -> TColumnType: return self.type_mapper.from_db_type(bq_t, precision, scale) + + +def _streaming_load( + sql_client: SqlClientBase[BigQueryClient], items: List[Dict[Any, Any]], table: Dict[str, Any] +) -> None: + """ + Upload the given items into BigQuery table, using streaming API. + Streaming API is used for small amounts of data, with optimal + batch size equal to 500 rows. + + Args: + sql_client (dlt.destinations.impl.bigquery.bigquery.BigQueryClient): + BigQuery client. + items (List[Dict[Any, Any]]): List of rows to upload. + table (Dict[Any, Any]): Table schema. + """ + + def _should_retry(exc: api_core_exceptions.GoogleAPICallError) -> bool: + """Predicate to decide if we need to retry the exception. + + Args: + exc (google.api_core.exceptions.GoogleAPICallError): + Exception raised by the client. + + Returns: + bool: True if the exception is retryable, False otherwise. + """ + reason = exc.errors[0]["reason"] + return reason in _RETRYABLE_REASONS + + full_name = sql_client.make_qualified_table_name(table["name"], escape=False) + + bq_client = sql_client._client + bq_client.insert_rows_json( + full_name, + items, + retry=retry.Retry(predicate=_should_retry, deadline=_DEFAULT_RETRY_DEADLINE), + ) diff --git a/dlt/destinations/impl/bigquery/bigquery_adapter.py b/dlt/destinations/impl/bigquery/bigquery_adapter.py index 1d630e9802..8943b0da79 100644 --- a/dlt/destinations/impl/bigquery/bigquery_adapter.py +++ b/dlt/destinations/impl/bigquery/bigquery_adapter.py @@ -30,6 +30,7 @@ def bigquery_adapter( round_half_even: TColumnNames = None, table_description: Optional[str] = None, table_expiration_datetime: Optional[str] = None, + insert_api: Optional[Literal["streaming", "default"]] = None, ) -> DltResource: """ Prepares data for loading into BigQuery. @@ -56,6 +57,11 @@ def bigquery_adapter( table_description (str, optional): A description for the BigQuery table. table_expiration_datetime (str, optional): String representing the datetime when the BigQuery table expires. This is always interpreted as UTC, BigQuery's default. + insert_api (Optional[Literal["streaming", "default"]]): The API to use for inserting data into BigQuery. + If "default" is chosen, the original SQL query mechanism is used. + If "streaming" is chosen, the streaming API (https://cloud.google.com/bigquery/docs/streaming-data-into-bigquery) + is used. + NOTE: due to BigQuery features, streaming insert is only available for `append` write_disposition. Returns: A `DltResource` object that is ready to be loaded into BigQuery. @@ -134,7 +140,7 @@ def bigquery_adapter( if not isinstance(table_expiration_datetime, str): raise ValueError( "`table_expiration_datetime` must be string representing the datetime when the" - " BigQuery table." + " BigQuery table will be deleted." ) try: parsed_table_expiration_datetime = parser.parse(table_expiration_datetime).replace( @@ -144,6 +150,16 @@ def bigquery_adapter( except ValueError as e: raise ValueError(f"{table_expiration_datetime} could not be parsed!") from e + if insert_api is not None: + if insert_api == "streaming" and data.write_disposition != "append": + raise ValueError( + ( + "BigQuery streaming insert can only be used with `append` write_disposition, while " + f"the given resource has `{data.write_disposition}`." + ) + ) + additional_table_hints |= {"x-insert-api": insert_api} # type: ignore[operator] + if column_hints or additional_table_hints: resource.apply_hints(columns=column_hints, additional_table_hints=additional_table_hints) else: diff --git a/dlt/destinations/impl/bigquery/configuration.py b/dlt/destinations/impl/bigquery/configuration.py index a6686c3f2d..f69e85ca3d 100644 --- a/dlt/destinations/impl/bigquery/configuration.py +++ b/dlt/destinations/impl/bigquery/configuration.py @@ -20,6 +20,7 @@ class BigQueryClientConfiguration(DestinationClientDwhWithStagingConfiguration): retry_deadline: float = ( 60.0 # how long to retry the operation in case of error, the backoff 60 s. ) + batch_size: int = 500 __config_gen_annotations__: ClassVar[List[str]] = ["location"] diff --git a/dlt/destinations/impl/bigquery/sql_client.py b/dlt/destinations/impl/bigquery/sql_client.py index 95cb7ea73b..21086a4db6 100644 --- a/dlt/destinations/impl/bigquery/sql_client.py +++ b/dlt/destinations/impl/bigquery/sql_client.py @@ -48,7 +48,9 @@ class BigQueryDBApiCursorImpl(DBApiCursorImpl): def df(self, chunk_size: int = None, **kwargs: Any) -> DataFrame: if chunk_size is not None: return super().df(chunk_size=chunk_size) - query_job: bigquery.QueryJob = self.native_cursor._query_job + query_job: bigquery.QueryJob = getattr( + self.native_cursor, "_query_job", self.native_cursor.query_job + ) try: return query_job.to_dataframe(**kwargs) diff --git a/dlt/destinations/impl/destination/destination.py b/dlt/destinations/impl/destination/destination.py index 4a3cabde34..4d0f081aa6 100644 --- a/dlt/destinations/impl/destination/destination.py +++ b/dlt/destinations/impl/destination/destination.py @@ -1,140 +1,29 @@ -from abc import ABC, abstractmethod -from types import TracebackType -from typing import ClassVar, Dict, Optional, Type, Iterable, Iterable, cast, Dict, List from copy import deepcopy +from types import TracebackType +from typing import ClassVar, Optional, Type, Iterable, cast, List from dlt.common.destination.reference import LoadJob from dlt.destinations.job_impl import EmptyLoadJob -from dlt.common.typing import TDataItems, AnyFun -from dlt.common import json -from dlt.pipeline.current import ( - destination_state, - commit_load_package_state, -) +from dlt.common.typing import AnyFun +from dlt.pipeline.current import destination_state from dlt.common.configuration import create_resolved_partial from dlt.common.schema import Schema, TTableSchema, TSchemaTables -from dlt.common.schema.typing import TTableSchema -from dlt.common.storages import FileStorage from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.destination.reference import ( - TLoadJobState, LoadJob, DoNothingJob, JobClientBase, ) from dlt.destinations.impl.destination import capabilities -from dlt.destinations.impl.destination.configuration import ( - CustomDestinationClientConfiguration, - TDestinationCallable, +from dlt.destinations.impl.destination.configuration import CustomDestinationClientConfiguration +from dlt.destinations.job_impl import ( + DestinationJsonlLoadJob, + DestinationParquetLoadJob, ) -class DestinationLoadJob(LoadJob, ABC): - def __init__( - self, - table: TTableSchema, - file_path: str, - config: CustomDestinationClientConfiguration, - schema: Schema, - destination_state: Dict[str, int], - destination_callable: TDestinationCallable, - skipped_columns: List[str], - ) -> None: - super().__init__(FileStorage.get_file_name_from_file_path(file_path)) - self._file_path = file_path - self._config = config - self._table = table - self._schema = schema - # we create pre_resolved callable here - self._callable = destination_callable - self._state: TLoadJobState = "running" - self._storage_id = f"{self._parsed_file_name.table_name}.{self._parsed_file_name.file_id}" - self.skipped_columns = skipped_columns - try: - if self._config.batch_size == 0: - # on batch size zero we only call the callable with the filename - self.call_callable_with_items(self._file_path) - else: - current_index = destination_state.get(self._storage_id, 0) - for batch in self.run(current_index): - self.call_callable_with_items(batch) - current_index += len(batch) - destination_state[self._storage_id] = current_index - - self._state = "completed" - except Exception as e: - self._state = "retry" - raise e - finally: - # save progress - commit_load_package_state() - - @abstractmethod - def run(self, start_index: int) -> Iterable[TDataItems]: - pass - - def call_callable_with_items(self, items: TDataItems) -> None: - if not items: - return - # call callable - self._callable(items, self._table) - - def state(self) -> TLoadJobState: - return self._state - - def exception(self) -> str: - raise NotImplementedError() - - -class DestinationParquetLoadJob(DestinationLoadJob): - def run(self, start_index: int) -> Iterable[TDataItems]: - # stream items - from dlt.common.libs.pyarrow import pyarrow - - # guard against changed batch size after restart of loadjob - assert ( - start_index % self._config.batch_size - ) == 0, "Batch size was changed during processing of one load package" - - # on record batches we cannot drop columns, we need to - # select the ones we want to keep - keep_columns = list(self._table["columns"].keys()) - start_batch = start_index / self._config.batch_size - with pyarrow.parquet.ParquetFile(self._file_path) as reader: - for record_batch in reader.iter_batches( - batch_size=self._config.batch_size, columns=keep_columns - ): - if start_batch > 0: - start_batch -= 1 - continue - yield record_batch - - -class DestinationJsonlLoadJob(DestinationLoadJob): - def run(self, start_index: int) -> Iterable[TDataItems]: - current_batch: TDataItems = [] - - # stream items - with FileStorage.open_zipsafe_ro(self._file_path) as f: - encoded_json = json.typed_loads(f.read()) - - for item in encoded_json: - # find correct start position - if start_index > 0: - start_index -= 1 - continue - # skip internal columns - for column in self.skipped_columns: - item.pop(column, None) - current_batch.append(item) - if len(current_batch) == self._config.batch_size: - yield current_batch - current_batch = [] - yield current_batch - - class DestinationClient(JobClientBase): """Sink Client""" diff --git a/dlt/destinations/job_impl.py b/dlt/destinations/job_impl.py index 7a6b98544c..8e017fc791 100644 --- a/dlt/destinations/job_impl.py +++ b/dlt/destinations/job_impl.py @@ -1,10 +1,20 @@ +from abc import ABC, abstractmethod import os import tempfile # noqa: 251 +from typing import Dict, Iterable, List +from dlt.common import json +from dlt.common.destination.reference import NewLoadJob, FollowupJob, TLoadJobState, LoadJob +from dlt.common.schema import Schema, TTableSchema from dlt.common.storages import FileStorage +from dlt.common.typing import TDataItems -from dlt.common.destination.reference import NewLoadJob, FollowupJob, TLoadJobState, LoadJob -from dlt.common.storages.load_storage import ParsedLoadJobFileName +from dlt.destinations.impl.destination.configuration import ( + CustomDestinationClientConfiguration, + TDestinationCallable, +) + +from dlt.pipeline.current import commit_load_package_state class EmptyLoadJobWithoutFollowup(LoadJob): @@ -60,3 +70,109 @@ def resolve_reference(file_path: str) -> str: with open(file_path, "r+", encoding="utf-8") as f: # Reading from a file return f.read() + + +class DestinationLoadJob(LoadJob, ABC): + def __init__( + self, + table: TTableSchema, + file_path: str, + config: CustomDestinationClientConfiguration, + schema: Schema, + destination_state: Dict[str, int], + destination_callable: TDestinationCallable, + skipped_columns: List[str], + ) -> None: + super().__init__(FileStorage.get_file_name_from_file_path(file_path)) + self._file_path = file_path + self._config = config + self._table = table + self._schema = schema + # we create pre_resolved callable here + self._callable = destination_callable + self._state: TLoadJobState = "running" + self._storage_id = f"{self._parsed_file_name.table_name}.{self._parsed_file_name.file_id}" + self.skipped_columns = skipped_columns + try: + if self._config.batch_size == 0: + # on batch size zero we only call the callable with the filename + self.call_callable_with_items(self._file_path) + else: + current_index = destination_state.get(self._storage_id, 0) + for batch in self.run(current_index): + self.call_callable_with_items(batch) + current_index += len(batch) + destination_state[self._storage_id] = current_index + + self._state = "completed" + except Exception as e: + self._state = "retry" + raise e + finally: + # save progress + commit_load_package_state() + + @abstractmethod + def run(self, start_index: int) -> Iterable[TDataItems]: + pass + + def call_callable_with_items(self, items: TDataItems) -> None: + if not items: + return + # call callable + self._callable(items, self._table) + + def state(self) -> TLoadJobState: + return self._state + + def exception(self) -> str: + raise NotImplementedError() + + +class DestinationParquetLoadJob(DestinationLoadJob): + def run(self, start_index: int) -> Iterable[TDataItems]: + # stream items + from dlt.common.libs.pyarrow import pyarrow + + # guard against changed batch size after restart of loadjob + assert ( + start_index % self._config.batch_size + ) == 0, "Batch size was changed during processing of one load package" + + # on record batches we cannot drop columns, we need to + # select the ones we want to keep + keep_columns = list(self._table["columns"].keys()) + start_batch = start_index / self._config.batch_size + with pyarrow.parquet.ParquetFile(self._file_path) as reader: + for record_batch in reader.iter_batches( + batch_size=self._config.batch_size, columns=keep_columns + ): + if start_batch > 0: + start_batch -= 1 + continue + yield record_batch + + +class DestinationJsonlLoadJob(DestinationLoadJob): + def run(self, start_index: int) -> Iterable[TDataItems]: + current_batch: TDataItems = [] + + # stream items + with FileStorage.open_zipsafe_ro(self._file_path) as f: + encoded_json = json.typed_loads(f.read()) + if isinstance(encoded_json, dict): + encoded_json = [encoded_json] + + for item in encoded_json: + # find correct start position + if start_index > 0: + start_index -= 1 + continue + # skip internal columns + for column in self.skipped_columns: + item.pop(column, None) + current_batch.append(item) + if len(current_batch) == self._config.batch_size: + yield current_batch + current_batch = [] + yield current_batch diff --git a/docs/website/docs/dlt-ecosystem/destinations/bigquery.md b/docs/website/docs/dlt-ecosystem/destinations/bigquery.md index 6203ab0866..1e80146a7a 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/bigquery.md +++ b/docs/website/docs/dlt-ecosystem/destinations/bigquery.md @@ -125,6 +125,17 @@ recreated with a [clone command](https://cloud.google.com/bigquery/docs/table-cl The loader follows [Google recommendations](https://cloud.google.com/bigquery/docs/error-messages) when retrying and terminating jobs. The Google BigQuery client implements an elaborate retry mechanism and timeouts for queries and file uploads, which may be configured in destination options. +BigQuery destination also supports [streaming insert](https://cloud.google.com/bigquery/docs/streaming-data-into-bigquery). The mode provides better performance with small (<500 records) batches, but it buffers the data, preventing any update/delete operations on it. Due to this, streaming inserts are only available with `write_disposition="append"`, and the inserted data is blocked for editing for up to 90 min (reading, however, is available immediately). [See more](https://cloud.google.com/bigquery/quotas#streaming_inserts). + +To switch the resource into streaming insert mode, use hints: +```py +@dlt.resource(write_disposition="append") +def streamed_resource(): + yield {"field1": 1, "field2": 2} + +streamed_resource.apply_hints(additional_table_hints={"x-insert-api": "streaming"}) +``` + ## Supported File Formats You can configure the following file formats to load data to BigQuery: @@ -244,6 +255,10 @@ bigquery_adapter( # Apply table level options. bigquery_adapter(event_data, table_description="Dummy event data.") + +# Load data in "streaming insert" mode (only available with +# write_disposition="append"). +bigquery_adapter(event_data, insert_api="streaming") ``` In the example above, the adapter specifies that `event_date` should be used for partitioning and both `event_date` and `user_id` should be used for clustering (in the given order) when the table is created. diff --git a/poetry.lock b/poetry.lock index a7c3979625..458614b792 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "about-time" @@ -3133,37 +3133,33 @@ tool = ["click (>=6.0.0)"] [[package]] name = "google-cloud-bigquery" -version = "3.11.4" +version = "3.19.0" description = "Google BigQuery API client library" optional = true python-versions = ">=3.7" files = [ - {file = "google-cloud-bigquery-3.11.4.tar.gz", hash = "sha256:697df117241a2283bcbb93b21e10badc14e51c9a90800d2a7e1a3e1c7d842974"}, - {file = "google_cloud_bigquery-3.11.4-py2.py3-none-any.whl", hash = "sha256:5fa7897743a0ed949ade25a0942fc9e7557d8fce307c6f8a76d1b604cf27f1b1"}, + {file = "google-cloud-bigquery-3.19.0.tar.gz", hash = "sha256:8e311dae49768e1501fcdc5e916bff4b7e169471e5707919f4a6f78a02b3b5a6"}, + {file = "google_cloud_bigquery-3.19.0-py2.py3-none-any.whl", hash = "sha256:c6b8850247a4b132066e49f6e45f850c22824482838688d744a4398eea1120ed"}, ] [package.dependencies] -google-api-core = {version = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0dev", extras = ["grpc"]} +google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} +google-auth = ">=2.14.1,<3.0.0dev" google-cloud-core = ">=1.6.0,<3.0.0dev" google-resumable-media = ">=0.6.0,<3.0dev" -grpcio = [ - {version = ">=1.47.0,<2.0dev", markers = "python_version < \"3.11\""}, - {version = ">=1.49.1,<2.0dev", markers = "python_version >= \"3.11\""}, -] packaging = ">=20.0.0" -proto-plus = ">=1.15.0,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" python-dateutil = ">=2.7.2,<3.0dev" requests = ">=2.21.0,<3.0.0dev" [package.extras] -all = ["Shapely (>=1.8.4,<2.0dev)", "db-dtypes (>=0.3.0,<2.0.0dev)", "geopandas (>=0.9.0,<1.0dev)", "google-cloud-bigquery-storage (>=2.6.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "ipykernel (>=6.0.0)", "ipython (>=7.23.1,!=8.1.0)", "ipywidgets (>=7.7.0)", "opentelemetry-api (>=1.1.0)", "opentelemetry-instrumentation (>=0.20b0)", "opentelemetry-sdk (>=1.1.0)", "pandas (>=1.1.0)", "pyarrow (>=3.0.0)", "tqdm (>=4.7.4,<5.0.0dev)"] +all = ["Shapely (>=1.8.4,<3.0.0dev)", "db-dtypes (>=0.3.0,<2.0.0dev)", "geopandas (>=0.9.0,<1.0dev)", "google-cloud-bigquery-storage (>=2.6.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "importlib-metadata (>=1.0.0)", "ipykernel (>=6.0.0)", "ipython (>=7.23.1,!=8.1.0)", "ipywidgets (>=7.7.0)", "opentelemetry-api (>=1.1.0)", "opentelemetry-instrumentation (>=0.20b0)", "opentelemetry-sdk (>=1.1.0)", "pandas (>=1.1.0)", "proto-plus (>=1.15.0,<2.0.0dev)", "protobuf (>=3.19.5,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev)", "pyarrow (>=3.0.0)", "tqdm (>=4.7.4,<5.0.0dev)"] +bigquery-v2 = ["proto-plus (>=1.15.0,<2.0.0dev)", "protobuf (>=3.19.5,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev)"] bqstorage = ["google-cloud-bigquery-storage (>=2.6.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "pyarrow (>=3.0.0)"] -geopandas = ["Shapely (>=1.8.4,<2.0dev)", "geopandas (>=0.9.0,<1.0dev)"] +geopandas = ["Shapely (>=1.8.4,<3.0.0dev)", "geopandas (>=0.9.0,<1.0dev)"] ipython = ["ipykernel (>=6.0.0)", "ipython (>=7.23.1,!=8.1.0)"] ipywidgets = ["ipykernel (>=6.0.0)", "ipywidgets (>=7.7.0)"] opentelemetry = ["opentelemetry-api (>=1.1.0)", "opentelemetry-instrumentation (>=0.20b0)", "opentelemetry-sdk (>=1.1.0)"] -pandas = ["db-dtypes (>=0.3.0,<2.0.0dev)", "pandas (>=1.1.0)", "pyarrow (>=3.0.0)"] +pandas = ["db-dtypes (>=0.3.0,<2.0.0dev)", "importlib-metadata (>=1.0.0)", "pandas (>=1.1.0)", "pyarrow (>=3.0.0)"] tqdm = ["tqdm (>=4.7.4,<5.0.0dev)"] [[package]] @@ -3519,56 +3515,6 @@ files = [ {file = "google_re2-1.1-4-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1f4d4f0823e8b2f6952a145295b1ff25245ce9bb136aff6fe86452e507d4c1dd"}, {file = "google_re2-1.1-4-cp39-cp39-win32.whl", hash = "sha256:1afae56b2a07bb48cfcfefaa15ed85bae26a68f5dc7f9e128e6e6ea36914e847"}, {file = "google_re2-1.1-4-cp39-cp39-win_amd64.whl", hash = "sha256:aa7d6d05911ab9c8adbf3c225a7a120ab50fd2784ac48f2f0d140c0b7afc2b55"}, - {file = "google_re2-1.1-5-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:222fc2ee0e40522de0b21ad3bc90ab8983be3bf3cec3d349c80d76c8bb1a4beb"}, - {file = "google_re2-1.1-5-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:d4763b0b9195b72132a4e7de8e5a9bf1f05542f442a9115aa27cfc2a8004f581"}, - {file = "google_re2-1.1-5-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:209649da10c9d4a93d8a4d100ecbf9cc3b0252169426bec3e8b4ad7e57d600cf"}, - {file = "google_re2-1.1-5-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:68813aa333c1604a2df4a495b2a6ed065d7c8aebf26cc7e7abb5a6835d08353c"}, - {file = "google_re2-1.1-5-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:370a23ec775ad14e9d1e71474d56f381224dcf3e72b15d8ca7b4ad7dd9cd5853"}, - {file = "google_re2-1.1-5-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:14664a66a3ddf6bc9e56f401bf029db2d169982c53eff3f5876399104df0e9a6"}, - {file = "google_re2-1.1-5-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ea3722cc4932cbcebd553b69dce1b4a73572823cff4e6a244f1c855da21d511"}, - {file = "google_re2-1.1-5-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e14bb264c40fd7c627ef5678e295370cd6ba95ca71d835798b6e37502fc4c690"}, - {file = "google_re2-1.1-5-cp310-cp310-win32.whl", hash = "sha256:39512cd0151ea4b3969c992579c79b423018b464624ae955be685fc07d94556c"}, - {file = "google_re2-1.1-5-cp310-cp310-win_amd64.whl", hash = "sha256:ac66537aa3bc5504320d922b73156909e3c2b6da19739c866502f7827b3f9fdf"}, - {file = "google_re2-1.1-5-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5b5ea68d54890c9edb1b930dcb2658819354e5d3f2201f811798bbc0a142c2b4"}, - {file = "google_re2-1.1-5-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:33443511b6b83c35242370908efe2e8e1e7cae749c766b2b247bf30e8616066c"}, - {file = "google_re2-1.1-5-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:413d77bdd5ba0bfcada428b4c146e87707452ec50a4091ec8e8ba1413d7e0619"}, - {file = "google_re2-1.1-5-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:5171686e43304996a34baa2abcee6f28b169806d0e583c16d55e5656b092a414"}, - {file = "google_re2-1.1-5-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3b284db130283771558e31a02d8eb8fb756156ab98ce80035ae2e9e3a5f307c4"}, - {file = "google_re2-1.1-5-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:296e6aed0b169648dc4b870ff47bd34c702a32600adb9926154569ef51033f47"}, - {file = "google_re2-1.1-5-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:38d50e68ead374160b1e656bbb5d101f0b95fb4cc57f4a5c12100155001480c5"}, - {file = "google_re2-1.1-5-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2a0416a35921e5041758948bcb882456916f22845f66a93bc25070ef7262b72a"}, - {file = "google_re2-1.1-5-cp311-cp311-win32.whl", hash = "sha256:a1d59568bbb5de5dd56dd6cdc79907db26cce63eb4429260300c65f43469e3e7"}, - {file = "google_re2-1.1-5-cp311-cp311-win_amd64.whl", hash = "sha256:72f5a2f179648b8358737b2b493549370debd7d389884a54d331619b285514e3"}, - {file = "google_re2-1.1-5-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:cbc72c45937b1dc5acac3560eb1720007dccca7c9879138ff874c7f6baf96005"}, - {file = "google_re2-1.1-5-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:5fadd1417fbef7235fa9453dba4eb102e6e7d94b1e4c99d5fa3dd4e288d0d2ae"}, - {file = "google_re2-1.1-5-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:040f85c63cc02696485b59b187a5ef044abe2f99b92b4fb399de40b7d2904ccc"}, - {file = "google_re2-1.1-5-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:64e3b975ee6d9bbb2420494e41f929c1a0de4bcc16d86619ab7a87f6ea80d6bd"}, - {file = "google_re2-1.1-5-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:8ee370413e00f4d828eaed0e83b8af84d7a72e8ee4f4bd5d3078bc741dfc430a"}, - {file = "google_re2-1.1-5-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:5b89383001079323f693ba592d7aad789d7a02e75adb5d3368d92b300f5963fd"}, - {file = "google_re2-1.1-5-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:63cb4fdfbbda16ae31b41a6388ea621510db82feb8217a74bf36552ecfcd50ad"}, - {file = "google_re2-1.1-5-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ebedd84ae8be10b7a71a16162376fd67a2386fe6361ef88c622dcf7fd679daf"}, - {file = "google_re2-1.1-5-cp312-cp312-win32.whl", hash = "sha256:c8e22d1692bc2c81173330c721aff53e47ffd3c4403ff0cd9d91adfd255dd150"}, - {file = "google_re2-1.1-5-cp312-cp312-win_amd64.whl", hash = "sha256:5197a6af438bb8c4abda0bbe9c4fbd6c27c159855b211098b29d51b73e4cbcf6"}, - {file = "google_re2-1.1-5-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:b6727e0b98417e114b92688ad2aa256102ece51f29b743db3d831df53faf1ce3"}, - {file = "google_re2-1.1-5-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:711e2b6417eb579c61a4951029d844f6b95b9b373b213232efd413659889a363"}, - {file = "google_re2-1.1-5-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:71ae8b3df22c5c154c8af0f0e99d234a450ef1644393bc2d7f53fc8c0a1e111c"}, - {file = "google_re2-1.1-5-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:94a04e214bc521a3807c217d50cf099bbdd0c0a80d2d996c0741dbb995b5f49f"}, - {file = "google_re2-1.1-5-cp38-cp38-macosx_14_0_arm64.whl", hash = "sha256:a770f75358508a9110c81a1257721f70c15d9bb592a2fb5c25ecbd13566e52a5"}, - {file = "google_re2-1.1-5-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:07c9133357f7e0b17c6694d5dcb82e0371f695d7c25faef2ff8117ef375343ff"}, - {file = "google_re2-1.1-5-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:204ca6b1cf2021548f4a9c29ac015e0a4ab0a7b6582bf2183d838132b60c8fda"}, - {file = "google_re2-1.1-5-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f0b95857c2c654f419ca684ec38c9c3325c24e6ba7d11910a5110775a557bb18"}, - {file = "google_re2-1.1-5-cp38-cp38-win32.whl", hash = "sha256:347ac770e091a0364e822220f8d26ab53e6fdcdeaec635052000845c5a3fb869"}, - {file = "google_re2-1.1-5-cp38-cp38-win_amd64.whl", hash = "sha256:ec32bb6de7ffb112a07d210cf9f797b7600645c2d5910703fa07f456dd2150e0"}, - {file = "google_re2-1.1-5-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:eb5adf89060f81c5ff26c28e261e6b4997530a923a6093c9726b8dec02a9a326"}, - {file = "google_re2-1.1-5-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:a22630c9dd9ceb41ca4316bccba2643a8b1d5c198f21c00ed5b50a94313aaf10"}, - {file = "google_re2-1.1-5-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:544dc17fcc2d43ec05f317366375796351dec44058e1164e03c3f7d050284d58"}, - {file = "google_re2-1.1-5-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:19710af5ea88751c7768575b23765ce0dfef7324d2539de576f75cdc319d6654"}, - {file = "google_re2-1.1-5-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:f82995a205e08ad896f4bd5ce4847c834fab877e1772a44e5f262a647d8a1dec"}, - {file = "google_re2-1.1-5-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:63533c4d58da9dc4bc040250f1f52b089911699f0368e0e6e15f996387a984ed"}, - {file = "google_re2-1.1-5-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79e00fcf0cb04ea35a22b9014712d448725ce4ddc9f08cc818322566176ca4b0"}, - {file = "google_re2-1.1-5-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bc41afcefee2da6c4ed883a93d7f527c4b960cd1d26bbb0020a7b8c2d341a60a"}, - {file = "google_re2-1.1-5-cp39-cp39-win32.whl", hash = "sha256:486730b5e1f1c31b0abc6d80abe174ce4f1188fe17d1b50698f2bf79dc6e44be"}, - {file = "google_re2-1.1-5-cp39-cp39-win_amd64.whl", hash = "sha256:4de637ca328f1d23209e80967d1b987d6b352cd01b3a52a84b4d742c69c3da6c"}, ] [[package]] @@ -4681,6 +4627,16 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -7071,6 +7027,7 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -7078,8 +7035,16 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -7096,6 +7061,7 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -7103,6 +7069,7 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -7990,6 +7957,7 @@ files = [ {file = "SQLAlchemy-1.4.49-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:03db81b89fe7ef3857b4a00b63dedd632d6183d4ea5a31c5d8a92e000a41fc71"}, {file = "SQLAlchemy-1.4.49-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:95b9df9afd680b7a3b13b38adf6e3a38995da5e162cc7524ef08e3be4e5ed3e1"}, {file = "SQLAlchemy-1.4.49-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a63e43bf3f668c11bb0444ce6e809c1227b8f067ca1068898f3008a273f52b09"}, + {file = "SQLAlchemy-1.4.49-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca46de16650d143a928d10842939dab208e8d8c3a9a8757600cae9b7c579c5cd"}, {file = "SQLAlchemy-1.4.49-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f835c050ebaa4e48b18403bed2c0fda986525896efd76c245bdd4db995e51a4c"}, {file = "SQLAlchemy-1.4.49-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c21b172dfb22e0db303ff6419451f0cac891d2e911bb9fbf8003d717f1bcf91"}, {file = "SQLAlchemy-1.4.49-cp310-cp310-win32.whl", hash = "sha256:5fb1ebdfc8373b5a291485757bd6431de8d7ed42c27439f543c81f6c8febd729"}, @@ -7999,26 +7967,35 @@ files = [ {file = "SQLAlchemy-1.4.49-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5debe7d49b8acf1f3035317e63d9ec8d5e4d904c6e75a2a9246a119f5f2fdf3d"}, {file = "SQLAlchemy-1.4.49-cp311-cp311-win32.whl", hash = "sha256:82b08e82da3756765c2e75f327b9bf6b0f043c9c3925fb95fb51e1567fa4ee87"}, {file = "SQLAlchemy-1.4.49-cp311-cp311-win_amd64.whl", hash = "sha256:171e04eeb5d1c0d96a544caf982621a1711d078dbc5c96f11d6469169bd003f1"}, + {file = "SQLAlchemy-1.4.49-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f23755c384c2969ca2f7667a83f7c5648fcf8b62a3f2bbd883d805454964a800"}, + {file = "SQLAlchemy-1.4.49-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8396e896e08e37032e87e7fbf4a15f431aa878c286dc7f79e616c2feacdb366c"}, + {file = "SQLAlchemy-1.4.49-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66da9627cfcc43bbdebd47bfe0145bb662041472393c03b7802253993b6b7c90"}, + {file = "SQLAlchemy-1.4.49-cp312-cp312-win32.whl", hash = "sha256:9a06e046ffeb8a484279e54bda0a5abfd9675f594a2e38ef3133d7e4d75b6214"}, + {file = "SQLAlchemy-1.4.49-cp312-cp312-win_amd64.whl", hash = "sha256:7cf8b90ad84ad3a45098b1c9f56f2b161601e4670827d6b892ea0e884569bd1d"}, {file = "SQLAlchemy-1.4.49-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:36e58f8c4fe43984384e3fbe6341ac99b6b4e083de2fe838f0fdb91cebe9e9cb"}, {file = "SQLAlchemy-1.4.49-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b31e67ff419013f99ad6f8fc73ee19ea31585e1e9fe773744c0f3ce58c039c30"}, + {file = "SQLAlchemy-1.4.49-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebc22807a7e161c0d8f3da34018ab7c97ef6223578fcdd99b1d3e7ed1100a5db"}, {file = "SQLAlchemy-1.4.49-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c14b29d9e1529f99efd550cd04dbb6db6ba5d690abb96d52de2bff4ed518bc95"}, {file = "SQLAlchemy-1.4.49-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c40f3470e084d31247aea228aa1c39bbc0904c2b9ccbf5d3cfa2ea2dac06f26d"}, {file = "SQLAlchemy-1.4.49-cp36-cp36m-win32.whl", hash = "sha256:706bfa02157b97c136547c406f263e4c6274a7b061b3eb9742915dd774bbc264"}, {file = "SQLAlchemy-1.4.49-cp36-cp36m-win_amd64.whl", hash = "sha256:a7f7b5c07ae5c0cfd24c2db86071fb2a3d947da7bd487e359cc91e67ac1c6d2e"}, {file = "SQLAlchemy-1.4.49-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:4afbbf5ef41ac18e02c8dc1f86c04b22b7a2125f2a030e25bbb4aff31abb224b"}, {file = "SQLAlchemy-1.4.49-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:24e300c0c2147484a002b175f4e1361f102e82c345bf263242f0449672a4bccf"}, + {file = "SQLAlchemy-1.4.49-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:393cd06c3b00b57f5421e2133e088df9cabcececcea180327e43b937b5a7caa5"}, {file = "SQLAlchemy-1.4.49-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:201de072b818f8ad55c80d18d1a788729cccf9be6d9dc3b9d8613b053cd4836d"}, {file = "SQLAlchemy-1.4.49-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7653ed6817c710d0c95558232aba799307d14ae084cc9b1f4c389157ec50df5c"}, {file = "SQLAlchemy-1.4.49-cp37-cp37m-win32.whl", hash = "sha256:647e0b309cb4512b1f1b78471fdaf72921b6fa6e750b9f891e09c6e2f0e5326f"}, {file = "SQLAlchemy-1.4.49-cp37-cp37m-win_amd64.whl", hash = "sha256:ab73ed1a05ff539afc4a7f8cf371764cdf79768ecb7d2ec691e3ff89abbc541e"}, {file = "SQLAlchemy-1.4.49-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:37ce517c011560d68f1ffb28af65d7e06f873f191eb3a73af5671e9c3fada08a"}, {file = "SQLAlchemy-1.4.49-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1878ce508edea4a879015ab5215546c444233881301e97ca16fe251e89f1c55"}, + {file = "SQLAlchemy-1.4.49-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95ab792ca493891d7a45a077e35b418f68435efb3e1706cb8155e20e86a9013c"}, {file = "SQLAlchemy-1.4.49-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:0e8e608983e6f85d0852ca61f97e521b62e67969e6e640fe6c6b575d4db68557"}, {file = "SQLAlchemy-1.4.49-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ccf956da45290df6e809ea12c54c02ace7f8ff4d765d6d3dfb3655ee876ce58d"}, {file = "SQLAlchemy-1.4.49-cp38-cp38-win32.whl", hash = "sha256:f167c8175ab908ce48bd6550679cc6ea20ae169379e73c7720a28f89e53aa532"}, {file = "SQLAlchemy-1.4.49-cp38-cp38-win_amd64.whl", hash = "sha256:45806315aae81a0c202752558f0df52b42d11dd7ba0097bf71e253b4215f34f4"}, {file = "SQLAlchemy-1.4.49-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:b6d0c4b15d65087738a6e22e0ff461b407533ff65a73b818089efc8eb2b3e1de"}, {file = "SQLAlchemy-1.4.49-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a843e34abfd4c797018fd8d00ffffa99fd5184c421f190b6ca99def4087689bd"}, + {file = "SQLAlchemy-1.4.49-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:738d7321212941ab19ba2acf02a68b8ee64987b248ffa2101630e8fccb549e0d"}, {file = "SQLAlchemy-1.4.49-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:1c890421651b45a681181301b3497e4d57c0d01dc001e10438a40e9a9c25ee77"}, {file = "SQLAlchemy-1.4.49-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d26f280b8f0a8f497bc10573849ad6dc62e671d2468826e5c748d04ed9e670d5"}, {file = "SQLAlchemy-1.4.49-cp39-cp39-win32.whl", hash = "sha256:ec2268de67f73b43320383947e74700e95c6770d0c68c4e615e9897e46296294"}, diff --git a/tests/load/bigquery/test_bigquery_streaming_insert.py b/tests/load/bigquery/test_bigquery_streaming_insert.py new file mode 100644 index 0000000000..c80f6ed65a --- /dev/null +++ b/tests/load/bigquery/test_bigquery_streaming_insert.py @@ -0,0 +1,69 @@ +import pytest + +import dlt +from dlt.destinations.impl.bigquery.bigquery_adapter import bigquery_adapter +from tests.pipeline.utils import assert_load_info + + +def test_bigquery_adapter_streaming_insert(): + @dlt.resource + def test_resource(): + yield {"field1": 1, "field2": 2} + + bigquery_adapter(test_resource, insert_api="streaming") + + pipe = dlt.pipeline(pipeline_name="insert_test", destination="bigquery", full_refresh=True) + pack = pipe.run(test_resource, table_name="test_streaming_items44") + + assert_load_info(pack) + + with pipe.sql_client() as client: + with client.execute_query("SELECT * FROM test_streaming_items44;") as cursor: + res = cursor.fetchall() + assert tuple(res[0])[:2] == (1, 2) + + +def test_bigquery_adapter_streaming_wrong_disposition(): + @dlt.resource(write_disposition="merge") + def test_resource(): + yield {"field1": 1, "field2": 2} + + with pytest.raises(ValueError): + bigquery_adapter(test_resource, insert_api="streaming") + + +def test_bigquery_streaming_wrong_disposition(): + @dlt.resource(write_disposition="merge") + def test_resource(): + yield {"field1": 1, "field2": 2} + + test_resource.apply_hints(additional_table_hints={"x-insert-api": "streaming"}) + + pipe = dlt.pipeline(pipeline_name="insert_test", destination="bigquery") + info = pipe.run(test_resource) + assert ( + """BigQuery streaming insert can only be used with `append`""" + """ write_disposition, while the given resource has `merge`.""" + ) in info.asdict()["load_packages"][0]["jobs"][0]["failed_message"] + + +def test_bigquery_streaming_nested_data(): + @dlt.resource + def test_resource(): + yield {"field1": {"nested_field": 1}, "field2": [{"nested_field": 2}]} + + bigquery_adapter(test_resource, insert_api="streaming") + + pipe = dlt.pipeline(pipeline_name="insert_test", destination="bigquery", full_refresh=True) + pack = pipe.run(test_resource, table_name="test_streaming_items") + + assert_load_info(pack) + + with pipe.sql_client() as client: + with client.execute_query("SELECT * FROM test_streaming_items;") as cursor: + res = cursor.fetchall() + assert res[0]["field1__nested_field"] == 1 # type: ignore + + with client.execute_query("SELECT * FROM test_streaming_items__field2;") as cursor: + res = cursor.fetchall() + assert res[0]["nested_field"] == 2 # type: ignore