From 06bd3bb6f9c4b1bcc1acee49f8b6b2332a796c30 Mon Sep 17 00:00:00 2001 From: Alberto Asuero Date: Tue, 14 Jun 2022 22:07:51 +0200 Subject: [PATCH 1/4] BigQuery scripts to import/export GeoParquet files --- scripts/README.md | 32 ++ scripts/bigquery_to_parquet.py | 90 +++++ scripts/encoder.py | 144 ++++++++ scripts/parquet_to_bigquery.py | 109 ++++++ scripts/poetry.lock | 524 ++++++++++++++++++++++++++- scripts/pyproject.toml | 4 +- scripts/write_nz_building_outline.py | 132 +------ 7 files changed, 899 insertions(+), 136 deletions(-) create mode 100644 scripts/bigquery_to_parquet.py create mode 100644 scripts/encoder.py create mode 100644 scripts/parquet_to_bigquery.py diff --git a/scripts/README.md b/scripts/README.md index f0c3c7d..41f027d 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -44,3 +44,35 @@ virtualenv: ``` poetry run pip install -U --force-reinstall pygeos --no-binary pygeos ``` + +### BigQuery + +Convert a SQL query to parquet: + +```bash +poetry run python bigquery_to_parquet.py \ + --input-query "SELECT * FROM carto-do-public-data.carto.geography_usa_blockgroup_2019" \ + --primary-column geom \ + --partition-size 100 \ + --output geography_usa_blockgroup_2019 +``` + +Upload a parquet file or folder to BigQuery: +```bash +poetry run python parquet_to_bigquery.py \ + --input geography_usa_blockgroup_2019 \ + --output "cartodb-gcp-backend-data-team.alasarr.geography_usa_blockgroup_2019" +``` + +Instead of using folders, you can also work with a single file, but it might hit [bigquery limits](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-parquet) when you upload it to BigQuery. For large parquet files you might get `UDF out of memory` errors. + +Convert a SQL query to single parquet file: + +```bash +poetry run python bigquery_to_parquet.py \ + --input-query "SELECT * FROM carto-do-public-data.carto.geography_usa_blockgroup_2019" \ + --primary-column geom \ + --partition-size 100 \ + --mode file \ + --output geography_usa_blockgroup_2019.parquet +``` diff --git a/scripts/bigquery_to_parquet.py b/scripts/bigquery_to_parquet.py new file mode 100644 index 0000000..4925fab --- /dev/null +++ b/scripts/bigquery_to_parquet.py @@ -0,0 +1,90 @@ + + +import click +import sys +import pyarrow.parquet as pq +import geopandas as gpd + +from encoder import AVAILABLE_COMPRESSIONS, Edges, PathType, geopandas_to_arrow +from pathlib import Path +from google.cloud import bigquery + +MODES = ["FILE", "FOLDER"] + +def read_gdf(input_query: str, primary_column: str): + client = bigquery.Client() + df = client.query(input_query).to_dataframe() + df[primary_column] = gpd.GeoSeries.from_wkt(df[primary_column]) + return gpd.GeoDataFrame(df, geometry=primary_column, crs="EPSG:4326") + +@click.command() +@click.option( + "-q", + "--input-query", + type=str, + help="SQL query of the data to export", + required=True, +) +@click.option( + "--primary-column", + type=str, + help="The primary column name with geometry data", + required=True, +) +@click.option( + "-o", + "--output", + type=PathType(file_okay=True, dir_okay=True, writable=True), + help="Path to output", + required=True, +) +@click.option( + "-m", + "--mode", + type=click.Choice(MODES, case_sensitive=False), + help="Mode to use FILE or FOLDER", + default="FOLDER", + show_default=True +) +@click.option( + "--compression", + type=click.Choice(AVAILABLE_COMPRESSIONS, case_sensitive=False), + default="SNAPPY", + help="Compression codec to use when writing to Parquet.", + show_default=True, +) +@click.option( + "--partition-size", + type=int, + default=5000, + help="Number of records per partition. Ignored if --single-file is provided.", + show_default=True, +) +def main(input_query: str, primary_column: str, output: Path, mode: str, compression: str , partition_size: int): + print("Reading data from BigQuery", file=sys.stderr) + + if mode.upper() == 'FOLDER': + gdf = ( + read_gdf(input_query, primary_column) + .assign(__partition__= lambda x: x.index // partition_size) + ) + else: + gdf = read_gdf(input_query, primary_column) + + print("Finished reading", file=sys.stderr) + print("Starting conversion to Arrow", file=sys.stderr) + arrow_table = geopandas_to_arrow(gdf, Edges.SPHERICAL) + print("Finished conversion to Arrow", file=sys.stderr) + + print("Starting write to Parquet", file=sys.stderr) + + if mode.upper() == 'FOLDER': + # We need to export to multiple files, because a single file might hit bigquery limits (UDF out of memory). https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-parquet + pq.write_to_dataset(arrow_table, root_path=output, partition_cols=['__partition__'], compression=compression) + else: + pq.write_table(arrow_table, output, compression=compression) + + print("Finished write to Parquet", file=sys.stderr) + +if __name__ == "__main__": + main() diff --git a/scripts/encoder.py b/scripts/encoder.py new file mode 100644 index 0000000..4db7dab --- /dev/null +++ b/scripts/encoder.py @@ -0,0 +1,144 @@ +import json +from enum import Enum +from pathlib import Path +from typing import Any, Dict, List + +import click +import geopandas as gpd +import numpy as np +import pandas as pd +import pyarrow as pa +import pyarrow.parquet as pq +import pygeos +from numpy.typing import NDArray + +GEOPARQUET_VERSION = "0.4.0" +AVAILABLE_COMPRESSIONS = ["NONE", "SNAPPY", "GZIP", "BROTLI", "LZ4", "ZSTD"] + +PygeosGeometryArray = NDArray[pygeos.Geometry] + +class Edges(Enum): + PLANAR = 'planar' + SPHERICAL = 'spherical' + +class PathType(click.Path): + """A Click path argument that returns a pathlib Path, not a string""" + + def convert(self, value, param, ctx): + return Path(super().convert(value, param, ctx)) + +class GeometryType(int, Enum): + """Pygeos (GEOS) geometry type mapping + From https://pygeos.readthedocs.io/en/latest/geometry.html?highlight=type#pygeos.geometry.get_type_id + """ + + Missing = -1 + Point = 0 + LineString = 1 + LinearRing = 2 + Polygon = 3 + MultiPoint = 4 + MultiLinestring = 5 + MultiPolygon = 6 + GeometryCollection = 7 + + +def _parse_to_pygeos(df: gpd.GeoDataFrame) -> Dict[str, PygeosGeometryArray]: + """Parse to pygeos geometry array + + This is split out from _create_metadata so that we don't have to create the pygeos + array twice: once for converting to wkb and another time for metadata handling. + """ + geometry_columns: Dict[str, PygeosGeometryArray] = {} + for col in df.columns[df.dtypes == "geometry"]: + geometry_columns[col] = df[col].array.data + + return geometry_columns + + +def _create_metadata( + df: gpd.GeoDataFrame, geometry_columns: Dict[str, PygeosGeometryArray], edges: Edges +) -> Dict[str, Any]: + """Create and encode geo metadata dict. + + Parameters + ---------- + df : GeoDataFrame + + Returns + ------- + dict + """ + + # Construct metadata for each geometry + column_metadata = {} + for col, geometry_array in geometry_columns.items(): + geometry_type = _get_geometry_type(geometry_array) + bbox = list(pygeos.total_bounds(geometry_array)) + + series = df[col] + column_metadata[col] = { + "encoding": "WKB", + "geometry_type": geometry_type, + "crs": series.crs.to_json_dict() if series.crs else None, + # We don't specify orientation for now + # "orientation" + "edges": edges.value, + "bbox": bbox, + # I don't know how to get the epoch from a pyproj CRS, and if it's relevant + # here + # "epoch": + } + + return { + "version": GEOPARQUET_VERSION, + "primary_column": df._geometry_column_name, + "columns": column_metadata, + # "creator": {"library": "geopandas", "version": geopandas.__version__}, + } + + +def _get_geometry_type(pygeos_geoms: PygeosGeometryArray) -> List[str]: + type_ids = pygeos.get_type_id(pygeos_geoms) + unique_type_ids = set(type_ids) + + geom_type_names: List[str] = [] + for type_id in unique_type_ids: + geom_type_names.append(GeometryType(type_id).name) + + return geom_type_names + + +def _encode_metadata(metadata: Dict) -> bytes: + """Encode metadata dict to UTF-8 JSON string + + Parameters + ---------- + metadata : dict + + Returns + ------- + UTF-8 encoded JSON string + """ + # Remove unnecessary whitespace in JSON metadata + # https://stackoverflow.com/a/33233406 + return json.dumps(metadata, separators=(',', ':')).encode("utf-8") + + +def geopandas_to_arrow(df: gpd.GeoDataFrame, edges:Edges = Edges.PLANAR) -> pa.Table: + geometry_columns = _parse_to_pygeos(df) + geo_metadata = _create_metadata(df, geometry_columns, edges) + + df = pd.DataFrame(df) + for col, geometry_array in geometry_columns.items(): + df[col] = pygeos.to_wkb(geometry_array) + + table = pa.Table.from_pandas(df, preserve_index=False) + + metadata = table.schema.metadata + metadata.update({b"geo": _encode_metadata(geo_metadata)}) + return table.replace_schema_metadata(metadata) + + + + diff --git a/scripts/parquet_to_bigquery.py b/scripts/parquet_to_bigquery.py new file mode 100644 index 0000000..39ffbfc --- /dev/null +++ b/scripts/parquet_to_bigquery.py @@ -0,0 +1,109 @@ +import click +import json +import glob +import pyarrow.parquet as pq + +from encoder import PathType +from pathlib import Path +from google.cloud import bigquery +from encoder import Edges + +def upload_parquet_file(client: bigquery.Client, file: Path, write_disposition: str, dst: str): + """Upload a parquet file to BigQuery""" + job_config = bigquery.LoadJobConfig( + source_format = bigquery.SourceFormat.PARQUET, + write_disposition = write_disposition, + ) + + with open(file, "rb") as source_file: + print(f"Uploading file {file}") + job = client.load_table_from_file(source_file, dst, job_config=job_config) + + job.result() # Waits for the job to complete. + +def validate_metadata(metadata): + """Validate metadata""" + if metadata is None or b"geo" not in metadata: + raise ValueError("Missing geo metadata") + + geo = json.loads(metadata[b"geo"]) + + if (geo["primary_column"] not in geo["columns"]): + raise ValueError("Primary column not found") + + for column_name, column_meta in geo["columns"].items(): + encoding = column_meta["encoding"] + edges = column_meta["edges"] + if encoding != 'WKB': + raise ValueError(f"Not supported encoding {encoding} for column {column_name}") + if edges != Edges.SPHERICAL.value: + raise ValueError(f"Only spherical edges are supported") + +@click.command() +@click.option( + "-i", + "--input", + type=PathType(exists=True, readable=True), + help="Path to a parquet file or a folder with multiple parquet files inside (it requires extension *.parquet).", + required=True, +) +@click.option( + "-o", + "--output", + type=str, + help="FQN of the destination table (project.dataset.table).", + required=True, +) +def main(input: Path, output: str): + primary_column = None + tmp_output = f"{output}_tmp" + metadata = None + client = bigquery.Client() + + if input.is_dir(): + # A folder is detected + first_file = True + + for file in glob.glob(f"{input}/**/*.parquet",recursive=True): + + if first_file: + # First file determines the schema and truncates the table + metadata = pq.read_schema(file).metadata + validate_metadata(metadata) + write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE + first_file = False + else: + # other files will append + write_disposition = bigquery.WriteDisposition.WRITE_APPEND + + upload_parquet_file(client, file, write_disposition, tmp_output) + else: + # Single file mode + metadata = pq.read_schema(input).metadata + validate_metadata(metadata) + write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE + upload_parquet_file(client, input, write_disposition, tmp_output) + + metadata_geo = json.loads(metadata[b"geo"]) + primary_column = metadata_geo["primary_column"] + geo_columns = list(metadata_geo["columns"].keys()) + wkb_columns_expression = map(lambda c: f"ST_GEOGFROMWKB({c}) as {c}", geo_columns) + + ## Convert to geography the file(s) imported + sql = f""" + DROP TABLE IF EXISTS {output}; + CREATE TABLE {output} CLUSTER BY {primary_column} + AS SELECT * EXCEPT({", ".join(geo_columns)}), + {", ".join(wkb_columns_expression)} + FROM {tmp_output}; + DROP TABLE IF EXISTS {tmp_output}; + """ + + query_job = client.query(sql) + query_job.result() # Waits for job to complete. + + table = client.get_table(output) + print(f"Loaded {table.num_rows} rows and {len(table.schema)} columns to {output}") + +if __name__ == "__main__": + main() diff --git a/scripts/poetry.lock b/scripts/poetry.lock index 318b044..43e536a 100644 --- a/scripts/poetry.lock +++ b/scripts/poetry.lock @@ -64,6 +64,14 @@ d = ["aiohttp (>=3.7.4)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] +[[package]] +name = "cachetools" +version = "5.2.0" +description = "Extensible memoizing collections and decorators" +category = "main" +optional = false +python-versions = "~=3.7" + [[package]] name = "certifi" version = "2021.10.8" @@ -83,6 +91,17 @@ python-versions = "*" [package.dependencies] pycparser = "*" +[[package]] +name = "charset-normalizer" +version = "2.0.12" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" +optional = false +python-versions = ">=3.5.0" + +[package.extras] +unicode_backport = ["unicodedata2"] + [[package]] name = "click" version = "8.1.2" @@ -130,6 +149,20 @@ category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +[[package]] +name = "db-dtypes" +version = "1.0.2" +description = "Pandas Data Types for SQL systems (BigQuery, Spanner)" +category = "main" +optional = false +python-versions = ">=3.6, <3.11" + +[package.dependencies] +numpy = ">=1.16.6,<2.0dev" +packaging = ">=17.0" +pandas = ">=0.24.2,<2.0dev" +pyarrow = ">=3.0.0,<9.0dev" + [[package]] name = "debugpy" version = "1.6.0" @@ -199,6 +232,185 @@ pandas = ">=0.25.0" pyproj = ">=2.2.0" shapely = ">=1.6" +[[package]] +name = "google-api-core" +version = "2.8.1" +description = "Google API client core library" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +google-auth = ">=1.25.0,<3.0dev" +googleapis-common-protos = ">=1.56.2,<2.0dev" +grpcio = {version = ">=1.33.2,<2.0dev", optional = true, markers = "extra == \"grpc\""} +grpcio-status = {version = ">=1.33.2,<2.0dev", optional = true, markers = "extra == \"grpc\""} +protobuf = ">=3.15.0,<4.0.0dev" +requests = ">=2.18.0,<3.0.0dev" + +[package.extras] +grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio-status (>=1.33.2,<2.0dev)"] +grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0dev)"] +grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0dev)"] + +[[package]] +name = "google-auth" +version = "2.7.0" +description = "Google Authentication Library" +category = "main" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*" + +[package.dependencies] +cachetools = ">=2.0.0,<6.0" +pyasn1-modules = ">=0.2.1" +rsa = {version = ">=3.1.4,<5", markers = "python_version >= \"3.6\""} +six = ">=1.9.0" + +[package.extras] +aiohttp = ["requests (>=2.20.0,<3.0.0dev)", "aiohttp (>=3.6.2,<4.0.0dev)"] +enterprise_cert = ["cryptography (==36.0.2)", "pyopenssl (==22.0.0)"] +pyopenssl = ["pyopenssl (>=20.0.0)"] +reauth = ["pyu2f (>=0.1.5)"] + +[[package]] +name = "google-cloud-bigquery" +version = "3.2.0" +description = "Google BigQuery API client library" +category = "main" +optional = false +python-versions = ">=3.6, <3.11" + +[package.dependencies] +google-api-core = {version = ">=1.31.5,<2.0.0 || >2.3.0,<3.0.0dev", extras = ["grpc"]} +google-cloud-bigquery-storage = ">=2.0.0,<3.0.0dev" +google-cloud-core = ">=1.4.1,<3.0.0dev" +google-resumable-media = ">=0.6.0,<3.0dev" +grpcio = ">=1.38.1,<2.0dev" +packaging = ">=14.3,<22.0.0dev" +proto-plus = ">=1.15.0,<2.0.0dev" +protobuf = ">=3.12.0,<4.0.0dev" +pyarrow = ">=3.0.0,<9.0dev" +python-dateutil = ">=2.7.2,<3.0dev" +requests = ">=2.18.0,<3.0.0dev" + +[package.extras] +all = ["pandas (>=1.0.0)", "db-dtypes (>=0.3.0,<2.0.0dev)", "geopandas (>=0.9.0,<1.0dev)", "Shapely (>=1.6.0,<2.0dev)", "ipython (>=7.0.1,!=8.1.0)", "tqdm (>=4.7.4,<5.0.0dev)", "opentelemetry-api (>=1.1.0)", "opentelemetry-sdk (>=1.1.0)", "opentelemetry-instrumentation (>=0.20b0)"] +geopandas = ["geopandas (>=0.9.0,<1.0dev)", "Shapely (>=1.6.0,<2.0dev)"] +ipython = ["ipython (>=7.0.1,!=8.1.0)"] +opentelemetry = ["opentelemetry-api (>=1.1.0)", "opentelemetry-sdk (>=1.1.0)", "opentelemetry-instrumentation (>=0.20b0)"] +pandas = ["pandas (>=1.0.0)", "db-dtypes (>=0.3.0,<2.0.0dev)"] +tqdm = ["tqdm (>=4.7.4,<5.0.0dev)"] + +[[package]] +name = "google-cloud-bigquery-storage" +version = "2.13.2" +description = "BigQuery Storage API API client library" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +google-api-core = {version = ">=1.31.5,<2.0.0 || >2.3.0,<3.0.0dev", extras = ["grpc"]} +proto-plus = ">=1.18.0,<2.0.0dev" +protobuf = ">=3.19.0,<4.0.0dev" + +[package.extras] +fastavro = ["fastavro (>=0.21.2)"] +pandas = ["pandas (>=0.21.1)"] +pyarrow = ["pyarrow (>=0.15.0)"] +tests = ["freezegun"] + +[[package]] +name = "google-cloud-core" +version = "2.3.1" +description = "Google Cloud API client core library" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +google-api-core = ">=1.31.5,<2.0.0 || >2.3.0,<3.0.0dev" +google-auth = ">=1.25.0,<3.0dev" + +[package.extras] +grpc = ["grpcio (>=1.8.2,<2.0dev)"] + +[[package]] +name = "google-crc32c" +version = "1.3.0" +description = "A python wrapper of the C library 'Google CRC32C'" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.extras] +testing = ["pytest"] + +[[package]] +name = "google-resumable-media" +version = "2.3.3" +description = "Utilities for Google Media Downloads and Resumable Uploads" +category = "main" +optional = false +python-versions = ">= 3.6" + +[package.dependencies] +google-crc32c = ">=1.0,<2.0dev" + +[package.extras] +aiohttp = ["aiohttp (>=3.6.2,<4.0.0dev)"] +requests = ["requests (>=2.18.0,<3.0.0dev)"] + +[[package]] +name = "googleapis-common-protos" +version = "1.56.2" +description = "Common protobufs used in Google APIs" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +protobuf = ">=3.15.0,<4.0.0dev" + +[package.extras] +grpc = ["grpcio (>=1.0.0,<2.0.0dev)"] + +[[package]] +name = "grpcio" +version = "1.46.3" +description = "HTTP/2-based RPC framework" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +six = ">=1.5.2" + +[package.extras] +protobuf = ["grpcio-tools (>=1.46.3)"] + +[[package]] +name = "grpcio-status" +version = "1.46.3" +description = "Status proto mapping for gRPC" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +googleapis-common-protos = ">=1.5.5" +grpcio = ">=1.46.3" +protobuf = ">=3.12.0" + +[[package]] +name = "idna" +version = "3.3" +description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" +optional = false +python-versions = ">=3.5" + [[package]] name = "ipykernel" version = "6.13.0" @@ -376,7 +588,7 @@ python-versions = ">=3.8" name = "packaging" version = "21.3" description = "Core utilities for Python packages" -category = "dev" +category = "main" optional = false python-versions = ">=3.6" @@ -466,6 +678,28 @@ python-versions = ">=3.6.2" [package.dependencies] wcwidth = "*" +[[package]] +name = "proto-plus" +version = "1.20.5" +description = "Beautiful, Pythonic protocol buffers." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +protobuf = ">=3.19.0,<4.0.0dev" + +[package.extras] +testing = ["google-api-core[grpc] (>=1.31.5)"] + +[[package]] +name = "protobuf" +version = "3.20.1" +description = "Protocol Buffers" +category = "main" +optional = false +python-versions = ">=3.7" + [[package]] name = "psutil" version = "5.9.0" @@ -515,6 +749,25 @@ python-versions = ">=3.7" [package.dependencies] numpy = ">=1.16.6" +[[package]] +name = "pyasn1" +version = "0.4.8" +description = "ASN.1 types and codecs" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "pyasn1-modules" +version = "0.2.8" +description = "A collection of ASN.1-based protocols modules." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +pyasn1 = ">=0.4.6,<0.5.0" + [[package]] name = "pycparser" version = "2.21" @@ -550,7 +803,7 @@ python-versions = ">=3.6" name = "pyparsing" version = "3.0.8" description = "pyparsing module - Classes and methods to define and execute parsing grammars" -category = "dev" +category = "main" optional = false python-versions = ">=3.6.8" @@ -607,6 +860,35 @@ python-versions = ">=3.6" cffi = {version = "*", markers = "implementation_name == \"pypy\""} py = {version = "*", markers = "implementation_name == \"pypy\""} +[[package]] +name = "requests" +version = "2.28.0" +description = "Python HTTP for Humans." +category = "main" +optional = false +python-versions = ">=3.7, <4" + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2.0.0,<2.1.0" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<1.27" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"] + +[[package]] +name = "rsa" +version = "4.8" +description = "Pure-Python RSA implementation" +category = "main" +optional = false +python-versions = ">=3.6,<4" + +[package.dependencies] +pyasn1 = ">=0.1.3" + [[package]] name = "shapely" version = "1.8.1.post1" @@ -679,6 +961,19 @@ category = "dev" optional = false python-versions = ">=3.7" +[[package]] +name = "urllib3" +version = "1.26.9" +description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" + +[package.extras] +brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"] +secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + [[package]] name = "wcwidth" version = "0.2.5" @@ -689,8 +984,8 @@ python-versions = "*" [metadata] lock-version = "1.1" -python-versions = "^3.8" -content-hash = "f689d4401867dfd7dfff14ee8b613834a66c8329f19793d6724571046409cb09" +python-versions = ">=3.8,<3.11" +content-hash = "7b794b4e07c81ea6a2ed116e82fcd6db5084e50989555c57ae66b1c0ae3270a4" [metadata.files] appnope = [ @@ -734,6 +1029,10 @@ black = [ {file = "black-22.3.0-py3-none-any.whl", hash = "sha256:bc58025940a896d7e5356952228b68f793cf5fcb342be703c3a2669a1488cb72"}, {file = "black-22.3.0.tar.gz", hash = "sha256:35020b8886c022ced9282b51b5a875b6d1ab0c387b31a065b84db7c33085ca79"}, ] +cachetools = [ + {file = "cachetools-5.2.0-py3-none-any.whl", hash = "sha256:f9f17d2aec496a9aa6b76f53e3b614c965223c061982d434d160f930c698a9db"}, + {file = "cachetools-5.2.0.tar.gz", hash = "sha256:6a94c6402995a99c3970cc7e4884bb60b4a8639938157eeed436098bf9831757"}, +] certifi = [ {file = "certifi-2021.10.8-py2.py3-none-any.whl", hash = "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"}, {file = "certifi-2021.10.8.tar.gz", hash = "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872"}, @@ -790,6 +1089,10 @@ cffi = [ {file = "cffi-1.15.0-cp39-cp39-win_amd64.whl", hash = "sha256:3773c4d81e6e818df2efbc7dd77325ca0dcb688116050fb2b3011218eda36139"}, {file = "cffi-1.15.0.tar.gz", hash = "sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954"}, ] +charset-normalizer = [ + {file = "charset-normalizer-2.0.12.tar.gz", hash = "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597"}, + {file = "charset_normalizer-2.0.12-py3-none-any.whl", hash = "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"}, +] click = [ {file = "click-8.1.2-py3-none-any.whl", hash = "sha256:24e1a4a9ec5bf6299411369b208c1df2188d9eb8d916302fe6bf03faed227f1e"}, {file = "click-8.1.2.tar.gz", hash = "sha256:479707fe14d9ec9a0757618b7a100a0ae4c4e236fac5b7f80ca68028141a1a72"}, @@ -806,6 +1109,10 @@ colorama = [ {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, ] +db-dtypes = [ + {file = "db-dtypes-1.0.2.tar.gz", hash = "sha256:7f1b5d9a75309e22d24b85914383f819636c3d487238c0ad4fa304879e04303e"}, + {file = "db_dtypes-1.0.2-py2.py3-none-any.whl", hash = "sha256:1016122f37ee077f5d984dd00b4b25c72b39b579997739d8deb437225c48b5d7"}, +] debugpy = [ {file = "debugpy-1.6.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:eb1946efac0c0c3d411cea0b5ac772fbde744109fd9520fb0c5a51979faf05ad"}, {file = "debugpy-1.6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e3513399177dd37af4c1332df52da5da1d0c387e5927dc4c0709e26ee7302e8f"}, @@ -855,6 +1162,143 @@ geopandas = [ {file = "geopandas-0.10.2-py2.py3-none-any.whl", hash = "sha256:1722853464441b603d9be3d35baf8bde43831424a891e82a8545eb8997b65d6c"}, {file = "geopandas-0.10.2.tar.gz", hash = "sha256:efbf47e70732e25c3727222019c92b39b2e0a66ebe4fe379fbe1aa43a2a871db"}, ] +google-api-core = [ + {file = "google-api-core-2.8.1.tar.gz", hash = "sha256:958024c6aa3460b08f35741231076a4dd9a4c819a6a39d44da9627febe8b28f0"}, + {file = "google_api_core-2.8.1-py3-none-any.whl", hash = "sha256:ce1daa49644b50398093d2a9ad886501aa845e2602af70c3001b9f402a9d7359"}, +] +google-auth = [ + {file = "google-auth-2.7.0.tar.gz", hash = "sha256:8a954960f852d5f19e6af14dd8e75c20159609e85d8db37e4013cc8c3824a7e1"}, + {file = "google_auth-2.7.0-py2.py3-none-any.whl", hash = "sha256:df549a1433108801b11bdcc0e312eaf0d5f0500db42f0523e4d65c78722e8475"}, +] +google-cloud-bigquery = [ + {file = "google-cloud-bigquery-3.2.0.tar.gz", hash = "sha256:97fb4306e324cb86909ce32a9944e81d21fbcb28a780c5cdf01b6e4d47d4c322"}, + {file = "google_cloud_bigquery-3.2.0-py2.py3-none-any.whl", hash = "sha256:5ab006a2c422df5feb6a3ccdec2feb01066a33a12c92752f92efa9abf47acf35"}, +] +google-cloud-bigquery-storage = [ + {file = "google-cloud-bigquery-storage-2.13.2.tar.gz", hash = "sha256:ac4ce734e805aca7e73a69617b14d31e690a113446136e92292a74e1d8b3e3b2"}, + {file = "google_cloud_bigquery_storage-2.13.2-py2.py3-none-any.whl", hash = "sha256:b3f72c66e6c92afdb8ba76b81cceba1969d217a205c35acb4eb1a5257b7fd21d"}, +] +google-cloud-core = [ + {file = "google-cloud-core-2.3.1.tar.gz", hash = "sha256:34334359cb04187bdc80ddcf613e462dfd7a3aabbc3fe4d118517ab4b9303d53"}, + {file = "google_cloud_core-2.3.1-py2.py3-none-any.whl", hash = "sha256:113ba4f492467d5bd442c8d724c1a25ad7384045c3178369038840ecdd19346c"}, +] +google-crc32c = [ + {file = "google-crc32c-1.3.0.tar.gz", hash = "sha256:276de6273eb074a35bc598f8efbc00c7869c5cf2e29c90748fccc8c898c244df"}, + {file = "google_crc32c-1.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cb6994fff247987c66a8a4e550ef374671c2b82e3c0d2115e689d21e511a652d"}, + {file = "google_crc32c-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c9da0a39b53d2fab3e5467329ed50e951eb91386e9d0d5b12daf593973c3b168"}, + {file = "google_crc32c-1.3.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:eb0b14523758e37802f27b7f8cd973f5f3d33be7613952c0df904b68c4842f0e"}, + {file = "google_crc32c-1.3.0-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:95c68a4b9b7828ba0428f8f7e3109c5d476ca44996ed9a5f8aac6269296e2d59"}, + {file = "google_crc32c-1.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c3cf890c3c0ecfe1510a452a165431b5831e24160c5fcf2071f0f85ca5a47cd"}, + {file = "google_crc32c-1.3.0-cp310-cp310-win32.whl", hash = "sha256:3bbce1be3687bbfebe29abdb7631b83e6b25da3f4e1856a1611eb21854b689ea"}, + {file = "google_crc32c-1.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:c124b8c8779bf2d35d9b721e52d4adb41c9bfbde45e6a3f25f0820caa9aba73f"}, + {file = "google_crc32c-1.3.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:42ae4781333e331a1743445931b08ebdad73e188fd554259e772556fc4937c48"}, + {file = "google_crc32c-1.3.0-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:ff71073ebf0e42258a42a0b34f2c09ec384977e7f6808999102eedd5b49920e3"}, + {file = "google_crc32c-1.3.0-cp36-cp36m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:fe31de3002e7b08eb20823b3735b97c86c5926dd0581c7710a680b418a8709d4"}, + {file = "google_crc32c-1.3.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd7760a88a8d3d705ff562aa93f8445ead54f58fd482e4f9e2bafb7e177375d4"}, + {file = "google_crc32c-1.3.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a0b9e622c3b2b8d0ce32f77eba617ab0d6768b82836391e4f8f9e2074582bf02"}, + {file = "google_crc32c-1.3.0-cp36-cp36m-win32.whl", hash = "sha256:779cbf1ce375b96111db98fca913c1f5ec11b1d870e529b1dc7354b2681a8c3a"}, + {file = "google_crc32c-1.3.0-cp36-cp36m-win_amd64.whl", hash = "sha256:04e7c220798a72fd0f08242bc8d7a05986b2a08a0573396187fd32c1dcdd58b3"}, + {file = "google_crc32c-1.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e7a539b9be7b9c00f11ef16b55486141bc2cdb0c54762f84e3c6fc091917436d"}, + {file = "google_crc32c-1.3.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:ca60076c388728d3b6ac3846842474f4250c91efbfe5afa872d3ffd69dd4b318"}, + {file = "google_crc32c-1.3.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:05340b60bf05b574159e9bd940152a47d38af3fb43803ffe71f11d704b7696a6"}, + {file = "google_crc32c-1.3.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:318f73f5484b5671f0c7f5f63741ab020a599504ed81d209b5c7129ee4667407"}, + {file = "google_crc32c-1.3.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:9f58099ad7affc0754ae42e6d87443299f15d739b0ce03c76f515153a5cda06c"}, + {file = "google_crc32c-1.3.0-cp37-cp37m-win32.whl", hash = "sha256:f52a4ad2568314ee713715b1e2d79ab55fab11e8b304fd1462ff5cccf4264b3e"}, + {file = "google_crc32c-1.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:bab4aebd525218bab4ee615786c4581952eadc16b1ff031813a2fd51f0cc7b08"}, + {file = "google_crc32c-1.3.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:dda4d8a3bb0b50f540f6ff4b6033f3a74e8bf0bd5320b70fab2c03e512a62812"}, + {file = "google_crc32c-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fec221a051150eeddfdfcff162e6db92c65ecf46cb0f7bb1bf812a1520ec026b"}, + {file = "google_crc32c-1.3.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:226f2f9b8e128a6ca6a9af9b9e8384f7b53a801907425c9a292553a3a7218ce0"}, + {file = "google_crc32c-1.3.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a7f9cbea4245ee36190f85fe1814e2d7b1e5f2186381b082f5d59f99b7f11328"}, + {file = "google_crc32c-1.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a4db36f9721fdf391646685ecffa404eb986cbe007a3289499020daf72e88a2"}, + {file = "google_crc32c-1.3.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:12674a4c3b56b706153a358eaa1018c4137a5a04635b92b4652440d3d7386206"}, + {file = "google_crc32c-1.3.0-cp38-cp38-win32.whl", hash = "sha256:650e2917660e696041ab3dcd7abac160b4121cd9a484c08406f24c5964099829"}, + {file = "google_crc32c-1.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:58be56ae0529c664cc04a9c76e68bb92b091e0194d6e3c50bea7e0f266f73713"}, + {file = "google_crc32c-1.3.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:96a8918a78d5d64e07c8ea4ed2bc44354e3f93f46a4866a40e8db934e4c0d74b"}, + {file = "google_crc32c-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:13af315c3a0eec8bb8b8d80b8b128cb3fcd17d7e4edafc39647846345a3f003a"}, + {file = "google_crc32c-1.3.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6311853aa2bba4064d0c28ca54e7b50c4d48e3de04f6770f6c60ebda1e975267"}, + {file = "google_crc32c-1.3.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ed447680ff21c14aaceb6a9f99a5f639f583ccfe4ce1a5e1d48eb41c3d6b3217"}, + {file = "google_crc32c-1.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1c1d6236feab51200272d79b3d3e0f12cf2cbb12b208c835b175a21efdb0a73"}, + {file = "google_crc32c-1.3.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:e0f1ff55dde0ebcfbef027edc21f71c205845585fffe30d4ec4979416613e9b3"}, + {file = "google_crc32c-1.3.0-cp39-cp39-win32.whl", hash = "sha256:fbd60c6aaa07c31d7754edbc2334aef50601b7f1ada67a96eb1eb57c7c72378f"}, + {file = "google_crc32c-1.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:127f9cc3ac41b6a859bd9dc4321097b1a4f6aa7fdf71b4f9227b9e3ebffb4422"}, + {file = "google_crc32c-1.3.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:fc28e0db232c62ca0c3600884933178f0825c99be4474cdd645e378a10588125"}, + {file = "google_crc32c-1.3.0-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:1926fd8de0acb9d15ee757175ce7242e235482a783cd4ec711cc999fc103c24e"}, + {file = "google_crc32c-1.3.0-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5da2c81575cc3ccf05d9830f9e8d3c70954819ca9a63828210498c0774fda1a3"}, + {file = "google_crc32c-1.3.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:891f712ce54e0d631370e1f4997b3f182f3368179198efc30d477c75d1f44942"}, + {file = "google_crc32c-1.3.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:7f6fe42536d9dcd3e2ffb9d3053f5d05221ae3bbcefbe472bdf2c71c793e3183"}, +] +google-resumable-media = [ + {file = "google-resumable-media-2.3.3.tar.gz", hash = "sha256:27c52620bd364d1c8116eaac4ea2afcbfb81ae9139fb3199652fcac1724bfb6c"}, + {file = "google_resumable_media-2.3.3-py2.py3-none-any.whl", hash = "sha256:5b52774ea7a829a8cdaa8bd2d4c3d4bc660c91b30857ab2668d0eb830f4ea8c5"}, +] +googleapis-common-protos = [ + {file = "googleapis-common-protos-1.56.2.tar.gz", hash = "sha256:b09b56f5463070c2153753ef123f07d2e49235e89148e9b2459ec8ed2f68d7d3"}, + {file = "googleapis_common_protos-1.56.2-py2.py3-none-any.whl", hash = "sha256:023eaea9d8c1cceccd9587c6af6c20f33eeeb05d4148670f2b0322dc1511700c"}, +] +grpcio = [ + {file = "grpcio-1.46.3-cp310-cp310-linux_armv7l.whl", hash = "sha256:4c05dbc164c2d3015109292ffeed68292807a6cb1225f9a36699bf2166634908"}, + {file = "grpcio-1.46.3-cp310-cp310-macosx_10_10_universal2.whl", hash = "sha256:c6a460b6aaf43428d48fececad864cc562458b944df80568e490d985d8576292"}, + {file = "grpcio-1.46.3-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:707b85fa0cf606a9ab02246bd3142c76e154f1c30f00f7346b2afa3d0b315d5a"}, + {file = "grpcio-1.46.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8c63e7c61c0b06f838e8f45ffd3a7c68a520c4c026b2e0e8b1ad29c456d0f859"}, + {file = "grpcio-1.46.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6fe85e5873d9784ab82cf261d9fc07ed67a4459ba69fbe1187ef8b8e3d9e30e"}, + {file = "grpcio-1.46.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:df980c4901a92ca649e18036ff67c7c8cad239b2759c2472694f7ab0f0b4ffb9"}, + {file = "grpcio-1.46.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7b59982e405159385d5796aa1e0817ec83affb3eb4c2a5b7ca39413d17d7e332"}, + {file = "grpcio-1.46.3-cp310-cp310-win32.whl", hash = "sha256:6d51fa98bd40d4593f819a3fec8a078a192958d24f84c3daf15b5ad7705d4c48"}, + {file = "grpcio-1.46.3-cp310-cp310-win_amd64.whl", hash = "sha256:e9bba429eb743471715e6dadf006a70a77cb6afb065aa4a6eaa9efd76b09e336"}, + {file = "grpcio-1.46.3-cp36-cp36m-linux_armv7l.whl", hash = "sha256:a898b0f13bda2dfe786952cc1ea705762fa6c3ae799b4bb0525d7821605ae968"}, + {file = "grpcio-1.46.3-cp36-cp36m-macosx_10_10_x86_64.whl", hash = "sha256:9014aee70e29911008d2f388011cabf2c7fe4fe29918ce5f71513a660494069a"}, + {file = "grpcio-1.46.3-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:9c97106134de70f8323b12738ac0adf0615688b69253002910d0c5d42d202a77"}, + {file = "grpcio-1.46.3-cp36-cp36m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:d41ea8efb87b1ae4e576b13d94f2b470297a1495ae6b2c9d1047952731bf168f"}, + {file = "grpcio-1.46.3-cp36-cp36m-manylinux_2_17_aarch64.whl", hash = "sha256:ab18e85082003d7883a4d069065436e61cb27c2c2150e7965ce93658f17bc8da"}, + {file = "grpcio-1.46.3-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:307ff1d6237d5c383196660a12db021c20280227f9f4423d88d6b2ab20c8b1d0"}, + {file = "grpcio-1.46.3-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c9106ef35239767b3aa9dc1a79856ad499655f853fca9f92f9dd3182d646627"}, + {file = "grpcio-1.46.3-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:e0ae8e8523308bf7ab0b7d6aa686011de59b19fb06abb253f302d0b5da2a5905"}, + {file = "grpcio-1.46.3-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:4fd0aa30a938893060defd5f222604461db55f9a81a028b154479b91deac7074"}, + {file = "grpcio-1.46.3-cp36-cp36m-win32.whl", hash = "sha256:f7637b55662e56a64c07846bc0d2da6232a6e893b22c39790f2e41d03ac1a826"}, + {file = "grpcio-1.46.3-cp36-cp36m-win_amd64.whl", hash = "sha256:97801afa96a819f911d030b490dbea95b246de02433bac69c5acf150081686e4"}, + {file = "grpcio-1.46.3-cp37-cp37m-linux_armv7l.whl", hash = "sha256:3585a6fa3d97fc8f030bbf0e88185b5eb345a340f6732e165d5c22df54de5bc6"}, + {file = "grpcio-1.46.3-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:dc6d15cbcceaebaacf2994280ed1c01d42b5772059b30afd8a76152e9d23daa4"}, + {file = "grpcio-1.46.3-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:e0486485d59d5865149010966ef3df99c5df97ab8b01f10e26f8759d6e10fafc"}, + {file = "grpcio-1.46.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5210ec7a1638daa61da16487fbfafb3dbb7b8cd44382d9262316bbb58a5b1cf7"}, + {file = "grpcio-1.46.3-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:e278fa30d2b5652f7e43970c86ad34c639146443553678b746909aae204924dc"}, + {file = "grpcio-1.46.3-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d4148f1f76516b01cccf2273b45bc706847f1560ccb55aa6e29df851e9ca8cc"}, + {file = "grpcio-1.46.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01f3f7a6cdb111cf276ffff9c892fa32624e03999bac809d3f3d8321d98b6855"}, + {file = "grpcio-1.46.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:91aaccbe1c035ad2bcd1b8a25cebd11839070eb70fb6573e9d0197ddbca5d96b"}, + {file = "grpcio-1.46.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:26136c19d96e2138f04412101f3730d66f5f1515dc912ac0d415587c8208d826"}, + {file = "grpcio-1.46.3-cp37-cp37m-win32.whl", hash = "sha256:a8f40dafcdc3e0e378387953528eaf4e35758161f3b10d96199f12b11afbe2c2"}, + {file = "grpcio-1.46.3-cp37-cp37m-win_amd64.whl", hash = "sha256:a6bb52df85a4bd6d3bad16b4e7cc43efe95469b74a856c87a2c5bef496c9147f"}, + {file = "grpcio-1.46.3-cp38-cp38-linux_armv7l.whl", hash = "sha256:2334ceeab4084e80433693451452cba26afc1607a7974133af3b3635fc8aa935"}, + {file = "grpcio-1.46.3-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:2c96a6103caec84985bb2cffac2b261f8cac2641e7a70d4b43b7d08754a6cfe7"}, + {file = "grpcio-1.46.3-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:7a39d39da8855b03be2d7348387986bab6a322031fcc8b04fa5e72355e7b13a1"}, + {file = "grpcio-1.46.3-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4caf87a06de88e3611a4610c57ef55b78801843d1f5a9e5fd6b75e887dad3340"}, + {file = "grpcio-1.46.3-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:ffbbb228e6fc6f85b34aac428eb76b4fc6591d771e487ce46eb16b4b7e18b91d"}, + {file = "grpcio-1.46.3-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c89ae010c57333dd3c692e0892199a59df1ddfd467cdfea31f98331d0e8cf87"}, + {file = "grpcio-1.46.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:34b206cdf78dd1c14d93e10e7308750c36b4e6754d579895cba74341875e2fb5"}, + {file = "grpcio-1.46.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a19b3ecdb8ddf60e4b034def27636065e49ac1ee3c85854a16353cf52c2afd83"}, + {file = "grpcio-1.46.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:aac6e66acae82be5c99a0a40ab8f5733d7df76a04f242cf42ecc34cfb1e947bd"}, + {file = "grpcio-1.46.3-cp38-cp38-win32.whl", hash = "sha256:aff6d961d6bc5e34e12e148383671f8da5d17e47ed606ec15f483def3053b206"}, + {file = "grpcio-1.46.3-cp38-cp38-win_amd64.whl", hash = "sha256:71d46c2f3c0512bac3d658af3193e3d645c96123af56bd07a8416474c69df2cf"}, + {file = "grpcio-1.46.3-cp39-cp39-linux_armv7l.whl", hash = "sha256:5969f63f3cf92538f83f26949d393d9fc59de670f47cf7c2a0e1e0d30b770294"}, + {file = "grpcio-1.46.3-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:5f8134d4a7e76c8c6644bd3ce728b9894933575155d02c09922986d5d8d6e48c"}, + {file = "grpcio-1.46.3-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:53fff69fd4d315adddda226e7b71804d1f12adf3a4162126dc520725624a483a"}, + {file = "grpcio-1.46.3-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:3af2cc4e41f87d3b57f624b1b14321c1d0f030b191da60f9eeeda5448d83240c"}, + {file = "grpcio-1.46.3-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:5fb7779ae01c20c4fad5831e98003b3f036acfe6b77697d6a9baa0f9a7f14daf"}, + {file = "grpcio-1.46.3-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:56636ebf8db63ba50d272dfd73c92538950525120311676246f8f6a81b0aa144"}, + {file = "grpcio-1.46.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0a5012ba00cf8b7ce9e6ac2312ace0b0e16fe9502c18340c8c3ecb734a759831"}, + {file = "grpcio-1.46.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:be1679d814a292a701f45df324e25b060435dd13159e9b08a16e2a2396c4391c"}, + {file = "grpcio-1.46.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4faaba7db078a0001a8c1a4370d56dc454c03b4613b6acec01f14b90c8dd03cf"}, + {file = "grpcio-1.46.3-cp39-cp39-win32.whl", hash = "sha256:f5c6393fa645183ae858ebfbf72ab94e7ebafb5cd849dcf4ae8c53a83cce4e24"}, + {file = "grpcio-1.46.3-cp39-cp39-win_amd64.whl", hash = "sha256:158b90d4f1354f40e435f4c866057acc29a4364b214c31049c8b8c903646fbab"}, + {file = "grpcio-1.46.3.tar.gz", hash = "sha256:4b8fd8b1cd553635274b83cd984f0755e6779886eca53c1c71d48215962eb689"}, +] +grpcio-status = [ + {file = "grpcio-status-1.46.3.tar.gz", hash = "sha256:78442ac7d2813c56f9cc04f713efd7088596b10f88a4ddd09279211cc48402d5"}, + {file = "grpcio_status-1.46.3-py3-none-any.whl", hash = "sha256:5357dcd69e51ba3f7b86d57698bd44d2ef295528eb7219b237eb596183334d39"}, +] +idna = [ + {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"}, + {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"}, +] ipykernel = [ {file = "ipykernel-6.13.0-py3-none-any.whl", hash = "sha256:2b0987af43c0d4b62cecb13c592755f599f96f29aafe36c01731aaa96df30d39"}, {file = "ipykernel-6.13.0.tar.gz", hash = "sha256:0e28273e290858393e86e152b104e5506a79c13d25b951ac6eca220051b4be60"}, @@ -968,6 +1412,36 @@ prompt-toolkit = [ {file = "prompt_toolkit-3.0.29-py3-none-any.whl", hash = "sha256:62291dad495e665fca0bda814e342c69952086afb0f4094d0893d357e5c78752"}, {file = "prompt_toolkit-3.0.29.tar.gz", hash = "sha256:bd640f60e8cecd74f0dc249713d433ace2ddc62b65ee07f96d358e0b152b6ea7"}, ] +proto-plus = [ + {file = "proto-plus-1.20.5.tar.gz", hash = "sha256:81794eb1be333c67986333948df70ebb8cdf538e039f8cfa92fd2a9d7176d405"}, + {file = "proto_plus-1.20.5-py3-none-any.whl", hash = "sha256:fa29fec8a91cf178bc1d8bf9263769421d2dba7787eae42b67235676e211c158"}, +] +protobuf = [ + {file = "protobuf-3.20.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3cc797c9d15d7689ed507b165cd05913acb992d78b379f6014e013f9ecb20996"}, + {file = "protobuf-3.20.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:ff8d8fa42675249bb456f5db06c00de6c2f4c27a065955917b28c4f15978b9c3"}, + {file = "protobuf-3.20.1-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cd68be2559e2a3b84f517fb029ee611546f7812b1fdd0aa2ecc9bc6ec0e4fdde"}, + {file = "protobuf-3.20.1-cp310-cp310-win32.whl", hash = "sha256:9016d01c91e8e625141d24ec1b20fed584703e527d28512aa8c8707f105a683c"}, + {file = "protobuf-3.20.1-cp310-cp310-win_amd64.whl", hash = "sha256:32ca378605b41fd180dfe4e14d3226386d8d1b002ab31c969c366549e66a2bb7"}, + {file = "protobuf-3.20.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:9be73ad47579abc26c12024239d3540e6b765182a91dbc88e23658ab71767153"}, + {file = "protobuf-3.20.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:097c5d8a9808302fb0da7e20edf0b8d4703274d140fd25c5edabddcde43e081f"}, + {file = "protobuf-3.20.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e250a42f15bf9d5b09fe1b293bdba2801cd520a9f5ea2d7fb7536d4441811d20"}, + {file = "protobuf-3.20.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:cdee09140e1cd184ba9324ec1df410e7147242b94b5f8b0c64fc89e38a8ba531"}, + {file = "protobuf-3.20.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:af0ebadc74e281a517141daad9d0f2c5d93ab78e9d455113719a45a49da9db4e"}, + {file = "protobuf-3.20.1-cp37-cp37m-win32.whl", hash = "sha256:755f3aee41354ae395e104d62119cb223339a8f3276a0cd009ffabfcdd46bb0c"}, + {file = "protobuf-3.20.1-cp37-cp37m-win_amd64.whl", hash = "sha256:62f1b5c4cd6c5402b4e2d63804ba49a327e0c386c99b1675c8a0fefda23b2067"}, + {file = "protobuf-3.20.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:06059eb6953ff01e56a25cd02cca1a9649a75a7e65397b5b9b4e929ed71d10cf"}, + {file = "protobuf-3.20.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:cb29edb9eab15742d791e1025dd7b6a8f6fcb53802ad2f6e3adcb102051063ab"}, + {file = "protobuf-3.20.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:69ccfdf3657ba59569c64295b7d51325f91af586f8d5793b734260dfe2e94e2c"}, + {file = "protobuf-3.20.1-cp38-cp38-win32.whl", hash = "sha256:dd5789b2948ca702c17027c84c2accb552fc30f4622a98ab5c51fcfe8c50d3e7"}, + {file = "protobuf-3.20.1-cp38-cp38-win_amd64.whl", hash = "sha256:77053d28427a29987ca9caf7b72ccafee011257561259faba8dd308fda9a8739"}, + {file = "protobuf-3.20.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6f50601512a3d23625d8a85b1638d914a0970f17920ff39cec63aaef80a93fb7"}, + {file = "protobuf-3.20.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:284f86a6207c897542d7e956eb243a36bb8f9564c1742b253462386e96c6b78f"}, + {file = "protobuf-3.20.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:7403941f6d0992d40161aa8bb23e12575637008a5a02283a930addc0508982f9"}, + {file = "protobuf-3.20.1-cp39-cp39-win32.whl", hash = "sha256:db977c4ca738dd9ce508557d4fce0f5aebd105e158c725beec86feb1f6bc20d8"}, + {file = "protobuf-3.20.1-cp39-cp39-win_amd64.whl", hash = "sha256:7e371f10abe57cee5021797126c93479f59fccc9693dafd6bd5633ab67808a91"}, + {file = "protobuf-3.20.1-py2.py3-none-any.whl", hash = "sha256:adfc6cf69c7f8c50fd24c793964eef18f0ac321315439d94945820612849c388"}, + {file = "protobuf-3.20.1.tar.gz", hash = "sha256:adc31566d027f45efe3f44eeb5b1f329da43891634d61c75a5944e9be6dd42c9"}, +] psutil = [ {file = "psutil-5.9.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:55ce319452e3d139e25d6c3f85a1acf12d1607ddedea5e35fb47a552c051161b"}, {file = "psutil-5.9.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:7336292a13a80eb93c21f36bde4328aa748a04b68c13d01dfddd67fc13fd0618"}, @@ -1046,6 +1520,36 @@ pyarrow = [ {file = "pyarrow-7.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:087769dac6e567d58d59b94c4f866b3356c00d3db5b261387ece47e7324c2150"}, {file = "pyarrow-7.0.0.tar.gz", hash = "sha256:da656cad3c23a2ebb6a307ab01d35fce22f7850059cffafcb90d12590f8f4f38"}, ] +pyasn1 = [ + {file = "pyasn1-0.4.8-py2.4.egg", hash = "sha256:fec3e9d8e36808a28efb59b489e4528c10ad0f480e57dcc32b4de5c9d8c9fdf3"}, + {file = "pyasn1-0.4.8-py2.5.egg", hash = "sha256:0458773cfe65b153891ac249bcf1b5f8f320b7c2ce462151f8fa74de8934becf"}, + {file = "pyasn1-0.4.8-py2.6.egg", hash = "sha256:5c9414dcfede6e441f7e8f81b43b34e834731003427e5b09e4e00e3172a10f00"}, + {file = "pyasn1-0.4.8-py2.7.egg", hash = "sha256:6e7545f1a61025a4e58bb336952c5061697da694db1cae97b116e9c46abcf7c8"}, + {file = "pyasn1-0.4.8-py2.py3-none-any.whl", hash = "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d"}, + {file = "pyasn1-0.4.8-py3.1.egg", hash = "sha256:78fa6da68ed2727915c4767bb386ab32cdba863caa7dbe473eaae45f9959da86"}, + {file = "pyasn1-0.4.8-py3.2.egg", hash = "sha256:08c3c53b75eaa48d71cf8c710312316392ed40899cb34710d092e96745a358b7"}, + {file = "pyasn1-0.4.8-py3.3.egg", hash = "sha256:03840c999ba71680a131cfaee6fab142e1ed9bbd9c693e285cc6aca0d555e576"}, + {file = "pyasn1-0.4.8-py3.4.egg", hash = "sha256:7ab8a544af125fb704feadb008c99a88805126fb525280b2270bb25cc1d78a12"}, + {file = "pyasn1-0.4.8-py3.5.egg", hash = "sha256:e89bf84b5437b532b0803ba5c9a5e054d21fec423a89952a74f87fa2c9b7bce2"}, + {file = "pyasn1-0.4.8-py3.6.egg", hash = "sha256:014c0e9976956a08139dc0712ae195324a75e142284d5f87f1a87ee1b068a359"}, + {file = "pyasn1-0.4.8-py3.7.egg", hash = "sha256:99fcc3c8d804d1bc6d9a099921e39d827026409a58f2a720dcdb89374ea0c776"}, + {file = "pyasn1-0.4.8.tar.gz", hash = "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba"}, +] +pyasn1-modules = [ + {file = "pyasn1-modules-0.2.8.tar.gz", hash = "sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e"}, + {file = "pyasn1_modules-0.2.8-py2.4.egg", hash = "sha256:0fe1b68d1e486a1ed5473f1302bd991c1611d319bba158e98b106ff86e1d7199"}, + {file = "pyasn1_modules-0.2.8-py2.5.egg", hash = "sha256:fe0644d9ab041506b62782e92b06b8c68cca799e1a9636ec398675459e031405"}, + {file = "pyasn1_modules-0.2.8-py2.6.egg", hash = "sha256:a99324196732f53093a84c4369c996713eb8c89d360a496b599fb1a9c47fc3eb"}, + {file = "pyasn1_modules-0.2.8-py2.7.egg", hash = "sha256:0845a5582f6a02bb3e1bde9ecfc4bfcae6ec3210dd270522fee602365430c3f8"}, + {file = "pyasn1_modules-0.2.8-py2.py3-none-any.whl", hash = "sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74"}, + {file = "pyasn1_modules-0.2.8-py3.1.egg", hash = "sha256:f39edd8c4ecaa4556e989147ebf219227e2cd2e8a43c7e7fcb1f1c18c5fd6a3d"}, + {file = "pyasn1_modules-0.2.8-py3.2.egg", hash = "sha256:b80486a6c77252ea3a3e9b1e360bc9cf28eaac41263d173c032581ad2f20fe45"}, + {file = "pyasn1_modules-0.2.8-py3.3.egg", hash = "sha256:65cebbaffc913f4fe9e4808735c95ea22d7a7775646ab690518c056784bc21b4"}, + {file = "pyasn1_modules-0.2.8-py3.4.egg", hash = "sha256:15b7c67fabc7fc240d87fb9aabf999cf82311a6d6fb2c70d00d3d0604878c811"}, + {file = "pyasn1_modules-0.2.8-py3.5.egg", hash = "sha256:426edb7a5e8879f1ec54a1864f16b882c2837bfd06eee62f2c982315ee2473ed"}, + {file = "pyasn1_modules-0.2.8-py3.6.egg", hash = "sha256:cbac4bc38d117f2a49aeedec4407d23e8866ea4ac27ff2cf7fb3e5b570df19e0"}, + {file = "pyasn1_modules-0.2.8-py3.7.egg", hash = "sha256:c29a5e5cc7a3f05926aff34e097e84f8589cd790ce0ed41b67aed6857b26aafd"}, +] pycparser = [ {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, @@ -1190,6 +1694,14 @@ pyzmq = [ {file = "pyzmq-22.3.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:d6157793719de168b199194f6b6173f0ccd3bf3499e6870fac17086072e39115"}, {file = "pyzmq-22.3.0.tar.gz", hash = "sha256:8eddc033e716f8c91c6a2112f0a8ebc5e00532b4a6ae1eb0ccc48e027f9c671c"}, ] +requests = [ + {file = "requests-2.28.0-py3-none-any.whl", hash = "sha256:bc7861137fbce630f17b03d3ad02ad0bf978c844f3536d0edda6499dafce2b6f"}, + {file = "requests-2.28.0.tar.gz", hash = "sha256:d568723a7ebd25875d8d1eaf5dfa068cd2fc8194b2e483d7b1f7c81918dbec6b"}, +] +rsa = [ + {file = "rsa-4.8-py3-none-any.whl", hash = "sha256:95c5d300c4e879ee69708c428ba566c59478fd653cc3a22243eeb8ed846950bb"}, + {file = "rsa-4.8.tar.gz", hash = "sha256:5c6bd9dc7a543b7fe4304a631f8a8a3b674e2bbfc49c2ae96200cdbe55df6b17"}, +] shapely = [ {file = "Shapely-1.8.1.post1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0ca96a3314b7a38a3bb385531469de1fcf2b2c2979ec2aa4f37b4c70632cf1ad"}, {file = "Shapely-1.8.1.post1-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:493902923fdd135316161a4ece5294ba3ce81accaa54540d2af3b93f7231143a"}, @@ -1281,6 +1793,10 @@ typing-extensions = [ {file = "typing_extensions-4.2.0-py3-none-any.whl", hash = "sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708"}, {file = "typing_extensions-4.2.0.tar.gz", hash = "sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376"}, ] +urllib3 = [ + {file = "urllib3-1.26.9-py2.py3-none-any.whl", hash = "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14"}, + {file = "urllib3-1.26.9.tar.gz", hash = "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"}, +] wcwidth = [ {file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"}, {file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"}, diff --git a/scripts/pyproject.toml b/scripts/pyproject.toml index a863f9a..faff0e0 100644 --- a/scripts/pyproject.toml +++ b/scripts/pyproject.toml @@ -6,12 +6,14 @@ authors = [] license = "MIT" [tool.poetry.dependencies] -python = "^3.8" +python = ">=3.8,<3.11" pyarrow = "^7.0.0" geopandas = "^0.10.2" pygeos = "^0.12.0" pandas = "^1.4.2" click = "^8.1.2" +google-cloud-bigquery = "^3.2.0" +db-dtypes = "^1.0.2" [tool.poetry.dev-dependencies] ipykernel = "^6.13.0" diff --git a/scripts/write_nz_building_outline.py b/scripts/write_nz_building_outline.py index 0ca1053..7fbcb1e 100644 --- a/scripts/write_nz_building_outline.py +++ b/scripts/write_nz_building_outline.py @@ -1,128 +1,13 @@ -import json import sys -from enum import Enum from pathlib import Path -from typing import Any, Dict, List import click import geopandas as gpd import numpy as np import pandas as pd -import pyarrow as pa import pyarrow.parquet as pq -import pygeos -from numpy.typing import NDArray - -GEOPARQUET_VERSION = "0.4.0" -AVAILABLE_COMPRESSIONS = ["NONE", "SNAPPY", "GZIP", "BROTLI", "LZ4", "ZSTD"] - -PygeosGeometryArray = NDArray[pygeos.Geometry] - - -class PathType(click.Path): - """A Click path argument that returns a pathlib Path, not a string""" - - def convert(self, value, param, ctx): - return Path(super().convert(value, param, ctx)) - - -class GeometryType(int, Enum): - """Pygeos (GEOS) geometry type mapping - From https://pygeos.readthedocs.io/en/latest/geometry.html?highlight=type#pygeos.geometry.get_type_id - """ - - Missing = -1 - Point = 0 - LineString = 1 - LinearRing = 2 - Polygon = 3 - MultiPoint = 4 - MultiLinestring = 5 - MultiPolygon = 6 - GeometryCollection = 7 - - -def parse_to_pygeos(df: gpd.GeoDataFrame) -> Dict[str, PygeosGeometryArray]: - """Parse to pygeos geometry array - - This is split out from _create_metadata so that we don't have to create the pygeos - array twice: once for converting to wkb and another time for metadata handling. - """ - geometry_columns: Dict[str, PygeosGeometryArray] = {} - for col in df.columns[df.dtypes == "geometry"]: - geometry_columns[col] = df[col].array.data - - return geometry_columns - - -def _create_metadata( - df: gpd.GeoDataFrame, geometry_columns: Dict[str, PygeosGeometryArray] -) -> Dict[str, Any]: - """Create and encode geo metadata dict. - - Parameters - ---------- - df : GeoDataFrame - - Returns - ------- - dict - """ - - # Construct metadata for each geometry - column_metadata = {} - for col, geometry_array in geometry_columns.items(): - geometry_type = get_geometry_type(geometry_array) - bbox = list(pygeos.total_bounds(geometry_array)) - - series = df[col] - column_metadata[col] = { - "encoding": "WKB", - "geometry_type": geometry_type, - "crs": series.crs.to_json_dict() if series.crs else None, - # We don't specify orientation for now - # "orientation" - "edges": "planar", - "bbox": bbox, - # I don't know how to get the epoch from a pyproj CRS, and if it's relevant - # here - # "epoch": - } - - return { - "version": GEOPARQUET_VERSION, - "primary_column": df._geometry_column_name, - "columns": column_metadata, - # "creator": {"library": "geopandas", "version": geopandas.__version__}, - } - - -def get_geometry_type(pygeos_geoms: PygeosGeometryArray) -> List[str]: - type_ids = pygeos.get_type_id(pygeos_geoms) - unique_type_ids = set(type_ids) - - geom_type_names: List[str] = [] - for type_id in unique_type_ids: - geom_type_names.append(GeometryType(type_id).name) - - return geom_type_names - - -def encode_metadata(metadata: Dict) -> bytes: - """Encode metadata dict to UTF-8 JSON string - - Parameters - ---------- - metadata : dict - - Returns - ------- - UTF-8 encoded JSON string - """ - # Remove unnecessary whitespace in JSON metadata - # https://stackoverflow.com/a/33233406 - return json.dumps(metadata, separators=(',', ':')).encode("utf-8") +from encoder import AVAILABLE_COMPRESSIONS, PathType, geopandas_to_arrow def cast_dtypes(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """ @@ -145,21 +30,6 @@ def cast_dtypes(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame: return df -def geopandas_to_arrow(df: gpd.GeoDataFrame) -> pa.Table: - geometry_columns = parse_to_pygeos(df) - geo_metadata = _create_metadata(df, geometry_columns) - - df = pd.DataFrame(df) - for col, geometry_array in geometry_columns.items(): - df[col] = pygeos.to_wkb(geometry_array) - - table = pa.Table.from_pandas(df, preserve_index=False) - - metadata = table.schema.metadata - metadata.update({b"geo": encode_metadata(geo_metadata)}) - return table.replace_schema_metadata(metadata) - - @click.command() @click.option( "-i", From c44aa9f7f261cb4a6ed258183b814cb32bebc7ca Mon Sep 17 00:00:00 2001 From: Alberto Asuero Date: Tue, 14 Jun 2022 22:17:32 +0200 Subject: [PATCH 2/4] Remove partition size --- scripts/README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/README.md b/scripts/README.md index 41f027d..afcad87 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -53,7 +53,6 @@ Convert a SQL query to parquet: poetry run python bigquery_to_parquet.py \ --input-query "SELECT * FROM carto-do-public-data.carto.geography_usa_blockgroup_2019" \ --primary-column geom \ - --partition-size 100 \ --output geography_usa_blockgroup_2019 ``` @@ -72,7 +71,6 @@ Convert a SQL query to single parquet file: poetry run python bigquery_to_parquet.py \ --input-query "SELECT * FROM carto-do-public-data.carto.geography_usa_blockgroup_2019" \ --primary-column geom \ - --partition-size 100 \ --mode file \ --output geography_usa_blockgroup_2019.parquet ``` From 8564d0b26ea810d702ef45da6b3b622f9d33cccb Mon Sep 17 00:00:00 2001 From: Alberto Asuero Date: Thu, 16 Jun 2022 00:10:28 +0200 Subject: [PATCH 3/4] Fix linter --- scripts/README.md | 6 +++--- scripts/bigquery_to_parquet.py | 2 +- scripts/encoder.py | 4 ---- scripts/parquet_to_bigquery.py | 14 +++++++------- 4 files changed, 11 insertions(+), 15 deletions(-) diff --git a/scripts/README.md b/scripts/README.md index afcad87..0e2f44d 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -45,7 +45,7 @@ virtualenv: poetry run pip install -U --force-reinstall pygeos --no-binary pygeos ``` -### BigQuery +### BigQuery Convert a SQL query to parquet: @@ -53,7 +53,7 @@ Convert a SQL query to parquet: poetry run python bigquery_to_parquet.py \ --input-query "SELECT * FROM carto-do-public-data.carto.geography_usa_blockgroup_2019" \ --primary-column geom \ - --output geography_usa_blockgroup_2019 + --output geography_usa_blockgroup_2019 ``` Upload a parquet file or folder to BigQuery: @@ -63,7 +63,7 @@ poetry run python parquet_to_bigquery.py \ --output "cartodb-gcp-backend-data-team.alasarr.geography_usa_blockgroup_2019" ``` -Instead of using folders, you can also work with a single file, but it might hit [bigquery limits](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-parquet) when you upload it to BigQuery. For large parquet files you might get `UDF out of memory` errors. +Instead of using folders, you can also work with a single file, but it might hit [bigquery limits](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-parquet) when you upload it to BigQuery. For large parquet files you might get `UDF out of memory` errors. Convert a SQL query to single parquet file: diff --git a/scripts/bigquery_to_parquet.py b/scripts/bigquery_to_parquet.py index 4925fab..21877ff 100644 --- a/scripts/bigquery_to_parquet.py +++ b/scripts/bigquery_to_parquet.py @@ -68,7 +68,7 @@ def main(input_query: str, primary_column: str, output: Path, mode: str, compres read_gdf(input_query, primary_column) .assign(__partition__= lambda x: x.index // partition_size) ) - else: + else: gdf = read_gdf(input_query, primary_column) print("Finished reading", file=sys.stderr) diff --git a/scripts/encoder.py b/scripts/encoder.py index 4db7dab..c708886 100644 --- a/scripts/encoder.py +++ b/scripts/encoder.py @@ -138,7 +138,3 @@ def geopandas_to_arrow(df: gpd.GeoDataFrame, edges:Edges = Edges.PLANAR) -> pa.T metadata = table.schema.metadata metadata.update({b"geo": _encode_metadata(geo_metadata)}) return table.replace_schema_metadata(metadata) - - - - diff --git a/scripts/parquet_to_bigquery.py b/scripts/parquet_to_bigquery.py index 39ffbfc..49f7003 100644 --- a/scripts/parquet_to_bigquery.py +++ b/scripts/parquet_to_bigquery.py @@ -19,18 +19,18 @@ def upload_parquet_file(client: bigquery.Client, file: Path, write_disposition: print(f"Uploading file {file}") job = client.load_table_from_file(source_file, dst, job_config=job_config) - job.result() # Waits for the job to complete. + job.result() # Waits for the job to complete. def validate_metadata(metadata): """Validate metadata""" if metadata is None or b"geo" not in metadata: - raise ValueError("Missing geo metadata") + raise ValueError("Missing geo metadata") geo = json.loads(metadata[b"geo"]) if (geo["primary_column"] not in geo["columns"]): raise ValueError("Primary column not found") - + for column_name, column_meta in geo["columns"].items(): encoding = column_meta["encoding"] edges = column_meta["edges"] @@ -70,7 +70,7 @@ def main(input: Path, output: str): # First file determines the schema and truncates the table metadata = pq.read_schema(file).metadata validate_metadata(metadata) - write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE + write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE first_file = False else: # other files will append @@ -81,7 +81,7 @@ def main(input: Path, output: str): # Single file mode metadata = pq.read_schema(input).metadata validate_metadata(metadata) - write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE + write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE upload_parquet_file(client, input, write_disposition, tmp_output) metadata_geo = json.loads(metadata[b"geo"]) @@ -93,7 +93,7 @@ def main(input: Path, output: str): sql = f""" DROP TABLE IF EXISTS {output}; CREATE TABLE {output} CLUSTER BY {primary_column} - AS SELECT * EXCEPT({", ".join(geo_columns)}), + AS SELECT * EXCEPT({", ".join(geo_columns)}), {", ".join(wkb_columns_expression)} FROM {tmp_output}; DROP TABLE IF EXISTS {tmp_output}; @@ -102,7 +102,7 @@ def main(input: Path, output: str): query_job = client.query(sql) query_job.result() # Waits for job to complete. - table = client.get_table(output) + table = client.get_table(output) print(f"Loaded {table.num_rows} rows and {len(table.schema)} columns to {output}") if __name__ == "__main__": From 9b6b815b73c577a658971afb4a6704fb0f5f6be1 Mon Sep 17 00:00:00 2001 From: Alberto Asuero Date: Thu, 16 Jun 2022 00:16:15 +0200 Subject: [PATCH 4/4] Remove blank lines --- scripts/bigquery_to_parquet.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/bigquery_to_parquet.py b/scripts/bigquery_to_parquet.py index 21877ff..71e90f0 100644 --- a/scripts/bigquery_to_parquet.py +++ b/scripts/bigquery_to_parquet.py @@ -1,5 +1,3 @@ - - import click import sys import pyarrow.parquet as pq