diff --git a/docker-compose.yaml b/docker-compose.yaml index 28d2a22..00227c2 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -2,9 +2,6 @@ version: '3' services: database: - # at time of writing this, ARM64 is not supported so we make sure to use - # a supported platform: https://github.com/postgis/docker-postgis/issues/216 - # Could possibly switch to https://github.com/vincentsarago/containers platform: linux/amd64 image: postgis/postgis:15-3.4 environment: @@ -13,6 +10,23 @@ services: - POSTGRES_DB=postgis ports: - 5439:5432 - command: postgres -N 500 + command: > + postgres -N 500 + -c checkpoint_timeout=30min + -c synchronous_commit=off + -c max_wal_senders=0 + -c max_connections=8 + -c shared_buffers=2GB + -c effective_cache_size=6GB + -c maintenance_work_mem=512MB + -c checkpoint_completion_target=0.9 + -c wal_buffers=16MB + -c default_statistics_target=100 + -c random_page_cost=1.1 + -c effective_io_concurrency=200 + -c work_mem=256MB + -c huge_pages=off + -c min_wal_size=1GB + -c max_wal_size=4GB volumes: - - ./.pgdata:/var/lib/postgresql/data + - ./.pgdata:/var/lib/postgresql/data \ No newline at end of file diff --git a/docs/acceptance/db.md b/docs/acceptance/db.md index d05d2e5..51c9da4 100644 --- a/docs/acceptance/db.md +++ b/docs/acceptance/db.md @@ -54,32 +54,15 @@ You can use the CLI tool for data ingestion. First, ensure you have the required poetry install ``` -To download the Parquet file from S3 and load it into the database, run the following command: +To load a Parquet file it into the database, run the following command: ```bash -poetry run space2stats-ingest download-and-load \ - "s3:///space2stats.parquet" \ +poetry run space2stats-ingest load \ "postgresql://username:password@localhost:5439/postgres" \ - "/space2stats.json" \ - --parquet-file "local.parquet" + "" \ + "local.parquet" ``` -Alternatively, you can run the `download` and `load` commands separately: - -1. **Download the Parquet file**: - ```bash - poetry run space2stats-ingest download "s3:///space2stats.parquet" --local-path "local.parquet" - ``` - -2. **Load the Parquet file into the database**: - ```bash - poetry run space2stats-ingest download-and-load \ - "s3:///space2stats.parquet" \ - "postgresql://username:password@localhost:5439/postgres" \ - "/space2stats.json" \ - --parquet-file "local.parquet" - ``` - ### Database Configuration Once connected to the database via `psql` or a PostgreSQL client (e.g., `pgAdmin`), execute the following SQL command to create an index on the `space2stats` table: @@ -110,3 +93,28 @@ SELECT sum_pop_2020 FROM space2stats WHERE hex_id IN ('86beabd8fffffff', '86beab ### Conclusion Ensure all steps are followed to verify the ETL process, database setup, and data ingestion pipeline. Reach out to the development team for any further assistance or troubleshooting. + + +#### Updating test + +- Spin up database with docker: +``` +docker-compose up +``` +- Download initial dataset: +``` +aws s3 cp s3://wbg-geography01/Space2Stats/parquet/GLOBAL/space2stats.parquet . +download: s3://wbg-geography01/Space2Stats/parquet/GLOBAL/space2stats.parquet to ./space2stats.parquet +``` +- Upload initial dataset: +``` +space2stats-ingest ./space2stats_ingest/METADATA/stac/space2stats/space2stats_population_2020/space2stats_population_2020.json space2stats.parquet +``` +- Generate second dataset: +``` +python space2stats_ingest/METADATA/generate_test_data.py +``` +- Upload second dataset: +``` +space2stats-ingest ./space2stats_ingest/METADATA/stac/space2stats/space2stats_population_2020/space2stats_reupload_test.json space2stats_test.parquet +``` \ No newline at end of file diff --git a/space2stats_api/src/poetry.lock b/space2stats_api/src/poetry.lock index 5214ac9..659e5e3 100644 --- a/space2stats_api/src/poetry.lock +++ b/space2stats_api/src/poetry.lock @@ -558,17 +558,17 @@ css = ["tinycss2 (>=1.1.0,<1.5)"] [[package]] name = "boto3" -version = "1.35.66" +version = "1.35.67" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" files = [ - {file = "boto3-1.35.66-py3-none-any.whl", hash = "sha256:09a610f8cf4d3c22d4ca69c1f89079e3a1c82805ce94fa0eb4ecdd4d2ba6c4bc"}, - {file = "boto3-1.35.66.tar.gz", hash = "sha256:c392b9168b65e9c23483eaccb5b68d1f960232d7f967a1e00a045ba065ce050d"}, + {file = "boto3-1.35.67-py3-none-any.whl", hash = "sha256:db4d8736ef9b0f1972740d464d77edbdf35cd9dcddf9291c645691920f8fa50d"}, + {file = "boto3-1.35.67.tar.gz", hash = "sha256:4eb793c45123fbca1b2b152ce0d18272d19126cf89809cd6698bf2dfc66270fb"}, ] [package.dependencies] -botocore = ">=1.35.66,<1.36.0" +botocore = ">=1.35.67,<1.36.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.10.0,<0.11.0" @@ -577,13 +577,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.35.66" +version = "1.35.67" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" files = [ - {file = "botocore-1.35.66-py3-none-any.whl", hash = "sha256:d0683e9c18bb6852f768da268086c3749d925332a664db0dd1459cfa7e96e475"}, - {file = "botocore-1.35.66.tar.gz", hash = "sha256:51f43220315f384959f02ea3266740db4d421592dd87576c18824e424b349fdb"}, + {file = "botocore-1.35.67-py3-none-any.whl", hash = "sha256:c83983c196b4452dd7f298e68a9a224bc8fd58075b60133532848813826611af"}, + {file = "botocore-1.35.67.tar.gz", hash = "sha256:d782e02f2949889cf97a140a89cd5e9363d0e4b0153db51faf7fc16305c6e0e1"}, ] [package.dependencies] @@ -1081,37 +1081,37 @@ test-randomorder = ["pytest-randomly"] [[package]] name = "debugpy" -version = "1.8.8" +version = "1.8.9" description = "An implementation of the Debug Adapter Protocol for Python" optional = false python-versions = ">=3.8" files = [ - {file = "debugpy-1.8.8-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:e59b1607c51b71545cb3496876544f7186a7a27c00b436a62f285603cc68d1c6"}, - {file = "debugpy-1.8.8-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6531d952b565b7cb2fbd1ef5df3d333cf160b44f37547a4e7cf73666aca5d8d"}, - {file = "debugpy-1.8.8-cp310-cp310-win32.whl", hash = "sha256:b01f4a5e5c5fb1d34f4ccba99a20ed01eabc45a4684f4948b5db17a319dfb23f"}, - {file = "debugpy-1.8.8-cp310-cp310-win_amd64.whl", hash = "sha256:535f4fb1c024ddca5913bb0eb17880c8f24ba28aa2c225059db145ee557035e9"}, - {file = "debugpy-1.8.8-cp311-cp311-macosx_14_0_universal2.whl", hash = "sha256:c399023146e40ae373753a58d1be0a98bf6397fadc737b97ad612886b53df318"}, - {file = "debugpy-1.8.8-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:09cc7b162586ea2171eea055985da2702b0723f6f907a423c9b2da5996ad67ba"}, - {file = "debugpy-1.8.8-cp311-cp311-win32.whl", hash = "sha256:eea8821d998ebeb02f0625dd0d76839ddde8cbf8152ebbe289dd7acf2cdc6b98"}, - {file = "debugpy-1.8.8-cp311-cp311-win_amd64.whl", hash = "sha256:d4483836da2a533f4b1454dffc9f668096ac0433de855f0c22cdce8c9f7e10c4"}, - {file = "debugpy-1.8.8-cp312-cp312-macosx_14_0_universal2.whl", hash = "sha256:0cc94186340be87b9ac5a707184ec8f36547fb66636d1029ff4f1cc020e53996"}, - {file = "debugpy-1.8.8-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64674e95916e53c2e9540a056e5f489e0ad4872645399d778f7c598eacb7b7f9"}, - {file = "debugpy-1.8.8-cp312-cp312-win32.whl", hash = "sha256:5c6e885dbf12015aed73770f29dec7023cb310d0dc2ba8bfbeb5c8e43f80edc9"}, - {file = "debugpy-1.8.8-cp312-cp312-win_amd64.whl", hash = "sha256:19ffbd84e757a6ca0113574d1bf5a2298b3947320a3e9d7d8dc3377f02d9f864"}, - {file = "debugpy-1.8.8-cp313-cp313-macosx_14_0_universal2.whl", hash = "sha256:705cd123a773d184860ed8dae99becd879dfec361098edbefb5fc0d3683eb804"}, - {file = "debugpy-1.8.8-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:890fd16803f50aa9cb1a9b9b25b5ec321656dd6b78157c74283de241993d086f"}, - {file = "debugpy-1.8.8-cp313-cp313-win32.whl", hash = "sha256:90244598214bbe704aa47556ec591d2f9869ff9e042e301a2859c57106649add"}, - {file = "debugpy-1.8.8-cp313-cp313-win_amd64.whl", hash = "sha256:4b93e4832fd4a759a0c465c967214ed0c8a6e8914bced63a28ddb0dd8c5f078b"}, - {file = "debugpy-1.8.8-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:143ef07940aeb8e7316de48f5ed9447644da5203726fca378f3a6952a50a9eae"}, - {file = "debugpy-1.8.8-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f95651bdcbfd3b27a408869a53fbefcc2bcae13b694daee5f1365b1b83a00113"}, - {file = "debugpy-1.8.8-cp38-cp38-win32.whl", hash = "sha256:26b461123a030e82602a750fb24d7801776aa81cd78404e54ab60e8b5fecdad5"}, - {file = "debugpy-1.8.8-cp38-cp38-win_amd64.whl", hash = "sha256:f3cbf1833e644a3100eadb6120f25be8a532035e8245584c4f7532937edc652a"}, - {file = "debugpy-1.8.8-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:53709d4ec586b525724819dc6af1a7703502f7e06f34ded7157f7b1f963bb854"}, - {file = "debugpy-1.8.8-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a9c013077a3a0000e83d97cf9cc9328d2b0bbb31f56b0e99ea3662d29d7a6a2"}, - {file = "debugpy-1.8.8-cp39-cp39-win32.whl", hash = "sha256:ffe94dd5e9a6739a75f0b85316dc185560db3e97afa6b215628d1b6a17561cb2"}, - {file = "debugpy-1.8.8-cp39-cp39-win_amd64.whl", hash = "sha256:5c0e5a38c7f9b481bf31277d2f74d2109292179081f11108e668195ef926c0f9"}, - {file = "debugpy-1.8.8-py2.py3-none-any.whl", hash = "sha256:ec684553aba5b4066d4de510859922419febc710df7bba04fe9e7ef3de15d34f"}, - {file = "debugpy-1.8.8.zip", hash = "sha256:e6355385db85cbd666be703a96ab7351bc9e6c61d694893206f8001e22aee091"}, + {file = "debugpy-1.8.9-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:cfe1e6c6ad7178265f74981edf1154ffce97b69005212fbc90ca22ddfe3d017e"}, + {file = "debugpy-1.8.9-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ada7fb65102a4d2c9ab62e8908e9e9f12aed9d76ef44880367bc9308ebe49a0f"}, + {file = "debugpy-1.8.9-cp310-cp310-win32.whl", hash = "sha256:c36856343cbaa448171cba62a721531e10e7ffb0abff838004701454149bc037"}, + {file = "debugpy-1.8.9-cp310-cp310-win_amd64.whl", hash = "sha256:17c5e0297678442511cf00a745c9709e928ea4ca263d764e90d233208889a19e"}, + {file = "debugpy-1.8.9-cp311-cp311-macosx_14_0_universal2.whl", hash = "sha256:b74a49753e21e33e7cf030883a92fa607bddc4ede1aa4145172debc637780040"}, + {file = "debugpy-1.8.9-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:62d22dacdb0e296966d7d74a7141aaab4bec123fa43d1a35ddcb39bf9fd29d70"}, + {file = "debugpy-1.8.9-cp311-cp311-win32.whl", hash = "sha256:8138efff315cd09b8dcd14226a21afda4ca582284bf4215126d87342bba1cc66"}, + {file = "debugpy-1.8.9-cp311-cp311-win_amd64.whl", hash = "sha256:ff54ef77ad9f5c425398efb150239f6fe8e20c53ae2f68367eba7ece1e96226d"}, + {file = "debugpy-1.8.9-cp312-cp312-macosx_14_0_universal2.whl", hash = "sha256:957363d9a7a6612a37458d9a15e72d03a635047f946e5fceee74b50d52a9c8e2"}, + {file = "debugpy-1.8.9-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e565fc54b680292b418bb809f1386f17081d1346dca9a871bf69a8ac4071afe"}, + {file = "debugpy-1.8.9-cp312-cp312-win32.whl", hash = "sha256:3e59842d6c4569c65ceb3751075ff8d7e6a6ada209ceca6308c9bde932bcef11"}, + {file = "debugpy-1.8.9-cp312-cp312-win_amd64.whl", hash = "sha256:66eeae42f3137eb428ea3a86d4a55f28da9bd5a4a3d369ba95ecc3a92c1bba53"}, + {file = "debugpy-1.8.9-cp313-cp313-macosx_14_0_universal2.whl", hash = "sha256:957ecffff80d47cafa9b6545de9e016ae8c9547c98a538ee96ab5947115fb3dd"}, + {file = "debugpy-1.8.9-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1efbb3ff61487e2c16b3e033bc8595aea578222c08aaf3c4bf0f93fadbd662ee"}, + {file = "debugpy-1.8.9-cp313-cp313-win32.whl", hash = "sha256:7c4d65d03bee875bcb211c76c1d8f10f600c305dbd734beaed4077e902606fee"}, + {file = "debugpy-1.8.9-cp313-cp313-win_amd64.whl", hash = "sha256:e46b420dc1bea64e5bbedd678148be512442bc589b0111bd799367cde051e71a"}, + {file = "debugpy-1.8.9-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:472a3994999fe6c0756945ffa359e9e7e2d690fb55d251639d07208dbc37caea"}, + {file = "debugpy-1.8.9-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:365e556a4772d7d0d151d7eb0e77ec4db03bcd95f26b67b15742b88cacff88e9"}, + {file = "debugpy-1.8.9-cp38-cp38-win32.whl", hash = "sha256:54a7e6d3014c408eb37b0b06021366ee985f1539e12fe49ca2ee0d392d9ceca5"}, + {file = "debugpy-1.8.9-cp38-cp38-win_amd64.whl", hash = "sha256:8e99c0b1cc7bf86d83fb95d5ccdc4ad0586d4432d489d1f54e4055bcc795f693"}, + {file = "debugpy-1.8.9-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:7e8b079323a56f719977fde9d8115590cb5e7a1cba2fcee0986ef8817116e7c1"}, + {file = "debugpy-1.8.9-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6953b335b804a41f16a192fa2e7851bdcfd92173cbb2f9f777bb934f49baab65"}, + {file = "debugpy-1.8.9-cp39-cp39-win32.whl", hash = "sha256:7e646e62d4602bb8956db88b1e72fe63172148c1e25c041e03b103a25f36673c"}, + {file = "debugpy-1.8.9-cp39-cp39-win_amd64.whl", hash = "sha256:3d9755e77a2d680ce3d2c5394a444cf42be4a592caaf246dbfbdd100ffcf7ae5"}, + {file = "debugpy-1.8.9-py2.py3-none-any.whl", hash = "sha256:cc37a6c9987ad743d9c3a14fa1b1a14b7e4e6041f9dd0c8abf8895fe7a97b899"}, + {file = "debugpy-1.8.9.zip", hash = "sha256:1339e14c7d980407248f09824d1b25ff5c5616651689f1e0f0e51bdead3ea13e"}, ] [[package]] @@ -2899,18 +2899,18 @@ files = [ [[package]] name = "pydantic" -version = "2.10.0" +version = "2.10.1" description = "Data validation using Python type hints" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic-2.10.0-py3-none-any.whl", hash = "sha256:5e7807ba9201bdf61b1b58aa6eb690916c40a47acfb114b1b4fef3e7fd5b30fc"}, - {file = "pydantic-2.10.0.tar.gz", hash = "sha256:0aca0f045ff6e2f097f1fe89521115335f15049eeb8a7bef3dafe4b19a74e289"}, + {file = "pydantic-2.10.1-py3-none-any.whl", hash = "sha256:a8d20db84de64cf4a7d59e899c2caf0fe9d660c7cfc482528e7020d7dd189a7e"}, + {file = "pydantic-2.10.1.tar.gz", hash = "sha256:a4daca2dc0aa429555e0656d6bf94873a7dc5f54ee42b1f5873d666fb3f35560"}, ] [package.dependencies] annotated-types = ">=0.6.0" -pydantic-core = "2.27.0" +pydantic-core = "2.27.1" typing-extensions = ">=4.12.2" [package.extras] @@ -2919,111 +2919,111 @@ timezone = ["tzdata"] [[package]] name = "pydantic-core" -version = "2.27.0" +version = "2.27.1" description = "Core functionality for Pydantic validation and serialization" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic_core-2.27.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:cd2ac6b919f7fed71b17fe0b4603c092a4c9b5bae414817c9c81d3c22d1e1bcc"}, - {file = "pydantic_core-2.27.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e015833384ca3e1a0565a79f5d953b0629d9138021c27ad37c92a9fa1af7623c"}, - {file = "pydantic_core-2.27.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db72e40628967f6dc572020d04b5f800d71264e0531c6da35097e73bdf38b003"}, - {file = "pydantic_core-2.27.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:df45c4073bed486ea2f18757057953afed8dd77add7276ff01bccb79982cf46c"}, - {file = "pydantic_core-2.27.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:836a4bfe0cc6d36dc9a9cc1a7b391265bf6ce9d1eb1eac62ac5139f5d8d9a6fa"}, - {file = "pydantic_core-2.27.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4bf1340ae507f6da6360b24179c2083857c8ca7644aab65807023cf35404ea8d"}, - {file = "pydantic_core-2.27.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ab325fc86fbc077284c8d7f996d904d30e97904a87d6fb303dce6b3de7ebba9"}, - {file = "pydantic_core-2.27.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1da0c98a85a6c6ed702d5556db3b09c91f9b0b78de37b7593e2de8d03238807a"}, - {file = "pydantic_core-2.27.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7b0202ebf2268954090209a84f9897345719e46a57c5f2c9b7b250ca0a9d3e63"}, - {file = "pydantic_core-2.27.0-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:35380671c3c921fe8adf31ad349dc6f7588b7e928dbe44e1093789734f607399"}, - {file = "pydantic_core-2.27.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6b4c19525c3538fbc0bbda6229f9682fb8199ce9ac37395880e6952798e00373"}, - {file = "pydantic_core-2.27.0-cp310-none-win32.whl", hash = "sha256:333c840a1303d1474f491e7be0b718226c730a39ead0f7dab2c7e6a2f3855555"}, - {file = "pydantic_core-2.27.0-cp310-none-win_amd64.whl", hash = "sha256:99b2863c1365f43f74199c980a3d40f18a218fbe683dd64e470199db426c4d6a"}, - {file = "pydantic_core-2.27.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:4523c4009c3f39d948e01962223c9f5538602e7087a628479b723c939fab262d"}, - {file = "pydantic_core-2.27.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:84af1cf7bfdcbc6fcf5a5f70cc9896205e0350306e4dd73d54b6a18894f79386"}, - {file = "pydantic_core-2.27.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e65466b31be1070b4a5b7dbfbd14b247884cb8e8b79c64fb0f36b472912dbaea"}, - {file = "pydantic_core-2.27.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a5c022bb0d453192426221605efc865373dde43b17822a264671c53b068ac20c"}, - {file = "pydantic_core-2.27.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6bb69bf3b6500f195c3deb69c1205ba8fc3cb21d1915f1f158a10d6b1ef29b6a"}, - {file = "pydantic_core-2.27.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0aa4d1b2eba9a325897308b3124014a142cdccb9f3e016f31d3ebee6b5ea5e75"}, - {file = "pydantic_core-2.27.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8e96ca781e0c01e32115912ebdf7b3fb0780ce748b80d7d28a0802fa9fbaf44e"}, - {file = "pydantic_core-2.27.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b872c86d8d71827235c7077461c502feb2db3f87d9d6d5a9daa64287d75e4fa0"}, - {file = "pydantic_core-2.27.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:82e1ad4ca170e8af4c928b67cff731b6296e6a0a0981b97b2eb7c275cc4e15bd"}, - {file = "pydantic_core-2.27.0-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:eb40f828bc2f73f777d1eb8fee2e86cd9692a4518b63b6b5aa8af915dfd3207b"}, - {file = "pydantic_core-2.27.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9a8fbf506fde1529a1e3698198fe64bfbe2e0c09557bc6a7dcf872e7c01fec40"}, - {file = "pydantic_core-2.27.0-cp311-none-win32.whl", hash = "sha256:24f984fc7762ed5f806d9e8c4c77ea69fdb2afd987b4fd319ef06c87595a8c55"}, - {file = "pydantic_core-2.27.0-cp311-none-win_amd64.whl", hash = "sha256:68950bc08f9735306322bfc16a18391fcaac99ded2509e1cc41d03ccb6013cfe"}, - {file = "pydantic_core-2.27.0-cp311-none-win_arm64.whl", hash = "sha256:3eb8849445c26b41c5a474061032c53e14fe92a11a5db969f722a2716cd12206"}, - {file = "pydantic_core-2.27.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:8117839a9bdbba86e7f9df57018fe3b96cec934c3940b591b0fd3fbfb485864a"}, - {file = "pydantic_core-2.27.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a291d0b4243a259c8ea7e2b84eb9ccb76370e569298875a7c5e3e71baf49057a"}, - {file = "pydantic_core-2.27.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84e35afd9e10b2698e6f2f32256678cb23ca6c1568d02628033a837638b3ed12"}, - {file = "pydantic_core-2.27.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:58ab0d979c969983cdb97374698d847a4acffb217d543e172838864636ef10d9"}, - {file = "pydantic_core-2.27.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0d06b667e53320332be2bf6f9461f4a9b78092a079b8ce8634c9afaa7e10cd9f"}, - {file = "pydantic_core-2.27.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:78f841523729e43e3928a364ec46e2e3f80e6625a4f62aca5c345f3f626c6e8a"}, - {file = "pydantic_core-2.27.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:400bf470e4327e920883b51e255617dfe4496d4e80c3fea0b5a5d0bf2c404dd4"}, - {file = "pydantic_core-2.27.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:951e71da6c89d354572098bada5ba5b5dc3a9390c933af8a614e37755d3d1840"}, - {file = "pydantic_core-2.27.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:2a51ce96224eadd1845150b204389623c8e129fde5a67a84b972bd83a85c6c40"}, - {file = "pydantic_core-2.27.0-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:483c2213a609e7db2c592bbc015da58b6c75af7360ca3c981f178110d9787bcf"}, - {file = "pydantic_core-2.27.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:359e7951f04ad35111b5ddce184db3391442345d0ab073aa63a95eb8af25a5ef"}, - {file = "pydantic_core-2.27.0-cp312-none-win32.whl", hash = "sha256:ee7d9d5537daf6d5c74a83b38a638cc001b648096c1cae8ef695b0c919d9d379"}, - {file = "pydantic_core-2.27.0-cp312-none-win_amd64.whl", hash = "sha256:2be0ad541bb9f059954ccf8877a49ed73877f862529575ff3d54bf4223e4dd61"}, - {file = "pydantic_core-2.27.0-cp312-none-win_arm64.whl", hash = "sha256:6e19401742ed7b69e51d8e4df3c03ad5ec65a83b36244479fd70edde2828a5d9"}, - {file = "pydantic_core-2.27.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:5f2b19b8d6fca432cb3acf48cf5243a7bf512988029b6e6fd27e9e8c0a204d85"}, - {file = "pydantic_core-2.27.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c86679f443e7085ea55a7376462553996c688395d18ef3f0d3dbad7838f857a2"}, - {file = "pydantic_core-2.27.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:510b11e9c3b1a852876d1ccd8d5903684336d635214148637ceb27366c75a467"}, - {file = "pydantic_core-2.27.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb704155e73b833801c247f39d562229c0303f54770ca14fb1c053acb376cf10"}, - {file = "pydantic_core-2.27.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9ce048deb1e033e7a865ca384770bccc11d44179cf09e5193a535c4c2f497bdc"}, - {file = "pydantic_core-2.27.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:58560828ee0951bb125c6f2862fbc37f039996d19ceb6d8ff1905abf7da0bf3d"}, - {file = "pydantic_core-2.27.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abb4785894936d7682635726613c44578c420a096729f1978cd061a7e72d5275"}, - {file = "pydantic_core-2.27.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2883b260f7a93235488699d39cbbd94fa7b175d3a8063fbfddd3e81ad9988cb2"}, - {file = "pydantic_core-2.27.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c6fcb3fa3855d583aa57b94cf146f7781d5d5bc06cb95cb3afece33d31aac39b"}, - {file = "pydantic_core-2.27.0-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:e851a051f7260e6d688267eb039c81f05f23a19431bd7dfa4bf5e3cb34c108cd"}, - {file = "pydantic_core-2.27.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:edb1bfd45227dec8d50bc7c7d86463cd8728bcc574f9b07de7369880de4626a3"}, - {file = "pydantic_core-2.27.0-cp313-none-win32.whl", hash = "sha256:678f66462058dd978702db17eb6a3633d634f7aa0deaea61e0a674152766d3fc"}, - {file = "pydantic_core-2.27.0-cp313-none-win_amd64.whl", hash = "sha256:d28ca7066d6cdd347a50d8b725dc10d9a1d6a1cce09836cf071ea6a2d4908be0"}, - {file = "pydantic_core-2.27.0-cp313-none-win_arm64.whl", hash = "sha256:6f4a53af9e81d757756508b57cae1cf28293f0f31b9fa2bfcb416cc7fb230f9d"}, - {file = "pydantic_core-2.27.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:e9f9feee7f334b72ceae46313333d002b56f325b5f04271b4ae2aadd9e993ae4"}, - {file = "pydantic_core-2.27.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:225bfff5d425c34e1fd562cef52d673579d59b967d9de06178850c4802af9039"}, - {file = "pydantic_core-2.27.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c921ad596ff1a82f9c692b0758c944355abc9f0de97a4c13ca60ffc6d8dc15d4"}, - {file = "pydantic_core-2.27.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6354e18a9be37bfa124d6b288a87fb30c673745806c92956f1a25e3ae6e76b96"}, - {file = "pydantic_core-2.27.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8ee4c2a75af9fe21269a4a0898c5425afb01af1f5d276063f57e2ae1bc64e191"}, - {file = "pydantic_core-2.27.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c91e3c04f5191fd3fb68764bddeaf02025492d5d9f23343b283870f6ace69708"}, - {file = "pydantic_core-2.27.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a6ebfac28fd51890a61df36ef202adbd77d00ee5aca4a3dadb3d9ed49cfb929"}, - {file = "pydantic_core-2.27.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:36aa167f69d8807ba7e341d67ea93e50fcaaf6bc433bb04939430fa3dab06f31"}, - {file = "pydantic_core-2.27.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:3e8d89c276234579cd3d095d5fa2a44eb10db9a218664a17b56363cddf226ff3"}, - {file = "pydantic_core-2.27.0-cp38-cp38-musllinux_1_1_armv7l.whl", hash = "sha256:5cc822ab90a70ea3a91e6aed3afac570b276b1278c6909b1d384f745bd09c714"}, - {file = "pydantic_core-2.27.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e15315691fe2253eb447503153acef4d7223dfe7e7702f9ed66539fcd0c43801"}, - {file = "pydantic_core-2.27.0-cp38-none-win32.whl", hash = "sha256:dfa5f5c0a4c8fced1422dc2ca7eefd872d5d13eb33cf324361dbf1dbfba0a9fe"}, - {file = "pydantic_core-2.27.0-cp38-none-win_amd64.whl", hash = "sha256:513cb14c0cc31a4dfd849a4674b20c46d87b364f997bbcb02282306f5e187abf"}, - {file = "pydantic_core-2.27.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:4148dc9184ab79e356dc00a4199dc0ee8647973332cb385fc29a7cced49b9f9c"}, - {file = "pydantic_core-2.27.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5fc72fbfebbf42c0856a824b8b0dc2b5cd2e4a896050281a21cfa6fed8879cb1"}, - {file = "pydantic_core-2.27.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:185ef205256cd8b38431205698531026979db89a79587725c1e55c59101d64e9"}, - {file = "pydantic_core-2.27.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:395e3e1148fa7809016231f8065f30bb0dc285a97b4dc4360cd86e17bab58af7"}, - {file = "pydantic_core-2.27.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:33d14369739c5d07e2e7102cdb0081a1fa46ed03215e07f097b34e020b83b1ae"}, - {file = "pydantic_core-2.27.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e7820bb0d65e3ce1e3e70b6708c2f66143f55912fa02f4b618d0f08b61575f12"}, - {file = "pydantic_core-2.27.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43b61989068de9ce62296cde02beffabcadb65672207fc51e7af76dca75e6636"}, - {file = "pydantic_core-2.27.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:15e350efb67b855cd014c218716feea4986a149ed1f42a539edd271ee074a196"}, - {file = "pydantic_core-2.27.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:433689845288f9a1ee5714444e65957be26d30915f7745091ede4a83cfb2d7bb"}, - {file = "pydantic_core-2.27.0-cp39-cp39-musllinux_1_1_armv7l.whl", hash = "sha256:3fd8bc2690e7c39eecdf9071b6a889ce7b22b72073863940edc2a0a23750ca90"}, - {file = "pydantic_core-2.27.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:884f1806609c2c66564082540cffc96868c5571c7c3cf3a783f63f2fb49bd3cd"}, - {file = "pydantic_core-2.27.0-cp39-none-win32.whl", hash = "sha256:bf37b72834e7239cf84d4a0b2c050e7f9e48bced97bad9bdf98d26b8eb72e846"}, - {file = "pydantic_core-2.27.0-cp39-none-win_amd64.whl", hash = "sha256:31a2cae5f059329f9cfe3d8d266d3da1543b60b60130d186d9b6a3c20a346361"}, - {file = "pydantic_core-2.27.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:4fb49cfdb53af5041aba909be00cccfb2c0d0a2e09281bf542371c5fd36ad04c"}, - {file = "pydantic_core-2.27.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:49633583eb7dc5cba61aaf7cdb2e9e662323ad394e543ee77af265736bcd3eaa"}, - {file = "pydantic_core-2.27.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:153017e3d6cd3ce979de06d84343ca424bb6092727375eba1968c8b4693c6ecb"}, - {file = "pydantic_core-2.27.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff63a92f6e249514ef35bc795de10745be0226eaea06eb48b4bbeaa0c8850a4a"}, - {file = "pydantic_core-2.27.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5982048129f40b082c2654de10c0f37c67a14f5ff9d37cf35be028ae982f26df"}, - {file = "pydantic_core-2.27.0-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:91bc66f878557313c2a6bcf396e7befcffe5ab4354cfe4427318968af31143c3"}, - {file = "pydantic_core-2.27.0-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:68ef5377eb582fa4343c9d0b57a5b094046d447b4c73dd9fbd9ffb216f829e7d"}, - {file = "pydantic_core-2.27.0-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:c5726eec789ee38f2c53b10b1821457b82274f81f4f746bb1e666d8741fcfadb"}, - {file = "pydantic_core-2.27.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c0c431e4be5c1a0c6654e0c31c661cd89e0ca956ef65305c3c3fd96f4e72ca39"}, - {file = "pydantic_core-2.27.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:8e21d927469d04b39386255bf00d0feedead16f6253dcc85e9e10ddebc334084"}, - {file = "pydantic_core-2.27.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:4b51f964fcbb02949fc546022e56cdb16cda457af485e9a3e8b78ac2ecf5d77e"}, - {file = "pydantic_core-2.27.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25a7fd4de38f7ff99a37e18fa0098c3140286451bc823d1746ba80cec5b433a1"}, - {file = "pydantic_core-2.27.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fda87808429c520a002a85d6e7cdadbf58231d60e96260976c5b8f9a12a8e13"}, - {file = "pydantic_core-2.27.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8a150392102c402c538190730fda06f3bce654fc498865579a9f2c1d2b425833"}, - {file = "pydantic_core-2.27.0-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:c9ed88b398ba7e3bad7bd64d66cc01dcde9cfcb7ec629a6fd78a82fa0b559d78"}, - {file = "pydantic_core-2.27.0-pp39-pypy39_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:9fe94d9d2a2b4edd7a4b22adcd45814b1b59b03feb00e56deb2e89747aec7bfe"}, - {file = "pydantic_core-2.27.0-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:d8b5ee4ae9170e2775d495b81f414cc20268041c42571530513496ba61e94ba3"}, - {file = "pydantic_core-2.27.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:d29e235ce13c91902ef3efc3d883a677655b3908b1cbc73dee816e5e1f8f7739"}, - {file = "pydantic_core-2.27.0.tar.gz", hash = "sha256:f57783fbaf648205ac50ae7d646f27582fc706be3977e87c3c124e7a92407b10"}, + {file = "pydantic_core-2.27.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:71a5e35c75c021aaf400ac048dacc855f000bdfed91614b4a726f7432f1f3d6a"}, + {file = "pydantic_core-2.27.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f82d068a2d6ecfc6e054726080af69a6764a10015467d7d7b9f66d6ed5afa23b"}, + {file = "pydantic_core-2.27.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:121ceb0e822f79163dd4699e4c54f5ad38b157084d97b34de8b232bcaad70278"}, + {file = "pydantic_core-2.27.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4603137322c18eaf2e06a4495f426aa8d8388940f3c457e7548145011bb68e05"}, + {file = "pydantic_core-2.27.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a33cd6ad9017bbeaa9ed78a2e0752c5e250eafb9534f308e7a5f7849b0b1bfb4"}, + {file = "pydantic_core-2.27.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:15cc53a3179ba0fcefe1e3ae50beb2784dede4003ad2dfd24f81bba4b23a454f"}, + {file = "pydantic_core-2.27.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45d9c5eb9273aa50999ad6adc6be5e0ecea7e09dbd0d31bd0c65a55a2592ca08"}, + {file = "pydantic_core-2.27.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8bf7b66ce12a2ac52d16f776b31d16d91033150266eb796967a7e4621707e4f6"}, + {file = "pydantic_core-2.27.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:655d7dd86f26cb15ce8a431036f66ce0318648f8853d709b4167786ec2fa4807"}, + {file = "pydantic_core-2.27.1-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:5556470f1a2157031e676f776c2bc20acd34c1990ca5f7e56f1ebf938b9ab57c"}, + {file = "pydantic_core-2.27.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f69ed81ab24d5a3bd93861c8c4436f54afdf8e8cc421562b0c7504cf3be58206"}, + {file = "pydantic_core-2.27.1-cp310-none-win32.whl", hash = "sha256:f5a823165e6d04ccea61a9f0576f345f8ce40ed533013580e087bd4d7442b52c"}, + {file = "pydantic_core-2.27.1-cp310-none-win_amd64.whl", hash = "sha256:57866a76e0b3823e0b56692d1a0bf722bffb324839bb5b7226a7dbd6c9a40b17"}, + {file = "pydantic_core-2.27.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:ac3b20653bdbe160febbea8aa6c079d3df19310d50ac314911ed8cc4eb7f8cb8"}, + {file = "pydantic_core-2.27.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a5a8e19d7c707c4cadb8c18f5f60c843052ae83c20fa7d44f41594c644a1d330"}, + {file = "pydantic_core-2.27.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f7059ca8d64fea7f238994c97d91f75965216bcbe5f695bb44f354893f11d52"}, + {file = "pydantic_core-2.27.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bed0f8a0eeea9fb72937ba118f9db0cb7e90773462af7962d382445f3005e5a4"}, + {file = "pydantic_core-2.27.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a3cb37038123447cf0f3ea4c74751f6a9d7afef0eb71aa07bf5f652b5e6a132c"}, + {file = "pydantic_core-2.27.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:84286494f6c5d05243456e04223d5a9417d7f443c3b76065e75001beb26f88de"}, + {file = "pydantic_core-2.27.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:acc07b2cfc5b835444b44a9956846b578d27beeacd4b52e45489e93276241025"}, + {file = "pydantic_core-2.27.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4fefee876e07a6e9aad7a8c8c9f85b0cdbe7df52b8a9552307b09050f7512c7e"}, + {file = "pydantic_core-2.27.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:258c57abf1188926c774a4c94dd29237e77eda19462e5bb901d88adcab6af919"}, + {file = "pydantic_core-2.27.1-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:35c14ac45fcfdf7167ca76cc80b2001205a8d5d16d80524e13508371fb8cdd9c"}, + {file = "pydantic_core-2.27.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d1b26e1dff225c31897696cab7d4f0a315d4c0d9e8666dbffdb28216f3b17fdc"}, + {file = "pydantic_core-2.27.1-cp311-none-win32.whl", hash = "sha256:2cdf7d86886bc6982354862204ae3b2f7f96f21a3eb0ba5ca0ac42c7b38598b9"}, + {file = "pydantic_core-2.27.1-cp311-none-win_amd64.whl", hash = "sha256:3af385b0cee8df3746c3f406f38bcbfdc9041b5c2d5ce3e5fc6637256e60bbc5"}, + {file = "pydantic_core-2.27.1-cp311-none-win_arm64.whl", hash = "sha256:81f2ec23ddc1b476ff96563f2e8d723830b06dceae348ce02914a37cb4e74b89"}, + {file = "pydantic_core-2.27.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:9cbd94fc661d2bab2bc702cddd2d3370bbdcc4cd0f8f57488a81bcce90c7a54f"}, + {file = "pydantic_core-2.27.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5f8c4718cd44ec1580e180cb739713ecda2bdee1341084c1467802a417fe0f02"}, + {file = "pydantic_core-2.27.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:15aae984e46de8d376df515f00450d1522077254ef6b7ce189b38ecee7c9677c"}, + {file = "pydantic_core-2.27.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1ba5e3963344ff25fc8c40da90f44b0afca8cfd89d12964feb79ac1411a260ac"}, + {file = "pydantic_core-2.27.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:992cea5f4f3b29d6b4f7f1726ed8ee46c8331c6b4eed6db5b40134c6fe1768bb"}, + {file = "pydantic_core-2.27.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0325336f348dbee6550d129b1627cb8f5351a9dc91aad141ffb96d4937bd9529"}, + {file = "pydantic_core-2.27.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7597c07fbd11515f654d6ece3d0e4e5093edc30a436c63142d9a4b8e22f19c35"}, + {file = "pydantic_core-2.27.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3bbd5d8cc692616d5ef6fbbbd50dbec142c7e6ad9beb66b78a96e9c16729b089"}, + {file = "pydantic_core-2.27.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:dc61505e73298a84a2f317255fcc72b710b72980f3a1f670447a21efc88f8381"}, + {file = "pydantic_core-2.27.1-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:e1f735dc43da318cad19b4173dd1ffce1d84aafd6c9b782b3abc04a0d5a6f5bb"}, + {file = "pydantic_core-2.27.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f4e5658dbffe8843a0f12366a4c2d1c316dbe09bb4dfbdc9d2d9cd6031de8aae"}, + {file = "pydantic_core-2.27.1-cp312-none-win32.whl", hash = "sha256:672ebbe820bb37988c4d136eca2652ee114992d5d41c7e4858cdd90ea94ffe5c"}, + {file = "pydantic_core-2.27.1-cp312-none-win_amd64.whl", hash = "sha256:66ff044fd0bb1768688aecbe28b6190f6e799349221fb0de0e6f4048eca14c16"}, + {file = "pydantic_core-2.27.1-cp312-none-win_arm64.whl", hash = "sha256:9a3b0793b1bbfd4146304e23d90045f2a9b5fd5823aa682665fbdaf2a6c28f3e"}, + {file = "pydantic_core-2.27.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:f216dbce0e60e4d03e0c4353c7023b202d95cbaeff12e5fd2e82ea0a66905073"}, + {file = "pydantic_core-2.27.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a2e02889071850bbfd36b56fd6bc98945e23670773bc7a76657e90e6b6603c08"}, + {file = "pydantic_core-2.27.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42b0e23f119b2b456d07ca91b307ae167cc3f6c846a7b169fca5326e32fdc6cf"}, + {file = "pydantic_core-2.27.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:764be71193f87d460a03f1f7385a82e226639732214b402f9aa61f0d025f0737"}, + {file = "pydantic_core-2.27.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c00666a3bd2f84920a4e94434f5974d7bbc57e461318d6bb34ce9cdbbc1f6b2"}, + {file = "pydantic_core-2.27.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3ccaa88b24eebc0f849ce0a4d09e8a408ec5a94afff395eb69baf868f5183107"}, + {file = "pydantic_core-2.27.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c65af9088ac534313e1963443d0ec360bb2b9cba6c2909478d22c2e363d98a51"}, + {file = "pydantic_core-2.27.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:206b5cf6f0c513baffaeae7bd817717140770c74528f3e4c3e1cec7871ddd61a"}, + {file = "pydantic_core-2.27.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:062f60e512fc7fff8b8a9d680ff0ddaaef0193dba9fa83e679c0c5f5fbd018bc"}, + {file = "pydantic_core-2.27.1-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:a0697803ed7d4af5e4c1adf1670af078f8fcab7a86350e969f454daf598c4960"}, + {file = "pydantic_core-2.27.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:58ca98a950171f3151c603aeea9303ef6c235f692fe555e883591103da709b23"}, + {file = "pydantic_core-2.27.1-cp313-none-win32.whl", hash = "sha256:8065914ff79f7eab1599bd80406681f0ad08f8e47c880f17b416c9f8f7a26d05"}, + {file = "pydantic_core-2.27.1-cp313-none-win_amd64.whl", hash = "sha256:ba630d5e3db74c79300d9a5bdaaf6200172b107f263c98a0539eeecb857b2337"}, + {file = "pydantic_core-2.27.1-cp313-none-win_arm64.whl", hash = "sha256:45cf8588c066860b623cd11c4ba687f8d7175d5f7ef65f7129df8a394c502de5"}, + {file = "pydantic_core-2.27.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:5897bec80a09b4084aee23f9b73a9477a46c3304ad1d2d07acca19723fb1de62"}, + {file = "pydantic_core-2.27.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d0165ab2914379bd56908c02294ed8405c252250668ebcb438a55494c69f44ab"}, + {file = "pydantic_core-2.27.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b9af86e1d8e4cfc82c2022bfaa6f459381a50b94a29e95dcdda8442d6d83864"}, + {file = "pydantic_core-2.27.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f6c8a66741c5f5447e047ab0ba7a1c61d1e95580d64bce852e3df1f895c4067"}, + {file = "pydantic_core-2.27.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a42d6a8156ff78981f8aa56eb6394114e0dedb217cf8b729f438f643608cbcd"}, + {file = "pydantic_core-2.27.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:64c65f40b4cd8b0e049a8edde07e38b476da7e3aaebe63287c899d2cff253fa5"}, + {file = "pydantic_core-2.27.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdcf339322a3fae5cbd504edcefddd5a50d9ee00d968696846f089b4432cf78"}, + {file = "pydantic_core-2.27.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bf99c8404f008750c846cb4ac4667b798a9f7de673ff719d705d9b2d6de49c5f"}, + {file = "pydantic_core-2.27.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8f1edcea27918d748c7e5e4d917297b2a0ab80cad10f86631e488b7cddf76a36"}, + {file = "pydantic_core-2.27.1-cp38-cp38-musllinux_1_1_armv7l.whl", hash = "sha256:159cac0a3d096f79ab6a44d77a961917219707e2a130739c64d4dd46281f5c2a"}, + {file = "pydantic_core-2.27.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:029d9757eb621cc6e1848fa0b0310310de7301057f623985698ed7ebb014391b"}, + {file = "pydantic_core-2.27.1-cp38-none-win32.whl", hash = "sha256:a28af0695a45f7060e6f9b7092558a928a28553366519f64083c63a44f70e618"}, + {file = "pydantic_core-2.27.1-cp38-none-win_amd64.whl", hash = "sha256:2d4567c850905d5eaaed2f7a404e61012a51caf288292e016360aa2b96ff38d4"}, + {file = "pydantic_core-2.27.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:e9386266798d64eeb19dd3677051f5705bf873e98e15897ddb7d76f477131967"}, + {file = "pydantic_core-2.27.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4228b5b646caa73f119b1ae756216b59cc6e2267201c27d3912b592c5e323b60"}, + {file = "pydantic_core-2.27.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b3dfe500de26c52abe0477dde16192ac39c98f05bf2d80e76102d394bd13854"}, + {file = "pydantic_core-2.27.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:aee66be87825cdf72ac64cb03ad4c15ffef4143dbf5c113f64a5ff4f81477bf9"}, + {file = "pydantic_core-2.27.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b748c44bb9f53031c8cbc99a8a061bc181c1000c60a30f55393b6e9c45cc5bd"}, + {file = "pydantic_core-2.27.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ca038c7f6a0afd0b2448941b6ef9d5e1949e999f9e5517692eb6da58e9d44be"}, + {file = "pydantic_core-2.27.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e0bd57539da59a3e4671b90a502da9a28c72322a4f17866ba3ac63a82c4498e"}, + {file = "pydantic_core-2.27.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ac6c2c45c847bbf8f91930d88716a0fb924b51e0c6dad329b793d670ec5db792"}, + {file = "pydantic_core-2.27.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b94d4ba43739bbe8b0ce4262bcc3b7b9f31459ad120fb595627eaeb7f9b9ca01"}, + {file = "pydantic_core-2.27.1-cp39-cp39-musllinux_1_1_armv7l.whl", hash = "sha256:00e6424f4b26fe82d44577b4c842d7df97c20be6439e8e685d0d715feceb9fb9"}, + {file = "pydantic_core-2.27.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:38de0a70160dd97540335b7ad3a74571b24f1dc3ed33f815f0880682e6880131"}, + {file = "pydantic_core-2.27.1-cp39-none-win32.whl", hash = "sha256:7ccebf51efc61634f6c2344da73e366c75e735960b5654b63d7e6f69a5885fa3"}, + {file = "pydantic_core-2.27.1-cp39-none-win_amd64.whl", hash = "sha256:a57847b090d7892f123726202b7daa20df6694cbd583b67a592e856bff603d6c"}, + {file = "pydantic_core-2.27.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3fa80ac2bd5856580e242dbc202db873c60a01b20309c8319b5c5986fbe53ce6"}, + {file = "pydantic_core-2.27.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d950caa237bb1954f1b8c9227b5065ba6875ac9771bb8ec790d956a699b78676"}, + {file = "pydantic_core-2.27.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e4216e64d203e39c62df627aa882f02a2438d18a5f21d7f721621f7a5d3611d"}, + {file = "pydantic_core-2.27.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02a3d637bd387c41d46b002f0e49c52642281edacd2740e5a42f7017feea3f2c"}, + {file = "pydantic_core-2.27.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:161c27ccce13b6b0c8689418da3885d3220ed2eae2ea5e9b2f7f3d48f1d52c27"}, + {file = "pydantic_core-2.27.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:19910754e4cc9c63bc1c7f6d73aa1cfee82f42007e407c0f413695c2f7ed777f"}, + {file = "pydantic_core-2.27.1-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:e173486019cc283dc9778315fa29a363579372fe67045e971e89b6365cc035ed"}, + {file = "pydantic_core-2.27.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:af52d26579b308921b73b956153066481f064875140ccd1dfd4e77db89dbb12f"}, + {file = "pydantic_core-2.27.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:981fb88516bd1ae8b0cbbd2034678a39dedc98752f264ac9bc5839d3923fa04c"}, + {file = "pydantic_core-2.27.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5fde892e6c697ce3e30c61b239330fc5d569a71fefd4eb6512fc6caec9dd9e2f"}, + {file = "pydantic_core-2.27.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:816f5aa087094099fff7edabb5e01cc370eb21aa1a1d44fe2d2aefdfb5599b31"}, + {file = "pydantic_core-2.27.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c10c309e18e443ddb108f0ef64e8729363adbfd92d6d57beec680f6261556f3"}, + {file = "pydantic_core-2.27.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98476c98b02c8e9b2eec76ac4156fd006628b1b2d0ef27e548ffa978393fd154"}, + {file = "pydantic_core-2.27.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c3027001c28434e7ca5a6e1e527487051136aa81803ac812be51802150d880dd"}, + {file = "pydantic_core-2.27.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:7699b1df36a48169cdebda7ab5a2bac265204003f153b4bd17276153d997670a"}, + {file = "pydantic_core-2.27.1-pp39-pypy39_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:1c39b07d90be6b48968ddc8c19e7585052088fd7ec8d568bb31ff64c70ae3c97"}, + {file = "pydantic_core-2.27.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:46ccfe3032b3915586e469d4972973f893c0a2bb65669194a5bdea9bacc088c2"}, + {file = "pydantic_core-2.27.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:62ba45e21cf6571d7f716d903b5b7b6d2617e2d5d67c0923dc47b9d41369f840"}, + {file = "pydantic_core-2.27.1.tar.gz", hash = "sha256:62a763352879b84aa31058fc931884055fd75089cccbd9d58bb6afd01141b235"}, ] [package.dependencies] @@ -4001,22 +4001,22 @@ files = [ [[package]] name = "tornado" -version = "6.4.1" +version = "6.4.2" description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." optional = false python-versions = ">=3.8" files = [ - {file = "tornado-6.4.1-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:163b0aafc8e23d8cdc3c9dfb24c5368af84a81e3364745ccb4427669bf84aec8"}, - {file = "tornado-6.4.1-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:6d5ce3437e18a2b66fbadb183c1d3364fb03f2be71299e7d10dbeeb69f4b2a14"}, - {file = "tornado-6.4.1-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e2e20b9113cd7293f164dc46fffb13535266e713cdb87bd2d15ddb336e96cfc4"}, - {file = "tornado-6.4.1-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ae50a504a740365267b2a8d1a90c9fbc86b780a39170feca9bcc1787ff80842"}, - {file = "tornado-6.4.1-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:613bf4ddf5c7a95509218b149b555621497a6cc0d46ac341b30bd9ec19eac7f3"}, - {file = "tornado-6.4.1-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:25486eb223babe3eed4b8aecbac33b37e3dd6d776bc730ca14e1bf93888b979f"}, - {file = "tornado-6.4.1-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:454db8a7ecfcf2ff6042dde58404164d969b6f5d58b926da15e6b23817950fc4"}, - {file = "tornado-6.4.1-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a02a08cc7a9314b006f653ce40483b9b3c12cda222d6a46d4ac63bb6c9057698"}, - {file = "tornado-6.4.1-cp38-abi3-win32.whl", hash = "sha256:d9a566c40b89757c9aa8e6f032bcdb8ca8795d7c1a9762910c722b1635c9de4d"}, - {file = "tornado-6.4.1-cp38-abi3-win_amd64.whl", hash = "sha256:b24b8982ed444378d7f21d563f4180a2de31ced9d8d84443907a0a64da2072e7"}, - {file = "tornado-6.4.1.tar.gz", hash = "sha256:92d3ab53183d8c50f8204a51e6f91d18a15d5ef261e84d452800d4ff6fc504e9"}, + {file = "tornado-6.4.2-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:e828cce1123e9e44ae2a50a9de3055497ab1d0aeb440c5ac23064d9e44880da1"}, + {file = "tornado-6.4.2-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:072ce12ada169c5b00b7d92a99ba089447ccc993ea2143c9ede887e0937aa803"}, + {file = "tornado-6.4.2-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a017d239bd1bb0919f72af256a970624241f070496635784d9bf0db640d3fec"}, + {file = "tornado-6.4.2-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c36e62ce8f63409301537222faffcef7dfc5284f27eec227389f2ad11b09d946"}, + {file = "tornado-6.4.2-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bca9eb02196e789c9cb5c3c7c0f04fb447dc2adffd95265b2c7223a8a615ccbf"}, + {file = "tornado-6.4.2-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:304463bd0772442ff4d0f5149c6f1c2135a1fae045adf070821c6cdc76980634"}, + {file = "tornado-6.4.2-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:c82c46813ba483a385ab2a99caeaedf92585a1f90defb5693351fa7e4ea0bf73"}, + {file = "tornado-6.4.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:932d195ca9015956fa502c6b56af9eb06106140d844a335590c1ec7f5277d10c"}, + {file = "tornado-6.4.2-cp38-abi3-win32.whl", hash = "sha256:2876cef82e6c5978fde1e0d5b1f919d756968d5b4282418f3146b79b58556482"}, + {file = "tornado-6.4.2-cp38-abi3-win_amd64.whl", hash = "sha256:908b71bf3ff37d81073356a5fadcc660eb10c1476ee6e2725588626ce7e5ca38"}, + {file = "tornado-6.4.2.tar.gz", hash = "sha256:92bad5b4746e9879fd7bf1eb21dce4e3fc5128d71601f80005afa39237ad620b"}, ] [[package]] diff --git a/space2stats_api/src/space2stats_ingest/METADATA/generate_test_data.py b/space2stats_api/src/space2stats_ingest/METADATA/generate_test_data.py new file mode 100644 index 0000000..f723a4c --- /dev/null +++ b/space2stats_api/src/space2stats_ingest/METADATA/generate_test_data.py @@ -0,0 +1,23 @@ +import numpy as np +import pyarrow as pa +import pyarrow.parquet as pq + +# Load the original Parquet file +input_file = "space2stats.parquet" +table = pq.read_table(input_file) + +# Select only the 'hex_id' column +table = table.select(["hex_id"]) + +# Create the new 'test_column' with random values +num_rows = table.num_rows +test_column = pa.array(np.random.random(size=num_rows), type=pa.float64()) + +# Add 'test_column' to the table +table = table.append_column("test_column", test_column) + +# Save the modified table to a new Parquet file +output_file = "space2stats_test.parquet" +pq.write_table(table, output_file) + +print(f"Modified Parquet file saved as {output_file}") diff --git a/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats/space2stats_population_2020/space2stats_population_2020.json b/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats/space2stats_population_2020/space2stats_population_2020.json new file mode 100644 index 0000000..76a6621 --- /dev/null +++ b/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats/space2stats_population_2020/space2stats_population_2020.json @@ -0,0 +1,336 @@ +{ + "type": "Feature", + "stac_version": "1.0.0", + "stac_extensions": [ + "https://stac-extensions.github.io/table/v1.2.0/schema.json", + "https://stac-extensions.github.io/scientific/v1.0.0/schema.json" + ], + "id": "space2stats_population_2020", + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + -179.99999561620714, + -89.98750455101016 + ], + [ + -179.99999561620714, + 89.98750455101016 + ], + [ + 179.99999096313272, + 89.98750455101016 + ], + [ + 179.99999096313272, + -89.98750455101016 + ], + [ + -179.99999561620714, + -89.98750455101016 + ] + ] + ] + }, + "bbox": [ + -179.99999561620714, + -89.98750455101016, + 179.99999096313272, + 89.98750455101016 + ], + "properties": { + "name": "Population Data", + "description": "Gridded population disaggregated by gender for the year 2020, with data available for different age groups.", + "methodological_notes": "Global raster files are processed for each hexagonal grid using zonal statistics.", + "source_data": "WorldPop gridded population, 2020, Unconstrained, UN-Adjusted", + "sci:citation": "Stevens FR, Gaughan AE, Linard C, Tatem AJ (2015) Disaggregating Census Data for Population Mapping Using Random Forests with Remotely-Sensed and Ancillary Data.", + "organization": "WorldPop, https://www.worldpop.org", + "method": "sum", + "resolution": "100 meters", + "table:primary_geometry": "geometry", + "table:columns": [ + { + "name": "hex_id", + "description": "H3 unique identifier", + "type": "object" + }, + { + "name": "sum_pop_f_0_2020", + "description": "Total population female, ages 0 to 1, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_10_2020", + "description": "Total population female, ages 10 to 15, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_15_2020", + "description": "Total population female, ages 15 to 20, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_1_2020", + "description": "Total population female, ages 1 to 10, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_20_2020", + "description": "Total population female, ages 20 to 25, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_25_2020", + "description": "Total population female, ages 25 to 30, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_30_2020", + "description": "Total population female, ages 30 to 35, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_35_2020", + "description": "Total population female, ages 35 to 40, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_40_2020", + "description": "Total population female, ages 40 to 45, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_45_2020", + "description": "Total population female, ages 45 to 50, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_50_2020", + "description": "Total population female, ages 50 to 55, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_55_2020", + "description": "Total population female, ages 55 to 60, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_5_2020", + "description": "Total population female, ages 5 to 10, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_60_2020", + "description": "Total population female, ages 60 to 65, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_65_2020", + "description": "Total population female, ages 65 to 70, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_70_2020", + "description": "Total population female, ages 70 to 75, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_75_2020", + "description": "Total population female, ages 75 to 80, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_80_2020", + "description": "Total population female, ages 80 and above, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_0_2020", + "description": "Total population male, ages 0 to 1, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_10_2020", + "description": "Total population male, ages 10 to 15, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_15_2020", + "description": "Total population male, ages 15 to 20, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_1_2020", + "description": "Total population male, ages 1 to 10, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_20_2020", + "description": "Total population male, ages 20 to 25, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_25_2020", + "description": "Total population male, ages 25 to 30, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_30_2020", + "description": "Total population male, ages 30 to 35, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_35_2020", + "description": "Total population male, ages 35 to 40, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_40_2020", + "description": "Total population male, ages 40 to 45, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_45_2020", + "description": "Total population male, ages 45 to 50, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_50_2020", + "description": "Total population male, ages 50 to 55, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_55_2020", + "description": "Total population male, ages 55 to 60, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_5_2020", + "description": "Total population male, ages 5 to 10, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_60_2020", + "description": "Total population male, ages 60 to 65, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_65_2020", + "description": "Total population male, ages 65 to 70, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_70_2020", + "description": "Total population male, ages 70 to 75, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_75_2020", + "description": "Total population male, ages 75 to 80, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_80_2020", + "description": "Total population male, ages 80 and above, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_2020", + "description": "Total population female, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_2020", + "description": "Total population male, 2020", + "type": "float64" + }, + { + "name": "sum_pop_2020", + "description": "Total population, 2020", + "type": "float64" + } + ], + "vector:layers": { + "space2stats": { + "hex_id": "object", + "sum_pop_f_0_2020": "float64", + "sum_pop_f_10_2020": "float64", + "sum_pop_f_15_2020": "float64", + "sum_pop_f_1_2020": "float64", + "sum_pop_f_20_2020": "float64", + "sum_pop_f_25_2020": "float64", + "sum_pop_f_30_2020": "float64", + "sum_pop_f_35_2020": "float64", + "sum_pop_f_40_2020": "float64", + "sum_pop_f_45_2020": "float64", + "sum_pop_f_50_2020": "float64", + "sum_pop_f_55_2020": "float64", + "sum_pop_f_5_2020": "float64", + "sum_pop_f_60_2020": "float64", + "sum_pop_f_65_2020": "float64", + "sum_pop_f_70_2020": "float64", + "sum_pop_f_75_2020": "float64", + "sum_pop_f_80_2020": "float64", + "sum_pop_m_0_2020": "float64", + "sum_pop_m_10_2020": "float64", + "sum_pop_m_15_2020": "float64", + "sum_pop_m_1_2020": "float64", + "sum_pop_m_20_2020": "float64", + "sum_pop_m_25_2020": "float64", + "sum_pop_m_30_2020": "float64", + "sum_pop_m_35_2020": "float64", + "sum_pop_m_40_2020": "float64", + "sum_pop_m_45_2020": "float64", + "sum_pop_m_50_2020": "float64", + "sum_pop_m_55_2020": "float64", + "sum_pop_m_5_2020": "float64", + "sum_pop_m_60_2020": "float64", + "sum_pop_m_65_2020": "float64", + "sum_pop_m_70_2020": "float64", + "sum_pop_m_75_2020": "float64", + "sum_pop_m_80_2020": "float64", + "sum_pop_f_2020": "float64", + "sum_pop_m_2020": "float64", + "sum_pop_2020": "float64", + "geometry": "geometry" + } + }, + "themes": [ + "Demographics", + "Population" + ], + "datetime": "2024-10-24T14:54:26.131129Z" + }, + "links": [ + { + "rel": "root", + "href": "../../catalog.json", + "type": "application/json", + "title": "Space2Stats Database" + }, + { + "rel": "parent", + "href": "../collection.json", + "type": "application/json", + "title": "Space2Stats Collection" + }, + { + "rel": "collection", + "href": "../collection.json", + "type": "application/json", + "title": "Space2Stats Collection" + } + ], + "assets": { + "api-docs": { + "href": "https://space2stats.ds.io/docs", + "type": "text/html", + "title": "API Documentation", + "roles": [ + "metadata" + ] + } + }, + "collection": "space2stats-collection" + } \ No newline at end of file diff --git a/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats/space2stats_population_2020/space2stats_reupload_test.json b/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats/space2stats_population_2020/space2stats_reupload_test.json new file mode 100644 index 0000000..a527395 --- /dev/null +++ b/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats/space2stats_population_2020/space2stats_reupload_test.json @@ -0,0 +1,30 @@ +{ + "type": "Feature", + "stac_version": "1.0.0", + "id": "space2stats_test", + "properties": { + "table:columns": [ + { + "name": "hex_id", + "type": "string", + "description": "Unique identifier for hexagonal regions." + }, + { + "name": "test_column", + "type": "float", + "description": "Random test values for validation of re-upload capabilities." + } + ], + "datetime": "2024-11-05T00:00:00Z" + }, + "geometry": null, + "bbox": [-180, -90, 180, 90], + "links": [], + "assets": { + "data": { + "href": "s3://your-bucket/path/to/space2stats.parquet", + "title": "Space2Stats Parquet File", + "type": "application/x-parquet" + } + } + } \ No newline at end of file diff --git a/space2stats_api/src/space2stats_ingest/cli.py b/space2stats_api/src/space2stats_ingest/cli.py index c23a838..85934a7 100644 --- a/space2stats_api/src/space2stats_ingest/cli.py +++ b/space2stats_api/src/space2stats_ingest/cli.py @@ -2,7 +2,7 @@ import typer -from .main import download_parquet_from_s3, load_parquet_to_db +from .main import load_parquet_to_db app = typer.Typer() @@ -22,52 +22,17 @@ def wrapper(*args, **kwargs): return wrapper -@app.command() -@handle_errors -def download(s3_path: str, local_path: str = typer.Option("local.parquet")): - """ - Download a Parquet file from an S3 bucket. - """ - typer.echo(f"Starting download from S3: {s3_path}") - download_parquet_from_s3(s3_path, local_path) - typer.echo(f"Download complete: {local_path}") - - @app.command() @handle_errors def load( connection_string: str, - stac_catalog_path: str, # Add the STAC metadata file path as an argument - parquet_file: str = typer.Option("local.parquet"), + stac_item_path: str, # Add the STAC metadata file path as an argument + parquet_file: str, chunksize: int = 64_000, ): """ Load a Parquet file into a PostgreSQL database after verifying columns with the STAC metadata. """ typer.echo(f"Loading data into PostgreSQL database from {parquet_file}") - load_parquet_to_db(parquet_file, connection_string, stac_catalog_path, chunksize) + load_parquet_to_db(parquet_file, connection_string, stac_item_path, chunksize) typer.echo("Data loaded successfully to PostgreSQL!") - - -@app.command() -@handle_errors -def download_and_load( - s3_path: str, - connection_string: str, - stac_catalog_path: str, - parquet_file: str = typer.Option("local.parquet"), - chunksize: int = 64_000, -): - """ - Download a Parquet file from S3, verify columns with the STAC metadata, and load it into a PostgreSQL database. - """ - download( - s3_path=s3_path, - local_path=parquet_file, - ) - load( - parquet_file=parquet_file, - connection_string=connection_string, - stac_catalog_path=stac_catalog_path, # Ensure this is passed along - chunksize=chunksize, - ) diff --git a/space2stats_api/src/space2stats_ingest/main.py b/space2stats_api/src/space2stats_ingest/main.py index e352789..e8632b2 100644 --- a/space2stats_api/src/space2stats_ingest/main.py +++ b/space2stats_api/src/space2stats_ingest/main.py @@ -3,63 +3,58 @@ import adbc_driver_postgresql.dbapi as pg import boto3 +import pyarrow as pa import pyarrow.parquet as pq -from pystac import Catalog +from pystac import Item, STACValidationError from tqdm import tqdm TABLE_NAME = "space2stats" -def read_parquet_file(file_path: str): - """ - Reads a Parquet file either from a local path or an S3 path. - - Args: - file_path (str): Path to the Parquet file, either local or S3. - - Returns: - pyarrow.Table: Parquet table object. - """ +def read_parquet_file(file_path: str) -> pa.Table: + """Reads a Parquet file either from a local path or an S3 path.""" if file_path.startswith("s3://"): - # Read from S3 s3 = boto3.client("s3") bucket, key = file_path[5:].split("/", 1) with tempfile.NamedTemporaryFile() as tmp_file: s3.download_file(bucket, key, tmp_file.name) table = pq.read_table(tmp_file.name) else: - # Read from local path table = pq.read_table(file_path) - return table -def get_all_stac_fields(stac_catalog_path: str) -> Set[str]: - catalog = Catalog.from_file(stac_catalog_path) - items = catalog.get_items(recursive=True) - columns = [] - for it in items: - columns.extend([col["name"] for col in it.properties.get("table:columns")]) - print(columns) +def get_stac_fields_from_item(stac_item_path: str) -> Set[str]: + item = Item.from_file(stac_item_path) + columns = [c["name"] for c in item.properties.get("table:columns")] return set(columns) -def verify_columns(parquet_file: str, stac_catalog_path: str) -> bool: - """ - Verifies that the Parquet file columns match the STAC item metadata columns. +def validate_stac_item(stac_item_path: str) -> bool: + item = Item.from_file(stac_item_path) + try: + item.validate() + return True + except STACValidationError as e: + raise STACValidationError(f"Expected valid STAC item, error: {e}") - Args: - parquet_file (str): Path to the Parquet file. - stac_metadata_file (str): Path to the STAC item metadata JSON file. - Returns: - bool: True if the columns match, False otherwise. - """ +def verify_columns( + parquet_file: str, stac_item_path: str, connection_string: str +) -> bool: + """Verifies that the Parquet file columns match the STAC item metadata columns, + ensures that 'hex_id' column is present, and checks that new columns don't already exist in the database.""" + + # Read Parquet columns and STAC fields parquet_table = read_parquet_file(parquet_file) parquet_columns = set(parquet_table.column_names) + stac_fields = get_stac_fields_from_item(stac_item_path) - stac_fields = get_all_stac_fields(stac_catalog_path) + # Check if 'hex_id' is present in the Parquet columns + if "hex_id" not in parquet_columns: + raise ValueError("The 'hex_id' column is missing from the Parquet file.") + # Verify Parquet columns match the STAC fields if parquet_columns != stac_fields: extra_in_parquet = parquet_columns - stac_fields extra_in_stac = stac_fields - parquet_columns @@ -67,42 +62,139 @@ def verify_columns(parquet_file: str, stac_catalog_path: str) -> bool: f"Column mismatch: Extra in Parquet: {extra_in_parquet}, Extra in STAC: {extra_in_stac}" ) - return True - + # Retrieve columns already present in the main table in the database + with pg.connect(connection_string) as conn: + with conn.cursor() as cur: + cur.execute(f""" + SELECT column_name + FROM information_schema.columns + WHERE table_name = '{TABLE_NAME}' + """) + existing_columns = set(row[0] for row in cur.fetchall()) + + # Check for overlap in columns (excluding 'hex_id') + overlapping_columns = parquet_columns.intersection(existing_columns) - {"hex_id"} + if overlapping_columns: + raise ValueError( + f"Columns already exist in the database: {overlapping_columns}" + ) -def download_parquet_from_s3(s3_path: str, local_path: str): - """ - Downloads a Parquet file from an S3 bucket and saves it locally. - """ - s3 = boto3.client("s3") + return True - # Split the S3 path into bucket and key - if s3_path.startswith("s3://"): - s3_path = s3_path[5:] - bucket, key = s3_path.split("/", 1) - s3.download_file(bucket, key, local_path) +def merge_tables(db_table: pa.Table, parquet_table: pa.Table) -> pa.Table: + """Adds columns from the Parquet table to the database table in memory.""" + for column in parquet_table.column_names: + if column != "hex_id": # Exclude hex_id to prevent duplicates + db_table = db_table.append_column(column, parquet_table[column]) + return db_table def load_parquet_to_db( parquet_file: str, connection_string: str, - stac_catalog_path: str, + stac_item_path: str, chunksize: int = 64_000, ): - # Verify column consistency between Parquet file and STAC metadata - if not verify_columns(parquet_file, stac_catalog_path): - raise ValueError("Column mismatch between Parquet file and STAC metadata") - - table = pq.read_table(parquet_file) - with ( - pg.connect(connection_string) as conn, - conn.cursor() as cur, - tqdm(total=table.num_rows, desc="Loading to PostgreSQL", unit="rows") as pbar, - ): - cur.adbc_ingest(TABLE_NAME, table.slice(0, 0), mode="replace") - for batch in table.to_batches(max_chunksize=chunksize): - count = cur.adbc_ingest(TABLE_NAME, batch, mode="append") - pbar.update(count) - cur.execute("CREATE INDEX ON space2stats (hex_id);") + """Main function to load and update data in PostgreSQL using Arrow in replace mode.""" + validate_stac_item(stac_item_path) + verify_columns(parquet_file, stac_item_path, connection_string) + + # Check if the table already exists in the database + with pg.connect(connection_string) as conn: + with conn.cursor() as cur: + cur.execute(f"SELECT to_regclass('{TABLE_NAME}');") + table_exists = cur.fetchone()[0] is not None + + if not table_exists: + # If the table does not exist, directly ingest the Parquet file in batches + parquet_table = read_parquet_file(parquet_file) + + with pg.connect(connection_string) as conn, tqdm( + total=parquet_table.num_rows, desc="Ingesting Data", unit="rows" + ) as pbar: + with conn.cursor() as cur: + # Create an empty table with the same schema + cur.adbc_ingest(TABLE_NAME, parquet_table.slice(0, 0), mode="replace") + + for batch in parquet_table.to_batches(max_chunksize=chunksize): + cur.adbc_ingest(TABLE_NAME, batch, mode="append") + pbar.update(batch.num_rows) + + # Create an index on hex_id for future joins + print("Creating index") + cur.execute( + f"CREATE INDEX idx_{TABLE_NAME}_hex_id ON {TABLE_NAME} (hex_id)" + ) + conn.commit() + return + + # Load Parquet file into a temporary table + parquet_table = read_parquet_file(parquet_file) + temp_table = f"{TABLE_NAME}_temp" + with pg.connect(connection_string) as conn, tqdm( + total=parquet_table.num_rows, desc="Ingesting Temporary Table", unit="rows" + ) as pbar: + with conn.cursor() as cur: + cur.adbc_ingest(temp_table, parquet_table.slice(0, 0), mode="replace") + + for batch in parquet_table.to_batches(max_chunksize=chunksize): + cur.adbc_ingest(temp_table, batch, mode="append") + pbar.update(batch.num_rows) + + conn.commit() + + # Fetch columns to add to the main table + with pg.connect(connection_string) as conn: + with conn.cursor() as cur: + cur.execute(f""" + SELECT column_name, data_type + FROM information_schema.columns + WHERE table_name = '{temp_table}' + AND column_name NOT IN ( + SELECT column_name FROM information_schema.columns WHERE table_name = '{TABLE_NAME}' + ) + """) + new_columns = cur.fetchall() + + # Add new columns and attempt to update in a transaction + try: + with pg.connect(connection_string) as conn: + with conn.cursor() as cur: + # Add new columns to the main table + for column, column_type in new_columns: + cur.execute( + f"ALTER TABLE {TABLE_NAME} ADD COLUMN IF NOT EXISTS {column} {column_type}" + ) + + print(f"Adding new columns: {[c[0] for c in new_columns]}...") + + # Construct the SET clause for the update query + update_columns = [ + f"{column} = temp.{column}" for column, _ in new_columns + ] + set_clause = ", ".join(update_columns) + + # Update TABLE_NAME with data from temp_table based on matching hex_id + print( + "Adding columns to dataset... All or nothing operation may take some time." + ) + cur.execute(f""" + UPDATE {TABLE_NAME} AS main + SET {set_clause} + FROM {temp_table} AS temp + WHERE main.hex_id = temp.hex_id + """) + + conn.commit() # Commit transaction if all operations succeed + except Exception as e: + # Rollback if any error occurs during the update + print("An error occurred during update. Rolling back changes.") + conn.rollback() + raise e # Re-raise the exception to alert calling code + + # Drop the temporary table + with pg.connect(connection_string) as conn: + with conn.cursor() as cur: + cur.execute(f"DROP TABLE IF EXISTS {temp_table}") conn.commit() diff --git a/space2stats_api/src/tests/conftest.py b/space2stats_api/src/tests/conftest.py index 81d9277..8a6aabe 100644 --- a/space2stats_api/src/tests/conftest.py +++ b/space2stats_api/src/tests/conftest.py @@ -29,6 +29,19 @@ def aws_credentials(): os.environ["AWS_DEFAULT_REGION"] = "us-east-1" +@pytest.fixture(scope="function") +def clean_database(postgresql_proc): + with DatabaseJanitor( + user=postgresql_proc.user, + host=postgresql_proc.host, + port=postgresql_proc.port, + dbname="cleantestdb", + version=postgresql_proc.version, + password="password", + ) as jan: + yield jan + + @pytest.fixture(scope="function") def database(postgresql_proc): """Set up a PostgreSQL database for testing and clean up afterwards.""" @@ -111,29 +124,3 @@ def aoi_example(): }, properties={}, ) - - -@pytest.fixture -def stac_catalog_path(): - return "./space2stats_ingest/METADATA/stac/catalog.json" - - -@pytest.fixture -def stac_file_path(): - current_dir = os.path.dirname(os.path.abspath(__file__)) - root_dir = os.path.abspath(os.path.join(current_dir, "../../..")) - json_file_path = os.path.join( - root_dir, - "space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats-collection/space2stats_population_2020/space2stats_population_2020.json", - ) - return json_file_path - - -@pytest.fixture -def types_json_file_path(): - current_dir = os.path.dirname(os.path.abspath(__file__)) - root_dir = os.path.abspath(os.path.join(current_dir, "../../..")) - types_json_file_path = os.path.join( - root_dir, "space2stats_api/src/space2stats_ingest/METADATA/types.json" - ) - return types_json_file_path diff --git a/space2stats_api/src/tests/metadata_tests/test_stac_columns.py b/space2stats_api/src/tests/metadata_tests/test_stac_columns.py deleted file mode 100644 index 303790f..0000000 --- a/space2stats_api/src/tests/metadata_tests/test_stac_columns.py +++ /dev/null @@ -1,30 +0,0 @@ -import json - - -def test_stac_columns_vs_types_json(stac_file_path, types_json_file_path): - # Load the expected column types from the types JSON file - with open(types_json_file_path, "r") as f: - expected_columns = json.load(f) - - # Load the STAC item from the JSON file - with open(stac_file_path, "r") as f: - stac_item = json.load(f) - - # Extract column names and types from the STAC item - stac_columns = { - col["name"]: col["type"] for col in stac_item["properties"]["table:columns"] - } - - # Assert that the number of columns in the STAC file matches the number of columns in the types JSON file - assert ( - len(stac_columns) == len(expected_columns) - ), f"Mismatch in column count: STAC ({len(stac_columns)}) vs JSON ({len(expected_columns)})" - - # Assert that column names and types match - for column_name, column_type in expected_columns.items(): - assert ( - column_name in stac_columns - ), f"Column {column_name} is missing in the STAC file" - assert ( - stac_columns[column_name] == column_type - ), f"Mismatch in column type for {column_name}: STAC ({stac_columns[column_name]}) vs JSON ({column_type})" diff --git a/space2stats_api/src/tests/test_ingest.py b/space2stats_api/src/tests/test_ingest.py index 03a65d8..4b99f2d 100644 --- a/space2stats_api/src/tests/test_ingest.py +++ b/space2stats_api/src/tests/test_ingest.py @@ -1,45 +1,64 @@ import json -import os import psycopg import pyarrow as pa import pyarrow.parquet as pq -from space2stats_ingest.main import ( - download_parquet_from_s3, - get_all_stac_fields, - load_parquet_to_db, -) +from space2stats_ingest.main import load_parquet_to_db -def test_get_all_stac_fields(stac_catalog_path): - print(stac_catalog_path) - fields = get_all_stac_fields(stac_catalog_path) - print(fields) - assert ( - len(fields) > 0 and len(fields) < 100 - ), f"Fields have unexpected length: {fields}" +def test_load_parquet_to_db(clean_database, tmpdir): + connection_string = f"postgresql://{clean_database.user}:{clean_database.password}@{clean_database.host}:{clean_database.port}/{clean_database.dbname}" + parquet_file = tmpdir.join("local.parquet") + item_file = tmpdir.join("space2stats_population_2020.json") -def test_download_parquet_from_s3(s3_mock): - s3_path = "s3://mybucket/myfile.parquet" - parquet_file = "local.parquet" + data = { + "hex_id": ["hex_1", "hex_2"], + "sum_pop_f_10_2020": [100, 200], + "sum_pop_m_10_2020": [150, 250], + } - s3_mock.put_object( - Bucket="mybucket", Key="myfile.parquet", Body=b"mock_parquet_data" - ) + table = pa.table(data) + pq.write_table(table, parquet_file) + + stac_item = { + "type": "Feature", + "stac_version": "1.0.0", + "id": "space2stats_population_2020", + "properties": { + "table:columns": [ + {"name": "hex_id", "type": "string"}, + {"name": "sum_pop_f_10_2020", "type": "int64"}, + {"name": "sum_pop_m_10_2020", "type": "int64"}, + ], + "datetime": "2024-10-07T11:21:25.944150Z", + }, + "geometry": None, + "bbox": [-180, -90, 180, 90], + "links": [], + "assets": {}, + } - download_parquet_from_s3(s3_path, parquet_file) + with open(item_file, "w") as f: + json.dump(stac_item, f) + + load_parquet_to_db(str(parquet_file), connection_string, str(item_file)) + + with psycopg.connect(connection_string) as conn: + with conn.cursor() as cur: + cur.execute("SELECT * FROM space2stats WHERE hex_id = 'hex_1'") + result = cur.fetchone() + assert result == ("hex_1", 100, 150) - assert os.path.exists(parquet_file) + cur.execute("SELECT * FROM space2stats WHERE hex_id = 'hex_2'") + result = cur.fetchone() + assert result == ("hex_2", 200, 250) -def test_load_parquet_to_db(database, tmpdir): - connection_string = f"postgresql://{database.user}:{database.password}@{database.host}:{database.port}/{database.dbname}" +def test_updating_table(clean_database, tmpdir): + connection_string = f"postgresql://{clean_database.user}:{clean_database.password}@{clean_database.host}:{clean_database.port}/{clean_database.dbname}" parquet_file = tmpdir.join("local.parquet") - - catalog_file = tmpdir.join("catalog.json") - collection_file = tmpdir.join("collection.json") item_file = tmpdir.join("space2stats_population_2020.json") data = { @@ -72,44 +91,206 @@ def test_load_parquet_to_db(database, tmpdir): with open(item_file, "w") as f: json.dump(stac_item, f) - stac_collection = { - "type": "Collection", + load_parquet_to_db(str(parquet_file), connection_string, str(item_file)) + + update_item_file = tmpdir.join("space2stats_population_2020.json") + update_parquet_file = tmpdir.join("update_local_parquet.json") + update_data = { + "hex_id": ["hex_1", "hex_2"], + "nighttime_lights": [10_000, 20_000], + } + update_table = pa.table(update_data) + pq.write_table(update_table, update_parquet_file) + + update_stac_item = { + "type": "Feature", "stac_version": "1.0.0", - "id": "space2stats-collection", - "description": "Test collection for Space2Stats.", - "license": "CC-BY-4.0", - "extent": { - "spatial": {"bbox": [[-180, -90, 180, 90]]}, - "temporal": {"interval": [["2020-01-01T00:00:00Z", None]]}, + "id": "space2stats_nighttime_lights_2020", + "properties": { + "table:columns": [ + {"name": "hex_id", "type": "string"}, + {"name": "nighttime_lights", "type": "int64"}, + ], + "datetime": "2024-10-07T11:21:25.944150Z", }, - "links": [{"rel": "item", "href": str(item_file), "type": "application/json"}], + "geometry": None, + "bbox": [-180, -90, 180, 90], + "links": [], + "assets": {}, } - with open(collection_file, "w") as f: - json.dump(stac_collection, f) + with open(update_item_file, "w") as f: + json.dump(update_stac_item, f) + + load_parquet_to_db( + str(update_parquet_file), connection_string, str(update_item_file) + ) + + with psycopg.connect(connection_string) as conn: + with conn.cursor() as cur: + cur.execute("SELECT * FROM space2stats WHERE hex_id = 'hex_1'") + result = cur.fetchone() + assert result == ("hex_1", 100, 150, 10_000) + + cur.execute("SELECT * FROM space2stats WHERE hex_id = 'hex_2'") + result = cur.fetchone() + assert result == ("hex_2", 200, 250, 20_000) + - stac_catalog = { - "type": "Catalog", +def test_columns_already_exist_in_db(clean_database, tmpdir): + connection_string = f"postgresql://{clean_database.user}:{clean_database.password}@{clean_database.host}:{clean_database.port}/{clean_database.dbname}" + + parquet_file = tmpdir.join("local.parquet") + data = { + "hex_id": ["hex_1", "hex_2"], + "existing_column": [123, 456], # Simulates an existing column in DB + "new_column": [789, 1011], + } + table = pa.table(data) + pq.write_table(table, parquet_file) + + stac_item = { + "type": "Feature", "stac_version": "1.0.0", - "id": "space2stats-catalog", - "description": "Test catalog for Space2Stats.", - "license": "CC-BY-4.0", - "links": [ - {"rel": "child", "href": str(collection_file), "type": "application/json"} - ], + "id": "space2stats_population_2020", + "properties": { + "table:columns": [ + {"name": "hex_id", "type": "string"}, + {"name": "existing_column", "type": "int64"}, + {"name": "new_column", "type": "int64"}, + ], + "datetime": "2024-10-07T11:21:25.944150Z", + }, + "geometry": None, + "bbox": [-180, -90, 180, 90], + "links": [], + "assets": {}, } - with open(catalog_file, "w") as f: - json.dump(stac_catalog, f) + item_file = tmpdir.join("space2stats_population_2020.json") + with open(item_file, "w") as f: + json.dump(stac_item, f) - load_parquet_to_db(str(parquet_file), connection_string, str(catalog_file)) + load_parquet_to_db(str(parquet_file), connection_string, str(item_file)) with psycopg.connect(connection_string) as conn: with conn.cursor() as cur: cur.execute("SELECT * FROM space2stats WHERE hex_id = 'hex_1'") result = cur.fetchone() - assert result == ("hex_1", 100, 150) + assert result == ("hex_1", 123, 789) # Verify no duplicates - cur.execute("SELECT * FROM space2stats WHERE hex_id = 'hex_2'") - result = cur.fetchone() - assert result == ("hex_2", 200, 250) + +def test_rollback_on_update_failure(clean_database, tmpdir): + connection_string = f"postgresql://{clean_database.user}:{clean_database.password}@{clean_database.host}:{clean_database.port}/{clean_database.dbname}" + + parquet_file = tmpdir.join("local.parquet") + data = { + "hex_id": ["hex_1", "hex_2"], + "sum_pop_f_10_2020": [100, 200], + "sum_pop_m_10_2020": [150, 250], + } + table = pa.table(data) + pq.write_table(table, parquet_file) + + stac_item = { + "type": "Feature", + "stac_version": "1.0.0", + "id": "space2stats_population_2020", + "properties": { + "table:columns": [ + {"name": "hex_id", "type": "string"}, + {"name": "sum_pop_f_10_2020", "type": "int64"}, + {"name": "sum_pop_m_10_2020", "type": "int64"}, + ], + "datetime": "2024-10-07T11:21:25.944150Z", + }, + "geometry": None, + "bbox": [-180, -90, 180, 90], + "links": [], + "assets": {}, + } + + item_file = tmpdir.join("space2stats_population_2020.json") + with open(item_file, "w") as f: + json.dump(stac_item, f) + + load_parquet_to_db(str(parquet_file), connection_string, str(item_file)) + + # Invalid Parquet without `hex_id` + update_parquet_file = tmpdir.join("update_local.parquet") + update_data = { + "new_column": [1000, 2000], + } + update_table = pa.table(update_data) + pq.write_table(update_table, update_parquet_file) + + update_item_file = tmpdir.join("update_item.json") + update_stac_item = { + "type": "Feature", + "stac_version": "1.0.0", + "id": "space2stats_population_2021", + "properties": { + "table:columns": [{"name": "new_column", "type": "int64"}], + "datetime": "2024-10-07T11:21:25.944150Z", + }, + "geometry": None, + "bbox": [-180, -90, 180, 90], + "links": [], + "assets": {}, + } + + with open(update_item_file, "w") as f: + json.dump(update_stac_item, f) + + try: + load_parquet_to_db( + str(update_parquet_file), connection_string, str(update_item_file) + ) + except ValueError: + pass + + with psycopg.connect(connection_string) as conn: + with conn.cursor() as cur: + cur.execute( + "SELECT column_name FROM information_schema.columns WHERE table_name = 'space2stats'" + ) + columns = [row[0] for row in cur.fetchall()] + assert "new_column" not in columns # Verify no unwanted columns were added + + +def test_hex_id_column_mandatory(clean_database, tmpdir): + connection_string = f"postgresql://{clean_database.user}:{clean_database.password}@{clean_database.host}:{clean_database.port}/{clean_database.dbname}" + + parquet_file = tmpdir.join("missing_hex_id.parquet") + data = { + "sum_pop_f_10_2020": [100, 200], + "sum_pop_m_10_2020": [150, 250], + } + table = pa.table(data) + pq.write_table(table, parquet_file) + + stac_item = { + "type": "Feature", + "stac_version": "1.0.0", + "id": "space2stats_population_2020", + "properties": { + "table:columns": [ + {"name": "sum_pop_f_10_2020", "type": "int64"}, + {"name": "sum_pop_m_10_2020", "type": "int64"}, + ], + "datetime": "2024-10-07T11:21:25.944150Z", + }, + "geometry": None, + "bbox": [-180, -90, 180, 90], + "links": [], + "assets": {}, + } + + item_file = tmpdir.join("space2stats_population_2020.json") + with open(item_file, "w") as f: + json.dump(stac_item, f) + + try: + load_parquet_to_db(str(parquet_file), connection_string, str(item_file)) + except ValueError as e: + assert "The 'hex_id' column is missing from the Parquet file." in str(e) diff --git a/space2stats_api/src/tests/test_ingest_cli.py b/space2stats_api/src/tests/test_ingest_cli.py index 8b77fee..2e146e3 100644 --- a/space2stats_api/src/tests/test_ingest_cli.py +++ b/space2stats_api/src/tests/test_ingest_cli.py @@ -1,5 +1,4 @@ import json -import os import pyarrow as pa import pyarrow.parquet as pq @@ -64,27 +63,8 @@ def create_stac_catalog(catalog_file, collection_file): json.dump(stac_catalog, f) -def test_download_command(tmpdir, s3_mock): - s3_path = "s3://mybucket/myfile.parquet" - parquet_file = tmpdir.join("local.parquet") - - s3_mock.put_object( - Bucket="mybucket", Key="myfile.parquet", Body=b"mock_parquet_data" - ) - - result = runner.invoke( - app, ["download", s3_path, "--local-path", str(parquet_file)] - ) - print(result.output) - - assert result.exit_code == 0 - assert "Starting download from S3" in result.stdout - assert "Download complete" in result.stdout - assert os.path.exists(parquet_file) - - -def test_load_command(tmpdir, database): - connection_string = f"postgresql://{database.user}:{database.password}@{database.host}:{database.port}/{database.dbname}" +def test_load_command(tmpdir, clean_database): + connection_string = f"postgresql://{clean_database.user}:{clean_database.password}@{clean_database.host}:{clean_database.port}/{clean_database.dbname}" parquet_file = tmpdir.join("local.parquet") catalog_file = tmpdir.join("catalog.json") collection_file = tmpdir.join("collection.json") @@ -102,10 +82,8 @@ def test_load_command(tmpdir, database): result = runner.invoke( app, [ - "load", connection_string, - str(catalog_file), - "--parquet-file", + str(item_file), str(parquet_file), ], ) @@ -115,69 +93,30 @@ def test_load_command(tmpdir, database): assert "Loading data into PostgreSQL" in result.stdout -def test_load_command_column_mismatch(tmpdir, database): - connection_string = f"postgresql://{database.user}:{database.password}@{database.host}:{database.port}/{database.dbname}" +def test_load_command_column_mismatch(tmpdir, clean_database): + connection_string = f"postgresql://{clean_database.user}:{clean_database.password}@{clean_database.host}:{clean_database.port}/{clean_database.dbname}" parquet_file = tmpdir.join("local.parquet") catalog_file = tmpdir.join("catalog.json") collection_file = tmpdir.join("collection.json") item_file = tmpdir.join("space2stats_population_2020.json") - create_mock_parquet_file(parquet_file, [("different_column", pa.float64())]) - - create_stac_item(item_file, [("mock_column", "float64")]) - - create_stac_collection(collection_file, item_file) - create_stac_catalog(catalog_file, collection_file) - - result = runner.invoke( - app, - [ - "load", - connection_string, - str(catalog_file), - "--parquet-file", - str(parquet_file), - ], - ) - print(result.output) - - assert result.exit_code != 0 - assert "Column mismatch" in result.stdout - - -def test_download_and_load_command(tmpdir, database, s3_mock): - s3_path = "s3://mybucket/myfile.parquet" - parquet_file = tmpdir.join("local.parquet") - catalog_file = tmpdir.join("catalog.json") - collection_file = tmpdir.join("collection.json") - item_file = tmpdir.join("space2stats_population_2020.json") - connection_string = f"postgresql://{database.user}:{database.password}@{database.host}:{database.port}/{database.dbname}" - create_mock_parquet_file( - parquet_file, [("hex_id", pa.string()), ("mock_column", pa.float64())] + parquet_file, [("hex_id", pa.string()), ("different_column", pa.float64())] ) - create_stac_item(item_file, [("hex_id", "string"), ("mock_column", "float64")]) create_stac_collection(collection_file, item_file) create_stac_catalog(catalog_file, collection_file) - with open(parquet_file, "rb") as f: - s3_mock.put_object(Bucket="mybucket", Key="myfile.parquet", Body=f.read()) - result = runner.invoke( app, [ - "download-and-load", - s3_path, connection_string, - str(catalog_file), - "--parquet-file", + str(item_file), str(parquet_file), ], ) print(result.output) - assert result.exit_code == 0 - assert "Starting download from S3" in result.stdout - assert "Loading data into PostgreSQL" in result.stdout + assert result.exit_code != 0 + assert "Column mismatch" in result.stdout