diff --git a/README.md b/README.md index eb94209..c7e0844 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,34 @@ # Space2Stats -Consistent, comparable, authoritative data describing sub-national variation is a constant point of complication for World Bank teams, our development partners, and client countries when assessing and investigating economic issues and national policy. This project will focus on creating and disseminating such data through aggregation of geospatial information at standard administrative divisions, and through the attribution of household survey data with foundational geospatial variables. \ No newline at end of file +Consistent, comparable, authoritative data describing sub-national variation is a constant point of complication for World Bank teams, our development partners, and client countries when assessing and investigating economic issues and national policy. This project will focus on creating and disseminating such data through aggregation of geospatial information at standard administrative divisions, and through the attribution of household survey data with foundational geospatial variables. + +## Getting Started Locally + +- Setup the database: +``` +docker-compose up -d +``` + +- Create a `db.env` file: +```.env +DB_HOST=localhost +DB_PORT=5439 +DB_NAME=postgis +DB_USER=username +DB_PASSWORD=password +DB_TABLE_NAME=space2stats +``` + +- Load our dataset into the database +``` +./postgres/download_parquet.sh +python postgres/chunk_parquet.py +./postgres/load_parquet_chunks.sh +``` + +> You can get started with a subset of data for NYC with `./load_nyc_sample.sh` which requires changing your `db.env` value for `DB_TABLE_NAME` to `space2stats_nyc_sample`. + +- Access your data using the Space2statS API! See the [example notebook](notebooks/space2stats_api_demo.ipynb). + + + diff --git a/notebooks/space2stats_api_demo.ipynb b/notebooks/space2stats_api_demo.ipynb new file mode 100644 index 0000000..f9cdb5c --- /dev/null +++ b/notebooks/space2stats_api_demo.ipynb @@ -0,0 +1,527 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Dict\n", + "\n", + "import requests\n", + "import pandas as pd\n", + "import geopandas as gpd\n", + "import h3\n", + "from shapely.geometry import Point\n", + "\n", + "from lonboard.colormap import apply_continuous_cmap\n", + "from lonboard import Map, ScatterplotLayer\n", + "from palettable.cartocolors.sequential import BurgYl_2\n", + "from geojson_pydantic import Feature, Polygon" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "BASE_URL = \"http://localhost:8000\"\n", + "FIELDS_ENDPOINT = f\"{BASE_URL}/fields\"\n", + "SUMMARY_ENDPOINT = f\"{BASE_URL}/summary\"" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Available Fields: ['sum_pop_2020', 'ogc_fid', 'sum_pop_f_0_2020', 'sum_pop_f_10_2020', 'sum_pop_f_15_2020', 'sum_pop_f_1_2020', 'sum_pop_f_20_2020', 'sum_pop_f_25_2020', 'sum_pop_f_30_2020', 'sum_pop_f_35_2020', 'sum_pop_f_40_2020', 'sum_pop_f_45_2020', 'sum_pop_f_50_2020', 'sum_pop_f_55_2020', 'sum_pop_f_5_2020', 'sum_pop_f_60_2020', 'sum_pop_f_65_2020', 'sum_pop_f_70_2020', 'sum_pop_f_75_2020', 'sum_pop_f_80_2020', 'sum_pop_m_0_2020', 'sum_pop_m_10_2020', 'sum_pop_m_15_2020', 'sum_pop_m_1_2020', 'sum_pop_m_20_2020', 'sum_pop_m_25_2020', 'sum_pop_m_30_2020', 'sum_pop_m_35_2020', 'sum_pop_m_40_2020', 'sum_pop_m_45_2020', 'sum_pop_m_50_2020', 'sum_pop_m_55_2020', 'sum_pop_m_5_2020', 'sum_pop_m_60_2020', 'sum_pop_m_65_2020', 'sum_pop_m_70_2020', 'sum_pop_m_75_2020', 'sum_pop_m_80_2020', 'sum_pop_m_2020', 'sum_pop_f_2020']\n" + ] + } + ], + "source": [ + "response = requests.get(FIELDS_ENDPOINT)\n", + "if response.status_code != 200:\n", + " raise Exception(f\"Failed to get fields: {response.text}\")\n", + "\n", + "available_fields = response.json()\n", + "print(\"Available Fields:\", available_fields)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "AOIModel = Feature[Polygon, Dict]\n", + "\n", + "# kenya\n", + "aoi = {\n", + " \"type\": \"Feature\",\n", + " \"geometry\": {\n", + " \"type\": \"Polygon\",\n", + " \"coordinates\": [\n", + " [\n", + " [41.85508309264397, -1.68325],\n", + " [40.98105, -2.49979],\n", + " [40.993, -3.444],\n", + " [41.58513, -3.91909],\n", + " [40.88477, -4.95913],\n", + " [39.55938425876585, -4.437641590288629],\n", + " [39.25451, -3.42206],\n", + " [37.7669, -3.67712],\n", + " [37.69869, -3.09699],\n", + " [34.07262, -1.05982],\n", + " [33.90371119710453, -0.95],\n", + " [33.893568969666944, 0.109813537861896],\n", + " [34.18, 0.515],\n", + " [34.6721, 1.17694],\n", + " [35.03599, 1.90584],\n", + " [34.59607, 3.05374],\n", + " [34.47913, 3.5556],\n", + " [35.298007118232946, 4.77696566346189],\n", + " [35.817447662353516, 4.77696566346189],\n", + " [36.159078632855646, 4.447864127672769],\n", + " [36.85509323800812, 4.447864127672769],\n", + " [38.120915, 3.598605],\n", + " [38.43697, 3.58851],\n", + " [38.67114, 3.61607],\n", + " [38.89251, 3.50074],\n", + " [39.55938425876585, 3.42206],\n", + " [39.85494, 3.83879],\n", + " [40.76848, 4.25702],\n", + " [41.1718, 3.91909],\n", + " [41.85508309264397, 2.97959],\n", + " [41.58513, 2.09],\n", + " [40.993, 1.657],\n", + " [40.98105, 1.002],\n", + " [41.85508309264397, -1.68325]\n", + " ]\n", + " ]\n", + " },\n", + " \"properties\": {\n", + " \"name\": \"Kenya\"\n", + " }\n", + " }\n", + "\n", + "\n", + "feat = AOIModel(**aoi)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | hex_id | \n", + "sum_pop_2020 | \n", + "sum_pop_f_2020 | \n", + "sum_pop_m_2020 | \n", + "
---|---|---|---|---|
0 | \n", + "866a4a48fffffff | \n", + "399.860905 | \n", + "189.675539 | \n", + "210.185366 | \n", + "
1 | \n", + "866a4a497ffffff | \n", + "582.555159 | \n", + "276.337255 | \n", + "306.217904 | \n", + "
2 | \n", + "866a4a49fffffff | \n", + "749.911237 | \n", + "355.723245 | \n", + "394.187992 | \n", + "
3 | \n", + "866a4a4d7ffffff | \n", + "863.888290 | \n", + "418.309236 | \n", + "445.579054 | \n", + "
4 | \n", + "866a5820fffffff | \n", + "525.085147 | \n", + "249.076134 | \n", + "276.009012 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
16212 | \n", + "867b5dd77ffffff | \n", + "-36.000000 | \n", + "-18.000000 | \n", + "-18.000000 | \n", + "
16213 | \n", + "867b5dd87ffffff | \n", + "-36.000000 | \n", + "-18.000000 | \n", + "-18.000000 | \n", + "
16214 | \n", + "867b5dd8fffffff | \n", + "-36.000000 | \n", + "-18.000000 | \n", + "-18.000000 | \n", + "
16215 | \n", + "867b5dd9fffffff | \n", + "-36.000000 | \n", + "-18.000000 | \n", + "-18.000000 | \n", + "
16216 | \n", + "867b5ddafffffff | \n", + "-36.000000 | \n", + "-18.000000 | \n", + "-18.000000 | \n", + "
16217 rows × 4 columns
\n", + "\n", + " | hex_id | \n", + "sum_pop_2020 | \n", + "sum_pop_f_2020 | \n", + "sum_pop_m_2020 | \n", + "geometry | \n", + "
---|---|---|---|---|---|
0 | \n", + "866a4a48fffffff | \n", + "399.860905 | \n", + "189.675539 | \n", + "210.185366 | \n", + "POINT (35.77461 4.75647) | \n", + "
1 | \n", + "866a4a497ffffff | \n", + "582.555159 | \n", + "276.337255 | \n", + "306.217904 | \n", + "POINT (35.67197 4.74377) | \n", + "
2 | \n", + "866a4a49fffffff | \n", + "749.911237 | \n", + "355.723245 | \n", + "394.187992 | \n", + "POINT (35.72824 4.72169) | \n", + "
3 | \n", + "866a4a4d7ffffff | \n", + "863.888290 | \n", + "418.309236 | \n", + "445.579054 | \n", + "POINT (35.83087 4.73438) | \n", + "
4 | \n", + "866a5820fffffff | \n", + "525.085147 | \n", + "249.076134 | \n", + "276.009012 | \n", + "POINT (34.87996 4.14901) | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
16212 | \n", + "867b5dd77ffffff | \n", + "-36.000000 | \n", + "-18.000000 | \n", + "-18.000000 | \n", + "POINT (40.83955 -4.90064) | \n", + "
16213 | \n", + "867b5dd87ffffff | \n", + "-36.000000 | \n", + "-18.000000 | \n", + "-18.000000 | \n", + "POINT (41.02377 -4.74704) | \n", + "
16214 | \n", + "867b5dd8fffffff | \n", + "-36.000000 | \n", + "-18.000000 | \n", + "-18.000000 | \n", + "POINT (40.96649 -4.72508) | \n", + "
16215 | \n", + "867b5dd9fffffff | \n", + "-36.000000 | \n", + "-18.000000 | \n", + "-18.000000 | \n", + "POINT (41.01249 -4.68672) | \n", + "
16216 | \n", + "867b5ddafffffff | \n", + "-36.000000 | \n", + "-18.000000 | \n", + "-18.000000 | \n", + "POINT (40.97776 -4.78541) | \n", + "
16217 rows × 5 columns
\n", + "