From 3c072415b5f0fb12af169b4b092aa1f18baa2ac1 Mon Sep 17 00:00:00 2001 From: Jesus Bracho Date: Fri, 15 Nov 2024 22:36:42 -0500 Subject: [PATCH 01/18] Update package-lock.json dependencies to pass precommit hook This commit updates several nodejs dependencies to their latest versions with `npm audit fix` in order to pass the precommit hook. --- package-lock.json | 168 +++++++++++++++++++++++++--------------------- 1 file changed, 93 insertions(+), 75 deletions(-) diff --git a/package-lock.json b/package-lock.json index 2d80ad35..d71d083d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -17,20 +17,20 @@ "clsx": "^2.1.1", "framer-motion": "^11.3.8", "lodash": "^4.17.21", - "mapbox-gl": "^3.2.0", - "maplibre-gl": "^4.1.1", - "multi-range-slider-react": "^2.0.5", - "next": "^14.0.3", - "pmtiles": "^3.0.5", - "postcss": "8.4.29", - "protobufjs": "^7.2.5", - "react": "18.2.0", - "react-dom": "18.2.0", - "react-icons": "^5.0.1", - "react-map-gl": "^7.1.6", - "sharp": "^0.33.0", - "tailwindcss": "^3.4.1", - "typescript": "5.2.2" + "mapbox-gl": "3.2.0", + "maplibre-gl": "^4.5.0", + "multi-range-slider-react": "^2.0.7", + "next": "^14.2.5", + "pmtiles": "^3.0.6", + "postcss": "8.4.39", + "protobufjs": "^7.3.2", + "react": "18.3.1", + "react-dom": "18.3.1", + "react-icons": "^5.2.1", + "react-map-gl": "^7.1.7", + "sharp": "^0.33.4", + "tailwindcss": "^3.4.6", + "typescript": "5.5.3" }, "devDependencies": { "@semantic-release/changelog": "^6.0.3", @@ -40,14 +40,15 @@ "@semantic-release/npm": "^12.0.1", "@semantic-release/release-notes-generator": "^14.0.1", "@types/mapbox__mapbox-gl-geocoder": "^5.0.0", - "@types/node": "^20.11.30", - "@types/pbf": "^3.0.2", - "@types/pg": "^8.10.2", - "@types/react": "^18.2.69", - "@types/react-dom": "^18.2.22", - "@typescript-eslint/parser": "^7.1.1", - "eslint": "^8.57.0", - "eslint-config-next": "^14.2.2", + "@types/node": "^20.14.11", + "@types/pbf": "^3.0.5", + "@types/pg": "^8.11.6", + "@types/react": "^18.3.3", + "@types/react-dom": "^18.3.0", + "@typescript-eslint/eslint-plugin": "^7.16.1", + "@typescript-eslint/parser": "^7.16.1", + "eslint": "^8.56.0", + "eslint-config-next": "^14.2.5", "eslint-config-prettier": "^9.1.0", "eslint-plugin-custom-rules": "file:./eslint-plugin-custom-rules", "eslint-plugin-prettier": "^5.0.0", @@ -2011,9 +2012,9 @@ "integrity": "sha512-gRa9gwYU3ECmQYv3lslts5hxuIa90veaEcxDYuu3QGOIAEM2mOZkVHp48ANJuu1CURtRdHKUBY5Lm1tHV+sD4g==" }, "node_modules/@next/env": { - "version": "14.1.4", - "resolved": "https://registry.npmjs.org/@next/env/-/env-14.1.4.tgz", - "integrity": "sha512-e7X7bbn3Z6DWnDi75UWn+REgAbLEqxI8Tq2pkFOFAMpWAWApz/YCUhtWMWn410h8Q2fYiYL7Yg5OlxMOCfFjJQ==" + "version": "14.2.18", + "resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.18.tgz", + "integrity": "sha512-2vWLOUwIPgoqMJKG6dt35fVXVhgM09tw4tK3/Q34GFXDrfiHlG7iS33VA4ggnjWxjiz9KV5xzfsQzJX6vGAekA==" }, "node_modules/@next/eslint-plugin-next": { "version": "14.2.13", @@ -2025,9 +2026,9 @@ } }, "node_modules/@next/swc-darwin-arm64": { - "version": "14.1.4", - "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.1.4.tgz", - "integrity": "sha512-ubmUkbmW65nIAOmoxT1IROZdmmJMmdYvXIe8211send9ZYJu+SqxSnJM4TrPj9wmL6g9Atvj0S/2cFmMSS99jg==", + "version": "14.2.18", + "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.18.tgz", + "integrity": "sha512-tOBlDHCjGdyLf0ube/rDUs6VtwNOajaWV+5FV/ajPgrvHeisllEdymY/oDgv2cx561+gJksfMUtqf8crug7sbA==", "cpu": [ "arm64" ], @@ -2040,9 +2041,9 @@ } }, "node_modules/@next/swc-darwin-x64": { - "version": "14.1.4", - "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.1.4.tgz", - "integrity": "sha512-b0Xo1ELj3u7IkZWAKcJPJEhBop117U78l70nfoQGo4xUSvv0PJSTaV4U9xQBLvZlnjsYkc8RwQN1HoH/oQmLlQ==", + "version": "14.2.18", + "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.18.tgz", + "integrity": "sha512-uJCEjutt5VeJ30jjrHV1VIHCsbMYnEqytQgvREx+DjURd/fmKy15NaVK4aR/u98S1LGTnjq35lRTnRyygglxoA==", "cpu": [ "x64" ], @@ -2055,9 +2056,9 @@ } }, "node_modules/@next/swc-linux-arm64-gnu": { - "version": "14.1.4", - "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.1.4.tgz", - "integrity": "sha512-457G0hcLrdYA/u1O2XkRMsDKId5VKe3uKPvrKVOyuARa6nXrdhJOOYU9hkKKyQTMru1B8qEP78IAhf/1XnVqKA==", + "version": "14.2.18", + "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.18.tgz", + "integrity": "sha512-IL6rU8vnBB+BAm6YSWZewc+qvdL1EaA+VhLQ6tlUc0xp+kkdxQrVqAnh8Zek1ccKHlTDFRyAft0e60gteYmQ4A==", "cpu": [ "arm64" ], @@ -2070,9 +2071,9 @@ } }, "node_modules/@next/swc-linux-arm64-musl": { - "version": "14.1.4", - "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.1.4.tgz", - "integrity": "sha512-l/kMG+z6MB+fKA9KdtyprkTQ1ihlJcBh66cf0HvqGP+rXBbOXX0dpJatjZbHeunvEHoBBS69GYQG5ry78JMy3g==", + "version": "14.2.18", + "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.18.tgz", + "integrity": "sha512-RCaENbIZqKKqTlL8KNd+AZV/yAdCsovblOpYFp0OJ7ZxgLNbV5w23CUU1G5On+0fgafrsGcW+GdMKdFjaRwyYA==", "cpu": [ "arm64" ], @@ -2085,9 +2086,9 @@ } }, "node_modules/@next/swc-linux-x64-gnu": { - "version": "14.1.4", - "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.1.4.tgz", - "integrity": "sha512-BapIFZ3ZRnvQ1uWbmqEGJuPT9cgLwvKtxhK/L2t4QYO7l+/DxXuIGjvp1x8rvfa/x1FFSsipERZK70pewbtJtw==", + "version": "14.2.18", + "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.18.tgz", + "integrity": "sha512-3kmv8DlyhPRCEBM1Vavn8NjyXtMeQ49ID0Olr/Sut7pgzaQTo4h01S7Z8YNE0VtbowyuAL26ibcz0ka6xCTH5g==", "cpu": [ "x64" ], @@ -2100,9 +2101,9 @@ } }, "node_modules/@next/swc-linux-x64-musl": { - "version": "14.1.4", - "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.1.4.tgz", - "integrity": "sha512-mqVxTwk4XuBl49qn2A5UmzFImoL1iLm0KQQwtdRJRKl21ylQwwGCxJtIYo2rbfkZHoSKlh/YgztY0qH3wG1xIg==", + "version": "14.2.18", + "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.18.tgz", + "integrity": "sha512-mliTfa8seVSpTbVEcKEXGjC18+TDII8ykW4a36au97spm9XMPqQTpdGPNBJ9RySSFw9/hLuaCMByluQIAnkzlw==", "cpu": [ "x64" ], @@ -2115,9 +2116,9 @@ } }, "node_modules/@next/swc-win32-arm64-msvc": { - "version": "14.1.4", - "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.1.4.tgz", - "integrity": "sha512-xzxF4ErcumXjO2Pvg/wVGrtr9QQJLk3IyQX1ddAC/fi6/5jZCZ9xpuL9Tzc4KPWMFq8GGWFVDMshZOdHGdkvag==", + "version": "14.2.18", + "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.18.tgz", + "integrity": "sha512-J5g0UFPbAjKYmqS3Cy7l2fetFmWMY9Oao32eUsBPYohts26BdrMUyfCJnZFQkX9npYaHNDOWqZ6uV9hSDPw9NA==", "cpu": [ "arm64" ], @@ -2130,9 +2131,9 @@ } }, "node_modules/@next/swc-win32-ia32-msvc": { - "version": "14.1.4", - "resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.1.4.tgz", - "integrity": "sha512-WZiz8OdbkpRw6/IU/lredZWKKZopUMhcI2F+XiMAcPja0uZYdMTZQRoQ0WZcvinn9xZAidimE7tN9W5v9Yyfyw==", + "version": "14.2.18", + "resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.18.tgz", + "integrity": "sha512-Ynxuk4ZgIpdcN7d16ivJdjsDG1+3hTvK24Pp8DiDmIa2+A4CfhJSEHHVndCHok6rnLUzAZD+/UOKESQgTsAZGg==", "cpu": [ "ia32" ], @@ -2145,9 +2146,9 @@ } }, "node_modules/@next/swc-win32-x64-msvc": { - "version": "14.1.4", - "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.1.4.tgz", - "integrity": "sha512-4Rto21sPfw555sZ/XNLqfxDUNeLhNYGO2dlPqsnuCg8N8a2a9u1ltqBOPQ4vj1Gf7eJC0W2hHG2eYUHuiXgY2w==", + "version": "14.2.18", + "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.18.tgz", + "integrity": "sha512-dtRGMhiU9TN5nyhwzce+7c/4CCeykYS+ipY/4mIrGzJ71+7zNo55ZxCB7cAVuNqdwtYniFNR2c9OFQ6UdFIMcg==", "cpu": [ "x64" ], @@ -5948,6 +5949,23 @@ "url": "https://github.com/sindresorhus/is?sponsor=1" } }, + "node_modules/@sindresorhus/merge-streams": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/@sindresorhus/merge-streams/-/merge-streams-2.3.0.tgz", + "integrity": "sha512-LtoMMhxAlorcGhmFYI+LhPgbPZCkgP6ra1YL604EeF6U98pLlQ3iWIGMdWSC+vWmPBWBNgmDBAhnAobLROJmwg==", + "dev": true, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/@swc/counter": { + "version": "0.1.3", + "resolved": "https://registry.npmjs.org/@swc/counter/-/counter-0.1.3.tgz", + "integrity": "sha512-e2BR4lsJkkRlKZ/qCHPw9ZaSxc0MVUd7gtbtaB7aMvHeJVYe8sOB8DBZkP2DtISHGSku9sCK6T6cnY0CtXrOCQ==" + }, "node_modules/@swc/helpers": { "version": "0.5.13", "resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.13.tgz", @@ -11302,12 +11320,12 @@ "dev": true }, "node_modules/next": { - "version": "14.1.4", - "resolved": "https://registry.npmjs.org/next/-/next-14.1.4.tgz", - "integrity": "sha512-1WTaXeSrUwlz/XcnhGTY7+8eiaFvdet5z9u3V2jb+Ek1vFo0VhHKSAIJvDWfQpttWjnyw14kBeq28TPq7bTeEQ==", + "version": "14.2.18", + "resolved": "https://registry.npmjs.org/next/-/next-14.2.18.tgz", + "integrity": "sha512-H9qbjDuGivUDEnK6wa+p2XKO+iMzgVgyr9Zp/4Iv29lKa+DYaxJGjOeEA+5VOvJh/M7HLiskehInSa0cWxVXUw==", "dependencies": { - "@next/env": "14.1.4", - "@swc/helpers": "0.5.2", + "@next/env": "14.2.18", + "@swc/helpers": "0.5.5", "busboy": "1.6.0", "caniuse-lite": "^1.0.30001579", "graceful-fs": "^4.2.11", @@ -11321,15 +11339,15 @@ "node": ">=18.17.0" }, "optionalDependencies": { - "@next/swc-darwin-arm64": "14.1.4", - "@next/swc-darwin-x64": "14.1.4", - "@next/swc-linux-arm64-gnu": "14.1.4", - "@next/swc-linux-arm64-musl": "14.1.4", - "@next/swc-linux-x64-gnu": "14.1.4", - "@next/swc-linux-x64-musl": "14.1.4", - "@next/swc-win32-arm64-msvc": "14.1.4", - "@next/swc-win32-ia32-msvc": "14.1.4", - "@next/swc-win32-x64-msvc": "14.1.4" + "@next/swc-darwin-arm64": "14.2.18", + "@next/swc-darwin-x64": "14.2.18", + "@next/swc-linux-arm64-gnu": "14.2.18", + "@next/swc-linux-arm64-musl": "14.2.18", + "@next/swc-linux-x64-gnu": "14.2.18", + "@next/swc-linux-x64-musl": "14.2.18", + "@next/swc-win32-arm64-msvc": "14.2.18", + "@next/swc-win32-ia32-msvc": "14.2.18", + "@next/swc-win32-x64-msvc": "14.2.18" }, "peerDependencies": { "@opentelemetry/api": "^1.1.0", @@ -14466,9 +14484,9 @@ } }, "node_modules/picocolors": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.0.0.tgz", - "integrity": "sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==" + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==" }, "node_modules/picomatch": { "version": "2.3.1", @@ -14588,9 +14606,9 @@ } }, "node_modules/postcss": { - "version": "8.4.29", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.29.tgz", - "integrity": "sha512-cbI+jaqIeu/VGqXEarWkRCCffhjgXc0qjBtXpqJhTBohMUjUQnbBr0xqX3vEKudc4iviTewcJo5ajcec5+wdJw==", + "version": "8.4.39", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.39.tgz", + "integrity": "sha512-0vzE+lAiG7hZl1/9I8yzKLx3aR9Xbof3fBHKunvMfOCYAtMhrsnccJY2iTURb9EZd5+pLuiNV9/c/GZJOHsgIw==", "funding": [ { "type": "opencollective", @@ -14606,9 +14624,9 @@ } ], "dependencies": { - "nanoid": "^3.3.6", - "picocolors": "^1.0.0", - "source-map-js": "^1.0.2" + "nanoid": "^3.3.7", + "picocolors": "^1.0.1", + "source-map-js": "^1.2.0" }, "engines": { "node": "^10 || ^12 || >=14" From 450e16afba70c84aa9ad7c284c4d1520edd25920 Mon Sep 17 00:00:00 2001 From: Jesus Bracho Date: Sat, 16 Nov 2024 11:46:38 -0500 Subject: [PATCH 02/18] Add @maptiler/geocoding-control dependency Included @maptiler/geocoding-control in dependencies as a replacement for @mapbox/mapbox-gl-geocoder. --- package-lock.json | 137 ++++++++++++++++++++++++++++++++++++++++++++++ package.json | 17 +++--- 2 files changed, 146 insertions(+), 8 deletions(-) diff --git a/package-lock.json b/package-lock.json index d71d083d..e48c1c18 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,6 +10,7 @@ "dependencies": { "@heroicons/react": "^2.1.5", "@mapbox/mapbox-gl-geocoder": "^5.0.2", + "@maptiler/geocoding-control": "^1.4.1", "@nextui-org/react": "^2.4.6", "@phosphor-icons/react": "^2.1.7", "@turf/centroid": "^7.0.0", @@ -2011,6 +2012,47 @@ "resolved": "https://registry.npmjs.org/tinyqueue/-/tinyqueue-3.0.0.tgz", "integrity": "sha512-gRa9gwYU3ECmQYv3lslts5hxuIa90veaEcxDYuu3QGOIAEM2mOZkVHp48ANJuu1CURtRdHKUBY5Lm1tHV+sD4g==" }, + "node_modules/@maptiler/geocoding-control": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/@maptiler/geocoding-control/-/geocoding-control-1.4.1.tgz", + "integrity": "sha512-/NMM8oaKKAdF36KbJuucJc18RaY+VpwkJ2V098yoG7H+9K7Rkyen+XKuLDA8gmvrgTeX1m48Pb9RP+e5zCrRvA==", + "dependencies": { + "@turf/bbox": "^7.1.0", + "@turf/clone": "^7.1.0", + "@turf/difference": "^7.1.0", + "@turf/flatten": "^7.1.0", + "@turf/union": "^7.1.0", + "geo-coordinates-parser": "^1.7.3" + }, + "peerDependencies": { + "@maptiler/sdk": "^1 || ^2", + "leaflet": "^1.7 || ^1.8 || ^1.9", + "maplibre-gl": "^2 || ^3 || ^4", + "ol": "^6 || ^7 || ^8 || ^9 || ^10", + "react": "^17 || ^18", + "svelte": "^4.2" + }, + "peerDependenciesMeta": { + "@maptiler/sdk": { + "optional": true + }, + "leaflet": { + "optional": true + }, + "maplibre-gl": { + "optional": true + }, + "ol": { + "optional": true + }, + "react": { + "optional": true + }, + "svelte": { + "optional": true + } + } + }, "node_modules/@next/env": { "version": "14.2.18", "resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.18.tgz", @@ -5985,6 +6027,20 @@ "node": ">=10" } }, + "node_modules/@turf/bbox": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/@turf/bbox/-/bbox-7.1.0.tgz", + "integrity": "sha512-PdWPz9tW86PD78vSZj2fiRaB8JhUHy6piSa/QXb83lucxPK+HTAdzlDQMTKj5okRCU8Ox/25IR2ep9T8NdopRA==", + "dependencies": { + "@turf/helpers": "^7.1.0", + "@turf/meta": "^7.1.0", + "@types/geojson": "^7946.0.10", + "tslib": "^2.6.2" + }, + "funding": { + "url": "https://opencollective.com/turf" + } + }, "node_modules/@turf/centroid": { "version": "7.1.0", "resolved": "https://registry.npmjs.org/@turf/centroid/-/centroid-7.1.0.tgz", @@ -5999,6 +6055,48 @@ "url": "https://opencollective.com/turf" } }, + "node_modules/@turf/clone": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/@turf/clone/-/clone-7.1.0.tgz", + "integrity": "sha512-5R9qeWvL7FDdBIbEemd0eCzOStr09oburDvJ1hRiPCFX6rPgzcZBQ0gDmZzoF4AFcNLb5IwknbLZjVLaUGWtFA==", + "dependencies": { + "@turf/helpers": "^7.1.0", + "@types/geojson": "^7946.0.10", + "tslib": "^2.6.2" + }, + "funding": { + "url": "https://opencollective.com/turf" + } + }, + "node_modules/@turf/difference": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/@turf/difference/-/difference-7.1.0.tgz", + "integrity": "sha512-+JVzdskICQ8ULKQ9CpWUM5kBvoXxN4CO78Ez/Ki3/7NXl7+HM/nb12B0OyM8hkJchpb8TsOi0YwyJiKMqEpTBA==", + "dependencies": { + "@turf/helpers": "^7.1.0", + "@turf/meta": "^7.1.0", + "@types/geojson": "^7946.0.10", + "polygon-clipping": "^0.15.3", + "tslib": "^2.6.2" + }, + "funding": { + "url": "https://opencollective.com/turf" + } + }, + "node_modules/@turf/flatten": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/@turf/flatten/-/flatten-7.1.0.tgz", + "integrity": "sha512-Kb23pqEarcLsdBqnQcK0qTrSMiWNTVb9tOFrNlZc66DIhDLAdpOKG4eqk00CMoUzWTixlnawDgJRqcStRrR4WA==", + "dependencies": { + "@turf/helpers": "^7.1.0", + "@turf/meta": "^7.1.0", + "@types/geojson": "^7946.0.10", + "tslib": "^2.6.2" + }, + "funding": { + "url": "https://opencollective.com/turf" + } + }, "node_modules/@turf/helpers": { "version": "7.1.0", "resolved": "https://registry.npmjs.org/@turf/helpers/-/helpers-7.1.0.tgz", @@ -6023,6 +6121,21 @@ "url": "https://opencollective.com/turf" } }, + "node_modules/@turf/union": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/@turf/union/-/union-7.1.0.tgz", + "integrity": "sha512-7VI8jONdBg9qmbfNlLQycPr93l5aU9HGMgWI9M6pb4ERuU2+p8KgffCgs2NyMtP2HxPrKSybzj31g7bnbEKofQ==", + "dependencies": { + "@turf/helpers": "^7.1.0", + "@turf/meta": "^7.1.0", + "@types/geojson": "^7946.0.10", + "polygon-clipping": "^0.15.3", + "tslib": "^2.6.2" + }, + "funding": { + "url": "https://opencollective.com/turf" + } + }, "node_modules/@types/cacheable-request": { "version": "6.0.3", "resolved": "https://registry.npmjs.org/@types/cacheable-request/-/cacheable-request-6.0.3.tgz", @@ -9442,6 +9555,11 @@ "node": ">= 0.6.0" } }, + "node_modules/geo-coordinates-parser": { + "version": "1.7.4", + "resolved": "https://registry.npmjs.org/geo-coordinates-parser/-/geo-coordinates-parser-1.7.4.tgz", + "integrity": "sha512-gVGxBW+s1csexXVMf5bIwz3TH9n4sCEglOOOqmrPk8YazUI5f79jCowKjTw05m/0h1//3+Z2m/nv8IIozgZyUw==" + }, "node_modules/geojson-vt": { "version": "3.2.1", "resolved": "https://registry.npmjs.org/geojson-vt/-/geojson-vt-3.2.1.tgz", @@ -14596,6 +14714,15 @@ "fflate": "^0.8.0" } }, + "node_modules/polygon-clipping": { + "version": "0.15.7", + "resolved": "https://registry.npmjs.org/polygon-clipping/-/polygon-clipping-0.15.7.tgz", + "integrity": "sha512-nhfdr83ECBg6xtqOAJab1tbksbBAOMUltN60bU+llHVOL0e5Onm1WpAXXWXVB39L8AJFssoIhEVuy/S90MmotA==", + "dependencies": { + "robust-predicates": "^3.0.2", + "splaytree": "^3.1.0" + } + }, "node_modules/possible-typed-array-names": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/possible-typed-array-names/-/possible-typed-array-names-1.0.0.tgz", @@ -16311,6 +16438,11 @@ "node": "*" } }, + "node_modules/robust-predicates": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/robust-predicates/-/robust-predicates-3.0.2.tgz", + "integrity": "sha512-IXgzBWvWQwE6PrDI05OvmXUIruQTcoMDzRsOd5CDvHCVLcLHMTSYvOK5Cm46kWqlV3yAbuSpBZdJ5oP5OUoStg==" + }, "node_modules/run-parallel": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", @@ -17089,6 +17221,11 @@ "resolved": "https://registry.npmjs.org/spdx-license-ids/-/spdx-license-ids-3.0.20.tgz", "integrity": "sha512-jg25NiDV/1fLtSgEgyvVyDunvaNHbuwF9lfNV17gSmPFAlYzdfNBlLtLzXTevwkPj7DhGbmN9VnmJIgLnhvaBw==" }, + "node_modules/splaytree": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/splaytree/-/splaytree-3.1.2.tgz", + "integrity": "sha512-4OM2BJgC5UzrhVnnJA4BkHKGtjXNzzUfpQjCO8I05xYPsfS/VuQDwjCGGMi8rYQilHEV4j8NBqTFbls/PZEE7A==" + }, "node_modules/split-string": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/split-string/-/split-string-3.1.0.tgz", diff --git a/package.json b/package.json index ad430062..39292210 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,7 @@ "dependencies": { "@heroicons/react": "^2.1.5", "@mapbox/mapbox-gl-geocoder": "^5.0.2", + "@maptiler/geocoding-control": "^1.4.1", "@nextui-org/react": "^2.4.6", "@phosphor-icons/react": "^2.1.7", "@turf/centroid": "^7.0.0", @@ -37,14 +38,6 @@ "typescript": "5.5.3" }, "devDependencies": { - "@typescript-eslint/eslint-plugin": "^7.16.1", - "@typescript-eslint/parser": "^7.16.1", - "eslint": "^8.56.0", - "eslint-config-next": "^14.2.5", - "eslint-config-prettier": "^9.1.0", - "eslint-plugin-custom-rules": "file:./eslint-plugin-custom-rules", - "eslint-plugin-react": "^7.34.4", - "eslint-plugin-prettier": "^5.0.0", "@semantic-release/changelog": "^6.0.3", "@semantic-release/commit-analyzer": "^13.0.0", "@semantic-release/git": "^10.0.1", @@ -57,6 +50,14 @@ "@types/pg": "^8.11.6", "@types/react": "^18.3.3", "@types/react-dom": "^18.3.0", + "@typescript-eslint/eslint-plugin": "^7.16.1", + "@typescript-eslint/parser": "^7.16.1", + "eslint": "^8.56.0", + "eslint-config-next": "^14.2.5", + "eslint-config-prettier": "^9.1.0", + "eslint-plugin-custom-rules": "file:./eslint-plugin-custom-rules", + "eslint-plugin-prettier": "^5.0.0", + "eslint-plugin-react": "^7.34.4", "postcss-nesting": "^12.1.5", "postcss-preset-env": "^9.6.0", "semantic-release": "^24.0.0" From e67b5d0baad7e230feb8cdad38199cad21e584d2 Mon Sep 17 00:00:00 2001 From: Jesus Bracho Date: Sat, 16 Nov 2024 14:46:06 -0500 Subject: [PATCH 03/18] Switch to MapTiler GeocodingControl Replaced Mapbox Geocoder with MapTiler GeocodingControl for address lookup and map navigation. --- src/components/PropertyMap.tsx | 99 +++++++++++++++++----------------- 1 file changed, 49 insertions(+), 50 deletions(-) diff --git a/src/components/PropertyMap.tsx b/src/components/PropertyMap.tsx index c1d636dd..f6d3ed25 100644 --- a/src/components/PropertyMap.tsx +++ b/src/components/PropertyMap.tsx @@ -11,7 +11,6 @@ import { } from 'react'; import { maptilerApiKey, - mapboxAccessToken, useStagingTiles, googleCloudBucketName, } from '../config/config'; @@ -39,10 +38,10 @@ import maplibregl, { LngLat, } from 'maplibre-gl'; import 'maplibre-gl/dist/maplibre-gl.css'; -import mapboxgl from 'mapbox-gl'; import { Protocol } from 'pmtiles'; -import MapboxGeocoder from '@mapbox/mapbox-gl-geocoder'; -import '@mapbox/mapbox-gl-geocoder/dist/mapbox-gl-geocoder.css'; +import { GeocodingControl } from '@maptiler/geocoding-control/react'; +import { createMapLibreGlMapController } from '@maptiler/geocoding-control/maplibregl-controller'; +import '@maptiler/geocoding-control/style.css'; import { MapLegendControl } from './MapLegendControl'; import { createPortal } from 'react-dom'; import { Tooltip } from '@nextui-org/react'; @@ -164,7 +163,7 @@ const PropertyMap: FC = ({ const { appFilter } = useFilter(); const [popupInfo, setPopupInfo] = useState(null); const [map, setMap] = useState(null); - const geocoderRef = useRef(null); + const [mapController, setMapController] = useState(); const [searchedProperty, setSearchedProperty] = useState({ coordinates: [-75.1628565788269, 39.97008211622267], address: '', @@ -305,53 +304,10 @@ const PropertyMap: FC = ({ ); } - // Add Geocoder - if (!geocoderRef.current) { - const center = map.getCenter(); - geocoderRef.current = new MapboxGeocoder({ - accessToken: mapboxAccessToken, - bbox: [-75.288283, 39.864114, -74.945063, 40.140129], - filter: function (item) { - return item.context.some((i) => { - return ( - (i.id.split('.').shift() === 'place' && - i.text === 'Philadelphia') || - (i.id.split('.').shift() === 'district' && - i.text === 'Philadelphia County') - ); - }); - }, - mapboxgl: mapboxgl, - marker: false, - proximity: { - longitude: center.lng, - latitude: center.lat, - }, - }); - - map.addControl(geocoderRef.current as unknown as IControl, 'top-right'); - - geocoderRef.current.on('result', (e) => { - const address = e.result.place_name.split(',')[0]; - setSelectedProperty(null); - setSearchedProperty({ - coordinates: e.result.center, - address: address, - }); - map.easeTo({ - center: e.result.center, - }); - }); - } + setMapController(createMapLibreGlMapController(map, maplibregl) as any); } - return () => { - // Remove Geocoder - if (map && geocoderRef.current) { - map.removeControl(geocoderRef.current as unknown as IControl); - geocoderRef.current = null; - } - }; + return () => {}; }, [map, setSelectedProperty]); useEffect(() => { @@ -451,6 +407,49 @@ const PropertyMap: FC = ({ }} onMoveEnd={handleSetFeatures} > +
+ { + return feature.context.some((i: any) => { + return ( + (i.id.split('.').shift() === 'place' && + i.text === 'Philadelphia') || + (i.id.split('.').shift() === 'district' && + i.text === 'Philadelphia County') + ); + }); + }} + proximity={[ + { + type: 'map-center', + }, + ]} + onPick={(feature) => { + if (feature) { + const address = feature.place_name.split(',')[0]; + setSelectedProperty(null); + setSearchedProperty({ + coordinates: feature.center, + address: address, + }); + map?.easeTo({ + center: feature.center, + }); + } + }} + /> +
{popupInfo && ( Date: Sat, 16 Nov 2024 15:06:00 -0500 Subject: [PATCH 04/18] Simplify filter condition in PropertyMap.tsx The original filter condition yields seems to yield no results. --- src/components/PropertyMap.tsx | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/components/PropertyMap.tsx b/src/components/PropertyMap.tsx index f6d3ed25..df99d174 100644 --- a/src/components/PropertyMap.tsx +++ b/src/components/PropertyMap.tsx @@ -422,12 +422,7 @@ const PropertyMap: FC = ({ markerOnSelected={false} filter={(feature: any) => { return feature.context.some((i: any) => { - return ( - (i.id.split('.').shift() === 'place' && - i.text === 'Philadelphia') || - (i.id.split('.').shift() === 'district' && - i.text === 'Philadelphia County') - ); + return i.text.includes("Philadelphia"); }); }} proximity={[ From f1c68459459318dee6ce35d7981423498b250447 Mon Sep 17 00:00:00 2001 From: Jesus Bracho Date: Sat, 16 Nov 2024 15:22:48 -0500 Subject: [PATCH 05/18] Cast expression operator to a string for now --- src/components/MapLegendControl/MapLegend.tsx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/components/MapLegendControl/MapLegend.tsx b/src/components/MapLegendControl/MapLegend.tsx index 7bc8f0f8..816bdcd5 100644 --- a/src/components/MapLegendControl/MapLegend.tsx +++ b/src/components/MapLegendControl/MapLegend.tsx @@ -1,4 +1,3 @@ -import { ExpressionName } from 'mapbox-gl'; import React, { ReactElement, Dispatch, SetStateAction } from 'react'; import { renderToStaticMarkup } from 'react-dom/server'; import { FillLayerSpecification } from 'maplibre-gl'; @@ -25,7 +24,7 @@ function parseBlocks( if (value && Array.isArray(value) && value.length > 0) { const [name, ...args] = value; - switch (name as ExpressionName) { + switch (name as string) { case 'match': { const [getter, ...paneLabels] = args; const elements: ReactElement[] = []; From 7f3eed9ff7ffc93b4902e97e338310b7c0cac28f Mon Sep 17 00:00:00 2001 From: Jesus Bracho Date: Sat, 16 Nov 2024 15:24:00 -0500 Subject: [PATCH 06/18] Remove mapgl dependencies --- package-lock.json | 875 ++++------------------------------------------ package.json | 3 - 2 files changed, 66 insertions(+), 812 deletions(-) diff --git a/package-lock.json b/package-lock.json index e48c1c18..e95c2dde 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,7 +9,6 @@ "version": "0.1.0", "dependencies": { "@heroicons/react": "^2.1.5", - "@mapbox/mapbox-gl-geocoder": "^5.0.2", "@maptiler/geocoding-control": "^1.4.1", "@nextui-org/react": "^2.4.6", "@phosphor-icons/react": "^2.1.7", @@ -18,7 +17,6 @@ "clsx": "^2.1.1", "framer-motion": "^11.3.8", "lodash": "^4.17.21", - "mapbox-gl": "3.2.0", "maplibre-gl": "^4.5.0", "multi-range-slider-react": "^2.0.7", "next": "^14.2.5", @@ -40,7 +38,6 @@ "@semantic-release/github": "^10.1.1", "@semantic-release/npm": "^12.0.1", "@semantic-release/release-notes-generator": "^14.0.1", - "@types/mapbox__mapbox-gl-geocoder": "^5.0.0", "@types/node": "^20.14.11", "@types/pbf": "^3.0.5", "@types/pg": "^8.11.6", @@ -79,6 +76,7 @@ "version": "7.24.7", "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.24.7.tgz", "integrity": "sha512-BcYH1CVJBO9tvyIZ2jVeXgSIMvGZ2FDRvDdOIVQyuklNKSsx+eppDEBq/g47Ayw+RqNFE+URvOShmf+f/qwAlA==", + "dev": true, "dependencies": { "@babel/highlight": "^7.24.7", "picocolors": "^1.0.0" @@ -91,6 +89,7 @@ "version": "7.24.7", "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.24.7.tgz", "integrity": "sha512-rR+PBcQ1SMQDDyF6X0wxtG8QyLCgUB0eRAGguqRLfkCA87l7yAP7ehq8SNj96OOGTO8OBV70KhuFYcIkHXOg0w==", + "dev": true, "engines": { "node": ">=6.9.0" } @@ -99,6 +98,7 @@ "version": "7.24.7", "resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.24.7.tgz", "integrity": "sha512-EStJpq4OuY8xYfhGVXngigBJRWxftKX9ksiGDnmlY3o7B/V7KIAc9X4oiK87uPJSc/vs5L869bem5fhZa8caZw==", + "dev": true, "dependencies": { "@babel/helper-validator-identifier": "^7.24.7", "chalk": "^2.4.2", @@ -113,6 +113,7 @@ "version": "3.2.1", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz", "integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==", + "dev": true, "dependencies": { "color-convert": "^1.9.0" }, @@ -124,6 +125,7 @@ "version": "2.4.2", "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz", "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==", + "dev": true, "dependencies": { "ansi-styles": "^3.2.1", "escape-string-regexp": "^1.0.5", @@ -137,6 +139,7 @@ "version": "1.9.3", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", + "dev": true, "dependencies": { "color-name": "1.1.3" } @@ -144,12 +147,14 @@ "node_modules/@babel/highlight/node_modules/color-name": { "version": "1.1.3", "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", - "integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==" + "integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==", + "dev": true }, "node_modules/@babel/highlight/node_modules/escape-string-regexp": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", "integrity": "sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==", + "dev": true, "engines": { "node": ">=0.8.0" } @@ -158,6 +163,7 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", "integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==", + "dev": true, "engines": { "node": ">=4" } @@ -166,6 +172,7 @@ "version": "5.5.0", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==", + "dev": true, "dependencies": { "has-flag": "^3.0.0" }, @@ -1857,15 +1864,6 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, - "node_modules/@mapbox/fusspot": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/@mapbox/fusspot/-/fusspot-0.4.0.tgz", - "integrity": "sha512-6sys1vUlhNCqMvJOqPEPSi0jc9tg7aJ//oG1A16H3PXoIt9whtNngD7UzBHUVTH15zunR/vRvMtGNVsogm1KzA==", - "dependencies": { - "is-plain-obj": "^1.1.0", - "xtend": "^4.0.1" - } - }, "node_modules/@mapbox/geojson-rewind": { "version": "0.5.2", "resolved": "https://registry.npmjs.org/@mapbox/geojson-rewind/-/geojson-rewind-0.5.2.tgz", @@ -1897,70 +1895,18 @@ "node": ">= 0.6" } }, - "node_modules/@mapbox/mapbox-gl-geocoder": { - "version": "5.0.3", - "resolved": "https://registry.npmjs.org/@mapbox/mapbox-gl-geocoder/-/mapbox-gl-geocoder-5.0.3.tgz", - "integrity": "sha512-aeu2ZM+UKoMUGqqKy4UVVEKsIaNj2KSsiQ4p4YbNSAjZj2vcP33KSod+DPeRwhvoY+MU6KgyvdZ/1xdmH+C62g==", - "dependencies": { - "@mapbox/mapbox-sdk": "^0.16.1", - "events": "^3.3.0", - "lodash.debounce": "^4.0.6", - "nanoid": "^3.1.31", - "subtag": "^0.5.0", - "suggestions": "^1.6.0", - "xtend": "^4.0.1" - }, - "engines": { - "node": ">=6" - } - }, "node_modules/@mapbox/mapbox-gl-supported": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/@mapbox/mapbox-gl-supported/-/mapbox-gl-supported-3.0.0.tgz", - "integrity": "sha512-2XghOwu16ZwPJLOFVuIOaLbN0iKMn867evzXFyf0P22dqugezfJwLmdanAgU25ITvz1TvOfVP4jsDImlDJzcWg==" - }, - "node_modules/@mapbox/mapbox-sdk": { - "version": "0.16.1", - "resolved": "https://registry.npmjs.org/@mapbox/mapbox-sdk/-/mapbox-sdk-0.16.1.tgz", - "integrity": "sha512-dyZrmg+UL/Gp5mGG3CDbcwGSUMYYrfbd9hdp0rcA3pHSf3A9eYoXO9nFiIk6SzBwBVMzHENJz84ZHdqM0MDncQ==", - "dependencies": { - "@mapbox/fusspot": "^0.4.0", - "@mapbox/parse-mapbox-token": "^0.2.0", - "@mapbox/polyline": "^1.0.0", - "eventemitter3": "^3.1.0", - "form-data": "^3.0.0", - "got": "^11.8.5", - "is-plain-obj": "^1.1.0", - "xtend": "^4.0.1" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/@mapbox/parse-mapbox-token": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/@mapbox/parse-mapbox-token/-/parse-mapbox-token-0.2.0.tgz", - "integrity": "sha512-BjeuG4sodYaoTygwXIuAWlZV6zUv4ZriYAQhXikzx+7DChycMUQ9g85E79Htat+AsBg+nStFALehlOhClYm5cQ==", - "dependencies": { - "base-64": "^0.1.0" - } + "integrity": "sha512-2XghOwu16ZwPJLOFVuIOaLbN0iKMn867evzXFyf0P22dqugezfJwLmdanAgU25ITvz1TvOfVP4jsDImlDJzcWg==", + "optional": true, + "peer": true }, "node_modules/@mapbox/point-geometry": { "version": "0.1.0", "resolved": "https://registry.npmjs.org/@mapbox/point-geometry/-/point-geometry-0.1.0.tgz", "integrity": "sha512-6j56HdLTwWGO0fJPlrZtdU/B13q8Uwmo18Ck2GnGgN9PCFyKTZ3UbXeEdRFh18i9XQ92eH2VdtpJHpBD3aripQ==" }, - "node_modules/@mapbox/polyline": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/@mapbox/polyline/-/polyline-1.2.1.tgz", - "integrity": "sha512-sn0V18O3OzW4RCcPoUIVDWvEGQaBNH9a0y5lgqrf5hUycyw1CzrhEoxV5irzrMNXKCkw1xRsZXcaVbsVZggHXA==", - "dependencies": { - "meow": "^9.0.0" - }, - "bin": { - "polyline": "bin/polyline.bin.js" - } - }, "node_modules/@mapbox/tiny-sdf": { "version": "2.0.6", "resolved": "https://registry.npmjs.org/@mapbox/tiny-sdf/-/tiny-sdf-2.0.6.tgz", @@ -5984,6 +5930,7 @@ "version": "4.6.0", "resolved": "https://registry.npmjs.org/@sindresorhus/is/-/is-4.6.0.tgz", "integrity": "sha512-t09vSN3MdfsyCHoFcTRCH/iUtG7OJ0CsjzB8cjAmKc/va/kIgeDI/TxsigdncE/4be734m0cvIYwNaV4i2XqAw==", + "dev": true, "engines": { "node": ">=10" }, @@ -6016,17 +5963,6 @@ "tslib": "^2.4.0" } }, - "node_modules/@szmarczak/http-timer": { - "version": "4.0.6", - "resolved": "https://registry.npmjs.org/@szmarczak/http-timer/-/http-timer-4.0.6.tgz", - "integrity": "sha512-4BAffykYOgO+5nzBWYwE3W90sBgLJoUPRWWcL8wlyiM8IB8ipJz3UMJ9KXQd1RKQXpKp8Tutn80HZtWsu2u76w==", - "dependencies": { - "defer-to-connect": "^2.0.0" - }, - "engines": { - "node": ">=10" - } - }, "node_modules/@turf/bbox": { "version": "7.1.0", "resolved": "https://registry.npmjs.org/@turf/bbox/-/bbox-7.1.0.tgz", @@ -6136,17 +6072,6 @@ "url": "https://opencollective.com/turf" } }, - "node_modules/@types/cacheable-request": { - "version": "6.0.3", - "resolved": "https://registry.npmjs.org/@types/cacheable-request/-/cacheable-request-6.0.3.tgz", - "integrity": "sha512-IQ3EbTzGxIigb1I3qPZc1rWJnH0BmSKv5QYTalEwweFvyBDLSAe24zP0le/hyi7ecGfZVlIVAg4BZqb8WBwKqw==", - "dependencies": { - "@types/http-cache-semantics": "*", - "@types/keyv": "^3.1.4", - "@types/node": "*", - "@types/responselike": "^1.0.0" - } - }, "node_modules/@types/geojson": { "version": "7946.0.14", "resolved": "https://registry.npmjs.org/@types/geojson/-/geojson-7946.0.14.tgz", @@ -6160,25 +6085,12 @@ "@types/geojson": "*" } }, - "node_modules/@types/http-cache-semantics": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/@types/http-cache-semantics/-/http-cache-semantics-4.0.4.tgz", - "integrity": "sha512-1m0bIFVc7eJWyve9S0RnuRgcQqF/Xd5QsUZAZeQFr1Q3/p9JWoQQEqmVy+DPTNpGXwhgIetAoYF8JSc33q29QA==" - }, "node_modules/@types/json5": { "version": "0.0.29", "resolved": "https://registry.npmjs.org/@types/json5/-/json5-0.0.29.tgz", "integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ==", "dev": true }, - "node_modules/@types/keyv": { - "version": "3.1.4", - "resolved": "https://registry.npmjs.org/@types/keyv/-/keyv-3.1.4.tgz", - "integrity": "sha512-BQ5aZNSCpj7D6K2ksrRCTmKRLEpnPvWDiLPfoGyhZ++8YtiK9d/3DBKPJgry359X/P1PfruyYwvnvwFjuEiEIg==", - "dependencies": { - "@types/node": "*" - } - }, "node_modules/@types/leaflet": { "version": "1.9.12", "resolved": "https://registry.npmjs.org/@types/leaflet/-/leaflet-1.9.12.tgz", @@ -6200,16 +6112,6 @@ "@types/lodash": "*" } }, - "node_modules/@types/mapbox__mapbox-gl-geocoder": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/@types/mapbox__mapbox-gl-geocoder/-/mapbox__mapbox-gl-geocoder-5.0.0.tgz", - "integrity": "sha512-eGBWdFiP2QgmwndPyhwK6eBeOfyB8vRscp2C6Acqasx5dH8FvTo/VgXWCrCKFR3zkWek/H4w4/CwmBFOs7OLBA==", - "dev": true, - "dependencies": { - "@types/geojson": "*", - "@types/mapbox-gl": "*" - } - }, "node_modules/@types/mapbox__point-geometry": { "version": "0.1.4", "resolved": "https://registry.npmjs.org/@types/mapbox__point-geometry/-/mapbox__point-geometry-0.1.4.tgz", @@ -6233,11 +6135,6 @@ "@types/geojson": "*" } }, - "node_modules/@types/minimist": { - "version": "1.2.5", - "resolved": "https://registry.npmjs.org/@types/minimist/-/minimist-1.2.5.tgz", - "integrity": "sha512-hov8bUuiLiyFPGyFPE1lwWhmzYbirOXQNNo40+y3zow8aFVTeyn3VWL0VFFfdNddA8S4Vf0Tc062rzyNr7Paag==" - }, "node_modules/@types/node": { "version": "20.16.6", "resolved": "https://registry.npmjs.org/@types/node/-/node-20.16.6.tgz", @@ -6249,7 +6146,8 @@ "node_modules/@types/normalize-package-data": { "version": "2.4.4", "resolved": "https://registry.npmjs.org/@types/normalize-package-data/-/normalize-package-data-2.4.4.tgz", - "integrity": "sha512-37i+OaWTh9qeK4LSHPsyRC7NahnGotNuZvjLSgcPzblpHB3rrCJxAOgI5gCdKm7coonsaX1Of0ILiTcnZjbfxA==" + "integrity": "sha512-37i+OaWTh9qeK4LSHPsyRC7NahnGotNuZvjLSgcPzblpHB3rrCJxAOgI5gCdKm7coonsaX1Of0ILiTcnZjbfxA==", + "dev": true }, "node_modules/@types/pbf": { "version": "3.0.5", @@ -6292,14 +6190,6 @@ "@types/react": "*" } }, - "node_modules/@types/responselike": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/@types/responselike/-/responselike-1.0.3.tgz", - "integrity": "sha512-H/+L+UkTV33uf49PH5pCAUBVPNj2nDBXTN+qS1dOwyyg24l3CcicicCA7ca+HMvJBZcFgl5r8e+RR6elsb4Lyw==", - "dependencies": { - "@types/node": "*" - } - }, "node_modules/@types/semver": { "version": "7.5.8", "resolved": "https://registry.npmjs.org/@types/semver/-/semver-7.5.8.tgz", @@ -6849,14 +6739,6 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/arrify": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/arrify/-/arrify-1.0.1.tgz", - "integrity": "sha512-3CYzex9M9FGQjCGMGyi6/31c8GJbgb0qGyrx5HWxPd0aCwh4cB2YjMb2Xf9UuoogrMrlO9cTqnB5rI5GHZTcUA==", - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/assign-symbols": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/assign-symbols/-/assign-symbols-1.0.0.tgz", @@ -6871,11 +6753,6 @@ "integrity": "sha512-OH/2E5Fg20h2aPrbe+QL8JZQFko0YZaF+j4mnQ7BGhfavO7OpSLa8a0y9sBwomHdSbkhTS8TQNayBfnW5DwbvQ==", "dev": true }, - "node_modules/asynckit": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", - "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" - }, "node_modules/autoprefixer": { "version": "10.4.19", "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.19.tgz", @@ -6950,11 +6827,6 @@ "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" }, - "node_modules/base-64": { - "version": "0.1.0", - "resolved": "https://registry.npmjs.org/base-64/-/base-64-0.1.0.tgz", - "integrity": "sha512-Y5gU45svrR5tI2Vt/X9GPd3L0HNIKzGu202EjxrXMpuc2V2CiKgemAbUUsqYmZJvPtCXoUKjNZwBJzsNScUbXA==" - }, "node_modules/before-after-hook": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/before-after-hook/-/before-after-hook-3.0.2.tgz", @@ -7056,56 +6928,6 @@ "typewise-core": "^1.2" } }, - "node_modules/cacheable-lookup": { - "version": "5.0.4", - "resolved": "https://registry.npmjs.org/cacheable-lookup/-/cacheable-lookup-5.0.4.tgz", - "integrity": "sha512-2/kNscPhpcxrOigMZzbiWF7dz8ilhb/nIHU3EyZiXWXpeq/au8qJ8VhdftMkty3n7Gj6HIGalQG8oiBNB3AJgA==", - "engines": { - "node": ">=10.6.0" - } - }, - "node_modules/cacheable-request": { - "version": "7.0.4", - "resolved": "https://registry.npmjs.org/cacheable-request/-/cacheable-request-7.0.4.tgz", - "integrity": "sha512-v+p6ongsrp0yTGbJXjgxPow2+DL93DASP4kXCDKb8/bwRtt9OEF3whggkkDkGNzgcWy2XaF4a8nZglC7uElscg==", - "dependencies": { - "clone-response": "^1.0.2", - "get-stream": "^5.1.0", - "http-cache-semantics": "^4.0.0", - "keyv": "^4.0.0", - "lowercase-keys": "^2.0.0", - "normalize-url": "^6.0.1", - "responselike": "^2.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/cacheable-request/node_modules/get-stream": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-5.2.0.tgz", - "integrity": "sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==", - "dependencies": { - "pump": "^3.0.0" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/cacheable-request/node_modules/normalize-url": { - "version": "6.1.0", - "resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-6.1.0.tgz", - "integrity": "sha512-DlL+XwOy3NxAQ8xuC0okPgK46iuVNAK01YN7RueYBqqFeGsBjV9XmCAzAdgt+667bCl5kPh9EqKKDwnaPG1I7A==", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/call-bind": { "version": "1.0.7", "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.7.tgz", @@ -7134,14 +6956,6 @@ "node": ">=6" } }, - "node_modules/camelcase": { - "version": "5.3.1", - "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-5.3.1.tgz", - "integrity": "sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==", - "engines": { - "node": ">=6" - } - }, "node_modules/camelcase-css": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/camelcase-css/-/camelcase-css-2.0.1.tgz", @@ -7150,30 +6964,6 @@ "node": ">= 6" } }, - "node_modules/camelcase-keys": { - "version": "6.2.2", - "resolved": "https://registry.npmjs.org/camelcase-keys/-/camelcase-keys-6.2.2.tgz", - "integrity": "sha512-YrwaA0vEKazPBkn0ipTiMpSajYDSe+KjQfrjhcBMxJt/znbvlHd8Pw/Vamaz5EB4Wfhs3SUR3Z9mwRu/P3s3Yg==", - "dependencies": { - "camelcase": "^5.3.1", - "map-obj": "^4.0.0", - "quick-lru": "^4.0.1" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/camelcase-keys/node_modules/quick-lru": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/quick-lru/-/quick-lru-4.0.1.tgz", - "integrity": "sha512-ARhCpm70fzdcvNQfPoy49IaanKkTlRWF2JMzqhcJbhSFRZv7nPTvZJdcY7301IPmvW+/p0RgIWnQDLJxifsQ7g==", - "engines": { - "node": ">=8" - } - }, "node_modules/caniuse-lite": { "version": "1.0.30001663", "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001663.tgz", @@ -7221,7 +7011,9 @@ "node_modules/cheap-ruler": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/cheap-ruler/-/cheap-ruler-3.0.2.tgz", - "integrity": "sha512-02T332h1/HTN6cDSufLP8x4JzDs2+VC+8qZ/N0kWIVPyc2xUkWwWh3B2fJxR7raXkL4Mq7k554mfuM9ofv/vGg==" + "integrity": "sha512-02T332h1/HTN6cDSufLP8x4JzDs2+VC+8qZ/N0kWIVPyc2xUkWwWh3B2fJxR7raXkL4Mq7k554mfuM9ofv/vGg==", + "optional": true, + "peer": true }, "node_modules/chokidar": { "version": "3.6.0", @@ -7444,17 +7236,6 @@ "url": "https://github.com/chalk/wrap-ansi?sponsor=1" } }, - "node_modules/clone-response": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/clone-response/-/clone-response-1.0.3.tgz", - "integrity": "sha512-ROoL94jJH2dUVML2Y/5PEDNaSHgeOdSDicUyS7izcF63G6sTc/FTjLub4b8Il9S8S0beOfYt0TaA5qvFK+w0wA==", - "dependencies": { - "mimic-response": "^1.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/clsx": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/clsx/-/clsx-2.1.1.tgz", @@ -7505,17 +7286,6 @@ "resolved": "https://registry.npmjs.org/color2k/-/color2k-2.0.3.tgz", "integrity": "sha512-zW190nQTIoXcGCaU08DvVNFTmQhUpnJfVuAKfWqUQkflXKpaDdpaYoM0iluLS9lgJNHyBF58KKA2FBEwkD7wog==" }, - "node_modules/combined-stream": { - "version": "1.0.8", - "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", - "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", - "dependencies": { - "delayed-stream": "~1.0.0" - }, - "engines": { - "node": ">= 0.8" - } - }, "node_modules/commander": { "version": "4.1.1", "resolved": "https://registry.npmjs.org/commander/-/commander-4.1.1.tgz", @@ -7819,7 +7589,9 @@ "node_modules/csscolorparser": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/csscolorparser/-/csscolorparser-1.0.3.tgz", - "integrity": "sha512-umPSgYwZkdFoUrH5hIq5kf0wPSXiro51nPw0j2K/c83KflkPSTBGMz6NJvMB+07VlL0y7VPo6QJcDjcgKTTm3w==" + "integrity": "sha512-umPSgYwZkdFoUrH5hIq5kf0wPSXiro51nPw0j2K/c83KflkPSTBGMz6NJvMB+07VlL0y7VPo6QJcDjcgKTTm3w==", + "optional": true, + "peer": true }, "node_modules/cssdb": { "version": "8.1.1", @@ -7928,62 +7700,6 @@ } } }, - "node_modules/decamelize": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz", - "integrity": "sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA==", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/decamelize-keys": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/decamelize-keys/-/decamelize-keys-1.1.1.tgz", - "integrity": "sha512-WiPxgEirIV0/eIOMcnFBA3/IJZAZqKnwAwWyvvdi4lsr1WCN22nhdf/3db3DoZcUjTV2SqfzIwNyp6y2xs3nmg==", - "dependencies": { - "decamelize": "^1.1.0", - "map-obj": "^1.0.0" - }, - "engines": { - "node": ">=0.10.0" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/decamelize-keys/node_modules/map-obj": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/map-obj/-/map-obj-1.0.1.tgz", - "integrity": "sha512-7N/q3lyZ+LVCp7PzuxrJr4KMbBE2hW7BT7YNia330OFxIf4d3r5zVpicP2650l7CPN6RM9zOJRl3NGpqSiw3Eg==", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/decompress-response": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz", - "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==", - "dependencies": { - "mimic-response": "^3.1.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/decompress-response/node_modules/mimic-response": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz", - "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/deep-equal": { "version": "2.2.3", "resolved": "https://registry.npmjs.org/deep-equal/-/deep-equal-2.2.3.tgz", @@ -8039,14 +7755,6 @@ "node": ">=0.10.0" } }, - "node_modules/defer-to-connect": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/defer-to-connect/-/defer-to-connect-2.0.1.tgz", - "integrity": "sha512-4tvttepXG1VaYGrRibk5EwJd1t4udunSOVMdLSAL6mId1ix438oPwPZMALY41FCijukO1L0twNcGsdzS7dHgDg==", - "engines": { - "node": ">=10" - } - }, "node_modules/define-data-property": { "version": "1.1.4", "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz", @@ -8081,14 +7789,6 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/delayed-stream": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", - "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", - "engines": { - "node": ">=0.4.0" - } - }, "node_modules/detect-libc": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.0.3.tgz", @@ -8160,7 +7860,9 @@ "node_modules/earcut": { "version": "2.2.4", "resolved": "https://registry.npmjs.org/earcut/-/earcut-2.2.4.tgz", - "integrity": "sha512-/pjZsA1b4RPHbeWZQn66SWS8nZZWLQQ23oE3Eam7aroEFGEvwKAsJfZ9ytiEMycfzXWpca4FA9QIOehf7PocBQ==" + "integrity": "sha512-/pjZsA1b4RPHbeWZQn66SWS8nZZWLQQ23oE3Eam7aroEFGEvwKAsJfZ9ytiEMycfzXWpca4FA9QIOehf7PocBQ==", + "optional": true, + "peer": true }, "node_modules/eastasianwidth": { "version": "0.2.0", @@ -8183,14 +7885,6 @@ "integrity": "sha512-5U0rVMU5Y2n2+ykNLQqMoqklN9ICBT/KsvC1Gz6vqHbz2AXXGkG+Pm5rMWk/8Vjrr/mY9985Hi8DYzn1F09Nyw==", "dev": true }, - "node_modules/end-of-stream": { - "version": "1.4.4", - "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz", - "integrity": "sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==", - "dependencies": { - "once": "^1.4.0" - } - }, "node_modules/enhanced-resolve": { "version": "5.17.1", "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.17.1.tgz", @@ -8376,6 +8070,7 @@ "version": "1.3.2", "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.2.tgz", "integrity": "sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g==", + "dev": true, "dependencies": { "is-arrayish": "^0.2.1" } @@ -8383,7 +8078,8 @@ "node_modules/error-ex/node_modules/is-arrayish": { "version": "0.2.1", "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz", - "integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==" + "integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==", + "dev": true }, "node_modules/es-abstract": { "version": "1.23.3", @@ -9141,19 +8837,6 @@ "node": ">=0.10.0" } }, - "node_modules/eventemitter3": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-3.1.2.tgz", - "integrity": "sha512-tvtQIeLVHjDkJYnzf2dgVMxfuSGJeM/7UCG17TT4EumTfNtF+0nebF/4zWOIkCreAbtNqhGEboB6BWrwqNaw4Q==" - }, - "node_modules/events": { - "version": "3.3.0", - "resolved": "https://registry.npmjs.org/events/-/events-3.3.0.tgz", - "integrity": "sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==", - "engines": { - "node": ">=0.8.x" - } - }, "node_modules/execa": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/execa/-/execa-5.1.1.tgz", @@ -9408,19 +9091,6 @@ "url": "https://github.com/sponsors/isaacs" } }, - "node_modules/form-data": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/form-data/-/form-data-3.0.1.tgz", - "integrity": "sha512-RHkBKtLWUVwd7SqRIvCZMEvAMoGUp0XU+seQiZejj0COz3RI3hWP4sCv3gZWWLjJTd7rGwcsF5eKZGii0r/hbg==", - "dependencies": { - "asynckit": "^0.4.0", - "combined-stream": "^1.0.8", - "mime-types": "^2.1.12" - }, - "engines": { - "node": ">= 6" - } - }, "node_modules/fraction.js": { "version": "4.3.7", "resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-4.3.7.tgz", @@ -9547,14 +9217,6 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/fuzzy": { - "version": "0.1.3", - "resolved": "https://registry.npmjs.org/fuzzy/-/fuzzy-0.1.3.tgz", - "integrity": "sha512-/gZffu4ykarLrCiP3Ygsa86UAo1E5vEVlvTrpkKywXSbP9Xhln3oSp9QSV57gEq3JFFpGJ4GZ+5zdEp3FcUh4w==", - "engines": { - "node": ">= 0.6.0" - } - }, "node_modules/geo-coordinates-parser": { "version": "1.7.4", "resolved": "https://registry.npmjs.org/geo-coordinates-parser/-/geo-coordinates-parser-1.7.4.tgz", @@ -9563,7 +9225,9 @@ "node_modules/geojson-vt": { "version": "3.2.1", "resolved": "https://registry.npmjs.org/geojson-vt/-/geojson-vt-3.2.1.tgz", - "integrity": "sha512-EvGQQi/zPrDA6zr6BnJD/YhwAkBP8nnJ9emh3EnHQKVMfg/MRVtPbMYdgVy/IaEmn4UfagD2a6fafPDL5hbtwg==" + "integrity": "sha512-EvGQQi/zPrDA6zr6BnJD/YhwAkBP8nnJ9emh3EnHQKVMfg/MRVtPbMYdgVy/IaEmn4UfagD2a6fafPDL5hbtwg==", + "optional": true, + "peer": true }, "node_modules/get-caller-file": { "version": "2.0.5", @@ -9811,30 +9475,6 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/got": { - "version": "11.8.6", - "resolved": "https://registry.npmjs.org/got/-/got-11.8.6.tgz", - "integrity": "sha512-6tfZ91bOr7bOXnK7PRDCGBLa1H4U080YHNaAQ2KsMGlLEzRbk44nsZF2E1IeRc3vtJHPVbKCYgdFbaGO2ljd8g==", - "dependencies": { - "@sindresorhus/is": "^4.0.0", - "@szmarczak/http-timer": "^4.0.5", - "@types/cacheable-request": "^6.0.1", - "@types/responselike": "^1.0.0", - "cacheable-lookup": "^5.0.3", - "cacheable-request": "^7.0.2", - "decompress-response": "^6.0.0", - "http2-wrapper": "^1.0.0-beta.5.2", - "lowercase-keys": "^2.0.0", - "p-cancelable": "^2.0.0", - "responselike": "^2.0.0" - }, - "engines": { - "node": ">=10.19.0" - }, - "funding": { - "url": "https://github.com/sindresorhus/got?sponsor=1" - } - }, "node_modules/graceful-fs": { "version": "4.2.11", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", @@ -9849,7 +9489,9 @@ "node_modules/grid-index": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/grid-index/-/grid-index-1.1.0.tgz", - "integrity": "sha512-HZRwumpOGUrHyxO5bqKZL0B0GlUpwtCAzZ42sgxUPniu33R1LSFH5yrIcBCHjkctCAh3mtWKcKd9J4vDDdeVHA==" + "integrity": "sha512-HZRwumpOGUrHyxO5bqKZL0B0GlUpwtCAzZ42sgxUPniu33R1LSFH5yrIcBCHjkctCAh3mtWKcKd9J4vDDdeVHA==", + "optional": true, + "peer": true }, "node_modules/handlebars": { "version": "4.7.8", @@ -9872,14 +9514,6 @@ "uglify-js": "^3.1.4" } }, - "node_modules/hard-rejection": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/hard-rejection/-/hard-rejection-2.1.0.tgz", - "integrity": "sha512-VIZB+ibDhx7ObhAe7OVtoEbuP4h/MuOTHJ+J8h/eBXotJYl0fBgR72xDFCKgIh22OJZIOVNxBMWuhAr10r8HdA==", - "engines": { - "node": ">=6" - } - }, "node_modules/has-bigints": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/has-bigints/-/has-bigints-1.0.2.tgz", @@ -9981,22 +9615,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/hosted-git-info": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-4.1.0.tgz", - "integrity": "sha512-kyCuEOWjJqZuDbRHzL8V93NzQhwIB71oFWSyzVo+KPZI+pnQPPxucdkrOZvkLRnrf5URsQM+IJ09Dw29cRALIA==", - "dependencies": { - "lru-cache": "^6.0.0" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/http-cache-semantics": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-4.1.1.tgz", - "integrity": "sha512-er295DKPVsV82j5kw1Gjt+ADA/XYHsajl82cGNQG2eyoPkvgUhX+nDIyelzhIWbbsXP39EHcI6l5tYs2FYqYXQ==" - }, "node_modules/http-proxy-agent": { "version": "7.0.2", "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", @@ -10010,18 +9628,6 @@ "node": ">= 14" } }, - "node_modules/http2-wrapper": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/http2-wrapper/-/http2-wrapper-1.0.3.tgz", - "integrity": "sha512-V+23sDMr12Wnz7iTcDeJr3O6AIxlnvT/bmaAAAP/Xda35C90p9599p0F1eHR/N1KILWSoWVAiOMFjBBXaXSMxg==", - "dependencies": { - "quick-lru": "^5.1.1", - "resolve-alpn": "^1.0.0" - }, - "engines": { - "node": ">=10.19.0" - } - }, "node_modules/https-proxy-agent": { "version": "7.0.5", "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.5.tgz", @@ -10124,6 +9730,7 @@ "version": "4.0.0", "resolved": "https://registry.npmjs.org/indent-string/-/indent-string-4.0.0.tgz", "integrity": "sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg==", + "dev": true, "engines": { "node": ">=8" } @@ -10497,14 +10104,6 @@ "node": ">=8" } }, - "node_modules/is-plain-obj": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/is-plain-obj/-/is-plain-obj-1.1.0.tgz", - "integrity": "sha512-yvkRyxmFKEOQ4pNXCmJG5AEQNlXJS5LaONXo5/cLdTZdWvsZ1ioJEonLGAosKlMWE8lwUy/bJzMjcw8az73+Fg==", - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/is-plain-object": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-2.0.4.tgz", @@ -10770,7 +10369,8 @@ "node_modules/json-buffer": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.1.tgz", - "integrity": "sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==" + "integrity": "sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==", + "dev": true }, "node_modules/json-parse-better-errors": { "version": "1.0.2", @@ -10781,7 +10381,8 @@ "node_modules/json-parse-even-better-errors": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", - "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==" + "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==", + "dev": true }, "node_modules/json-schema-traverse": { "version": "0.4.1", @@ -10848,6 +10449,7 @@ "version": "4.5.4", "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz", "integrity": "sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw==", + "dev": true, "dependencies": { "json-buffer": "3.0.1" } @@ -10967,7 +10569,9 @@ "node_modules/lodash.clonedeep": { "version": "4.5.0", "resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz", - "integrity": "sha512-H5ZhCF25riFd9uB5UCkVKo61m3S/xZk1x4wA6yp/L3RFP6Z/eHH1ymQcGLo7J3GMPfm0V/7m1tryHuGVxpqEBQ==" + "integrity": "sha512-H5ZhCF25riFd9uB5UCkVKo61m3S/xZk1x4wA6yp/L3RFP6Z/eHH1ymQcGLo7J3GMPfm0V/7m1tryHuGVxpqEBQ==", + "optional": true, + "peer": true }, "node_modules/lodash.debounce": { "version": "4.0.8", @@ -11045,40 +10649,12 @@ "loose-envify": "cli.js" } }, - "node_modules/lowercase-keys": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/lowercase-keys/-/lowercase-keys-2.0.0.tgz", - "integrity": "sha512-tqNXrS78oMOE73NMxK4EMLQsQowWf8jKooH9g7xPavRT706R6bkQJ6DY2Te7QukaZsulxa30wQ7bk0pm4XiHmA==", - "engines": { - "node": ">=8" - } - }, - "node_modules/lru-cache": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", - "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==", - "dependencies": { - "yallist": "^4.0.0" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/map-obj": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/map-obj/-/map-obj-4.3.0.tgz", - "integrity": "sha512-hdN1wVrZbb29eBGiGjJbeP8JbKjq1urkHJ/LIP/NY48MZ1QVXUsQBV1G1zvYFHn1XE06cwjBsOI2K3Ulnj1YXQ==", - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/mapbox-gl": { "version": "3.2.0", "resolved": "https://registry.npmjs.org/mapbox-gl/-/mapbox-gl-3.2.0.tgz", "integrity": "sha512-v8S7x+wTr35kJ9nqzgn/VPiSFZxBkyQhwCk9bdyiFHVwCukNGG3LXt03FoaHHTsOuB9JWenWE96k0Uw+HGMZ8w==", + "optional": true, + "peer": true, "dependencies": { "@mapbox/geojson-rewind": "^0.5.2", "@mapbox/jsonlint-lines-primitives": "^2.0.2", @@ -11212,42 +10788,6 @@ "url": "https://github.com/chalk/chalk?sponsor=1" } }, - "node_modules/meow": { - "version": "9.0.0", - "resolved": "https://registry.npmjs.org/meow/-/meow-9.0.0.tgz", - "integrity": "sha512-+obSblOQmRhcyBt62furQqRAQpNyWXo8BuQ5bN7dG8wmwQ+vwHKp/rCFD4CrTP8CsDQD1sjoZ94K417XEUk8IQ==", - "dependencies": { - "@types/minimist": "^1.2.0", - "camelcase-keys": "^6.2.2", - "decamelize": "^1.2.0", - "decamelize-keys": "^1.1.0", - "hard-rejection": "^2.1.0", - "minimist-options": "4.1.0", - "normalize-package-data": "^3.0.0", - "read-pkg-up": "^7.0.1", - "redent": "^3.0.0", - "trim-newlines": "^3.0.0", - "type-fest": "^0.18.0", - "yargs-parser": "^20.2.3" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/meow/node_modules/type-fest": { - "version": "0.18.1", - "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.18.1.tgz", - "integrity": "sha512-OIAYXk8+ISY+qTOwkHtKqzAuxchoMiD9Udx+FSGQDuiRR+PJKJHc2NJAXlbhkGwTt/4/nKZxELY1w3ReWOL8mw==", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/merge-stream": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz", @@ -11289,25 +10829,6 @@ "node": ">=16" } }, - "node_modules/mime-db": { - "version": "1.52.0", - "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", - "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/mime-types": { - "version": "2.1.35", - "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", - "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", - "dependencies": { - "mime-db": "1.52.0" - }, - "engines": { - "node": ">= 0.6" - } - }, "node_modules/mimic-fn": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz", @@ -11317,22 +10838,6 @@ "node": ">=6" } }, - "node_modules/mimic-response": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-1.0.1.tgz", - "integrity": "sha512-j5EctnkH7amfV/q5Hgmoal1g2QHFJRraOtmx0JpIqkxhBhI/lJSl1nMpQ45hVarwNETOoWEimndZ4QK0RHxuxQ==", - "engines": { - "node": ">=4" - } - }, - "node_modules/min-indent": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/min-indent/-/min-indent-1.0.1.tgz", - "integrity": "sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg==", - "engines": { - "node": ">=4" - } - }, "node_modules/minimatch": { "version": "9.0.5", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", @@ -11355,19 +10860,6 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/minimist-options": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/minimist-options/-/minimist-options-4.1.0.tgz", - "integrity": "sha512-Q4r8ghd80yhO/0j1O3B2BjweX3fiHg9cdOwjJd2J76Q135c+NDxGCqdYKQ1SKBuFfgWbAUzBfvYjPUEeNgqN1A==", - "dependencies": { - "arrify": "^1.0.1", - "is-plain-obj": "^1.1.0", - "kind-of": "^6.0.3" - }, - "engines": { - "node": ">= 6" - } - }, "node_modules/minipass": { "version": "7.1.2", "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", @@ -11542,20 +11034,6 @@ "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.18.tgz", "integrity": "sha512-d9VeXT4SJ7ZeOqGX6R5EM022wpL+eWPooLI+5UpWn2jCT1aosUQEhQP214x33Wkwx3JQMvIm+tIoVOdodFS40g==" }, - "node_modules/normalize-package-data": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/normalize-package-data/-/normalize-package-data-3.0.3.tgz", - "integrity": "sha512-p2W1sgqij3zMMyRC067Dg16bfzVH+w7hyegmpIvZ4JNjqtGOVAIvLmjBx3yP7YTe9vKJgkoNOPjwQGogDoMXFA==", - "dependencies": { - "hosted-git-info": "^4.0.1", - "is-core-module": "^2.5.0", - "semver": "^7.3.4", - "validate-npm-package-license": "^3.0.1" - }, - "engines": { - "node": ">=10" - } - }, "node_modules/normalize-path": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", @@ -14274,6 +13752,7 @@ "version": "1.4.0", "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "dev": true, "dependencies": { "wrappy": "1" } @@ -14310,14 +13789,6 @@ "node": ">= 0.8.0" } }, - "node_modules/p-cancelable": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/p-cancelable/-/p-cancelable-2.1.1.tgz", - "integrity": "sha512-BZOr3nRQHOntUjTrH8+Lh54smKHoHyur8We1V8DSMVrl5A2malOOwuJRnKRDjSnkoeBh4at6BwEnb5I7Jl31wg==", - "engines": { - "node": ">=8" - } - }, "node_modules/p-each-series": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/p-each-series/-/p-each-series-3.0.0.tgz", @@ -14492,6 +13963,7 @@ "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", "integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==", + "dev": true, "engines": { "node": ">=8" } @@ -15703,15 +15175,6 @@ "resolved": "https://registry.npmjs.org/protocol-buffers-schema/-/protocol-buffers-schema-3.6.0.tgz", "integrity": "sha512-TdDRD+/QNdrCGCE7v8340QyuXd4kIWIgapsE2+n/SaGiSSbomYl4TjHlvIoCWRpE7wFt02EpB35VVA2ImcBVqw==" }, - "node_modules/pump": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.2.tgz", - "integrity": "sha512-tUPXtzlGM8FE3P0ZL6DVs/3P58k9nk8/jZeQCurTJylQA8qFYzHFfhBJkuqyE0FifOsQ0uKWekiZ5g8wtr28cw==", - "dependencies": { - "end-of-stream": "^1.1.0", - "once": "^1.3.1" - } - }, "node_modules/punycode": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", @@ -15740,17 +15203,6 @@ } ] }, - "node_modules/quick-lru": { - "version": "5.1.1", - "resolved": "https://registry.npmjs.org/quick-lru/-/quick-lru-5.1.1.tgz", - "integrity": "sha512-WuyALRjWPDGtt/wzJiadO5AXY+8hZ80hVpe6MyivgraREW751X3SbhRvG3eLKOYN+8VEvqLcf3wdnt44Z4S4SA==", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/quickselect": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/quickselect/-/quickselect-2.0.0.tgz", @@ -16016,149 +15468,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/read-pkg-up": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/read-pkg-up/-/read-pkg-up-7.0.1.tgz", - "integrity": "sha512-zK0TB7Xd6JpCLmlLmufqykGE+/TlOePD6qKClNW7hHDKFh/J7/7gCWGR7joEQEW1bKq3a3yUZSObOoWLFQ4ohg==", - "dependencies": { - "find-up": "^4.1.0", - "read-pkg": "^5.2.0", - "type-fest": "^0.8.1" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/read-pkg-up/node_modules/find-up": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/find-up/-/find-up-4.1.0.tgz", - "integrity": "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==", - "dependencies": { - "locate-path": "^5.0.0", - "path-exists": "^4.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/read-pkg-up/node_modules/hosted-git-info": { - "version": "2.8.9", - "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-2.8.9.tgz", - "integrity": "sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw==" - }, - "node_modules/read-pkg-up/node_modules/locate-path": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-5.0.0.tgz", - "integrity": "sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==", - "dependencies": { - "p-locate": "^4.1.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/read-pkg-up/node_modules/normalize-package-data": { - "version": "2.5.0", - "resolved": "https://registry.npmjs.org/normalize-package-data/-/normalize-package-data-2.5.0.tgz", - "integrity": "sha512-/5CMN3T0R4XTj4DcGaexo+roZSdSFW/0AOOTROrjxzCG1wrWXEsGbRKevjlIL+ZDE4sZlJr5ED4YW0yqmkK+eA==", - "dependencies": { - "hosted-git-info": "^2.1.4", - "resolve": "^1.10.0", - "semver": "2 || 3 || 4 || 5", - "validate-npm-package-license": "^3.0.1" - } - }, - "node_modules/read-pkg-up/node_modules/p-limit": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz", - "integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==", - "dependencies": { - "p-try": "^2.0.0" - }, - "engines": { - "node": ">=6" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/read-pkg-up/node_modules/p-locate": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-4.1.0.tgz", - "integrity": "sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==", - "dependencies": { - "p-limit": "^2.2.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/read-pkg-up/node_modules/p-try": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz", - "integrity": "sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==", - "engines": { - "node": ">=6" - } - }, - "node_modules/read-pkg-up/node_modules/parse-json": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz", - "integrity": "sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==", - "dependencies": { - "@babel/code-frame": "^7.0.0", - "error-ex": "^1.3.1", - "json-parse-even-better-errors": "^2.3.0", - "lines-and-columns": "^1.1.6" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/read-pkg-up/node_modules/read-pkg": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/read-pkg/-/read-pkg-5.2.0.tgz", - "integrity": "sha512-Ug69mNOpfvKDAc2Q8DRpMjjzdtrnv9HcSMX+4VsZxD1aZ6ZzrIE7rlzXBtWTyhULSMKg076AW6WR5iZpD0JiOg==", - "dependencies": { - "@types/normalize-package-data": "^2.4.0", - "normalize-package-data": "^2.5.0", - "parse-json": "^5.0.0", - "type-fest": "^0.6.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/read-pkg-up/node_modules/read-pkg/node_modules/type-fest": { - "version": "0.6.0", - "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.6.0.tgz", - "integrity": "sha512-q+MB8nYR1KDLrgr4G5yemftpMC7/QLqVndBmEEdqzmNj5dcFOO4Oo8qlwZE3ULT3+Zim1F8Kq4cBnikNhlCMlg==", - "engines": { - "node": ">=8" - } - }, - "node_modules/read-pkg-up/node_modules/semver": { - "version": "5.7.2", - "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.2.tgz", - "integrity": "sha512-cBznnQ9KjJqU67B52RMC65CMarK2600WFnbkcaiwWq3xy/5haFJlshgnpjovMVJ+Hff49d8GEn0b87C5pDQ10g==", - "bin": { - "semver": "bin/semver" - } - }, - "node_modules/read-pkg-up/node_modules/type-fest": { - "version": "0.8.1", - "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.8.1.tgz", - "integrity": "sha512-4dbzIzqvjtgiM5rw1k5rEHtBANKmdudhGyBEajN01fEyhaAIhsoKNy6y7+IN93IfpFtwY9iqi7kD+xwKhQsNJA==", - "engines": { - "node": ">=8" - } - }, "node_modules/read-pkg/node_modules/hosted-git-info": { "version": "7.0.2", "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-7.0.2.tgz", @@ -16235,18 +15544,6 @@ "node": ">=8.10.0" } }, - "node_modules/redent": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/redent/-/redent-3.0.0.tgz", - "integrity": "sha512-6tDA8g98We0zd0GvVeMT9arEOnTw9qM03L9cJXaCjrip1OO764RDBLBfrB4cwzNGDj5OA5ioymC9GkizgWJDUg==", - "dependencies": { - "indent-string": "^4.0.0", - "strip-indent": "^3.0.0" - }, - "engines": { - "node": ">=8" - } - }, "node_modules/reflect.getprototypeof": { "version": "1.0.6", "resolved": "https://registry.npmjs.org/reflect.getprototypeof/-/reflect.getprototypeof-1.0.6.tgz", @@ -16328,11 +15625,6 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/resolve-alpn": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/resolve-alpn/-/resolve-alpn-1.2.1.tgz", - "integrity": "sha512-0a1F4l73/ZFZOakJnQ3FvkJ2+gSTQWz/r2KE5OdDY0TxPm5h4GkqkWWfM47T7HsbnOtcJVEF4epCVy6u7Q3K+g==" - }, "node_modules/resolve-from": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", @@ -16359,17 +15651,6 @@ "protocol-buffers-schema": "^3.3.1" } }, - "node_modules/responselike": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/responselike/-/responselike-2.0.1.tgz", - "integrity": "sha512-4gl03wn3hj1HP3yzgdI7d3lCkF95F21Pz4BPGvKHinyQzALR5CapwC8yIi0Rh58DEMQ/SguC03wFj2k0M/mHhw==", - "dependencies": { - "lowercase-keys": "^2.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/reusify": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.0.4.tgz", @@ -16878,6 +16159,8 @@ "version": "3.1.2", "resolved": "https://registry.npmjs.org/serialize-to-js/-/serialize-to-js-3.1.2.tgz", "integrity": "sha512-owllqNuDDEimQat7EPG0tH7JjO090xKNzUtYz6X+Sk2BXDnOCilDdNLwjWeFywG9xkJul1ULvtUQa9O4pUaY0w==", + "optional": true, + "peer": true, "engines": { "node": ">=4.0.0" } @@ -17197,6 +16480,7 @@ "version": "3.2.0", "resolved": "https://registry.npmjs.org/spdx-correct/-/spdx-correct-3.2.0.tgz", "integrity": "sha512-kN9dJbvnySHULIluDHy32WHRUu3Og7B9sbY7tsFLctQkIqnMh3hErYgdMjTYuqmcXX+lK5T1lnUt3G7zNswmZA==", + "dev": true, "dependencies": { "spdx-expression-parse": "^3.0.0", "spdx-license-ids": "^3.0.0" @@ -17205,12 +16489,14 @@ "node_modules/spdx-exceptions": { "version": "2.5.0", "resolved": "https://registry.npmjs.org/spdx-exceptions/-/spdx-exceptions-2.5.0.tgz", - "integrity": "sha512-PiU42r+xO4UbUS1buo3LPJkjlO7430Xn5SVAhdpzzsPHsjbYVflnnFdATgabnLude+Cqu25p6N+g2lw/PFsa4w==" + "integrity": "sha512-PiU42r+xO4UbUS1buo3LPJkjlO7430Xn5SVAhdpzzsPHsjbYVflnnFdATgabnLude+Cqu25p6N+g2lw/PFsa4w==", + "dev": true }, "node_modules/spdx-expression-parse": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/spdx-expression-parse/-/spdx-expression-parse-3.0.1.tgz", "integrity": "sha512-cbqHunsQWnJNE6KhVSMsMeH5H/L9EpymbzqTQ3uLwNCLZ1Q481oWaofqH7nO6V07xlXwY6PhQdQ2IedWx/ZK4Q==", + "dev": true, "dependencies": { "spdx-exceptions": "^2.1.0", "spdx-license-ids": "^3.0.0" @@ -17219,7 +16505,8 @@ "node_modules/spdx-license-ids": { "version": "3.0.20", "resolved": "https://registry.npmjs.org/spdx-license-ids/-/spdx-license-ids-3.0.20.tgz", - "integrity": "sha512-jg25NiDV/1fLtSgEgyvVyDunvaNHbuwF9lfNV17gSmPFAlYzdfNBlLtLzXTevwkPj7DhGbmN9VnmJIgLnhvaBw==" + "integrity": "sha512-jg25NiDV/1fLtSgEgyvVyDunvaNHbuwF9lfNV17gSmPFAlYzdfNBlLtLzXTevwkPj7DhGbmN9VnmJIgLnhvaBw==", + "dev": true }, "node_modules/splaytree": { "version": "3.1.2", @@ -17504,17 +16791,6 @@ "node": ">=6" } }, - "node_modules/strip-indent": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/strip-indent/-/strip-indent-3.0.0.tgz", - "integrity": "sha512-laJTa3Jb+VQpaC6DseHhF7dXVqHTfJPCRDaEbid/drOhgitgYku/letMUqOXFoWV0zIIUbjpdH2t+tYj4bQMRQ==", - "dependencies": { - "min-indent": "^1.0.0" - }, - "engines": { - "node": ">=8" - } - }, "node_modules/strip-json-comments": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", @@ -17549,11 +16825,6 @@ } } }, - "node_modules/subtag": { - "version": "0.5.0", - "resolved": "https://registry.npmjs.org/subtag/-/subtag-0.5.0.tgz", - "integrity": "sha512-CaIBcTSb/nyk4xiiSOtZYz1B+F12ZxW8NEp54CdT+84vmh/h4sUnHGC6+KQXUfED8u22PQjCYWfZny8d2ELXwg==" - }, "node_modules/sucrase": { "version": "3.35.0", "resolved": "https://registry.npmjs.org/sucrase/-/sucrase-3.35.0.tgz", @@ -17575,15 +16846,6 @@ "node": ">=16 || 14 >=14.17" } }, - "node_modules/suggestions": { - "version": "1.7.1", - "resolved": "https://registry.npmjs.org/suggestions/-/suggestions-1.7.1.tgz", - "integrity": "sha512-gl5YPAhPYl07JZ5obiD9nTZsg4SyZswAQU/NNtnYiSnFkI3+ZHuXAiEsYm7AaZ71E0LXSFaGVaulGSWN3Gd71A==", - "dependencies": { - "fuzzy": "^0.1.1", - "xtend": "^4.0.0" - } - }, "node_modules/super-regex": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/super-regex/-/super-regex-1.0.0.tgz", @@ -17836,7 +17098,9 @@ "node_modules/tinyqueue": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/tinyqueue/-/tinyqueue-2.0.3.tgz", - "integrity": "sha512-ppJZNDuKGgxzkHihX8v9v9G5f+18gzaTfrukGrq6ueg0lmH4nqVnA2IPG0AEH3jKEk2GRJCUhDoqpoiw3PHLBA==" + "integrity": "sha512-ppJZNDuKGgxzkHihX8v9v9G5f+18gzaTfrukGrq6ueg0lmH4nqVnA2IPG0AEH3jKEk2GRJCUhDoqpoiw3PHLBA==", + "optional": true, + "peer": true }, "node_modules/to-regex-range": { "version": "5.0.1", @@ -17861,14 +17125,6 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/trim-newlines": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/trim-newlines/-/trim-newlines-3.0.1.tgz", - "integrity": "sha512-c1PTsA3tYrIsLGkJkzHF+w9F2EyxfXGo4UyJc4pFL++FMjnq0HJS69T3M7d//gKrFKwy429bouPescbjecU+Zw==", - "engines": { - "node": ">=8" - } - }, "node_modules/ts-api-utils": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-1.3.0.tgz", @@ -17907,6 +17163,8 @@ "version": "4.0.4", "resolved": "https://registry.npmjs.org/tweakpane/-/tweakpane-4.0.4.tgz", "integrity": "sha512-RkWD54zDlEbnN01wQPk0ANHGbdCvlJx/E8A1QxhTfCbX+ROWos1Ws2MnhOm39aUGMOh+36TjUwpDmLfmwTr1Fg==", + "optional": true, + "peer": true, "funding": { "url": "https://github.com/sponsors/cocopon" } @@ -18265,6 +17523,7 @@ "version": "3.0.4", "resolved": "https://registry.npmjs.org/validate-npm-package-license/-/validate-npm-package-license-3.0.4.tgz", "integrity": "sha512-DpKm2Ui/xN7/HQKCtpZxoRWBhZ9Z0kqtygG8XCgNQ8ZlDnxuQmWhj566j8fN4Cu3/JmbhsDo7fcAJq4s9h27Ew==", + "dev": true, "dependencies": { "spdx-correct": "^3.0.0", "spdx-expression-parse": "^3.0.0" @@ -18478,12 +17737,14 @@ "node_modules/wrappy": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", - "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", + "dev": true }, "node_modules/xtend": { "version": "4.0.2", "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz", "integrity": "sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==", + "dev": true, "engines": { "node": ">=0.4" } @@ -18497,11 +17758,6 @@ "node": ">=10" } }, - "node_modules/yallist": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", - "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==" - }, "node_modules/yaml": { "version": "2.5.1", "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.5.1.tgz", @@ -18535,6 +17791,7 @@ "version": "20.2.9", "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.9.tgz", "integrity": "sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==", + "dev": true, "engines": { "node": ">=10" } diff --git a/package.json b/package.json index 39292210..544ef107 100644 --- a/package.json +++ b/package.json @@ -13,7 +13,6 @@ }, "dependencies": { "@heroicons/react": "^2.1.5", - "@mapbox/mapbox-gl-geocoder": "^5.0.2", "@maptiler/geocoding-control": "^1.4.1", "@nextui-org/react": "^2.4.6", "@phosphor-icons/react": "^2.1.7", @@ -22,7 +21,6 @@ "clsx": "^2.1.1", "framer-motion": "^11.3.8", "lodash": "^4.17.21", - "mapbox-gl": "3.2.0", "maplibre-gl": "^4.5.0", "multi-range-slider-react": "^2.0.7", "next": "^14.2.5", @@ -44,7 +42,6 @@ "@semantic-release/github": "^10.1.1", "@semantic-release/npm": "^12.0.1", "@semantic-release/release-notes-generator": "^14.0.1", - "@types/mapbox__mapbox-gl-geocoder": "^5.0.0", "@types/node": "^20.14.11", "@types/pbf": "^3.0.5", "@types/pg": "^8.11.6", From 22d4afc3fed0a1a87b93ddec8c96d045340d3ae2 Mon Sep 17 00:00:00 2001 From: Jesus Bracho Date: Sat, 16 Nov 2024 15:52:12 -0500 Subject: [PATCH 07/18] Run eslint on PropertyMap.tsx --- src/components/PropertyMap.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/PropertyMap.tsx b/src/components/PropertyMap.tsx index df99d174..ea8c9eb5 100644 --- a/src/components/PropertyMap.tsx +++ b/src/components/PropertyMap.tsx @@ -422,7 +422,7 @@ const PropertyMap: FC = ({ markerOnSelected={false} filter={(feature: any) => { return feature.context.some((i: any) => { - return i.text.includes("Philadelphia"); + return i.text.includes('Philadelphia'); }); }} proximity={[ From cd87b341057ac2a268fa8449fa12534642dea813 Mon Sep 17 00:00:00 2001 From: nlebovits Date: Mon, 18 Nov 2024 08:43:24 -0500 Subject: [PATCH 08/18] run precommit --- data/Dockerfile-pg | 30 +-- data/src/classes/featurelayer.py | 205 ++++++++++-------- data/src/constants/services.py | 20 +- data/src/data_utils/__init__.py | 4 +- data/src/data_utils/access_process.py | 2 +- data/src/data_utils/city_owned_properties.py | 8 +- data/src/data_utils/community_gardens.py | 19 +- data/src/data_utils/contig_neighbors.py | 33 ++- data/src/data_utils/council_dists.py | 36 +++ .../{deliquencies.py => delinquencies.py} | 20 +- data/src/data_utils/l_and_i.py | 31 ++- data/src/data_utils/nbhoods.py | 14 +- data/src/data_utils/negligent_devs.py | 169 +++------------ data/src/data_utils/opa_properties.py | 117 +++++++++- data/src/data_utils/owner_type.py | 3 +- data/src/data_utils/phs_properties.py | 9 +- data/src/data_utils/ppr_properties.py | 29 +-- data/src/data_utils/pwd_parcels.py | 71 ++++++ data/src/data_utils/rco_geoms.py | 18 +- data/src/data_utils/utils.py | 8 +- data/src/data_utils/vacant_properties.py | 158 ++++---------- 21 files changed, 548 insertions(+), 456 deletions(-) create mode 100644 data/src/data_utils/council_dists.py rename data/src/data_utils/{deliquencies.py => delinquencies.py} (56%) create mode 100644 data/src/data_utils/pwd_parcels.py diff --git a/data/Dockerfile-pg b/data/Dockerfile-pg index d4d62d77..8fbca869 100644 --- a/data/Dockerfile-pg +++ b/data/Dockerfile-pg @@ -1,26 +1,26 @@ -# -# NOTE: THIS DOCKERFILE IS GENERATED VIA "make update"! PLEASE DO NOT EDIT IT DIRECTLY. -# - FROM postgres:16-bullseye LABEL maintainer="PostGIS Project - https://postgis.net" \ - org.opencontainers.image.description="PostGIS 3.4.3+dfsg-2.pgdg110+1 spatial database extension with PostgreSQL 16 bullseye" \ + org.opencontainers.image.description="PostGIS with PostgreSQL 16" \ org.opencontainers.image.source="https://github.com/postgis/docker-postgis" ENV POSTGIS_MAJOR 3 -ENV POSTGIS_VERSION 3.4.3+dfsg-2.pgdg110+1 +# Install dependencies and PostGIS RUN apt-get update \ - && apt-cache showpkg postgresql-$PG_MAJOR-postgis-$POSTGIS_MAJOR \ - && apt-get install -y --no-install-recommends \ - # ca-certificates: for accessing remote raster files; - # fix: https://github.com/postgis/docker-postgis/issues/307 + && apt-get install -y --no-install-recommends \ + gnupg \ + postgresql-common \ + apt-transport-https \ + lsb-release \ + wget \ ca-certificates \ - \ - postgresql-$PG_MAJOR-postgis-$POSTGIS_MAJOR=$POSTGIS_VERSION \ - postgresql-$PG_MAJOR-postgis-$POSTGIS_MAJOR-scripts \ - && rm -rf /var/lib/apt/lists/* + && yes | /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + postgresql-16-postgis-3 \ + postgresql-16-postgis-3-scripts \ + postgresql-client-16 \ + && rm -rf /var/lib/apt/lists/* RUN mkdir -p /docker-entrypoint-initdb.d - diff --git a/data/src/classes/featurelayer.py b/data/src/classes/featurelayer.py index e704ce97..9f10d8fc 100644 --- a/data/src/classes/featurelayer.py +++ b/data/src/classes/featurelayer.py @@ -10,7 +10,6 @@ from config.config import ( FORCE_RELOAD, USE_CRS, - log_level, min_tiles_file_size_in_bytes, write_production_tiles_file, ) @@ -18,9 +17,15 @@ from esridump.dumper import EsriDumper from google.cloud import storage from google.cloud.storage.bucket import Bucket -from shapely import Point, wkb +from shapely import wkb -log.basicConfig(level=log_level) +from concurrent.futures import ThreadPoolExecutor, as_completed +from tqdm.auto import tqdm + +from tqdm import tqdm + + +log.basicConfig(level=log.INFO) def google_cloud_bucket() -> Bucket: @@ -46,10 +51,6 @@ def google_cloud_bucket() -> Bucket: class FeatureLayer: - """ - FeatureLayer is a class to represent a GIS dataset. It can be initialized with a URL to an Esri Feature Service, a SQL query to Carto, or a GeoDataFrame. - """ - def __init__( self, name, @@ -61,6 +62,8 @@ def __init__( from_xy=False, use_wkb_geom_field=None, cols: list[str] = None, + max_workers=16, + chunk_size=100000, ): self.name = name self.esri_rest_urls = ( @@ -77,147 +80,163 @@ def __init__( self.psql_table = name.lower().replace(" ", "_") self.input_crs = "EPSG:4326" if not from_xy else USE_CRS self.use_wkb_geom_field = use_wkb_geom_field + self.max_workers = max_workers + self.chunk_size = chunk_size inputs = [self.esri_rest_urls, self.carto_sql_queries, self.gdf] non_none_inputs = [i for i in inputs if i is not None] if len(non_none_inputs) > 0: - if self.esri_rest_urls is not None: - self.type = "esri" - elif self.carto_sql_queries is not None: - self.type = "carto" - elif self.gdf is not None: - self.type = "gdf" - - if force_reload: + self.type = ( + "esri" + if self.esri_rest_urls + else "carto" if self.carto_sql_queries else "gdf" + ) + if force_reload or not self.check_psql(): self.load_data() - else: - psql_exists = self.check_psql() - if not psql_exists: - self.load_data() else: - print(f"Initialized FeatureLayer {self.name} with no data.") + log.info(f"Initialized FeatureLayer {self.name} with no data.") def check_psql(self): try: if not sa.inspect(local_engine).has_table(self.psql_table): - print(f"Table {self.psql_table} does not exist") + log.debug(f"Table {self.psql_table} does not exist") return False psql_table = gpd.read_postgis( f"SELECT * FROM {self.psql_table}", conn, geom_col="geometry" ) if len(psql_table) == 0: return False - else: - print(f"Loading data for {self.name} from psql...") - self.gdf = psql_table - return True + log.info(f"Loading data for {self.name} from psql...") + self.gdf = psql_table + return True except Exception as e: - print(f"Error loading data for {self.name}: {e}") + log.error(f"Error loading data for {self.name}: {e}") return False def load_data(self): - print(f"Loading data for {self.name} from {self.type}...") + log.info(f"Loading data for {self.name} from {self.type}...") if self.type == "gdf": pass else: try: if self.type == "esri": - if self.esri_rest_urls is None: + if not self.esri_rest_urls: raise ValueError("Must provide a URL to load data from Esri") gdfs = [] for url in self.esri_rest_urls: - parcel_type = ( - "Land" - if "Vacant_Indicators_Land" in url - else "Building" - if "Vacant_Indicators_Bldg" in url - else None - ) - self.dumper = EsriDumper(url) - features = [feature for feature in self.dumper] + print(f"Processing URL: {url}") # Debugging: Print the URL + + # Use EsriDumper to get features + dumper = EsriDumper(url) + features = [feature for feature in dumper] + + if not features: + log.error(f"No features returned for URL: {url}") + continue geojson_features = { "type": "FeatureCollection", "features": features, } - - this_gdf = gpd.GeoDataFrame.from_features( + gdf = gpd.GeoDataFrame.from_features( geojson_features, crs=self.input_crs ) + gdf = gdf.to_crs(self.crs) - # Check if 'X' and 'Y' columns exist and create geometry if necessary - if "X" in this_gdf.columns and "Y" in this_gdf.columns: - this_gdf["geometry"] = this_gdf.apply( - lambda row: Point(row["X"], row["Y"]), axis=1 - ) - elif "geometry" not in this_gdf.columns: - raise ValueError( - "No geometry information found in the data." - ) - - this_gdf = this_gdf.to_crs(USE_CRS) + # Add parcel_type based on the URL + if "Vacant_Indicators_Land" in url: + gdf["parcel_type"] = "Land" + elif "Vacant_Indicators_Bldg" in url: + gdf["parcel_type"] = "Building" - # Assign the parcel_type to the GeoDataFrame - if parcel_type: - this_gdf["parcel_type"] = parcel_type - - gdfs.append(this_gdf) + gdfs.append(gdf) + # Concatenate all dataframes self.gdf = pd.concat(gdfs, ignore_index=True) elif self.type == "carto": - if self.carto_sql_queries is None: - raise ValueError( - "Must provide a SQL query to load data from Carto" - ) + self._load_carto_data() - gdfs = [] - for sql_query in self.carto_sql_queries: - response = requests.get( - "https://phl.carto.com/api/v2/sql", params={"q": sql_query} - ) + # Convert all column names to lowercase + if not self.gdf.empty: + self.gdf.columns = [col.lower() for col in self.gdf.columns] - data = response.json()["rows"] - df = pd.DataFrame(data) - geometry = ( - wkb.loads(df[self.use_wkb_geom_field], hex=True) - if self.use_wkb_geom_field - else gpd.points_from_xy(df.x, df.y) - ) - - gdf = gpd.GeoDataFrame( - df, - geometry=geometry, - crs=self.input_crs, - ) - gdf = gdf.to_crs(USE_CRS) - - gdfs.append(gdf) - self.gdf = pd.concat(gdfs, ignore_index=True) - - # Drop columns + # Drop columns not in self.cols, if specified if self.cols: + self.cols = [ + col.lower() for col in self.cols + ] # Ensure self.cols is lowercase self.cols.append("geometry") - self.gdf = self.gdf[self.cols] + self.gdf = self.gdf[ + [col for col in self.cols if col in self.gdf.columns] + ] - # save self.gdf to psql - # rename columns to lowercase for table creation in postgres - if self.cols: - self.gdf = self.gdf.rename( - columns={x: x.lower() for x in self.cols} - ) + # Save to PostGIS self.gdf.to_postgis( name=self.psql_table, con=conn, if_exists="replace", chunksize=1000, ) + except Exception as e: - print(f"Error loading data for {self.name}: {e}") + log.error(f"Error loading data for {self.name}: {e}") traceback.print_exc() - self.gdf = None + self.gdf = gpd.GeoDataFrame() + + def _load_carto_data(self): + if not self.carto_sql_queries: + raise ValueError("Must provide SQL query to load data from Carto") + gdfs = [] + with ThreadPoolExecutor(max_workers=self.max_workers) as executor: + futures = [] + for query in self.carto_sql_queries: + total_rows = self._get_carto_total_rows(query) + for offset in range(0, total_rows, self.chunk_size): + futures.append( + executor.submit( + self._fetch_carto_chunk, query, offset, self.chunk_size + ) + ) + for future in tqdm( + as_completed(futures), + total=len(futures), + desc="Processing Carto chunks", + ): + try: + gdfs.append(future.result()) + except Exception as e: + log.error(f"Error processing Carto chunk: {e}") + self.gdf = pd.concat(gdfs, ignore_index=True) + + def _fetch_carto_chunk(self, query, offset, chunk_size): + chunk_query = f"{query} LIMIT {chunk_size} OFFSET {offset}" + response = requests.get( + "https://phl.carto.com/api/v2/sql", params={"q": chunk_query} + ) + response.raise_for_status() + data = response.json().get("rows", []) + if not data: + return gpd.GeoDataFrame() + df = pd.DataFrame(data) + geometry = ( + wkb.loads(df[self.use_wkb_geom_field], hex=True) + if self.use_wkb_geom_field + else gpd.points_from_xy(df.x, df.y) + ) + return gpd.GeoDataFrame(df, geometry=geometry, crs=self.input_crs).to_crs( + self.crs + ) + + def _get_carto_total_rows(self, query): + count_query = f"SELECT COUNT(*) as count FROM ({query}) as subquery" + response = requests.get( + "https://phl.carto.com/api/v2/sql", params={"q": count_query} + ) + response.raise_for_status() + return response.json()["rows"][0]["count"] def spatial_join(self, other_layer, how="left", predicate="intersects"): """ @@ -314,7 +333,7 @@ def build_and_publish(self, tiles_file_id_prefix: str) -> None: self.centroid_gdf["geometry"] = self.centroid_gdf["geometry"].centroid self.centroid_gdf = self.centroid_gdf.to_crs(epsg=4326) self.centroid_gdf.to_file(temp_geojson_points, driver="GeoJSON") - + # Command for generating PMTiles for points up to zoom level zoom_threshold points_command: list[str] = [ "tippecanoe", diff --git a/data/src/constants/services.py b/data/src/constants/services.py index ecc095e1..e08a38fb 100644 --- a/data/src/constants/services.py +++ b/data/src/constants/services.py @@ -5,6 +5,10 @@ "https://services.arcgis.com/fLeGjb7u4uXqeF9q/ArcGIS/rest/services/Vacant_Indicators_Bldg/FeatureServer/0/", ] +COUNCIL_DISTRICTS_TO_LOAD = [ + "https://services.arcgis.com/fLeGjb7u4uXqeF9q/arcgis/rest/services/Council_Districts_2024/FeatureServer/0/" +] + CITY_OWNED_PROPERTIES_TO_LOAD = [ "https://services.arcgis.com/fLeGjb7u4uXqeF9q/ArcGIS/rest/services/LAMAAssets/FeatureServer/0/" ] @@ -30,17 +34,19 @@ ) # Load data for complaints from L&I -COMPLAINTS_SQL_QUERY = f"SELECT address, service_request_id, subject, status, service_name, service_code, lat AS y, lon AS x FROM public_cases_fc WHERE requested_datetime >= '{one_year_ago}' AND lat IS NOT NULL;" +COMPLAINTS_SQL_QUERY = f"SELECT address, service_request_id, subject, status, service_name, service_code, lat AS y, lon AS x FROM public_cases_fc WHERE requested_datetime >= '{one_year_ago}' AND lat IS NOT NULL" -VIOLATIONS_SQL_QUERY = f"SELECT parcel_id_num, casenumber, casecreateddate, casetype, casestatus, violationnumber, violationcodetitle, violationstatus, opa_account_num, address, opa_owner, geocode_x AS x, geocode_y AS y FROM violations WHERE violationdate >= '{one_year_ago}' AND geocode_x IS NOT NULL;" +VIOLATIONS_SQL_QUERY = f"SELECT parcel_id_num, casenumber, casecreateddate, casetype, casestatus, violationnumber, violationcodetitle, violationstatus, opa_account_num, address, opa_owner, geocode_x AS x, geocode_y AS y FROM violations WHERE violationdate >= '{one_year_ago}' AND geocode_x IS NOT NULL" -GUNCRIME_SQL_QUERY = f"SELECT text_general_code, dispatch_date, point_x AS x, point_y AS y FROM incidents_part1_part2 WHERE dispatch_date_time >= '{one_year_ago}' AND text_general_code IN ('Aggravated Assault Firearm', 'Robbery Firearm') AND point_x IS NOT NULL;" +GUNCRIME_SQL_QUERY = f"SELECT text_general_code, dispatch_date, point_x AS x, point_y AS y FROM incidents_part1_part2 WHERE dispatch_date_time >= '{one_year_ago}' AND text_general_code IN ('Aggravated Assault Firearm', 'Robbery Firearm') AND point_x IS NOT NULL" -DRUGCRIME_SQL_QUERY = f"SELECT text_general_code, dispatch_date, point_x AS x, point_y AS y FROM incidents_part1_part2 WHERE dispatch_date_time >= '{one_year_ago}' AND text_general_code IN ('Narcotic / Drug Law Violations') AND point_x IS NOT NULL;" +DRUGCRIME_SQL_QUERY = f"SELECT text_general_code, dispatch_date, point_x AS x, point_y AS y FROM incidents_part1_part2 WHERE dispatch_date_time >= '{one_year_ago}' AND text_general_code IN ('Narcotic / Drug Law Violations') AND point_x IS NOT NULL" DELINQUENCIES_QUERY = "SELECT * FROM real_estate_tax_delinquencies" -OPA_PROPERTIES_QUERY = "SELECT market_value, sale_date, sale_price, parcel_number, mailing_address_1, mailing_address_2, mailing_care_of, mailing_street, mailing_zip, mailing_city_state, the_geom FROM opa_properties_public" +OPA_PROPERTIES_QUERY = "SELECT building_code_description, market_value, sale_date, sale_price, parcel_number, owner_1, owner_2, mailing_address_1, mailing_address_2, mailing_care_of, mailing_street, mailing_zip, mailing_city_state, zip_code, zoning, the_geom FROM opa_properties_public" + +PWD_PARCELS_QUERY = "SELECT *, the_geom FROM pwd_parcels" UNSAFE_BUILDINGS_QUERY = "SELECT * FROM unsafe" @@ -67,4 +73,6 @@ NBHOODS_URL = "https://raw.githubusercontent.com/opendataphilly/open-geo-data/master/philadelphia-neighborhoods/philadelphia-neighborhoods.geojson" -CENSUS_BGS_URL = "https://opendata.arcgis.com/datasets/2f982bada233478ea0100528227febce_0.geojson" \ No newline at end of file +CENSUS_BGS_URL = ( + "https://opendata.arcgis.com/datasets/2f982bada233478ea0100528227febce_0.geojson" +) diff --git a/data/src/data_utils/__init__.py b/data/src/data_utils/__init__.py index e1709a69..9fc453ac 100644 --- a/data/src/data_utils/__init__.py +++ b/data/src/data_utils/__init__.py @@ -5,7 +5,7 @@ from .tree_canopy import tree_canopy from .nbhoods import nbhoods from .gun_crimes import gun_crimes -from .deliquencies import deliquencies +from .delinquencies import delinquencies from .opa_properties import opa_properties from .vacant_properties import vacant_properties from .priority_level import priority_level @@ -19,7 +19,7 @@ "tree_canopy", "nbhoods", "gun_crimes", - "deliquencies", + "delinquencies", "opa_properties", "vacant_properties", "priority_level", diff --git a/data/src/data_utils/access_process.py b/data/src/data_utils/access_process.py index 7c8e79de..ae3af8e6 100644 --- a/data/src/data_utils/access_process.py +++ b/data/src/data_utils/access_process.py @@ -39,5 +39,5 @@ def access_process(dataset: Any) -> Any: access_processes.append(access_process) dataset.gdf["access_process"] = access_processes - + return dataset diff --git a/data/src/data_utils/city_owned_properties.py b/data/src/data_utils/city_owned_properties.py index a5b21980..dc6f708a 100644 --- a/data/src/data_utils/city_owned_properties.py +++ b/data/src/data_utils/city_owned_properties.py @@ -1,7 +1,7 @@ -from typing import Any from classes.featurelayer import FeatureLayer from constants.services import CITY_OWNED_PROPERTIES_TO_LOAD + def city_owned_properties(primary_featurelayer: FeatureLayer) -> FeatureLayer: """ Processes city-owned property data by joining it with the primary feature layer, @@ -9,11 +9,11 @@ def city_owned_properties(primary_featurelayer: FeatureLayer) -> FeatureLayer: All instances where the "city_owner_agency" is "PLB" are changed to "Land Bank (PHDC)". Args: - primary_featurelayer (FeatureLayer): The primary feature layer to which city-owned + primary_featurelayer (FeatureLayer): The primary feature layer to which city-owned property data will be joined. Returns: - FeatureLayer: The updated primary feature layer with processed city ownership + FeatureLayer: The updated primary feature layer with processed city ownership information. """ city_owned_properties = FeatureLayer( @@ -22,6 +22,8 @@ def city_owned_properties(primary_featurelayer: FeatureLayer) -> FeatureLayer: cols=["OPABRT", "AGENCY", "SIDEYARDELIGIBLE"], ) + print("Columns for city_owned_properties:", city_owned_properties.gdf.columns) + city_owned_properties.gdf.dropna(subset=["opabrt"], inplace=True) primary_featurelayer.opa_join(city_owned_properties.gdf, "opabrt") diff --git a/data/src/data_utils/community_gardens.py b/data/src/data_utils/community_gardens.py index 4bed0284..603d2b7c 100644 --- a/data/src/data_utils/community_gardens.py +++ b/data/src/data_utils/community_gardens.py @@ -8,21 +8,28 @@ def community_gardens(primary_featurelayer): name="Community Gardens", esri_rest_urls=COMMUNITY_GARDENS_TO_LOAD ) - community_gardens.gdf = community_gardens.gdf[["Site_Name", "geometry"]] - + community_gardens.gdf = community_gardens.gdf[["site_name", "geometry"]] + primary_featurelayer.spatial_join(community_gardens) - # Create a boolean mask where 'site_Name' is not null - mask = primary_featurelayer.gdf["Site_Name"].notnull() + # Print the columns to debug and confirm that "site_name" exists + print("Columns in primary_featurelayer.gdf:", primary_featurelayer.gdf.columns) + + # Create a boolean mask where 'site_name' is not null + mask = primary_featurelayer.gdf["site_name"].notnull() count_dropped = mask.sum() print(f"Number of community gardens being dropped: {count_dropped}") - # Use this mask to drop rows where 'site_Name' is not null + # Use this mask to drop rows where 'site_name' is not null primary_featurelayer.gdf = primary_featurelayer.gdf.drop( primary_featurelayer.gdf[mask].index ) - primary_featurelayer.gdf = primary_featurelayer.gdf.drop(columns=["Site_Name"]) + # Ensure 'site_name' exists before attempting to drop it + if "site_name" in primary_featurelayer.gdf.columns: + primary_featurelayer.gdf = primary_featurelayer.gdf.drop(columns=["site_name"]) + else: + print("'site_name' column is missing, cannot drop.") return primary_featurelayer diff --git a/data/src/data_utils/contig_neighbors.py b/data/src/data_utils/contig_neighbors.py index 1c811a15..0d58abe3 100644 --- a/data/src/data_utils/contig_neighbors.py +++ b/data/src/data_utils/contig_neighbors.py @@ -1,11 +1,18 @@ import warnings - import networkx as nx from libpysal.weights import Queen def contig_neighbors(primary_featurelayer): - parcels = primary_featurelayer.gdf + # Filter the parcels to only consider vacant properties + parcels = primary_featurelayer.gdf[primary_featurelayer.gdf["vacant"] == 1] + + if parcels.empty: + print("No vacant properties found in the dataset.") + primary_featurelayer.gdf["n_contiguous"] = 0 + return primary_featurelayer + + print(f"Found {len(parcels)} vacant properties.") with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=FutureWarning) @@ -15,13 +22,29 @@ def contig_neighbors(primary_featurelayer): message="The weights matrix is not fully connected", ) + # Create a spatial weights matrix for vacant parcels + print("Creating spatial weights matrix for vacant parcels...") w = Queen.from_dataframe(parcels) + print("Converting spatial weights matrix to NetworkX graph...") g = w.to_networkx() - # Calculate the number of contiguous neighbors for each feature in parcels - n_contiguous = [len(nx.node_connected_component(g, i)) for i in range(len(parcels))] + # Calculate the number of contiguous neighbors for each vacant property + print("Calculating number of contiguous vacant neighbors for each property...") + n_contiguous = { + node: len(nx.node_connected_component(g, node)) - 1 for node in g.nodes + } + + # Assign the number of contiguous vacant neighbors to vacant properties + parcels["n_contiguous"] = parcels.index.map(n_contiguous).fillna(0).astype(int) + + print("Joining results back to primary feature layer...") + primary_featurelayer.gdf = primary_featurelayer.gdf.merge( + parcels[["opa_id", "n_contiguous"]], on="opa_id", how="left" + ) - primary_featurelayer.gdf["n_contiguous"] = n_contiguous + # For non-vacant properties, set the number of contiguous vacant neighbors to 0 + primary_featurelayer.gdf["n_contiguous"].fillna(0, inplace=True) + print("Process completed. Returning updated primary feature layer.") return primary_featurelayer diff --git a/data/src/data_utils/council_dists.py b/data/src/data_utils/council_dists.py new file mode 100644 index 00000000..4b87f259 --- /dev/null +++ b/data/src/data_utils/council_dists.py @@ -0,0 +1,36 @@ +from classes.featurelayer import FeatureLayer +from constants.services import COUNCIL_DISTRICTS_TO_LOAD +import pandas as pd + + +pd.set_option("future.no_silent_downcasting", True) + + +def council_dists(primary_featurelayer): + # Load council districts + council_dists = FeatureLayer( + name="Council Districts", esri_rest_urls=COUNCIL_DISTRICTS_TO_LOAD + ) + + # Check that the required columns exist in the DataFrame + required_columns = ["district", "geometry"] + missing_columns = [ + col for col in required_columns if col not in council_dists.gdf.columns + ] + if missing_columns: + raise KeyError( + f"Missing required columns in council districts data: {', '.join(missing_columns)}" + ) + + # Use only the required columns + council_dists.gdf = council_dists.gdf[required_columns].copy() + council_dists.rebuild_gdf() + + # Perform spatial join + primary_featurelayer.spatial_join(council_dists) + + # Drop duplicates in the primary feature layer + primary_featurelayer.gdf.drop_duplicates(inplace=True) + primary_featurelayer.rebuild_gdf() + + return primary_featurelayer diff --git a/data/src/data_utils/deliquencies.py b/data/src/data_utils/delinquencies.py similarity index 56% rename from data/src/data_utils/deliquencies.py rename to data/src/data_utils/delinquencies.py index 16f8f205..48c474ac 100644 --- a/data/src/data_utils/deliquencies.py +++ b/data/src/data_utils/delinquencies.py @@ -2,8 +2,8 @@ from constants.services import DELINQUENCIES_QUERY -def deliquencies(primary_featurelayer): - tax_deliquencies = FeatureLayer( +def delinquencies(primary_featurelayer): + tax_delinquencies = FeatureLayer( name="Property Tax Delinquencies", carto_sql_queries=DELINQUENCIES_QUERY, use_wkb_geom_field="the_geom", @@ -20,11 +20,23 @@ def deliquencies(primary_featurelayer): ) primary_featurelayer.opa_join( - tax_deliquencies.gdf, + tax_delinquencies.gdf, "opa_number", ) - primary_featurelayer.gdf.loc[:, "sheriff_sale"] = primary_featurelayer.gdf[ + delinquency_cols = [ + "total_due", + "is_actionable", + "payment_agreement", + "num_years_owed", + "most_recent_year_owed", + "total_assessment", + ] + primary_featurelayer.gdf[delinquency_cols] = primary_featurelayer.gdf[ + delinquency_cols + ].fillna("NA") + + primary_featurelayer.gdf["sheriff_sale"] = primary_featurelayer.gdf[ "sheriff_sale" ].fillna("N") diff --git a/data/src/data_utils/l_and_i.py b/data/src/data_utils/l_and_i.py index 27f28147..6cf277ba 100644 --- a/data/src/data_utils/l_and_i.py +++ b/data/src/data_utils/l_and_i.py @@ -4,6 +4,7 @@ from classes.featurelayer import FeatureLayer from constants.services import COMPLAINTS_SQL_QUERY, VIOLATIONS_SQL_QUERY + def l_and_i(primary_featurelayer: FeatureLayer) -> FeatureLayer: """ Process L&I (Licenses and Inspections) data for complaints and violations. @@ -19,20 +20,27 @@ def l_and_i(primary_featurelayer: FeatureLayer) -> FeatureLayer: FeatureLayer: The primary feature layer updated with L&I data. """ keywords: List[str] = [ - 'dumping', 'blight', 'rubbish', 'weeds', 'graffiti', - 'abandoned', 'sanitation', 'litter', 'vacant', 'trash', - 'unsafe' + "dumping", + "blight", + "rubbish", + "weeds", + "graffiti", + "abandoned", + "sanitation", + "litter", + "vacant", + "trash", + "unsafe", ] # Load complaints data from L&I l_and_i_complaints: FeatureLayer = FeatureLayer( - name="LI Complaints", - carto_sql_queries=COMPLAINTS_SQL_QUERY + name="LI Complaints", carto_sql_queries=COMPLAINTS_SQL_QUERY ) # Filter for rows where 'subject' contains any of the keywords l_and_i_complaints.gdf = l_and_i_complaints.gdf[ - l_and_i_complaints.gdf["subject"].str.lower().str.contains('|'.join(keywords)) + l_and_i_complaints.gdf["subject"].str.lower().str.contains("|".join(keywords)) ] # Filter for only Status = 'Open' @@ -56,14 +64,15 @@ def l_and_i(primary_featurelayer: FeatureLayer) -> FeatureLayer: # Load data for violations from L&I l_and_i_violations: FeatureLayer = FeatureLayer( - name="LI Violations", - carto_sql_queries=VIOLATIONS_SQL_QUERY, - from_xy=True + name="LI Violations", carto_sql_queries=VIOLATIONS_SQL_QUERY, from_xy=True ) # Filter for rows where 'casetype' contains any of the keywords, handling NaN values l_and_i_violations.gdf = l_and_i_violations.gdf[ - l_and_i_violations.gdf["violationcodetitle"].fillna('').str.lower().str.contains('|'.join(keywords)) + l_and_i_violations.gdf["violationcodetitle"] + .fillna("") + .str.lower() + .str.contains("|".join(keywords)) ] all_violations_count_df: pd.DataFrame = ( @@ -175,4 +184,4 @@ def remove_nan_strings(x: str) -> str | None: .astype(int) ) - return primary_featurelayer \ No newline at end of file + return primary_featurelayer diff --git a/data/src/data_utils/nbhoods.py b/data/src/data_utils/nbhoods.py index 6fde4bd0..d0de302b 100644 --- a/data/src/data_utils/nbhoods.py +++ b/data/src/data_utils/nbhoods.py @@ -7,19 +7,19 @@ def nbhoods(primary_featurelayer): phl_nbhoods = gpd.read_file(NBHOODS_URL) - + # Correct the column name to uppercase if needed - if 'MAPNAME' in phl_nbhoods.columns: + if "MAPNAME" in phl_nbhoods.columns: phl_nbhoods.rename(columns={"MAPNAME": "neighborhood"}, inplace=True) - + phl_nbhoods = phl_nbhoods.to_crs(USE_CRS) - + nbhoods = FeatureLayer("Neighborhoods") nbhoods.gdf = phl_nbhoods - + red_cols_to_keep = ["neighborhood", "geometry"] nbhoods.gdf = nbhoods.gdf[red_cols_to_keep] - + primary_featurelayer.spatial_join(nbhoods) - + return primary_featurelayer diff --git a/data/src/data_utils/negligent_devs.py b/data/src/data_utils/negligent_devs.py index aa95532c..edac75b1 100644 --- a/data/src/data_utils/negligent_devs.py +++ b/data/src/data_utils/negligent_devs.py @@ -1,60 +1,5 @@ -import re - import pandas as pd -replacements = { - "STREET": "ST", - "AVENUE": "AVE", - "ROAD": "RD", - "BOULEVARD": "BLVD", - "PLACE": "PL", - "FLOOR": "FL", - "FLR": "FL", - "FIRST": "1ST", - "SECOND": "2ND", - "THIRD": "3RD", - "FOURTH": "4TH", - "FIFTH": "5TH", - "SIXTH": "6TH", - "SEVENTH": "7TH", - "EIGHTH": "8TH", - "NINTH": "9TH", - "NORTH": "N", - "SOUTH": "S", - "EAST": "E", - "WEST": "W", - "SUITE": "STE", - "LA": "LN", - "LANE": "LN", - "PARKWAY": "PKY", -} - - -def standardize_street(street): - if not isinstance(street, str): - return "" - for full, abbr in replacements.items(): - street = re.sub(r"\b{}\b".format(full), abbr, street, flags=re.IGNORECASE) - return street - - -def create_standardized_address(row): - parts = [ - row["mailing_address_1"].strip() - if pd.notnull(row["mailing_address_1"]) - else "", - row["mailing_address_2"].strip() - if pd.notnull(row["mailing_address_2"]) - else "", - row["mailing_street"].strip() if pd.notnull(row["mailing_street"]) else "", - row["mailing_city_state"].strip() - if pd.notnull(row["mailing_city_state"]) - else "", - row["mailing_zip"].strip() if pd.notnull(row["mailing_zip"]) else "", - ] - standardized_address = ", ".join([part for part in parts if part]) - return standardized_address.lower() - def negligent_devs(primary_featurelayer): devs = primary_featurelayer.gdf @@ -62,105 +7,43 @@ def negligent_devs(primary_featurelayer): print("Columns in 'devs' DataFrame:", devs.columns) print("Initial properties data:") - print(devs[['opa_id', 'city_owner_agency', 'mailing_street']].head(10)) - - city_owners = devs.loc[~devs["city_owner_agency"].isna() & (devs["city_owner_agency"] != "")].copy() - non_city_owners = devs.loc[devs["city_owner_agency"].isna() | (devs["city_owner_agency"] == "")].copy() - - print(f"City owners shape: {city_owners.shape}, Non-city owners shape: {non_city_owners.shape}") - - # Log before standardizing addresses - print("Non-city owners mailing streets before standardization:") - print(non_city_owners[['opa_id', 'mailing_street']].head(10)) - - non_city_owners.loc[:, "mailing_street"] = ( - non_city_owners["mailing_street"].astype(str).apply(standardize_street) - ) - - print("Non-city owners mailing streets after standardization:") - print(non_city_owners[['opa_id', 'mailing_street']].head(10)) - - for term in ["ST", "AVE", "RD", "BLVD"]: - non_city_owners.loc[:, "mailing_street"] = non_city_owners[ - "mailing_street" - ].replace(regex={f"{term}.*": term}) - - # Log after applying term replacement - print("Non-city owners mailing streets after term replacement:") - print(non_city_owners[['opa_id', 'mailing_street']].head(10)) - - # Fill missing address components - non_city_owners.loc[:, "mailing_address_1"] = non_city_owners[ - "mailing_address_1" - ].fillna("") - non_city_owners.loc[:, "mailing_address_2"] = non_city_owners[ - "mailing_address_2" - ].fillna("") - non_city_owners.loc[:, "mailing_street"] = non_city_owners["mailing_street"].fillna( - "" - ) - non_city_owners.loc[:, "mailing_city_state"] = non_city_owners[ - "mailing_city_state" - ].fillna("") - non_city_owners.loc[:, "mailing_zip"] = non_city_owners["mailing_zip"].fillna("") - - # Log addresses before creating standardized address - print("Non-city owners mailing details before creating standardized address:") - print(non_city_owners[['opa_id', 'mailing_street', 'mailing_city_state', 'mailing_zip']].head(10)) - - non_city_owners.loc[:, "standardized_address"] = non_city_owners.apply( - create_standardized_address, axis=1 + print( + devs[["opa_id", "city_owner_agency", "standardized_address", "vacant"]].head(10) ) - # Log standardized addresses and counts - print("Standardized addresses with counts:") - address_counts = ( - non_city_owners.groupby("standardized_address") + # Count observations where vacant == 1 by standardized_address + vacant_counts = ( + devs[devs["vacant"] == 1] + .groupby("standardized_address") .size() - .reset_index(name="property_count") - ) - print(address_counts.head(10)) - - sorted_address_counts = address_counts.sort_values( - by="property_count", ascending=False - ) - print("Top standardized addresses by property count:") - print(sorted_address_counts.head(10)) - - non_city_owners = non_city_owners.merge( - sorted_address_counts, on="standardized_address", how="left" + .reset_index(name="vacant_property_count") ) - # Log merged data for city owners - city_owner_counts = ( - city_owners.groupby("city_owner_agency") - .size() - .reset_index(name="property_count") - ) - print("City owner counts:") - print(city_owner_counts.head(10)) + print("Head of resulting DataFrame with vacant counts:") + print(vacant_counts.head(10)) - city_owners = city_owners.merge( - city_owner_counts, on="city_owner_agency", how="left" + # Merge the vacant counts back to the main DataFrame + primary_featurelayer.gdf = primary_featurelayer.gdf.merge( + vacant_counts, on="standardized_address", how="left" ) - devs_combined = pd.concat([city_owners, non_city_owners], axis=0) - - # Final check on the merged data before updating primary_featurelayer - print("Combined data with property counts:") - print(devs_combined[['opa_id', 'property_count']].head(10)) + # Identify negligent developers: non-city owned entities owning 5+ vacant properties + primary_featurelayer.gdf["n_properties_owned"] = primary_featurelayer.gdf.groupby( + "opa_id" + )["vacant_property_count"].transform("sum") - primary_featurelayer.gdf = primary_featurelayer.gdf.merge( - devs_combined[["opa_id", "property_count"]], on="opa_id", how="left" - ) - primary_featurelayer.gdf.rename( - columns={"property_count": "n_properties_owned"}, inplace=True + primary_featurelayer.gdf["negligent_dev"] = ( + primary_featurelayer.gdf["n_properties_owned"] >= 5 + ) & ( + primary_featurelayer.gdf["city_owner_agency"].isna() + | (primary_featurelayer.gdf["city_owner_agency"] == "") ) - primary_featurelayer.gdf.loc[:, "negligent_dev"] = ( - primary_featurelayer.gdf["n_properties_owned"] > 5 - ) & (primary_featurelayer.gdf["city_owner_agency"].isna() | (primary_featurelayer.gdf["city_owner_agency"] == "")) print("Final feature layer data with negligent_dev flag:") - print(primary_featurelayer.gdf[['opa_id', 'n_properties_owned', 'negligent_dev']].head(10)) + print( + primary_featurelayer.gdf[ + ["opa_id", "n_properties_owned", "negligent_dev"] + ].head(10) + ) return primary_featurelayer diff --git a/data/src/data_utils/opa_properties.py b/data/src/data_utils/opa_properties.py index 2d02f42f..0b71eccd 100644 --- a/data/src/data_utils/opa_properties.py +++ b/data/src/data_utils/opa_properties.py @@ -1,8 +1,69 @@ from classes.featurelayer import FeatureLayer from constants.services import OPA_PROPERTIES_QUERY +import pandas as pd +import re +replacements = { + "STREET": "ST", + "AVENUE": "AVE", + "ROAD": "RD", + "BOULEVARD": "BLVD", + "PLACE": "PL", + "FLOOR": "FL", + "FLR": "FL", + "FIRST": "1ST", + "SECOND": "2ND", + "THIRD": "3RD", + "FOURTH": "4TH", + "FIFTH": "5TH", + "SIXTH": "6TH", + "SEVENTH": "7TH", + "EIGHTH": "8TH", + "NINTH": "9TH", + "NORTH": "N", + "SOUTH": "S", + "EAST": "E", + "WEST": "W", + "SUITE": "STE", + "LA": "LN", + "LANE": "LN", + "PARKWAY": "PKY", +} -def opa_properties(primary_featurelayer): + +def standardize_street(street): + if not isinstance(street, str): + return "" + for full, abbr in replacements.items(): + street = re.sub(r"\b{}\b".format(full), abbr, street, flags=re.IGNORECASE) + return street + + +def create_standardized_address(row): + parts = [ + ( + row["mailing_address_1"].strip() + if pd.notnull(row["mailing_address_1"]) + else "" + ), + ( + row["mailing_address_2"].strip() + if pd.notnull(row["mailing_address_2"]) + else "" + ), + row["mailing_street"].strip() if pd.notnull(row["mailing_street"]) else "", + ( + row["mailing_city_state"].strip() + if pd.notnull(row["mailing_city_state"]) + else "" + ), + row["mailing_zip"].strip() if pd.notnull(row["mailing_zip"]) else "", + ] + standardized_address = ", ".join([part for part in parts if part]) + return standardized_address.lower() + + +def opa_properties(): opa = FeatureLayer( name="OPA Properties", carto_sql_queries=OPA_PROPERTIES_QUERY, @@ -12,18 +73,54 @@ def opa_properties(primary_featurelayer): "sale_date", "sale_price", "parcel_number", - "mailing_address_1", - "mailing_address_2", - "mailing_care_of", + "owner_1", + "owner_2", + "mailing_address_1", + "mailing_address_2", + "mailing_care_of", "mailing_city_state", "mailing_street", - "mailing_zip" - ] + "mailing_zip", + "building_code_description", + "zip_code", + "zoning", + ], + ) + + # Rename columns + opa.gdf = opa.gdf.rename(columns={"parcel_number": "opa_id"}) + + # Convert 'sale_price' and 'market_value' to numeric values + opa.gdf["sale_price"] = pd.to_numeric(opa.gdf["sale_price"], errors="coerce") + opa.gdf["market_value"] = pd.to_numeric(opa.gdf["market_value"], errors="coerce") + + # Add parcel_type + opa.gdf["parcel_type"] = ( + opa.gdf["building_code_description"] + .str.contains("VACANT LAND", case=False, na=False) + .map({True: "Land", False: "Building"}) ) - primary_featurelayer.opa_join( - opa.gdf, - "parcel_number", + # Standardize mailing street addresses + opa.gdf["mailing_street"] = ( + opa.gdf["mailing_street"].astype(str).apply(standardize_street) ) - return primary_featurelayer + # Create standardized address column + opa.gdf["standardized_address"] = opa.gdf.apply(create_standardized_address, axis=1) + + # Drop columns starting with "mailing_" + opa.gdf = opa.gdf.loc[:, ~opa.gdf.columns.str.startswith("mailing_")] + + # Use GeoSeries.make_valid to repair geometries + opa.gdf["geometry"] = opa.gdf["geometry"].make_valid() + + # Drop empty geometries + opa.gdf = opa.gdf[~opa.gdf.is_empty] + final_row_count = len(opa.gdf) + print(f"Final row count after cleaning geometries: {final_row_count}") + + # Exclude the geometry column when checking NA counts + print("NA Counts:\n", opa.gdf.drop(columns="geometry").isna().sum()) + + return opa diff --git a/data/src/data_utils/owner_type.py b/data/src/data_utils/owner_type.py index 291364df..bd2aa7fd 100644 --- a/data/src/data_utils/owner_type.py +++ b/data/src/data_utils/owner_type.py @@ -1,9 +1,10 @@ import pandas as pd from classes.featurelayer import FeatureLayer + def owner_type(primary_featurelayer: FeatureLayer) -> FeatureLayer: """ - Determines the ownership type for each property in the primary feature layer based on + Determines the ownership type for each property in the primary feature layer based on the 'owner_1', 'owner_2', and 'city_owner_agency' columns. The ownership type is set as: - "Public" if 'city_owner_agency' is not NA. - "Business (LLC)" if 'city_owner_agency' is NA and "LLC" is found in 'owner_1' or 'owner_2'. diff --git a/data/src/data_utils/phs_properties.py b/data/src/data_utils/phs_properties.py index c906c2d1..aeeac757 100644 --- a/data/src/data_utils/phs_properties.py +++ b/data/src/data_utils/phs_properties.py @@ -1,6 +1,7 @@ from classes.featurelayer import FeatureLayer from constants.services import PHS_LAYERS_TO_LOAD + def phs_properties(primary_featurelayer: FeatureLayer) -> FeatureLayer: """ Perform a spatial join between the primary feature layer and the PHS properties layer, @@ -13,7 +14,7 @@ def phs_properties(primary_featurelayer: FeatureLayer) -> FeatureLayer: Returns: FeatureLayer: The updated primary feature layer with the 'phs_care_program' column. """ - + phs_properties = FeatureLayer( name="PHS Properties", esri_rest_urls=PHS_LAYERS_TO_LOAD, cols=["program"] ) @@ -23,9 +24,11 @@ def phs_properties(primary_featurelayer: FeatureLayer) -> FeatureLayer: # Initialize 'phs_care_program' column with default "no" for all rows primary_featurelayer.gdf["phs_care_program"] = "No" - + # Set 'phs_care_program' to "yes" for matched rows - primary_featurelayer.gdf.loc[primary_featurelayer.gdf["program"].notna(), "phs_care_program"] = "Yes" + primary_featurelayer.gdf.loc[ + primary_featurelayer.gdf["program"].notna(), "phs_care_program" + ] = "Yes" # Rebuild the GeoDataFrame after updates primary_featurelayer.rebuild_gdf() diff --git a/data/src/data_utils/ppr_properties.py b/data/src/data_utils/ppr_properties.py index 48111b35..67e7ce28 100644 --- a/data/src/data_utils/ppr_properties.py +++ b/data/src/data_utils/ppr_properties.py @@ -9,29 +9,30 @@ def ppr_properties(primary_featurelayer): - fallback_url = 'https://opendata.arcgis.com/datasets/d52445160ab14380a673e5849203eb64_0.geojson' + fallback_url = "https://opendata.arcgis.com/datasets/d52445160ab14380a673e5849203eb64_0.geojson" try: - ppr_properties = FeatureLayer( - name="PPR Properties", - esri_rest_urls=PPR_PROPERTIES_TO_LOAD, - cols=["PUBLIC_NAME"] + name="PPR Properties", + esri_rest_urls=PPR_PROPERTIES_TO_LOAD, + cols=["PUBLIC_NAME"], ) if ppr_properties.gdf is None or ppr_properties.gdf.empty: - raise ValueError("PPR properties GeoDataFrame is empty or failed to load from Esri REST URL.") - + raise ValueError( + "PPR properties GeoDataFrame is empty or failed to load from Esri REST URL." + ) + print("Loaded PPR properties from Esri REST URL.") - + except Exception as e: print(f"Error loading PPR properties from Esri REST URL: {e}") print("Falling back to loading from GeoJSON URL.") - + response = requests.get(fallback_url) response.raise_for_status() ppr_properties_gdf = gpd.read_file(io.BytesIO(response.content)) - + ppr_properties = FeatureLayer(name="PPR Properties") ppr_properties.gdf = ppr_properties_gdf @@ -42,12 +43,14 @@ def ppr_properties(primary_featurelayer): primary_featurelayer.spatial_join(ppr_properties) mask = primary_featurelayer.gdf["public_name"].notnull() - + count_dropped = mask.sum() print(f"Number of PPR properties being dropped: {count_dropped}") - primary_featurelayer.gdf = primary_featurelayer.gdf.drop(primary_featurelayer.gdf[mask].index) + primary_featurelayer.gdf = primary_featurelayer.gdf.drop( + primary_featurelayer.gdf[mask].index + ) primary_featurelayer.gdf = primary_featurelayer.gdf.drop(columns=["public_name"]) - return primary_featurelayer \ No newline at end of file + return primary_featurelayer diff --git a/data/src/data_utils/pwd_parcels.py b/data/src/data_utils/pwd_parcels.py new file mode 100644 index 00000000..81cd5e02 --- /dev/null +++ b/data/src/data_utils/pwd_parcels.py @@ -0,0 +1,71 @@ +from classes.featurelayer import FeatureLayer +from constants.services import PWD_PARCELS_QUERY + + +def pwd_parcels(primary_featurelayer: FeatureLayer) -> FeatureLayer: + """ + Updates the primary feature layer by replacing its geometry column with validated + geometries from PWD parcels data. Retains point geometry for rows with no polygon + geometry available. + + Args: + primary_featurelayer (FeatureLayer): The primary feature layer to update. + + Returns: + FeatureLayer: The updated primary feature layer with geometries replaced + by those from PWD parcels or retained from the original layer if no match. + """ + # Load PWD parcels + pwd_parcels = FeatureLayer( + name="PWD Parcels", + carto_sql_queries=PWD_PARCELS_QUERY, + use_wkb_geom_field="the_geom", + cols=["brt_id"], + ) + + print("Columns in PWD Parcels:", pwd_parcels.gdf.columns) + + # Drop rows with null brt_id, rename to opa_id, and validate geometries + pwd_parcels.gdf.dropna(subset=["brt_id"], inplace=True) + pwd_parcels.gdf.rename(columns={"brt_id": "opa_id"}, inplace=True) + pwd_parcels.gdf["geometry"] = pwd_parcels.gdf["geometry"].make_valid() + + # Ensure geometries are polygons or multipolygons + if not all(pwd_parcels.gdf.geometry.type.isin(["Polygon", "MultiPolygon"])): + raise ValueError("Some geometries are not polygons or multipolygons.") + + # Log initial feature counts + print("Size of primary feature layer:", len(primary_featurelayer.gdf)) + print("Size of PWD parcels:", len(pwd_parcels.gdf)) + print( + "Number of valid geometries in PWD parcels:", + pwd_parcels.gdf.geometry.notnull().sum(), + ) + + # Temporarily drop geometry from the primary feature layer + primary_df = primary_featurelayer.gdf.drop(columns=["geometry"]) + + # Join geometries from PWD parcels + merged_gdf = primary_df.merge( + pwd_parcels.gdf[["opa_id", "geometry"]], + on="opa_id", + how="left", + ) + + # Log observations with no polygon geometry + no_geometry_count = merged_gdf["geometry"].isnull().sum() + print("Number of observations with no polygon geometry:", no_geometry_count) + + # Retain point geometry for rows with no polygon geometry + merged_gdf["geometry"] = merged_gdf["geometry"].combine_first( + primary_featurelayer.gdf["geometry"] + ) + print("Number of observations retaining point geometry:", no_geometry_count) + + # Validate the merged GeoDataFrame + updated_gdf = FeatureLayer( + name=primary_featurelayer.name, + gdf=merged_gdf, + ) + + return updated_gdf diff --git a/data/src/data_utils/rco_geoms.py b/data/src/data_utils/rco_geoms.py index 6aa3dca6..8f293c2a 100644 --- a/data/src/data_utils/rco_geoms.py +++ b/data/src/data_utils/rco_geoms.py @@ -9,33 +9,31 @@ def rco_geoms(primary_featurelayer): rco_geoms = FeatureLayer(name="RCOs", esri_rest_urls=RCOS_LAYERS_TO_LOAD) rco_aggregate_cols = [ - "ORGANIZATION_NAME", - "ORGANIZATION_ADDRESS", - "PRIMARY_EMAIL", - "PRIMARY_PHONE", + "organization_name", + "organization_address", + "primary_email", + "primary_phone", ] rco_use_cols = ["rco_info", "rco_names", "geometry"] - rco_geoms.gdf.loc[:, "rco_info"] = rco_geoms.gdf[rco_aggregate_cols].apply( + rco_geoms.gdf["rco_info"] = rco_geoms.gdf[rco_aggregate_cols].apply( lambda x: "; ".join(map(str, x)), axis=1 ) - rco_geoms.gdf.loc[:, "rco_names"] = rco_geoms.gdf["ORGANIZATION_NAME"] + rco_geoms.gdf["rco_names"] = rco_geoms.gdf["organization_name"] - rco_geoms.gdf = rco_geoms.gdf.loc[:, rco_use_cols].copy() + rco_geoms.gdf = rco_geoms.gdf[rco_use_cols].copy() rco_geoms.rebuild_gdf() primary_featurelayer.spatial_join(rco_geoms) - # Collapse columns and aggregate rco_info group_columns = [ col for col in primary_featurelayer.gdf.columns if col not in rco_use_cols ] for col in group_columns: - # Use .infer_objects() after fillna() to fix the warning - primary_featurelayer.gdf.loc[:, col] = ( + primary_featurelayer.gdf[col] = ( primary_featurelayer.gdf[col].fillna("").infer_objects(copy=False) ) diff --git a/data/src/data_utils/utils.py b/data/src/data_utils/utils.py index b7b9ef4e..0b55b4fa 100644 --- a/data/src/data_utils/utils.py +++ b/data/src/data_utils/utils.py @@ -17,7 +17,7 @@ def mask_password(value: str): def save_stream_url(url: str) -> str: - """download the file from this url to the tmp/ directory by streaming in a memory-friendly way. + """download the file from this url to the tmp/ directory by streaming in a memory-friendly way. If local file already exists, use it and don't download. Args: url (str): the url of the zip file @@ -25,17 +25,17 @@ def save_stream_url(url: str) -> str: Returns: str: the relative local path of the saved zip file """ - local_filename = "tmp/" + url.split('/')[-1] + local_filename = "tmp/" + url.split("/")[-1] if os.path.exists(local_filename): return local_filename with requests.get(url, stream=True) as r: r.raise_for_status() - with open(local_filename, 'wb') as f: + with open(local_filename, "wb") as f: for chunk in r.iter_content(chunk_size=8192): # If you have chunk encoded response uncomment if # and set chunk_size parameter to None. - #if chunk: + # if chunk: f.write(chunk) f.close() r.close() diff --git a/data/src/data_utils/vacant_properties.py b/data/src/data_utils/vacant_properties.py index d6573218..49867650 100644 --- a/data/src/data_utils/vacant_properties.py +++ b/data/src/data_utils/vacant_properties.py @@ -1,13 +1,11 @@ from classes.featurelayer import FeatureLayer, google_cloud_bucket from constants.services import VACANT_PROPS_LAYERS_TO_LOAD import geopandas as gpd -from config.config import USE_CRS from io import BytesIO - import pandas as pd -def load_backup_data_from_gcs(file_name: str) -> gpd.GeoDataFrame: +def load_backup_data_from_gcs(file_name: str) -> pd.DataFrame: bucket = google_cloud_bucket() blob = bucket.blob(file_name) if not blob.exists(): @@ -15,25 +13,18 @@ def load_backup_data_from_gcs(file_name: str) -> gpd.GeoDataFrame: file_bytes = blob.download_as_bytes() try: + # Read GeoJSON as a GeoDataFrame gdf = gpd.read_file(BytesIO(file_bytes)) except Exception as e: raise ValueError(f"Error reading GeoJSON file: {e}") print("Loaded backup data from GCS.") - # Ensure column names are consistent - gdf = gdf.rename( - columns={ - "ADDRESS": "address", - "OWNER1": "owner_1", - "OWNER2": "owner_2", - "BLDG_DESC": "building_description", - "CouncilDistrict": "council_district", - "ZoningBaseDistrict": "zoning_base_district", - "ZipCode": "zipcode", - "OPA_ID": "opa_id", - } - ) + # Ensure only opa_id is retained and convert to DataFrame (drop geometry) + gdf = gdf[["OPA_ID"]].rename(columns={"OPA_ID": "opa_id"}) + + # Drop the geometry column to avoid CRS issues (we don't need the geometry for matching) + gdf = gdf.drop(columns=["geometry"], errors="ignore") return gdf @@ -42,145 +33,74 @@ def check_null_percentage(df: pd.DataFrame, threshold: float = 0.05): """Checks if any column in the dataframe has more than the given threshold of null values.""" null_percentages = df.isnull().mean() for col, pct in null_percentages.items(): - if col not in ["owner1", "owner2"] and pct > threshold: + if pct > threshold: raise ValueError( f"Column '{col}' has more than {threshold * 100}% null values ({pct * 100}%)." ) -def vacant_properties() -> FeatureLayer: +def vacant_properties(primary_featurelayer) -> FeatureLayer: vacant_properties = FeatureLayer( name="Vacant Properties", esri_rest_urls=VACANT_PROPS_LAYERS_TO_LOAD, cols=[ - "ADDRESS", - "OWNER1", - "OWNER2", - "BLDG_DESC", - "COUNCILDISTRICT", - "ZONINGBASEDISTRICT", - "ZIPCODE", "OPA_ID", "parcel_type", - ], + ], # Only need opa_id and parcel_type from the vacancy layers ) + print("Columns in vacant properties dataset:", vacant_properties.gdf.columns) + # Rename columns for consistency in the original data - vacant_properties.gdf = vacant_properties.gdf.rename( - columns={ - "ADDRESS": "address", - "OWNER1": "owner_1", - "OWNER2": "owner_2", - "BLDG_DESC": "building_description", - "COUNCILDISTRICT": "council_district", - "ZONINGBASEDISTRICT": "zoning_base_district", - "ZIPCODE": "zipcode", - "OPA_ID": "opa_id", - } - ) + vacant_properties.gdf = vacant_properties.gdf.rename(columns={"OPA_ID": "opa_id"}) + # Check for "Land" properties in the default dataset vacant_land_gdf = vacant_properties.gdf[ vacant_properties.gdf["parcel_type"] == "Land" ] - print(f"Vacant land data size: {len(vacant_land_gdf)} rows.") + print(f"Vacant land data size in the default dataset: {len(vacant_land_gdf)} rows.") + # If vacant land properties are below the threshold (20,000 rows), load backup data if len(vacant_land_gdf) < 20000: - print("Vacant land data is below the threshold. Loading backup data from GCS.") - backup_gdf = load_backup_data_from_gcs("vacant_indicators_land_06_2024.geojson") - - # Ensure CRS is consistent with project-wide CRS (USE_CRS) - if backup_gdf.crs != USE_CRS: - print(f"Reprojecting backup data from {backup_gdf.crs} to {USE_CRS}") - backup_gdf = backup_gdf.to_crs(USE_CRS) - - # Ensure CRS is the same - if backup_gdf.crs != vacant_properties.gdf.crs: - backup_gdf = backup_gdf.to_crs(vacant_properties.gdf.crs) - - # Map backup dataset column names to match the original dataset - backup_gdf = backup_gdf.rename( - columns={ - "owner_1": "owner1", - "owner_2": "owner2", - "building_description": "bldg_desc", - "council_district": "councildistrict", - "zoning_base_district": "zoningbasedistrict", - } + print( + "Vacant land data is below the threshold. Removing vacant land rows and loading backup data from GCS." ) - # Set parcel_type to "Land" for backup data - backup_gdf["parcel_type"] = "Land" - - # Select only the columns present in the original dataset - backup_gdf = backup_gdf[vacant_properties.gdf.columns] - - # Ensure all necessary columns are present in backup data - for col in vacant_properties.gdf.columns: - if col not in backup_gdf.columns: - backup_gdf[col] = None - - # Check for column mismatches between original and backup datasets - for col in vacant_properties.gdf.columns: - if vacant_properties.gdf[col].dtype != backup_gdf[col].dtype: - print( - f"Warning: Data type mismatch in column '{col}'. Original: {vacant_properties.gdf[col].dtype}, Backup: {backup_gdf[col].dtype}" - ) - - # Verify if backup data contains more than expected null values - check_null_percentage(backup_gdf) - - # Remove existing Land data + # Drop vacant land rows from the current dataset vacant_properties.gdf = vacant_properties.gdf[ vacant_properties.gdf["parcel_type"] != "Land" ] + # Load backup data and ensure it's a DataFrame (dropping geometry) + backup_gdf = load_backup_data_from_gcs("vacant_indicators_land_06_2024.geojson") + + # Add a parcel_type column with value "Land" for all rows in the backup data + backup_gdf["parcel_type"] = "Land" + # Concatenate the backup data with the existing data print(f"Appending backup data ({len(backup_gdf)} rows) to the existing data.") vacant_properties.gdf = pd.concat( [vacant_properties.gdf, backup_gdf], ignore_index=True ) - # Ensure concatenated data is still a GeoDataFrame - vacant_properties.gdf = gpd.GeoDataFrame( - vacant_properties.gdf, geometry="geometry" - ) + # Drop the geometry column to convert to a regular DataFrame + df = vacant_properties.gdf.drop(columns=["geometry"], errors="ignore") - vacant_properties.gdf.dropna(subset=["opa_id"], inplace=True) + # Drop rows where opa_id is missing + df.dropna(subset=["opa_id"], inplace=True) # Final null value check before returning - check_null_percentage(vacant_properties.gdf) - - # Final column renaming and selection - vacant_properties.gdf = vacant_properties.gdf.rename( - columns={ - "owner1": "owner_1", - "owner2": "owner_2", - "councildistrict": "council_district", - "zoningbasedistrict": "zoning_base_district", - } - ) + check_null_percentage(df) - # Select only the final columns needed - final_columns = [ - "address", - "owner_1", - "owner_2", - "council_district", - "zoning_base_district", - "zipcode", - "opa_id", - "parcel_type", - "geometry", - ] - - vacant_properties.gdf = vacant_properties.gdf[final_columns] + # Create vacant column in the primary feature layer based on opa_id match + primary_featurelayer.gdf["vacant"] = ( + primary_featurelayer.gdf["opa_id"].isin(df["opa_id"]).astype(int) + ) - # Ensure concatenated data is still a GeoDataFrame - vacant_properties.gdf = gpd.GeoDataFrame(vacant_properties.gdf, geometry="geometry") + print("Vacant column added based on opa_id match.") - before_drop = vacant_properties.gdf.shape[0] - vacant_properties.gdf = vacant_properties.gdf.drop_duplicates(subset="opa_id") - after_drop = vacant_properties.gdf.shape[0] - print(f"Duplicate vacant properties dropped: {before_drop - after_drop}") + # Drop the parcel_type column once the decision has been made + df.drop(columns=["parcel_type"], inplace=True) - return vacant_properties + # Return primary_featurelayer after adding vacant column + return primary_featurelayer From 4465aebfcdc39f8e8eaa564420dc4d0f9e91b97c Mon Sep 17 00:00:00 2001 From: nlebovits Date: Mon, 18 Nov 2024 08:52:39 -0500 Subject: [PATCH 09/18] remove references to mapbox key, which we are no longer using --- data/docker-compose.yml | 1 - data/src/config/config.py | 5 +---- data/src/script.py | 2 +- src/config/config.ts | 4 ---- 4 files changed, 2 insertions(+), 10 deletions(-) diff --git a/data/docker-compose.yml b/data/docker-compose.yml index 5d797d9e..ebf2a10a 100644 --- a/data/docker-compose.yml +++ b/data/docker-compose.yml @@ -6,7 +6,6 @@ services: image: vacant-lots-proj:latest environment: - GOOGLE_APPLICATION_CREDENTIALS=/app/service-account-key.json - - CFP_MAPBOX_TOKEN_UPLOADER - VACANT_LOTS_DB - CLEAN_GREEN_GOOGLE_KEY - PYTHONUNBUFFERED=1 diff --git a/data/src/config/config.py b/data/src/config/config.py index 16630a54..79bd315b 100644 --- a/data/src/config/config.py +++ b/data/src/config/config.py @@ -2,15 +2,12 @@ import os from pathlib import Path -FORCE_RELOAD = False +FORCE_RELOAD = True """ During the data load, whether to query the various GIS API services for the data to load into the postgres tables. If True, will query the API services, backup the database, reload the database and report on data differences. If false will read the data from postgres.""" USE_CRS = "EPSG:2272" """ the standard geospatial code for Pennsylvania South (ftUS) """ -MAPBOX_TOKEN = os.environ.get("CFP_MAPBOX_TOKEN_UPLOADER") -""" The location of the token for your mapbox account in your environment """ - log_level: int = logging.WARN """ overall log level for the project """ diff --git a/data/src/script.py b/data/src/script.py index 0b7bcc3e..cf42ace6 100644 --- a/data/src/script.py +++ b/data/src/script.py @@ -127,7 +127,7 @@ conn.commit() - # Post to Mapbox + # Post to GCP dataset.build_and_publish(tiles_file_id_prefix) # if we are reloading, run the diff report, then archive the backup and finally prune old archives diff --git a/src/config/config.ts b/src/config/config.ts index 6002630e..23cb2ad5 100644 --- a/src/config/config.ts +++ b/src/config/config.ts @@ -1,9 +1,5 @@ const EMPTY_STRING = ''; -export const mapboxAccessToken = - 'pk.eyJ1IjoibmxlYm92aXRzIiwiYSI6ImNsZXQ2Nzd3ZDBjZnYzcHFvYXhib2RqYzQifQ.PWg2LuNCH1E6-REjmYvdOg' || - EMPTY_STRING; - export const maptilerApiKey = process.env.NEXT_PUBLIC_MAPTILER_KEY || EMPTY_STRING; From 8485a30b24b1c8a75f87a56086c014ac30a62e5e Mon Sep 17 00:00:00 2001 From: nlebovits Date: Mon, 18 Nov 2024 08:53:56 -0500 Subject: [PATCH 10/18] set force reload to False by default --- data/src/config/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/src/config/config.py b/data/src/config/config.py index 79bd315b..7b7fa01c 100644 --- a/data/src/config/config.py +++ b/data/src/config/config.py @@ -2,7 +2,7 @@ import os from pathlib import Path -FORCE_RELOAD = True +FORCE_RELOAD = False """ During the data load, whether to query the various GIS API services for the data to load into the postgres tables. If True, will query the API services, backup the database, reload the database and report on data differences. If false will read the data from postgres.""" USE_CRS = "EPSG:2272" From b40c1cd2f5c2a693825161ed741708c847fba28f Mon Sep 17 00:00:00 2001 From: nlebovits Date: Wed, 20 Nov 2024 21:36:25 -0500 Subject: [PATCH 11/18] skip precommit hook --- data/src/constants/services.py | 20 +++++- data/src/data_utils/__init__.py | 6 +- data/src/data_utils/contig_neighbors.py | 34 ++++++----- data/src/data_utils/li_complaints.py | 8 +++ .../{l_and_i.py => li_violations.py} | 61 ++----------------- data/src/data_utils/negligent_devs.py | 31 +++++----- data/src/data_utils/phs_properties.py | 10 +-- data/src/data_utils/priority_level.py | 6 +- data/src/data_utils/pwd_parcels.py | 24 ++++++-- data/src/data_utils/vacant_properties.py | 6 +- 10 files changed, 97 insertions(+), 109 deletions(-) create mode 100644 data/src/data_utils/li_complaints.py rename data/src/data_utils/{l_and_i.py => li_violations.py} (64%) diff --git a/data/src/constants/services.py b/data/src/constants/services.py index e08a38fb..34f2eb41 100644 --- a/data/src/constants/services.py +++ b/data/src/constants/services.py @@ -34,7 +34,25 @@ ) # Load data for complaints from L&I -COMPLAINTS_SQL_QUERY = f"SELECT address, service_request_id, subject, status, service_name, service_code, lat AS y, lon AS x FROM public_cases_fc WHERE requested_datetime >= '{one_year_ago}' AND lat IS NOT NULL" +COMPLAINTS_SQL_QUERY = f""" +SELECT address, service_request_id, subject, status, service_name, service_code, lat AS y, lon AS x +FROM public_cases_fc +WHERE requested_datetime >= '{one_year_ago}' + AND lat IS NOT NULL + AND ( + subject ILIKE '%dumping%' + OR subject ILIKE '%blight%' + OR subject ILIKE '%rubbish%' + OR subject ILIKE '%weeds%' + OR subject ILIKE '%graffiti%' + OR subject ILIKE '%abandoned%' + OR subject ILIKE '%sanitation%' + OR subject ILIKE '%litter%' + OR subject ILIKE '%vacant%' + OR subject ILIKE '%trash%' + OR subject ILIKE '%unsafe%' + ) +""" VIOLATIONS_SQL_QUERY = f"SELECT parcel_id_num, casenumber, casecreateddate, casetype, casestatus, violationnumber, violationcodetitle, violationstatus, opa_account_num, address, opa_owner, geocode_x AS x, geocode_y AS y FROM violations WHERE violationdate >= '{one_year_ago}' AND geocode_x IS NOT NULL" diff --git a/data/src/data_utils/__init__.py b/data/src/data_utils/__init__.py index 9fc453ac..d351ecd4 100644 --- a/data/src/data_utils/__init__.py +++ b/data/src/data_utils/__init__.py @@ -1,6 +1,7 @@ from .city_owned_properties import city_owned_properties from .phs_properties import phs_properties -from .l_and_i import l_and_i +from .li_violations import li_violations +from .li_complaints import li_complaints from .rco_geoms import rco_geoms from .tree_canopy import tree_canopy from .nbhoods import nbhoods @@ -14,7 +15,8 @@ __all__ = [ "city_owned_properties", "phs_properties", - "l_and_i", + "li_violations", + "li_complaints", "rco_geoms", "tree_canopy", "nbhoods", diff --git a/data/src/data_utils/contig_neighbors.py b/data/src/data_utils/contig_neighbors.py index 0d58abe3..d91eef80 100644 --- a/data/src/data_utils/contig_neighbors.py +++ b/data/src/data_utils/contig_neighbors.py @@ -1,18 +1,23 @@ import warnings import networkx as nx from libpysal.weights import Queen +import numpy as np def contig_neighbors(primary_featurelayer): - # Filter the parcels to only consider vacant properties - parcels = primary_featurelayer.gdf[primary_featurelayer.gdf["vacant"] == 1] - - if parcels.empty: + # Create a filtered dataframe with only vacant properties and polygon geometries + vacant_parcels = primary_featurelayer.gdf.loc[ + (primary_featurelayer.gdf["vacant"]) & + (primary_featurelayer.gdf.geometry.type.isin(["Polygon", "MultiPolygon"])), + ["opa_id", "geometry"] + ] + + if vacant_parcels.empty: print("No vacant properties found in the dataset.") - primary_featurelayer.gdf["n_contiguous"] = 0 + primary_featurelayer.gdf["n_contiguous"] = np.nan return primary_featurelayer - print(f"Found {len(parcels)} vacant properties.") + print(f"Found {len(vacant_parcels)} vacant properties.") with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=FutureWarning) @@ -24,27 +29,28 @@ def contig_neighbors(primary_featurelayer): # Create a spatial weights matrix for vacant parcels print("Creating spatial weights matrix for vacant parcels...") - w = Queen.from_dataframe(parcels) + w = Queen.from_dataframe(vacant_parcels) + # Convert the spatial weights matrix to a NetworkX graph print("Converting spatial weights matrix to NetworkX graph...") g = w.to_networkx() - # Calculate the number of contiguous neighbors for each vacant property + # Calculate the number of contiguous vacant properties for each vacant parcel print("Calculating number of contiguous vacant neighbors for each property...") n_contiguous = { node: len(nx.node_connected_component(g, node)) - 1 for node in g.nodes } - # Assign the number of contiguous vacant neighbors to vacant properties - parcels["n_contiguous"] = parcels.index.map(n_contiguous).fillna(0).astype(int) + # Assign the contiguous neighbor count to the filtered vacant parcels + vacant_parcels["n_contiguous"] = vacant_parcels.index.map(n_contiguous) - print("Joining results back to primary feature layer...") + # Merge the results back to the primary feature layer primary_featurelayer.gdf = primary_featurelayer.gdf.merge( - parcels[["opa_id", "n_contiguous"]], on="opa_id", how="left" + vacant_parcels[["opa_id", "n_contiguous"]], on="opa_id", how="left" ) - # For non-vacant properties, set the number of contiguous vacant neighbors to 0 - primary_featurelayer.gdf["n_contiguous"].fillna(0, inplace=True) + # Assign NA for non-vacant properties + primary_featurelayer.gdf.loc[~primary_featurelayer.gdf["vacant"], "n_contiguous"] = np.nan print("Process completed. Returning updated primary feature layer.") return primary_featurelayer diff --git a/data/src/data_utils/li_complaints.py b/data/src/data_utils/li_complaints.py new file mode 100644 index 00000000..ca086032 --- /dev/null +++ b/data/src/data_utils/li_complaints.py @@ -0,0 +1,8 @@ +from constants.services import COMPLAINTS_SQL_QUERY + + +from data_utils.kde import apply_kde_to_primary + + +def li_complaints(primary_featurelayer): + return apply_kde_to_primary(primary_featurelayer, "L and I Complaints", COMPLAINTS_SQL_QUERY) diff --git a/data/src/data_utils/l_and_i.py b/data/src/data_utils/li_violations.py similarity index 64% rename from data/src/data_utils/l_and_i.py rename to data/src/data_utils/li_violations.py index 6cf277ba..c207bf4c 100644 --- a/data/src/data_utils/l_and_i.py +++ b/data/src/data_utils/li_violations.py @@ -2,14 +2,14 @@ import geopandas as gpd from typing import List from classes.featurelayer import FeatureLayer -from constants.services import COMPLAINTS_SQL_QUERY, VIOLATIONS_SQL_QUERY +from constants.services import VIOLATIONS_SQL_QUERY -def l_and_i(primary_featurelayer: FeatureLayer) -> FeatureLayer: +def li_violations(primary_featurelayer: FeatureLayer) -> FeatureLayer: """ - Process L&I (Licenses and Inspections) data for complaints and violations. + Process L&I (Licenses and Inspections) data for violations. - This function filters and processes L&I complaints and violations data, + This function filters and processes L&I violations data, joining it with the primary feature layer based on spatial relationships and OPA (Office of Property Assessment) identifiers. @@ -33,35 +33,6 @@ def l_and_i(primary_featurelayer: FeatureLayer) -> FeatureLayer: "unsafe", ] - # Load complaints data from L&I - l_and_i_complaints: FeatureLayer = FeatureLayer( - name="LI Complaints", carto_sql_queries=COMPLAINTS_SQL_QUERY - ) - - # Filter for rows where 'subject' contains any of the keywords - l_and_i_complaints.gdf = l_and_i_complaints.gdf[ - l_and_i_complaints.gdf["subject"].str.lower().str.contains("|".join(keywords)) - ] - - # Filter for only Status = 'Open' - l_and_i_complaints.gdf = l_and_i_complaints.gdf[ - l_and_i_complaints.gdf["status"].str.lower() == "open" - ] - - # Group by geometry and concatenate the violationcodetitle values into a list with a semicolon separator - l_and_i_complaints.gdf = ( - l_and_i_complaints.gdf.groupby("geometry")["service_name"] - .apply(lambda x: "; ".join([val for val in x if val is not None])) - .reset_index() - ) - - l_and_i_complaints.rebuild_gdf() - - # rename the column to 'li_complaints' - l_and_i_complaints.gdf.rename( - columns={"service_name": "li_complaints"}, inplace=True - ) - # Load data for violations from L&I l_and_i_violations: FeatureLayer = FeatureLayer( name="LI Violations", carto_sql_queries=VIOLATIONS_SQL_QUERY, from_xy=True @@ -121,7 +92,6 @@ def l_and_i(primary_featurelayer: FeatureLayer) -> FeatureLayer: .apply(lambda x: "; ".join([val for val in x if val is not None])) .reset_index() ) - l_and_i_complaints.rebuild_gdf() # rename the column to 'li_violations' l_and_i_violations.gdf.rename( @@ -134,19 +104,6 @@ def l_and_i(primary_featurelayer: FeatureLayer) -> FeatureLayer: "opa_account_num", ) - # Complaints need a spatial join, but we need to take special care to merge on just the parcel geoms first to get opa_id - complaints_with_opa_id: gpd.GeoDataFrame = primary_featurelayer.gdf.sjoin( - l_and_i_complaints.gdf, how="left", predicate="contains" - ) - complaints_with_opa_id.drop(columns=["index_right"], inplace=True) - - # Concatenate the complaints values into a list with a semicolon separator by opa_id - complaints_with_opa_id = ( - complaints_with_opa_id.groupby("opa_id")["li_complaints"] - .apply(lambda x: "; ".join([str(val) for val in x if val is not None])) - .reset_index()[["opa_id", "li_complaints"]] - ) - # Clean up the NaN values in the li_complaints column def remove_nan_strings(x: str) -> str | None: """ @@ -163,16 +120,6 @@ def remove_nan_strings(x: str) -> str | None: else: return x - complaints_with_opa_id["li_complaints"] = complaints_with_opa_id[ - "li_complaints" - ].apply(remove_nan_strings) - - # Merge the complaints values back into the primary_featurelayer - primary_featurelayer.opa_join( - complaints_with_opa_id, - "opa_id", - ) - primary_featurelayer.gdf[ ["all_violations_past_year", "open_violations_past_year"] ] = ( diff --git a/data/src/data_utils/negligent_devs.py b/data/src/data_utils/negligent_devs.py index edac75b1..b627c3aa 100644 --- a/data/src/data_utils/negligent_devs.py +++ b/data/src/data_utils/negligent_devs.py @@ -1,6 +1,5 @@ import pandas as pd - def negligent_devs(primary_featurelayer): devs = primary_featurelayer.gdf @@ -11,29 +10,27 @@ def negligent_devs(primary_featurelayer): devs[["opa_id", "city_owner_agency", "standardized_address", "vacant"]].head(10) ) - # Count observations where vacant == 1 by standardized_address - vacant_counts = ( - devs[devs["vacant"] == 1] - .groupby("standardized_address") - .size() - .reset_index(name="vacant_property_count") + # Count total properties and vacant properties by standardized_address + property_counts = ( + devs.groupby("standardized_address") + .agg( + n_total_properties_owned=("opa_id", "size"), + n_vacant_properties_owned=("vacant", "sum"), + ) + .reset_index() ) - print("Head of resulting DataFrame with vacant counts:") - print(vacant_counts.head(10)) + print("Head of resulting DataFrame with property counts:") + print(property_counts.head(10)) - # Merge the vacant counts back to the main DataFrame + # Merge the property counts back to the main DataFrame primary_featurelayer.gdf = primary_featurelayer.gdf.merge( - vacant_counts, on="standardized_address", how="left" + property_counts, on="standardized_address", how="left" ) # Identify negligent developers: non-city owned entities owning 5+ vacant properties - primary_featurelayer.gdf["n_properties_owned"] = primary_featurelayer.gdf.groupby( - "opa_id" - )["vacant_property_count"].transform("sum") - primary_featurelayer.gdf["negligent_dev"] = ( - primary_featurelayer.gdf["n_properties_owned"] >= 5 + primary_featurelayer.gdf["n_vacant_properties_owned"] >= 5 ) & ( primary_featurelayer.gdf["city_owner_agency"].isna() | (primary_featurelayer.gdf["city_owner_agency"] == "") @@ -42,7 +39,7 @@ def negligent_devs(primary_featurelayer): print("Final feature layer data with negligent_dev flag:") print( primary_featurelayer.gdf[ - ["opa_id", "n_properties_owned", "negligent_dev"] + ["opa_id", "n_total_properties_owned", "n_vacant_properties_owned", "negligent_dev"] ].head(10) ) diff --git a/data/src/data_utils/phs_properties.py b/data/src/data_utils/phs_properties.py index aeeac757..172a2575 100644 --- a/data/src/data_utils/phs_properties.py +++ b/data/src/data_utils/phs_properties.py @@ -1,5 +1,6 @@ from classes.featurelayer import FeatureLayer from constants.services import PHS_LAYERS_TO_LOAD +import pandas as pd def phs_properties(primary_featurelayer: FeatureLayer) -> FeatureLayer: @@ -22,13 +23,8 @@ def phs_properties(primary_featurelayer: FeatureLayer) -> FeatureLayer: # Perform spatial join between primary feature layer and PHS properties primary_featurelayer.spatial_join(phs_properties) - # Initialize 'phs_care_program' column with default "no" for all rows - primary_featurelayer.gdf["phs_care_program"] = "No" - - # Set 'phs_care_program' to "yes" for matched rows - primary_featurelayer.gdf.loc[ - primary_featurelayer.gdf["program"].notna(), "phs_care_program" - ] = "Yes" + # Create 'phs_care_program' column with values from 'program', drop 'program' + primary_featurelayer.gdf["phs_care_program"] = primary_featurelayer.gdf.pop("program") # Rebuild the GeoDataFrame after updates primary_featurelayer.rebuild_gdf() diff --git a/data/src/data_utils/priority_level.py b/data/src/data_utils/priority_level.py index 33097de3..e23e3f6f 100644 --- a/data/src/data_utils/priority_level.py +++ b/data/src/data_utils/priority_level.py @@ -1,3 +1,5 @@ +import pandas as pd + def priority_level(dataset): priority_levels = [] for idx, row in dataset.gdf.iterrows(): @@ -5,11 +7,11 @@ def priority_level(dataset): # Decision Points guncrime_density_percentile = row["gun_crimes_density_percentile"] - in_phs_landcare = row["phs_care_program"] == "yes" + in_phs_landcare = pd.notna(row["phs_care_program"]) has_li_complaint_or_violation = ( row["li_complaints"] is not None and float(row["all_violations_past_year"]) > 0 - ) + ) or (row["l_and_i_complaints_density_percentile"] > 50) very_low_tree_canopy = row["tree_canopy_gap"] >= 0.3 # Updated logic based on percentile values diff --git a/data/src/data_utils/pwd_parcels.py b/data/src/data_utils/pwd_parcels.py index 81cd5e02..d4dd2856 100644 --- a/data/src/data_utils/pwd_parcels.py +++ b/data/src/data_utils/pwd_parcels.py @@ -1,5 +1,6 @@ from classes.featurelayer import FeatureLayer from constants.services import PWD_PARCELS_QUERY +import geopandas as gpd def pwd_parcels(primary_featurelayer: FeatureLayer) -> FeatureLayer: @@ -52,6 +53,13 @@ def pwd_parcels(primary_featurelayer: FeatureLayer) -> FeatureLayer: how="left", ) + # Coerce merged_gdf into a GeoDataFrame + merged_gdf = gpd.GeoDataFrame( + merged_gdf, + geometry="geometry", + crs=primary_featurelayer.gdf.crs, # Ensure the CRS matches the original + ) + # Log observations with no polygon geometry no_geometry_count = merged_gdf["geometry"].isnull().sum() print("Number of observations with no polygon geometry:", no_geometry_count) @@ -62,10 +70,16 @@ def pwd_parcels(primary_featurelayer: FeatureLayer) -> FeatureLayer: ) print("Number of observations retaining point geometry:", no_geometry_count) - # Validate the merged GeoDataFrame - updated_gdf = FeatureLayer( - name=primary_featurelayer.name, - gdf=merged_gdf, + # Count observations with point geometry grouped by 'vacant' + point_geometry_counts = ( + merged_gdf[merged_gdf["geometry"].geom_type == "Point"] + .groupby("vacant") + .size() ) - return updated_gdf + # Log the results + print("Counts of point geometry grouped by 'vacant':") + print(point_geometry_counts) + + # Wrap the GeoDataFrame back into a FeatureLayer + return FeatureLayer(name=primary_featurelayer.name, gdf=merged_gdf) diff --git a/data/src/data_utils/vacant_properties.py b/data/src/data_utils/vacant_properties.py index 49867650..c4d9347e 100644 --- a/data/src/data_utils/vacant_properties.py +++ b/data/src/data_utils/vacant_properties.py @@ -92,10 +92,8 @@ def vacant_properties(primary_featurelayer) -> FeatureLayer: # Final null value check before returning check_null_percentage(df) - # Create vacant column in the primary feature layer based on opa_id match - primary_featurelayer.gdf["vacant"] = ( - primary_featurelayer.gdf["opa_id"].isin(df["opa_id"]).astype(int) - ) + # Create vacant column in the primary feature layer as True/False + primary_featurelayer.gdf["vacant"] = primary_featurelayer.gdf["opa_id"].isin(df["opa_id"]) print("Vacant column added based on opa_id match.") From e28ded3c4127c6fc9c72641867c713cfea71d31a Mon Sep 17 00:00:00 2001 From: nlebovits Date: Wed, 20 Nov 2024 21:59:24 -0500 Subject: [PATCH 12/18] precommit hook --- data/src/data_utils/tactical_urbanism.py | 25 -------------- data/src/new_etl/__init__.py | 0 data/src/new_etl/classes/__init__.py | 0 .../src/{ => new_etl}/classes/featurelayer.py | 4 ++- data/src/new_etl/constants/__init__.py | 0 data/src/{ => new_etl}/constants/services.py | 0 data/src/new_etl/data_utils/__init__.py | 0 .../data_utils}/data_utils/__init__.py | 0 .../data_utils}/data_utils/access_process.py | 0 .../data_utils/city_owned_properties.py | 0 .../data_utils/community_gardens.py | 0 .../data_utils}/data_utils/conservatorship.py | 0 .../data_utils/contig_neighbors.py | 10 +++--- .../data_utils}/data_utils/council_dists.py | 0 .../data_utils}/data_utils/delinquencies.py | 0 .../data_utils}/data_utils/dev_probability.py | 0 .../data_utils}/data_utils/drug_crimes.py | 0 .../data_utils}/data_utils/gun_crimes.py | 0 .../data_utils/imm_dang_buildings.py | 0 .../data_utils}/data_utils/kde.py | 0 .../data_utils}/data_utils/li_complaints.py | 4 ++- .../data_utils}/data_utils/li_violations.py | 0 .../data_utils}/data_utils/nbhoods.py | 0 .../data_utils}/data_utils/negligent_devs.py | 9 +++-- .../data_utils}/data_utils/opa_properties.py | 0 .../data_utils}/data_utils/owner_type.py | 0 .../data_utils}/data_utils/park_priority.py | 0 .../data_utils}/data_utils/phs_properties.py | 5 +-- .../data_utils}/data_utils/ppr_properties.py | 0 .../data_utils}/data_utils/priority_level.py | 1 + .../data_utils}/data_utils/pwd_parcels.py | 4 +-- .../data_utils}/data_utils/rco_geoms.py | 0 .../data_utils/tactical_urbanism.py | 34 +++++++++++++++++++ .../data_utils}/data_utils/tree_canopy.py | 0 .../data_utils/unsafe_buildings.py | 0 .../data_utils}/data_utils/utils.py | 0 .../data_utils/vacant_properties.py | 4 ++- 37 files changed, 60 insertions(+), 40 deletions(-) delete mode 100644 data/src/data_utils/tactical_urbanism.py create mode 100644 data/src/new_etl/__init__.py create mode 100644 data/src/new_etl/classes/__init__.py rename data/src/{ => new_etl}/classes/featurelayer.py (99%) create mode 100644 data/src/new_etl/constants/__init__.py rename data/src/{ => new_etl}/constants/services.py (100%) create mode 100644 data/src/new_etl/data_utils/__init__.py rename data/src/{ => new_etl/data_utils}/data_utils/__init__.py (100%) rename data/src/{ => new_etl/data_utils}/data_utils/access_process.py (100%) rename data/src/{ => new_etl/data_utils}/data_utils/city_owned_properties.py (100%) rename data/src/{ => new_etl/data_utils}/data_utils/community_gardens.py (100%) rename data/src/{ => new_etl/data_utils}/data_utils/conservatorship.py (100%) rename data/src/{ => new_etl/data_utils}/data_utils/contig_neighbors.py (87%) rename data/src/{ => new_etl/data_utils}/data_utils/council_dists.py (100%) rename data/src/{ => new_etl/data_utils}/data_utils/delinquencies.py (100%) rename data/src/{ => new_etl/data_utils}/data_utils/dev_probability.py (100%) rename data/src/{ => new_etl/data_utils}/data_utils/drug_crimes.py (100%) rename data/src/{ => new_etl/data_utils}/data_utils/gun_crimes.py (100%) rename data/src/{ => new_etl/data_utils}/data_utils/imm_dang_buildings.py (100%) rename data/src/{ => new_etl/data_utils}/data_utils/kde.py (100%) rename data/src/{ => new_etl/data_utils}/data_utils/li_complaints.py (56%) rename data/src/{ => new_etl/data_utils}/data_utils/li_violations.py (100%) rename data/src/{ => new_etl/data_utils}/data_utils/nbhoods.py (100%) rename data/src/{ => new_etl/data_utils}/data_utils/negligent_devs.py (88%) rename data/src/{ => new_etl/data_utils}/data_utils/opa_properties.py (100%) rename data/src/{ => new_etl/data_utils}/data_utils/owner_type.py (100%) rename data/src/{ => new_etl/data_utils}/data_utils/park_priority.py (100%) rename data/src/{ => new_etl/data_utils}/data_utils/phs_properties.py (96%) rename data/src/{ => new_etl/data_utils}/data_utils/ppr_properties.py (100%) rename data/src/{ => new_etl/data_utils}/data_utils/priority_level.py (99%) rename data/src/{ => new_etl/data_utils}/data_utils/pwd_parcels.py (96%) rename data/src/{ => new_etl/data_utils}/data_utils/rco_geoms.py (100%) create mode 100644 data/src/new_etl/data_utils/data_utils/tactical_urbanism.py rename data/src/{ => new_etl/data_utils}/data_utils/tree_canopy.py (100%) rename data/src/{ => new_etl/data_utils}/data_utils/unsafe_buildings.py (100%) rename data/src/{ => new_etl/data_utils}/data_utils/utils.py (100%) rename data/src/{ => new_etl/data_utils}/data_utils/vacant_properties.py (98%) diff --git a/data/src/data_utils/tactical_urbanism.py b/data/src/data_utils/tactical_urbanism.py deleted file mode 100644 index df15a0f2..00000000 --- a/data/src/data_utils/tactical_urbanism.py +++ /dev/null @@ -1,25 +0,0 @@ -def tactical_urbanism(dataset): - unsafe_words = [ - "dangerous", - ] - - tactical_urbanism_values = [] - - for idx, row in dataset.gdf.iterrows(): - li_complaints_lower = str(row["li_complaints"]).lower().split(" ") - contains_unsafe_word = any(word in li_complaints_lower for word in unsafe_words) - - if ( - row["parcel_type"] == "Land" - and row["unsafe_building"] == "N" - and row["imm_dang_building"] == "N" - and not contains_unsafe_word - ): - tactical_urbanism = "Yes" - else: - tactical_urbanism = "No" - - tactical_urbanism_values.append(tactical_urbanism) - - dataset.gdf["tactical_urbanism"] = tactical_urbanism_values - return dataset diff --git a/data/src/new_etl/__init__.py b/data/src/new_etl/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/data/src/new_etl/classes/__init__.py b/data/src/new_etl/classes/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/data/src/classes/featurelayer.py b/data/src/new_etl/classes/featurelayer.py similarity index 99% rename from data/src/classes/featurelayer.py rename to data/src/new_etl/classes/featurelayer.py index 9f10d8fc..fce57fbf 100644 --- a/data/src/classes/featurelayer.py +++ b/data/src/new_etl/classes/featurelayer.py @@ -90,7 +90,9 @@ def __init__( self.type = ( "esri" if self.esri_rest_urls - else "carto" if self.carto_sql_queries else "gdf" + else "carto" + if self.carto_sql_queries + else "gdf" ) if force_reload or not self.check_psql(): self.load_data() diff --git a/data/src/new_etl/constants/__init__.py b/data/src/new_etl/constants/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/data/src/constants/services.py b/data/src/new_etl/constants/services.py similarity index 100% rename from data/src/constants/services.py rename to data/src/new_etl/constants/services.py diff --git a/data/src/new_etl/data_utils/__init__.py b/data/src/new_etl/data_utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/data/src/data_utils/__init__.py b/data/src/new_etl/data_utils/data_utils/__init__.py similarity index 100% rename from data/src/data_utils/__init__.py rename to data/src/new_etl/data_utils/data_utils/__init__.py diff --git a/data/src/data_utils/access_process.py b/data/src/new_etl/data_utils/data_utils/access_process.py similarity index 100% rename from data/src/data_utils/access_process.py rename to data/src/new_etl/data_utils/data_utils/access_process.py diff --git a/data/src/data_utils/city_owned_properties.py b/data/src/new_etl/data_utils/data_utils/city_owned_properties.py similarity index 100% rename from data/src/data_utils/city_owned_properties.py rename to data/src/new_etl/data_utils/data_utils/city_owned_properties.py diff --git a/data/src/data_utils/community_gardens.py b/data/src/new_etl/data_utils/data_utils/community_gardens.py similarity index 100% rename from data/src/data_utils/community_gardens.py rename to data/src/new_etl/data_utils/data_utils/community_gardens.py diff --git a/data/src/data_utils/conservatorship.py b/data/src/new_etl/data_utils/data_utils/conservatorship.py similarity index 100% rename from data/src/data_utils/conservatorship.py rename to data/src/new_etl/data_utils/data_utils/conservatorship.py diff --git a/data/src/data_utils/contig_neighbors.py b/data/src/new_etl/data_utils/data_utils/contig_neighbors.py similarity index 87% rename from data/src/data_utils/contig_neighbors.py rename to data/src/new_etl/data_utils/data_utils/contig_neighbors.py index d91eef80..48656bbd 100644 --- a/data/src/data_utils/contig_neighbors.py +++ b/data/src/new_etl/data_utils/data_utils/contig_neighbors.py @@ -7,9 +7,9 @@ def contig_neighbors(primary_featurelayer): # Create a filtered dataframe with only vacant properties and polygon geometries vacant_parcels = primary_featurelayer.gdf.loc[ - (primary_featurelayer.gdf["vacant"]) & - (primary_featurelayer.gdf.geometry.type.isin(["Polygon", "MultiPolygon"])), - ["opa_id", "geometry"] + (primary_featurelayer.gdf["vacant"]) + & (primary_featurelayer.gdf.geometry.type.isin(["Polygon", "MultiPolygon"])), + ["opa_id", "geometry"], ] if vacant_parcels.empty: @@ -50,7 +50,9 @@ def contig_neighbors(primary_featurelayer): ) # Assign NA for non-vacant properties - primary_featurelayer.gdf.loc[~primary_featurelayer.gdf["vacant"], "n_contiguous"] = np.nan + primary_featurelayer.gdf.loc[ + ~primary_featurelayer.gdf["vacant"], "n_contiguous" + ] = np.nan print("Process completed. Returning updated primary feature layer.") return primary_featurelayer diff --git a/data/src/data_utils/council_dists.py b/data/src/new_etl/data_utils/data_utils/council_dists.py similarity index 100% rename from data/src/data_utils/council_dists.py rename to data/src/new_etl/data_utils/data_utils/council_dists.py diff --git a/data/src/data_utils/delinquencies.py b/data/src/new_etl/data_utils/data_utils/delinquencies.py similarity index 100% rename from data/src/data_utils/delinquencies.py rename to data/src/new_etl/data_utils/data_utils/delinquencies.py diff --git a/data/src/data_utils/dev_probability.py b/data/src/new_etl/data_utils/data_utils/dev_probability.py similarity index 100% rename from data/src/data_utils/dev_probability.py rename to data/src/new_etl/data_utils/data_utils/dev_probability.py diff --git a/data/src/data_utils/drug_crimes.py b/data/src/new_etl/data_utils/data_utils/drug_crimes.py similarity index 100% rename from data/src/data_utils/drug_crimes.py rename to data/src/new_etl/data_utils/data_utils/drug_crimes.py diff --git a/data/src/data_utils/gun_crimes.py b/data/src/new_etl/data_utils/data_utils/gun_crimes.py similarity index 100% rename from data/src/data_utils/gun_crimes.py rename to data/src/new_etl/data_utils/data_utils/gun_crimes.py diff --git a/data/src/data_utils/imm_dang_buildings.py b/data/src/new_etl/data_utils/data_utils/imm_dang_buildings.py similarity index 100% rename from data/src/data_utils/imm_dang_buildings.py rename to data/src/new_etl/data_utils/data_utils/imm_dang_buildings.py diff --git a/data/src/data_utils/kde.py b/data/src/new_etl/data_utils/data_utils/kde.py similarity index 100% rename from data/src/data_utils/kde.py rename to data/src/new_etl/data_utils/data_utils/kde.py diff --git a/data/src/data_utils/li_complaints.py b/data/src/new_etl/data_utils/data_utils/li_complaints.py similarity index 56% rename from data/src/data_utils/li_complaints.py rename to data/src/new_etl/data_utils/data_utils/li_complaints.py index ca086032..ee219f36 100644 --- a/data/src/data_utils/li_complaints.py +++ b/data/src/new_etl/data_utils/data_utils/li_complaints.py @@ -5,4 +5,6 @@ def li_complaints(primary_featurelayer): - return apply_kde_to_primary(primary_featurelayer, "L and I Complaints", COMPLAINTS_SQL_QUERY) + return apply_kde_to_primary( + primary_featurelayer, "L and I Complaints", COMPLAINTS_SQL_QUERY + ) diff --git a/data/src/data_utils/li_violations.py b/data/src/new_etl/data_utils/data_utils/li_violations.py similarity index 100% rename from data/src/data_utils/li_violations.py rename to data/src/new_etl/data_utils/data_utils/li_violations.py diff --git a/data/src/data_utils/nbhoods.py b/data/src/new_etl/data_utils/data_utils/nbhoods.py similarity index 100% rename from data/src/data_utils/nbhoods.py rename to data/src/new_etl/data_utils/data_utils/nbhoods.py diff --git a/data/src/data_utils/negligent_devs.py b/data/src/new_etl/data_utils/data_utils/negligent_devs.py similarity index 88% rename from data/src/data_utils/negligent_devs.py rename to data/src/new_etl/data_utils/data_utils/negligent_devs.py index b627c3aa..9e0cf440 100644 --- a/data/src/data_utils/negligent_devs.py +++ b/data/src/new_etl/data_utils/data_utils/negligent_devs.py @@ -1,5 +1,3 @@ -import pandas as pd - def negligent_devs(primary_featurelayer): devs = primary_featurelayer.gdf @@ -39,7 +37,12 @@ def negligent_devs(primary_featurelayer): print("Final feature layer data with negligent_dev flag:") print( primary_featurelayer.gdf[ - ["opa_id", "n_total_properties_owned", "n_vacant_properties_owned", "negligent_dev"] + [ + "opa_id", + "n_total_properties_owned", + "n_vacant_properties_owned", + "negligent_dev", + ] ].head(10) ) diff --git a/data/src/data_utils/opa_properties.py b/data/src/new_etl/data_utils/data_utils/opa_properties.py similarity index 100% rename from data/src/data_utils/opa_properties.py rename to data/src/new_etl/data_utils/data_utils/opa_properties.py diff --git a/data/src/data_utils/owner_type.py b/data/src/new_etl/data_utils/data_utils/owner_type.py similarity index 100% rename from data/src/data_utils/owner_type.py rename to data/src/new_etl/data_utils/data_utils/owner_type.py diff --git a/data/src/data_utils/park_priority.py b/data/src/new_etl/data_utils/data_utils/park_priority.py similarity index 100% rename from data/src/data_utils/park_priority.py rename to data/src/new_etl/data_utils/data_utils/park_priority.py diff --git a/data/src/data_utils/phs_properties.py b/data/src/new_etl/data_utils/data_utils/phs_properties.py similarity index 96% rename from data/src/data_utils/phs_properties.py rename to data/src/new_etl/data_utils/data_utils/phs_properties.py index 172a2575..f1331b28 100644 --- a/data/src/data_utils/phs_properties.py +++ b/data/src/new_etl/data_utils/data_utils/phs_properties.py @@ -1,6 +1,5 @@ from classes.featurelayer import FeatureLayer from constants.services import PHS_LAYERS_TO_LOAD -import pandas as pd def phs_properties(primary_featurelayer: FeatureLayer) -> FeatureLayer: @@ -24,7 +23,9 @@ def phs_properties(primary_featurelayer: FeatureLayer) -> FeatureLayer: primary_featurelayer.spatial_join(phs_properties) # Create 'phs_care_program' column with values from 'program', drop 'program' - primary_featurelayer.gdf["phs_care_program"] = primary_featurelayer.gdf.pop("program") + primary_featurelayer.gdf["phs_care_program"] = primary_featurelayer.gdf.pop( + "program" + ) # Rebuild the GeoDataFrame after updates primary_featurelayer.rebuild_gdf() diff --git a/data/src/data_utils/ppr_properties.py b/data/src/new_etl/data_utils/data_utils/ppr_properties.py similarity index 100% rename from data/src/data_utils/ppr_properties.py rename to data/src/new_etl/data_utils/data_utils/ppr_properties.py diff --git a/data/src/data_utils/priority_level.py b/data/src/new_etl/data_utils/data_utils/priority_level.py similarity index 99% rename from data/src/data_utils/priority_level.py rename to data/src/new_etl/data_utils/data_utils/priority_level.py index e23e3f6f..84890404 100644 --- a/data/src/data_utils/priority_level.py +++ b/data/src/new_etl/data_utils/data_utils/priority_level.py @@ -1,5 +1,6 @@ import pandas as pd + def priority_level(dataset): priority_levels = [] for idx, row in dataset.gdf.iterrows(): diff --git a/data/src/data_utils/pwd_parcels.py b/data/src/new_etl/data_utils/data_utils/pwd_parcels.py similarity index 96% rename from data/src/data_utils/pwd_parcels.py rename to data/src/new_etl/data_utils/data_utils/pwd_parcels.py index d4dd2856..00fadc8c 100644 --- a/data/src/data_utils/pwd_parcels.py +++ b/data/src/new_etl/data_utils/data_utils/pwd_parcels.py @@ -72,9 +72,7 @@ def pwd_parcels(primary_featurelayer: FeatureLayer) -> FeatureLayer: # Count observations with point geometry grouped by 'vacant' point_geometry_counts = ( - merged_gdf[merged_gdf["geometry"].geom_type == "Point"] - .groupby("vacant") - .size() + merged_gdf[merged_gdf["geometry"].geom_type == "Point"].groupby("vacant").size() ) # Log the results diff --git a/data/src/data_utils/rco_geoms.py b/data/src/new_etl/data_utils/data_utils/rco_geoms.py similarity index 100% rename from data/src/data_utils/rco_geoms.py rename to data/src/new_etl/data_utils/data_utils/rco_geoms.py diff --git a/data/src/new_etl/data_utils/data_utils/tactical_urbanism.py b/data/src/new_etl/data_utils/data_utils/tactical_urbanism.py new file mode 100644 index 00000000..205ae264 --- /dev/null +++ b/data/src/new_etl/data_utils/data_utils/tactical_urbanism.py @@ -0,0 +1,34 @@ +from classes.featurelayer import ( + FeatureLayer, +) # Replace with the actual module where FeatureLayer is defined + + +def tactical_urbanism(primary_featurelayer: FeatureLayer) -> FeatureLayer: + """ + Assigns a 'tactical_urbanism' value to each row in the primary feature layer based on specific conditions. + + Tactical urbanism is marked as "Yes" if the property is a parcel of type 'Land', + and does not have any unsafe or immediately dangerous buildings. Otherwise, it is "No". + + Args: + primary_featurelayer: A FeatureLayer object containing a GeoDataFrame (`gdf`) as an attribute. + + Returns: + The input FeatureLayer with a new column 'tactical_urbanism' added to its GeoDataFrame. + """ + tactical_urbanism_values = [] + + for idx, row in primary_featurelayer.gdf.iterrows(): + if ( + row["parcel_type"] == "Land" + and row["unsafe_building"] == "N" + and row["imm_dang_building"] == "N" + ): + tactical_urbanism = "Yes" + else: + tactical_urbanism = "No" + + tactical_urbanism_values.append(tactical_urbanism) + + primary_featurelayer.gdf["tactical_urbanism"] = tactical_urbanism_values + return primary_featurelayer diff --git a/data/src/data_utils/tree_canopy.py b/data/src/new_etl/data_utils/data_utils/tree_canopy.py similarity index 100% rename from data/src/data_utils/tree_canopy.py rename to data/src/new_etl/data_utils/data_utils/tree_canopy.py diff --git a/data/src/data_utils/unsafe_buildings.py b/data/src/new_etl/data_utils/data_utils/unsafe_buildings.py similarity index 100% rename from data/src/data_utils/unsafe_buildings.py rename to data/src/new_etl/data_utils/data_utils/unsafe_buildings.py diff --git a/data/src/data_utils/utils.py b/data/src/new_etl/data_utils/data_utils/utils.py similarity index 100% rename from data/src/data_utils/utils.py rename to data/src/new_etl/data_utils/data_utils/utils.py diff --git a/data/src/data_utils/vacant_properties.py b/data/src/new_etl/data_utils/data_utils/vacant_properties.py similarity index 98% rename from data/src/data_utils/vacant_properties.py rename to data/src/new_etl/data_utils/data_utils/vacant_properties.py index c4d9347e..f3457be9 100644 --- a/data/src/data_utils/vacant_properties.py +++ b/data/src/new_etl/data_utils/data_utils/vacant_properties.py @@ -93,7 +93,9 @@ def vacant_properties(primary_featurelayer) -> FeatureLayer: check_null_percentage(df) # Create vacant column in the primary feature layer as True/False - primary_featurelayer.gdf["vacant"] = primary_featurelayer.gdf["opa_id"].isin(df["opa_id"]) + primary_featurelayer.gdf["vacant"] = primary_featurelayer.gdf["opa_id"].isin( + df["opa_id"] + ) print("Vacant column added based on opa_id match.") From 7bdec829d9b74413efe3be9496d1918888023234 Mon Sep 17 00:00:00 2001 From: nlebovits Date: Wed, 20 Nov 2024 22:01:36 -0500 Subject: [PATCH 13/18] reset regular pipeline to staging --- data/src/data_utils/__init__.py | 27 +++ data/src/data_utils/access_process.py | 43 +++++ data/src/data_utils/city_owned_properties.py | 81 ++++++++ data/src/data_utils/community_gardens.py | 28 +++ data/src/data_utils/conservatorship.py | 59 ++++++ data/src/data_utils/contig_neighbors.py | 27 +++ data/src/data_utils/deliquencies.py | 31 ++++ data/src/data_utils/dev_probability.py | 59 ++++++ data/src/data_utils/drug_crimes.py | 10 + data/src/data_utils/gun_crimes.py | 8 + data/src/data_utils/imm_dang_buildings.py | 28 +++ data/src/data_utils/kde.py | 159 ++++++++++++++++ data/src/data_utils/l_and_i.py | 178 ++++++++++++++++++ data/src/data_utils/nbhoods.py | 25 +++ data/src/data_utils/negligent_devs.py | 166 +++++++++++++++++ data/src/data_utils/opa_properties.py | 29 +++ data/src/data_utils/owner_type.py | 37 ++++ data/src/data_utils/park_priority.py | 135 ++++++++++++++ data/src/data_utils/phs_properties.py | 33 ++++ data/src/data_utils/ppr_properties.py | 53 ++++++ data/src/data_utils/priority_level.py | 51 +++++ data/src/data_utils/rco_geoms.py | 57 ++++++ data/src/data_utils/tactical_urbanism.py | 25 +++ data/src/data_utils/tree_canopy.py | 32 ++++ data/src/data_utils/unsafe_buildings.py | 28 +++ data/src/data_utils/utils.py | 42 +++++ data/src/data_utils/vacant_properties.py | 186 +++++++++++++++++++ 27 files changed, 1637 insertions(+) create mode 100644 data/src/data_utils/__init__.py create mode 100644 data/src/data_utils/access_process.py create mode 100644 data/src/data_utils/city_owned_properties.py create mode 100644 data/src/data_utils/community_gardens.py create mode 100644 data/src/data_utils/conservatorship.py create mode 100644 data/src/data_utils/contig_neighbors.py create mode 100644 data/src/data_utils/deliquencies.py create mode 100644 data/src/data_utils/dev_probability.py create mode 100644 data/src/data_utils/drug_crimes.py create mode 100644 data/src/data_utils/gun_crimes.py create mode 100644 data/src/data_utils/imm_dang_buildings.py create mode 100644 data/src/data_utils/kde.py create mode 100644 data/src/data_utils/l_and_i.py create mode 100644 data/src/data_utils/nbhoods.py create mode 100644 data/src/data_utils/negligent_devs.py create mode 100644 data/src/data_utils/opa_properties.py create mode 100644 data/src/data_utils/owner_type.py create mode 100644 data/src/data_utils/park_priority.py create mode 100644 data/src/data_utils/phs_properties.py create mode 100644 data/src/data_utils/ppr_properties.py create mode 100644 data/src/data_utils/priority_level.py create mode 100644 data/src/data_utils/rco_geoms.py create mode 100644 data/src/data_utils/tactical_urbanism.py create mode 100644 data/src/data_utils/tree_canopy.py create mode 100644 data/src/data_utils/unsafe_buildings.py create mode 100644 data/src/data_utils/utils.py create mode 100644 data/src/data_utils/vacant_properties.py diff --git a/data/src/data_utils/__init__.py b/data/src/data_utils/__init__.py new file mode 100644 index 00000000..e1709a69 --- /dev/null +++ b/data/src/data_utils/__init__.py @@ -0,0 +1,27 @@ +from .city_owned_properties import city_owned_properties +from .phs_properties import phs_properties +from .l_and_i import l_and_i +from .rco_geoms import rco_geoms +from .tree_canopy import tree_canopy +from .nbhoods import nbhoods +from .gun_crimes import gun_crimes +from .deliquencies import deliquencies +from .opa_properties import opa_properties +from .vacant_properties import vacant_properties +from .priority_level import priority_level +from .access_process import access_process + +__all__ = [ + "city_owned_properties", + "phs_properties", + "l_and_i", + "rco_geoms", + "tree_canopy", + "nbhoods", + "gun_crimes", + "deliquencies", + "opa_properties", + "vacant_properties", + "priority_level", + "access_process", +] diff --git a/data/src/data_utils/access_process.py b/data/src/data_utils/access_process.py new file mode 100644 index 00000000..7c8e79de --- /dev/null +++ b/data/src/data_utils/access_process.py @@ -0,0 +1,43 @@ +from typing import Any + + +def access_process(dataset: Any) -> Any: + """ + Process a dataset to determine the access process for each property based on + city ownership and market value. The result is added as a new column in the dataset. + + Args: + dataset (Any): The dataset containing a GeoDataFrame named `gdf` with + columns "city_owner_agency" and "market_value". + + Returns: + Any: The updated dataset with an additional "access_process" column. + + Side Effects: + Prints the distribution of the "access_process" column. + """ + access_processes = [] + + for _, row in dataset.gdf.iterrows(): + # Decision Points + city_owner_agency = row["city_owner_agency"] + market_value_over_1000 = ( + row["market_value"] and float(row["market_value"]) > 1000 + ) + + # Simplified decision logic + if city_owner_agency == "Land Bank (PHDC)": + access_process = "Go through Land Bank" + elif city_owner_agency == "PRA": + access_process = "Do Nothing" + else: + if market_value_over_1000: + access_process = "Private Land Use Agreement" + else: + access_process = "Buy Property" + + access_processes.append(access_process) + + dataset.gdf["access_process"] = access_processes + + return dataset diff --git a/data/src/data_utils/city_owned_properties.py b/data/src/data_utils/city_owned_properties.py new file mode 100644 index 00000000..a5b21980 --- /dev/null +++ b/data/src/data_utils/city_owned_properties.py @@ -0,0 +1,81 @@ +from typing import Any +from classes.featurelayer import FeatureLayer +from constants.services import CITY_OWNED_PROPERTIES_TO_LOAD + +def city_owned_properties(primary_featurelayer: FeatureLayer) -> FeatureLayer: + """ + Processes city-owned property data by joining it with the primary feature layer, + renaming columns, and updating access information for properties based on ownership. + All instances where the "city_owner_agency" is "PLB" are changed to "Land Bank (PHDC)". + + Args: + primary_featurelayer (FeatureLayer): The primary feature layer to which city-owned + property data will be joined. + + Returns: + FeatureLayer: The updated primary feature layer with processed city ownership + information. + """ + city_owned_properties = FeatureLayer( + name="City Owned Properties", + esri_rest_urls=CITY_OWNED_PROPERTIES_TO_LOAD, + cols=["OPABRT", "AGENCY", "SIDEYARDELIGIBLE"], + ) + + city_owned_properties.gdf.dropna(subset=["opabrt"], inplace=True) + + primary_featurelayer.opa_join(city_owned_properties.gdf, "opabrt") + + rename_columns = { + "agency": "city_owner_agency", + "sideyardeligible": "side_yard_eligible", + } + primary_featurelayer.gdf.rename(columns=rename_columns, inplace=True) + + primary_featurelayer.gdf.loc[ + primary_featurelayer.gdf["owner_1"].isin( + [ + "PHILADELPHIA HOUSING AUTH", + "PHILADELPHIA LAND BANK", + "REDEVELOPMENT AUTHORITY", + "PHILA REDEVELOPMENT AUTH", + ] + ), + "city_owner_agency", + ] = primary_featurelayer.gdf["owner_1"].replace( + { + "PHILADELPHIA HOUSING AUTH": "PHA", + "PHILADELPHIA LAND BANK": "Land Bank (PHDC)", + "REDEVELOPMENT AUTHORITY": "PRA", + "PHILA REDEVELOPMENT AUTH": "PRA", + } + ) + + primary_featurelayer.gdf.loc[ + (primary_featurelayer.gdf["owner_1"] == "CITY OF PHILA") + & ( + primary_featurelayer.gdf["owner_2"].str.contains( + "PUBLIC PROP|PUBLC PROP", na=False + ) + ), + "city_owner_agency", + ] = "DPP" + + primary_featurelayer.gdf.loc[ + primary_featurelayer.gdf["owner_1"].isin( + ["CITY OF PHILADELPHIA", "CITY OF PHILA"] + ) + & primary_featurelayer.gdf["owner_2"].isna(), + "city_owner_agency", + ] = "City of Philadelphia" + + primary_featurelayer.gdf.loc[:, "side_yard_eligible"] = primary_featurelayer.gdf[ + "side_yard_eligible" + ].fillna("No") + + # Update all instances where city_owner_agency is "PLB" to "Land Bank (PHDC)" + primary_featurelayer.gdf.loc[ + primary_featurelayer.gdf["city_owner_agency"] == "PLB", "city_owner_agency" + ] = "Land Bank (PHDC)" + + return primary_featurelayer diff --git a/data/src/data_utils/community_gardens.py b/data/src/data_utils/community_gardens.py new file mode 100644 index 00000000..4bed0284 --- /dev/null +++ b/data/src/data_utils/community_gardens.py @@ -0,0 +1,28 @@ +from classes.featurelayer import FeatureLayer +from constants.services import COMMUNITY_GARDENS_TO_LOAD + + +def community_gardens(primary_featurelayer): + # this script *removes* (rather than adds) known community gardens from the dataset in order to protect them from potential predatory developers + community_gardens = FeatureLayer( + name="Community Gardens", esri_rest_urls=COMMUNITY_GARDENS_TO_LOAD + ) + + community_gardens.gdf = community_gardens.gdf[["Site_Name", "geometry"]] + + primary_featurelayer.spatial_join(community_gardens) + + # Create a boolean mask where 'site_Name' is not null + mask = primary_featurelayer.gdf["Site_Name"].notnull() + + count_dropped = mask.sum() + print(f"Number of community gardens being dropped: {count_dropped}") + + # Use this mask to drop rows where 'site_Name' is not null + primary_featurelayer.gdf = primary_featurelayer.gdf.drop( + primary_featurelayer.gdf[mask].index + ) + + primary_featurelayer.gdf = primary_featurelayer.gdf.drop(columns=["Site_Name"]) + + return primary_featurelayer diff --git a/data/src/data_utils/conservatorship.py b/data/src/data_utils/conservatorship.py new file mode 100644 index 00000000..5f9c9793 --- /dev/null +++ b/data/src/data_utils/conservatorship.py @@ -0,0 +1,59 @@ +import datetime +from dateutil.parser import parse +import pytz + +est = pytz.timezone("US/Eastern") +six_months_ago = (datetime.datetime.now() - datetime.timedelta(days=180)).astimezone( + est +) + +blight_words = [ + "weed", + "rubbish", + "garbage", + "tire", + "debris", + "clean", + "waste", + "vegetation", + "dumping", + "scrap", + "auto", + "vehicle", + "graffiti", + "dangerous", +] + + +def conservatorship(primary_featurelayer): + conservatorships = [] + + for idx, row in primary_featurelayer.gdf.iterrows(): + city_owner_agency = row["city_owner_agency"] + sheriff_sale = row["sheriff_sale"] == "Y" + market_value_over_1000 = ( + row["market_value"] and float(row["market_value"]) > 1000 + ) + li_complaints_lower = str(row["li_complaints"]).lower().split(" ") + contains_blight_word = any(word in li_complaints_lower for word in blight_words) + + try: + sale_date = parse(row["sale_date"]).astimezone(est) + sale_date_6_months_ago = sale_date <= six_months_ago + except (TypeError, ValueError): + sale_date_6_months_ago = False + + # Simplified decision logic + if city_owner_agency == "Land Bank (PHDC)" or ( + not sale_date_6_months_ago and market_value_over_1000 + ): + conservatorship = "No" + elif contains_blight_word and not sheriff_sale and sale_date_6_months_ago: + conservatorship = "Yes" + else: + conservatorship = "No" + + conservatorships.append(conservatorship) + + primary_featurelayer.gdf["conservatorship"] = conservatorships + return primary_featurelayer diff --git a/data/src/data_utils/contig_neighbors.py b/data/src/data_utils/contig_neighbors.py new file mode 100644 index 00000000..1c811a15 --- /dev/null +++ b/data/src/data_utils/contig_neighbors.py @@ -0,0 +1,27 @@ +import warnings + +import networkx as nx +from libpysal.weights import Queen + + +def contig_neighbors(primary_featurelayer): + parcels = primary_featurelayer.gdf + + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=FutureWarning) + warnings.filterwarnings( + "ignore", + category=UserWarning, + message="The weights matrix is not fully connected", + ) + + w = Queen.from_dataframe(parcels) + + g = w.to_networkx() + + # Calculate the number of contiguous neighbors for each feature in parcels + n_contiguous = [len(nx.node_connected_component(g, i)) for i in range(len(parcels))] + + primary_featurelayer.gdf["n_contiguous"] = n_contiguous + + return primary_featurelayer diff --git a/data/src/data_utils/deliquencies.py b/data/src/data_utils/deliquencies.py new file mode 100644 index 00000000..16f8f205 --- /dev/null +++ b/data/src/data_utils/deliquencies.py @@ -0,0 +1,31 @@ +from classes.featurelayer import FeatureLayer +from constants.services import DELINQUENCIES_QUERY + + +def deliquencies(primary_featurelayer): + tax_deliquencies = FeatureLayer( + name="Property Tax Delinquencies", + carto_sql_queries=DELINQUENCIES_QUERY, + use_wkb_geom_field="the_geom", + cols=[ + "opa_number", + "total_due", + "is_actionable", + "payment_agreement", + "num_years_owed", + "most_recent_year_owed", + "total_assessment", + "sheriff_sale", + ], + ) + + primary_featurelayer.opa_join( + tax_deliquencies.gdf, + "opa_number", + ) + + primary_featurelayer.gdf.loc[:, "sheriff_sale"] = primary_featurelayer.gdf[ + "sheriff_sale" + ].fillna("N") + + return primary_featurelayer diff --git a/data/src/data_utils/dev_probability.py b/data/src/data_utils/dev_probability.py new file mode 100644 index 00000000..4c8a220f --- /dev/null +++ b/data/src/data_utils/dev_probability.py @@ -0,0 +1,59 @@ +import geopandas as gpd +import jenkspy +import pandas as pd +import requests +from classes.featurelayer import FeatureLayer +from constants.services import CENSUS_BGS_URL, PERMITS_QUERY + +from config.config import USE_CRS + + +def dev_probability(primary_featurelayer): + census_bgs_gdf = gpd.read_file(CENSUS_BGS_URL) + census_bgs_gdf = census_bgs_gdf.to_crs(USE_CRS) + + base_url = "https://phl.carto.com/api/v2/sql" + response = requests.get(f"{base_url}?q={PERMITS_QUERY}&format=GeoJSON") + + if response.status_code == 200: + try: + permits_gdf = gpd.GeoDataFrame.from_features( + response.json(), crs="EPSG:4326" + ) + print("GeoDataFrame created successfully.") + except Exception as e: + print(f"Failed to convert response to GeoDataFrame: {e}") + return primary_featurelayer + else: + truncated_response = response.content[:500] + print( + f"Failed to fetch permits data. HTTP status code: {response.status_code}. Response text: {truncated_response}" + ) + return primary_featurelayer + + permits_gdf = permits_gdf.to_crs(USE_CRS) + + joined_gdf = gpd.sjoin(permits_gdf, census_bgs_gdf, how="inner", predicate="within") + + permit_counts = joined_gdf.groupby("index_right").size() + census_bgs_gdf["permit_count"] = census_bgs_gdf.index.map(permit_counts) + census_bgs_gdf["permit_count"] = census_bgs_gdf["permit_count"].fillna(0) + + # Classify development probability using Jenks natural breaks + breaks = jenkspy.jenks_breaks(census_bgs_gdf["permit_count"], n_classes=3) + census_bgs_gdf["dev_rank"] = pd.cut( + census_bgs_gdf["permit_count"], bins=breaks, labels=["Low", "Medium", "High"] + ).astype(str) + + updated_census_bgs = FeatureLayer( + name="Updated Census Block Groups", + gdf=census_bgs_gdf[["permit_count", "dev_rank", "geometry"]], + use_wkb_geom_field="geometry", + cols=["permit_count", "dev_rank"], + ) + + updated_census_bgs.gdf = updated_census_bgs.gdf.to_crs(USE_CRS) + + primary_featurelayer.spatial_join(updated_census_bgs) + + return primary_featurelayer diff --git a/data/src/data_utils/drug_crimes.py b/data/src/data_utils/drug_crimes.py new file mode 100644 index 00000000..61a4a43c --- /dev/null +++ b/data/src/data_utils/drug_crimes.py @@ -0,0 +1,10 @@ +from constants.services import DRUGCRIME_SQL_QUERY + + +from data_utils.kde import apply_kde_to_primary + + +def drug_crimes(primary_featurelayer): + return apply_kde_to_primary( + primary_featurelayer, "Drug Crimes", DRUGCRIME_SQL_QUERY + ) diff --git a/data/src/data_utils/gun_crimes.py b/data/src/data_utils/gun_crimes.py new file mode 100644 index 00000000..27155546 --- /dev/null +++ b/data/src/data_utils/gun_crimes.py @@ -0,0 +1,8 @@ +from constants.services import GUNCRIME_SQL_QUERY + + +from data_utils.kde import apply_kde_to_primary + + +def gun_crimes(primary_featurelayer): + return apply_kde_to_primary(primary_featurelayer, "Gun Crimes", GUNCRIME_SQL_QUERY) diff --git a/data/src/data_utils/imm_dang_buildings.py b/data/src/data_utils/imm_dang_buildings.py new file mode 100644 index 00000000..7e7041ba --- /dev/null +++ b/data/src/data_utils/imm_dang_buildings.py @@ -0,0 +1,28 @@ +from classes.featurelayer import FeatureLayer +from constants.services import IMMINENT_DANGER_BUILDINGS_QUERY + + +def imm_dang_buildings(primary_featurelayer): + imm_dang_buildings = FeatureLayer( + name="Imminently Dangerous Buildings", + use_wkb_geom_field="the_geom", + carto_sql_queries=IMMINENT_DANGER_BUILDINGS_QUERY, + cols=["opa_account_num"], + ) + + imm_dang_buildings.gdf.loc[:, "imm_dang_building"] = "Y" + + imm_dang_buildings.gdf = imm_dang_buildings.gdf.rename( + columns={"opa_account_num": "opa_number"} + ) + + primary_featurelayer.opa_join( + imm_dang_buildings.gdf, + "opa_number", + ) + + primary_featurelayer.gdf.loc[:, "imm_dang_building"] = primary_featurelayer.gdf[ + "imm_dang_building" + ].fillna("N") + + return primary_featurelayer diff --git a/data/src/data_utils/kde.py b/data/src/data_utils/kde.py new file mode 100644 index 00000000..477cc306 --- /dev/null +++ b/data/src/data_utils/kde.py @@ -0,0 +1,159 @@ +import numpy as np +import rasterio +from awkde.awkde import GaussianKDE +from classes.featurelayer import FeatureLayer +from config.config import USE_CRS +from rasterio.transform import Affine +from tqdm import tqdm +from concurrent.futures import ProcessPoolExecutor, as_completed + +import mapclassify + +resolution = 1320 # 0.25 miles (in feet, bc the CRS is 2272) +batch_size = 100000 + + +def kde_predict_chunk(kde, chunk): + """Helper function to predict KDE for a chunk of grid points.""" + return kde.predict(chunk) + + +def generic_kde(name, query, resolution=resolution, batch_size=batch_size): + print(f"Initializing FeatureLayer for {name}") + + feature_layer = FeatureLayer(name=name, carto_sql_queries=query) + + coords = np.array([geom.xy for geom in feature_layer.gdf.geometry]) + x, y = coords[:, 0, :].flatten(), coords[:, 1, :].flatten() + + X = np.column_stack((x, y)) + + x_grid, y_grid = ( + np.linspace(x.min(), x.max(), resolution), + np.linspace(y.min(), y.max(), resolution), + ) + xx, yy = np.meshgrid(x_grid, y_grid) + grid_points = np.column_stack((xx.ravel(), yy.ravel())) + + print(f"Fitting KDE for {name} data") + kde = GaussianKDE(glob_bw=0.1, alpha=0.999, diag_cov=True) + kde.fit(X) + + print(f"Predicting KDE values for grid of size {grid_points.shape}") + + # Split grid points into chunks + chunks = [ + grid_points[i : i + batch_size] for i in range(0, len(grid_points), batch_size) + ] + + # Run predictions in parallel + z = np.zeros(len(grid_points)) # Placeholder for predicted values + + with ProcessPoolExecutor() as executor: + # Submit the tasks first, wrapped with tqdm to monitor as they're submitted + futures = { + executor.submit(kde_predict_chunk, kde, chunk): i + for i, chunk in enumerate(tqdm(chunks, desc="Submitting tasks")) + } + + # Now wrap the as_completed with tqdm for progress tracking + for future in tqdm( + as_completed(futures), total=len(futures), desc="Processing tasks" + ): + i = futures[future] + z[i * batch_size : (i + 1) * batch_size] = future.result() + + zz = z.reshape(xx.shape) + + x_res, y_res = ( + (x.max() - x.min()) / (resolution - 1), + (y.max() - y.min()) / (resolution - 1), + ) + min_x, min_y = x.min(), y.min() + + transform = Affine.translation(min_x, min_y) * Affine.scale(x_res, y_res) + + raster_filename = f"tmp/{name.lower().replace(' ', '_')}.tif" + print(f"Saving raster to {raster_filename}") + + with rasterio.open( + raster_filename, + "w", + driver="GTiff", + height=zz.shape[0], + width=zz.shape[1], + count=1, + dtype=zz.dtype, + crs=USE_CRS, + transform=transform, + ) as dst: + dst.write(zz, 1) + + return raster_filename, X + + +def apply_kde_to_primary(primary_featurelayer, name, query, resolution=resolution): + # Generate KDE and raster file + raster_filename, crime_coords = generic_kde(name, query, resolution) + + # Add centroid column temporarily + primary_featurelayer.gdf["centroid"] = primary_featurelayer.gdf.geometry.centroid + + # Create list of (x, y) coordinates for centroids + coord_list = [ + (x, y) + for x, y in zip( + primary_featurelayer.gdf["centroid"].x, + primary_featurelayer.gdf["centroid"].y, + ) + ] + + # Remove the temporary centroid column + primary_featurelayer.gdf = primary_featurelayer.gdf.drop(columns=["centroid"]) + + # Open the generated raster file and sample the KDE density values at the centroids + with rasterio.open(raster_filename) as src: + sampled_values = [x[0] for x in src.sample(coord_list)] + + # Create a column for the density values + density_column = f"{name.lower().replace(' ', '_')}_density" + primary_featurelayer.gdf[density_column] = sampled_values + + # Calculate percentiles using mapclassify.Percentiles + percentile_breaks = list(range(101)) # Percentile breaks from 0 to 100 + classifier = mapclassify.Percentiles( + primary_featurelayer.gdf[density_column], pct=percentile_breaks + ) + + # Assign the percentile bins to the density values + primary_featurelayer.gdf[density_column + "_percentile"] = ( + classifier.yb + ) # yb gives the bin index + + # Apply percentile labels (e.g., 1st Percentile, 2nd Percentile, etc.) + primary_featurelayer.gdf[density_column + "_label"] = primary_featurelayer.gdf[ + density_column + "_percentile" + ].apply(label_percentile) + + # Convert the percentile column to float and drop the density column + primary_featurelayer.gdf[density_column + "_percentile"] = primary_featurelayer.gdf[ + density_column + "_percentile" + ].astype(float) + + primary_featurelayer.gdf = primary_featurelayer.gdf.drop(columns=[density_column]) + + print(f"Finished processing {name}") + return primary_featurelayer + + +def label_percentile(value): + if 10 <= value % 100 <= 13: + return f"{value}th Percentile" + elif value % 10 == 1: + return f"{value}st Percentile" + elif value % 10 == 2: + return f"{value}nd Percentile" + elif value % 10 == 3: + return f"{value}rd Percentile" + else: + return f"{value}th Percentile" diff --git a/data/src/data_utils/l_and_i.py b/data/src/data_utils/l_and_i.py new file mode 100644 index 00000000..27f28147 --- /dev/null +++ b/data/src/data_utils/l_and_i.py @@ -0,0 +1,178 @@ +import pandas as pd +import geopandas as gpd +from typing import List +from classes.featurelayer import FeatureLayer +from constants.services import COMPLAINTS_SQL_QUERY, VIOLATIONS_SQL_QUERY + +def l_and_i(primary_featurelayer: FeatureLayer) -> FeatureLayer: + """ + Process L&I (Licenses and Inspections) data for complaints and violations. + + This function filters and processes L&I complaints and violations data, + joining it with the primary feature layer based on spatial relationships + and OPA (Office of Property Assessment) identifiers. + + Args: + primary_featurelayer (FeatureLayer): The primary feature layer to join L&I data to. + + Returns: + FeatureLayer: The primary feature layer updated with L&I data. + """ + keywords: List[str] = [ + 'dumping', 'blight', 'rubbish', 'weeds', 'graffiti', + 'abandoned', 'sanitation', 'litter', 'vacant', 'trash', + 'unsafe' + ] + + # Load complaints data from L&I + l_and_i_complaints: FeatureLayer = FeatureLayer( + name="LI Complaints", + carto_sql_queries=COMPLAINTS_SQL_QUERY + ) + + # Filter for rows where 'subject' contains any of the keywords + l_and_i_complaints.gdf = l_and_i_complaints.gdf[ + l_and_i_complaints.gdf["subject"].str.lower().str.contains('|'.join(keywords)) + ] + + # Filter for only Status = 'Open' + l_and_i_complaints.gdf = l_and_i_complaints.gdf[ + l_and_i_complaints.gdf["status"].str.lower() == "open" + ] + + # Group by geometry and concatenate the violationcodetitle values into a list with a semicolon separator + l_and_i_complaints.gdf = ( + l_and_i_complaints.gdf.groupby("geometry")["service_name"] + .apply(lambda x: "; ".join([val for val in x if val is not None])) + .reset_index() + ) + + l_and_i_complaints.rebuild_gdf() + + # rename the column to 'li_complaints' + l_and_i_complaints.gdf.rename( + columns={"service_name": "li_complaints"}, inplace=True + ) + + # Load data for violations from L&I + l_and_i_violations: FeatureLayer = FeatureLayer( + name="LI Violations", + carto_sql_queries=VIOLATIONS_SQL_QUERY, + from_xy=True + ) + + # Filter for rows where 'casetype' contains any of the keywords, handling NaN values + l_and_i_violations.gdf = l_and_i_violations.gdf[ + l_and_i_violations.gdf["violationcodetitle"].fillna('').str.lower().str.contains('|'.join(keywords)) + ] + + all_violations_count_df: pd.DataFrame = ( + l_and_i_violations.gdf.groupby("opa_account_num") + .count() + .reset_index()[["opa_account_num", "violationnumber", "geometry"]] + ) + all_violations_count_df = all_violations_count_df.rename( + columns={"violationnumber": "all_violations_past_year"} + ) + # filter for only cases where the casestatus is 'IN VIOLATION' or 'UNDER INVESTIGATION' + violations_gdf: gpd.GeoDataFrame = l_and_i_violations.gdf[ + (l_and_i_violations.gdf["violationstatus"].str.lower() == "open") + ] + + open_violations_count_df: pd.DataFrame = ( + violations_gdf.groupby("opa_account_num") + .count() + .reset_index()[["opa_account_num", "violationnumber", "geometry"]] + ) + open_violations_count_df = open_violations_count_df.rename( + columns={"violationnumber": "open_violations_past_year"} + ) + # join the all_violations_count_df and open_violations_count_df dataframes on opa_account_num + violations_count_gdf: gpd.GeoDataFrame = all_violations_count_df.merge( + open_violations_count_df, how="left", on="opa_account_num" + ) + + # replace NaN values with 0 + violations_count_gdf.fillna(0, inplace=True) + + # convert the all_violations_past_year and open_violations_past_year columns to integers + violations_count_gdf["all_violations_past_year"] = violations_count_gdf[ + "all_violations_past_year" + ].astype(int) + violations_count_gdf["open_violations_past_year"] = violations_count_gdf[ + "open_violations_past_year" + ].astype(int) + violations_count_gdf = violations_count_gdf[ + ["opa_account_num", "all_violations_past_year", "open_violations_past_year"] + ] + + # collapse violations_gdf by address and concatenate the violationcodetitle values into a list with a semicolon separator + l_and_i_violations.gdf = ( + l_and_i_violations.gdf.groupby("geometry")["violationcodetitle"] + .apply(lambda x: "; ".join([val for val in x if val is not None])) + .reset_index() + ) + l_and_i_complaints.rebuild_gdf() + + # rename the column to 'li_violations' + l_and_i_violations.gdf.rename( + columns={"violationcodetitle": "li_code_violations"}, inplace=True + ) + + # Violations can work with an OPA join + primary_featurelayer.opa_join( + violations_count_gdf, + "opa_account_num", + ) + + # Complaints need a spatial join, but we need to take special care to merge on just the parcel geoms first to get opa_id + complaints_with_opa_id: gpd.GeoDataFrame = primary_featurelayer.gdf.sjoin( + l_and_i_complaints.gdf, how="left", predicate="contains" + ) + complaints_with_opa_id.drop(columns=["index_right"], inplace=True) + + # Concatenate the complaints values into a list with a semicolon separator by opa_id + complaints_with_opa_id = ( + complaints_with_opa_id.groupby("opa_id")["li_complaints"] + .apply(lambda x: "; ".join([str(val) for val in x if val is not None])) + .reset_index()[["opa_id", "li_complaints"]] + ) + + # Clean up the NaN values in the li_complaints column + def remove_nan_strings(x: str) -> str | None: + """ + Remove 'nan' strings from the input. + + Args: + x (str): Input string. + + Returns: + str | None: Cleaned string or None if only 'nan' values. + """ + if x == "nan" or ("nan;" in x): + return None + else: + return x + + complaints_with_opa_id["li_complaints"] = complaints_with_opa_id[ + "li_complaints" + ].apply(remove_nan_strings) + + # Merge the complaints values back into the primary_featurelayer + primary_featurelayer.opa_join( + complaints_with_opa_id, + "opa_id", + ) + + primary_featurelayer.gdf[ + ["all_violations_past_year", "open_violations_past_year"] + ] = ( + primary_featurelayer.gdf[ + ["all_violations_past_year", "open_violations_past_year"] + ] + .apply(lambda x: pd.to_numeric(x, errors="coerce")) + .fillna(0) + .astype(int) + ) + + return primary_featurelayer \ No newline at end of file diff --git a/data/src/data_utils/nbhoods.py b/data/src/data_utils/nbhoods.py new file mode 100644 index 00000000..6fde4bd0 --- /dev/null +++ b/data/src/data_utils/nbhoods.py @@ -0,0 +1,25 @@ +import geopandas as gpd +from classes.featurelayer import FeatureLayer +from constants.services import NBHOODS_URL + +from config.config import USE_CRS + + +def nbhoods(primary_featurelayer): + phl_nbhoods = gpd.read_file(NBHOODS_URL) + + # Correct the column name to uppercase if needed + if 'MAPNAME' in phl_nbhoods.columns: + phl_nbhoods.rename(columns={"MAPNAME": "neighborhood"}, inplace=True) + + phl_nbhoods = phl_nbhoods.to_crs(USE_CRS) + + nbhoods = FeatureLayer("Neighborhoods") + nbhoods.gdf = phl_nbhoods + + red_cols_to_keep = ["neighborhood", "geometry"] + nbhoods.gdf = nbhoods.gdf[red_cols_to_keep] + + primary_featurelayer.spatial_join(nbhoods) + + return primary_featurelayer diff --git a/data/src/data_utils/negligent_devs.py b/data/src/data_utils/negligent_devs.py new file mode 100644 index 00000000..aa95532c --- /dev/null +++ b/data/src/data_utils/negligent_devs.py @@ -0,0 +1,166 @@ +import re + +import pandas as pd + +replacements = { + "STREET": "ST", + "AVENUE": "AVE", + "ROAD": "RD", + "BOULEVARD": "BLVD", + "PLACE": "PL", + "FLOOR": "FL", + "FLR": "FL", + "FIRST": "1ST", + "SECOND": "2ND", + "THIRD": "3RD", + "FOURTH": "4TH", + "FIFTH": "5TH", + "SIXTH": "6TH", + "SEVENTH": "7TH", + "EIGHTH": "8TH", + "NINTH": "9TH", + "NORTH": "N", + "SOUTH": "S", + "EAST": "E", + "WEST": "W", + "SUITE": "STE", + "LA": "LN", + "LANE": "LN", + "PARKWAY": "PKY", +} + + +def standardize_street(street): + if not isinstance(street, str): + return "" + for full, abbr in replacements.items(): + street = re.sub(r"\b{}\b".format(full), abbr, street, flags=re.IGNORECASE) + return street + + +def create_standardized_address(row): + parts = [ + row["mailing_address_1"].strip() + if pd.notnull(row["mailing_address_1"]) + else "", + row["mailing_address_2"].strip() + if pd.notnull(row["mailing_address_2"]) + else "", + row["mailing_street"].strip() if pd.notnull(row["mailing_street"]) else "", + row["mailing_city_state"].strip() + if pd.notnull(row["mailing_city_state"]) + else "", + row["mailing_zip"].strip() if pd.notnull(row["mailing_zip"]) else "", + ] + standardized_address = ", ".join([part for part in parts if part]) + return standardized_address.lower() + + +def negligent_devs(primary_featurelayer): + devs = primary_featurelayer.gdf + + print("Columns in 'devs' DataFrame:", devs.columns) + + print("Initial properties data:") + print(devs[['opa_id', 'city_owner_agency', 'mailing_street']].head(10)) + + city_owners = devs.loc[~devs["city_owner_agency"].isna() & (devs["city_owner_agency"] != "")].copy() + non_city_owners = devs.loc[devs["city_owner_agency"].isna() | (devs["city_owner_agency"] == "")].copy() + + print(f"City owners shape: {city_owners.shape}, Non-city owners shape: {non_city_owners.shape}") + + # Log before standardizing addresses + print("Non-city owners mailing streets before standardization:") + print(non_city_owners[['opa_id', 'mailing_street']].head(10)) + + non_city_owners.loc[:, "mailing_street"] = ( + non_city_owners["mailing_street"].astype(str).apply(standardize_street) + ) + + print("Non-city owners mailing streets after standardization:") + print(non_city_owners[['opa_id', 'mailing_street']].head(10)) + + for term in ["ST", "AVE", "RD", "BLVD"]: + non_city_owners.loc[:, "mailing_street"] = non_city_owners[ + "mailing_street" + ].replace(regex={f"{term}.*": term}) + + # Log after applying term replacement + print("Non-city owners mailing streets after term replacement:") + print(non_city_owners[['opa_id', 'mailing_street']].head(10)) + + # Fill missing address components + non_city_owners.loc[:, "mailing_address_1"] = non_city_owners[ + "mailing_address_1" + ].fillna("") + non_city_owners.loc[:, "mailing_address_2"] = non_city_owners[ + "mailing_address_2" + ].fillna("") + non_city_owners.loc[:, "mailing_street"] = non_city_owners["mailing_street"].fillna( + "" + ) + non_city_owners.loc[:, "mailing_city_state"] = non_city_owners[ + "mailing_city_state" + ].fillna("") + non_city_owners.loc[:, "mailing_zip"] = non_city_owners["mailing_zip"].fillna("") + + # Log addresses before creating standardized address + print("Non-city owners mailing details before creating standardized address:") + print(non_city_owners[['opa_id', 'mailing_street', 'mailing_city_state', 'mailing_zip']].head(10)) + + non_city_owners.loc[:, "standardized_address"] = non_city_owners.apply( + create_standardized_address, axis=1 + ) + + # Log standardized addresses and counts + print("Standardized addresses with counts:") + address_counts = ( + non_city_owners.groupby("standardized_address") + .size() + .reset_index(name="property_count") + ) + print(address_counts.head(10)) + + sorted_address_counts = address_counts.sort_values( + by="property_count", ascending=False + ) + print("Top standardized addresses by property count:") + print(sorted_address_counts.head(10)) + + non_city_owners = non_city_owners.merge( + sorted_address_counts, on="standardized_address", how="left" + ) + + # Log merged data for city owners + city_owner_counts = ( + city_owners.groupby("city_owner_agency") + .size() + .reset_index(name="property_count") + ) + print("City owner counts:") + print(city_owner_counts.head(10)) + + city_owners = city_owners.merge( + city_owner_counts, on="city_owner_agency", how="left" + ) + + devs_combined = pd.concat([city_owners, non_city_owners], axis=0) + + # Final check on the merged data before updating primary_featurelayer + print("Combined data with property counts:") + print(devs_combined[['opa_id', 'property_count']].head(10)) + + primary_featurelayer.gdf = primary_featurelayer.gdf.merge( + devs_combined[["opa_id", "property_count"]], on="opa_id", how="left" + ) + primary_featurelayer.gdf.rename( + columns={"property_count": "n_properties_owned"}, inplace=True + ) + primary_featurelayer.gdf.loc[:, "negligent_dev"] = ( + primary_featurelayer.gdf["n_properties_owned"] > 5 + ) & (primary_featurelayer.gdf["city_owner_agency"].isna() | (primary_featurelayer.gdf["city_owner_agency"] == "")) + + print("Final feature layer data with negligent_dev flag:") + print(primary_featurelayer.gdf[['opa_id', 'n_properties_owned', 'negligent_dev']].head(10)) + + return primary_featurelayer diff --git a/data/src/data_utils/opa_properties.py b/data/src/data_utils/opa_properties.py new file mode 100644 index 00000000..2d02f42f --- /dev/null +++ b/data/src/data_utils/opa_properties.py @@ -0,0 +1,29 @@ +from classes.featurelayer import FeatureLayer +from constants.services import OPA_PROPERTIES_QUERY + + +def opa_properties(primary_featurelayer): + opa = FeatureLayer( + name="OPA Properties", + carto_sql_queries=OPA_PROPERTIES_QUERY, + use_wkb_geom_field="the_geom", + cols=[ + "market_value", + "sale_date", + "sale_price", + "parcel_number", + "mailing_address_1", + "mailing_address_2", + "mailing_care_of", + "mailing_city_state", + "mailing_street", + "mailing_zip" + ] + ) + + primary_featurelayer.opa_join( + opa.gdf, + "parcel_number", + ) + + return primary_featurelayer diff --git a/data/src/data_utils/owner_type.py b/data/src/data_utils/owner_type.py new file mode 100644 index 00000000..291364df --- /dev/null +++ b/data/src/data_utils/owner_type.py @@ -0,0 +1,37 @@ +import pandas as pd +from classes.featurelayer import FeatureLayer + +def owner_type(primary_featurelayer: FeatureLayer) -> FeatureLayer: + """ + Determines the ownership type for each property in the primary feature layer based on + the 'owner_1', 'owner_2', and 'city_owner_agency' columns. The ownership type is set as: + - "Public" if 'city_owner_agency' is not NA. + - "Business (LLC)" if 'city_owner_agency' is NA and "LLC" is found in 'owner_1' or 'owner_2'. + - "Individual" if 'city_owner_agency' is NA and "LLC" is not found in 'owner_1' or 'owner_2'. + + Args: + primary_featurelayer (FeatureLayer): The feature layer containing property ownership data. + + Returns: + FeatureLayer: The updated feature layer with the 'owner_type' column added. + """ + owner_types = [] + + for _, row in primary_featurelayer.gdf.iterrows(): + # Extract owner1, owner2, and city_owner_agency + owner1 = str(row["owner_1"]).lower() + owner2 = str(row["owner_2"]).lower() + city_owner_agency = row["city_owner_agency"] + + # Determine ownership type based on the conditions + if pd.notna(city_owner_agency): + owner_types.append("Public") + elif " llc" in owner1 or " llc" in owner2: + owner_types.append("Business (LLC)") + else: + owner_types.append("Individual") + + # Add the 'owner_type' column to the GeoDataFrame + primary_featurelayer.gdf["owner_type"] = owner_types + + return primary_featurelayer diff --git a/data/src/data_utils/park_priority.py b/data/src/data_utils/park_priority.py new file mode 100644 index 00000000..7a97fb3b --- /dev/null +++ b/data/src/data_utils/park_priority.py @@ -0,0 +1,135 @@ +import os +import zipfile +from io import BytesIO +from typing import List, Union + +import geopandas as gpd +import requests +from bs4 import BeautifulSoup +from classes.featurelayer import FeatureLayer +from config.config import USE_CRS +from tqdm import tqdm +import pyogrio + + +def get_latest_shapefile_url() -> str: + """ + Scrapes the TPL website to get the URL of the latest shapefile. + + Returns: + str: The URL of the latest shapefile. + + Raises: + ValueError: If the shapefile link is not found on the page. + """ + url: str = "https://www.tpl.org/park-data-downloads" + response: requests.Response = requests.get(url) + soup: BeautifulSoup = BeautifulSoup(response.content, "html.parser") + + shapefile_link: Union[BeautifulSoup, None] = soup.find("a", string="Shapefile") + if shapefile_link: + return str(shapefile_link["href"]) + else: + raise ValueError("Shapefile link not found on the page") + + +def download_and_process_shapefile( + geojson_path: str, park_url: str, target_files: List[str], file_name_prefix: str +) -> gpd.GeoDataFrame: + """ + Downloads and processes the shapefile to create a GeoDataFrame for Philadelphia parks. + + Args: + geojson_path (str): Path to save the GeoJSON file. + park_url (str): URL to download the shapefile. + target_files (List[str]): List of files to extract from the shapefile. + file_name_prefix (str): Prefix for the file names to be extracted. + + Returns: + gpd.GeoDataFrame: GeoDataFrame containing the processed park data. + """ + print("Downloading and processing park priority data...") + response: requests.Response = requests.get(park_url, stream=True) + total_size: int = int(response.headers.get("content-length", 0)) + + with tqdm( + total=total_size, unit="iB", unit_scale=True, desc="Downloading" + ) as progress_bar: + buffer: BytesIO = BytesIO() + for data in response.iter_content(1024): + size: int = buffer.write(data) + progress_bar.update(size) + + with zipfile.ZipFile(buffer) as zip_ref: + for file_name in tqdm(target_files, desc="Extracting"): + zip_ref.extract(file_name, "tmp/") + + print("Processing shapefile...") + pa_parks: gpd.GeoDataFrame = gpd.read_file( + "tmp/" + file_name_prefix + "_ParkPriorityAreas.shp" + ) + pa_parks = pa_parks.to_crs(USE_CRS) + + phl_parks: gpd.GeoDataFrame = pa_parks[pa_parks["ID"].str.startswith("42101")] + phl_parks = phl_parks.loc[:, ["ParkNeed", "geometry"]] + + if isinstance(phl_parks, gpd.GeoDataFrame): + phl_parks.rename(columns={"ParkNeed": "park_priority"}, inplace=True) + else: + raise TypeError("Expected a GeoDataFrame, got Series or another type instead") + + print(f"Writing filtered data to GeoJSON: {geojson_path}") + phl_parks.to_file(geojson_path, driver="GeoJSON") + + return phl_parks + + +def park_priority(primary_featurelayer: FeatureLayer) -> FeatureLayer: + """ + Downloads and processes park priority data, then joins it with the primary feature layer. + + Args: + primary_featurelayer (FeatureLayer): The primary feature layer to join with park priority data. + + Returns: + FeatureLayer: The primary feature layer with park priority data joined. + """ + park_url: str = get_latest_shapefile_url() + print(f"Downloading park priority data from: {park_url}") + + file_name_prefix: str = "Parkserve" + target_files: List[str] = [ + file_name_prefix + "_ParkPriorityAreas.shp", + file_name_prefix + "_ParkPriorityAreas.dbf", + file_name_prefix + "_ParkPriorityAreas.shx", + file_name_prefix + "_ParkPriorityAreas.prj", + file_name_prefix + "_ParkPriorityAreas.CPG", + file_name_prefix + "_ParkPriorityAreas.sbn", + file_name_prefix + "_ParkPriorityAreas.sbx", + ] + geojson_path: str = "tmp/phl_parks.geojson" + + os.makedirs("tmp/", exist_ok=True) + + try: + if os.path.exists(geojson_path): + print(f"GeoJSON file already exists, loading from {geojson_path}") + phl_parks: gpd.GeoDataFrame = gpd.read_file(geojson_path) + else: + raise pyogrio.errors.DataSourceError( + "GeoJSON file missing, forcing download." + ) + + except (pyogrio.errors.DataSourceError, ValueError) as e: + print(f"Error loading GeoJSON: {e}. Re-downloading and processing shapefile.") + if os.path.exists(geojson_path): + os.remove(geojson_path) # Delete the corrupted GeoJSON if it exists + phl_parks = download_and_process_shapefile( + geojson_path, park_url, target_files, file_name_prefix + ) + + park_priority_layer: FeatureLayer = FeatureLayer("Park Priority") + park_priority_layer.gdf = phl_parks + + primary_featurelayer.spatial_join(park_priority_layer) + return primary_featurelayer diff --git a/data/src/data_utils/phs_properties.py b/data/src/data_utils/phs_properties.py new file mode 100644 index 00000000..c906c2d1 --- /dev/null +++ b/data/src/data_utils/phs_properties.py @@ -0,0 +1,33 @@ +from classes.featurelayer import FeatureLayer +from constants.services import PHS_LAYERS_TO_LOAD + +def phs_properties(primary_featurelayer: FeatureLayer) -> FeatureLayer: + """ + Perform a spatial join between the primary feature layer and the PHS properties layer, + then update the primary feature layer with a new column 'phs_care_program' indicating + if the property is part of the PHS care program. + + Args: + primary_featurelayer (FeatureLayer): The primary feature layer to join with the PHS properties layer. + + Returns: + FeatureLayer: The updated primary feature layer with the 'phs_care_program' column. + """ + + phs_properties = FeatureLayer( + name="PHS Properties", esri_rest_urls=PHS_LAYERS_TO_LOAD, cols=["program"] + ) + + # Perform spatial join between primary feature layer and PHS properties + primary_featurelayer.spatial_join(phs_properties) + + # Initialize 'phs_care_program' column with default "no" for all rows + primary_featurelayer.gdf["phs_care_program"] = "No" + + # Set 'phs_care_program' to "yes" for matched rows + primary_featurelayer.gdf.loc[primary_featurelayer.gdf["program"].notna(), "phs_care_program"] = "Yes" + + # Rebuild the GeoDataFrame after updates + primary_featurelayer.rebuild_gdf() + + return primary_featurelayer diff --git a/data/src/data_utils/ppr_properties.py b/data/src/data_utils/ppr_properties.py new file mode 100644 index 00000000..48111b35 --- /dev/null +++ b/data/src/data_utils/ppr_properties.py @@ -0,0 +1,53 @@ +import io + +import geopandas as gpd +import requests +from classes.featurelayer import FeatureLayer +from constants.services import PPR_PROPERTIES_TO_LOAD + +from config.config import USE_CRS + + +def ppr_properties(primary_featurelayer): + fallback_url = 'https://opendata.arcgis.com/datasets/d52445160ab14380a673e5849203eb64_0.geojson' + + try: + + ppr_properties = FeatureLayer( + name="PPR Properties", + esri_rest_urls=PPR_PROPERTIES_TO_LOAD, + cols=["PUBLIC_NAME"] + ) + + if ppr_properties.gdf is None or ppr_properties.gdf.empty: + raise ValueError("PPR properties GeoDataFrame is empty or failed to load from Esri REST URL.") + + print("Loaded PPR properties from Esri REST URL.") + + except Exception as e: + print(f"Error loading PPR properties from Esri REST URL: {e}") + print("Falling back to loading from GeoJSON URL.") + + response = requests.get(fallback_url) + response.raise_for_status() + ppr_properties_gdf = gpd.read_file(io.BytesIO(response.content)) + + ppr_properties = FeatureLayer(name="PPR Properties") + ppr_properties.gdf = ppr_properties_gdf + + ppr_properties.gdf = ppr_properties.gdf[["public_name", "geometry"]] + + ppr_properties.gdf = ppr_properties.gdf.to_crs(USE_CRS) + + primary_featurelayer.spatial_join(ppr_properties) + + mask = primary_featurelayer.gdf["public_name"].notnull() + + count_dropped = mask.sum() + print(f"Number of PPR properties being dropped: {count_dropped}") + + primary_featurelayer.gdf = primary_featurelayer.gdf.drop(primary_featurelayer.gdf[mask].index) + + primary_featurelayer.gdf = primary_featurelayer.gdf.drop(columns=["public_name"]) + + return primary_featurelayer \ No newline at end of file diff --git a/data/src/data_utils/priority_level.py b/data/src/data_utils/priority_level.py new file mode 100644 index 00000000..33097de3 --- /dev/null +++ b/data/src/data_utils/priority_level.py @@ -0,0 +1,51 @@ +def priority_level(dataset): + priority_levels = [] + for idx, row in dataset.gdf.iterrows(): + priority_level = "" + + # Decision Points + guncrime_density_percentile = row["gun_crimes_density_percentile"] + in_phs_landcare = row["phs_care_program"] == "yes" + has_li_complaint_or_violation = ( + row["li_complaints"] is not None + and float(row["all_violations_past_year"]) > 0 + ) + very_low_tree_canopy = row["tree_canopy_gap"] >= 0.3 + + # Updated logic based on percentile values + if guncrime_density_percentile <= 50: + # Low Gun Crime Density (Bottom 50%) + priority_level = "Low" + + elif guncrime_density_percentile > 75: + # High Gun Crime Density (Top 25%) + + if has_li_complaint_or_violation: + priority_level = "High" + else: + if in_phs_landcare: + if very_low_tree_canopy: + priority_level = "High" + else: + priority_level = "Medium" + else: + priority_level = "High" + + else: + # Medium Gun Crime Density (Between 50% and 75%) + if has_li_complaint_or_violation: + if in_phs_landcare: + priority_level = "Medium" + else: + if very_low_tree_canopy: + priority_level = "High" + else: + priority_level = "Medium" + else: + priority_level = "Low" + + priority_levels.append(priority_level) + + dataset.gdf["priority_level"] = priority_levels + + return dataset diff --git a/data/src/data_utils/rco_geoms.py b/data/src/data_utils/rco_geoms.py new file mode 100644 index 00000000..6aa3dca6 --- /dev/null +++ b/data/src/data_utils/rco_geoms.py @@ -0,0 +1,57 @@ +from classes.featurelayer import FeatureLayer +from constants.services import RCOS_LAYERS_TO_LOAD +import pandas as pd + +pd.set_option("future.no_silent_downcasting", True) + + +def rco_geoms(primary_featurelayer): + rco_geoms = FeatureLayer(name="RCOs", esri_rest_urls=RCOS_LAYERS_TO_LOAD) + + rco_aggregate_cols = [ + "ORGANIZATION_NAME", + "ORGANIZATION_ADDRESS", + "PRIMARY_EMAIL", + "PRIMARY_PHONE", + ] + + rco_use_cols = ["rco_info", "rco_names", "geometry"] + + rco_geoms.gdf.loc[:, "rco_info"] = rco_geoms.gdf[rco_aggregate_cols].apply( + lambda x: "; ".join(map(str, x)), axis=1 + ) + + rco_geoms.gdf.loc[:, "rco_names"] = rco_geoms.gdf["ORGANIZATION_NAME"] + + rco_geoms.gdf = rco_geoms.gdf.loc[:, rco_use_cols].copy() + rco_geoms.rebuild_gdf() + + primary_featurelayer.spatial_join(rco_geoms) + + # Collapse columns and aggregate rco_info + group_columns = [ + col for col in primary_featurelayer.gdf.columns if col not in rco_use_cols + ] + + for col in group_columns: + # Use .infer_objects() after fillna() to fix the warning + primary_featurelayer.gdf.loc[:, col] = ( + primary_featurelayer.gdf[col].fillna("").infer_objects(copy=False) + ) + + primary_featurelayer.gdf = ( + primary_featurelayer.gdf.groupby(group_columns) + .agg( + { + "rco_info": lambda x: "|".join(map(str, x)), + "rco_names": lambda x: "|".join(map(str, x)), + "geometry": "first", + } + ) + .reset_index() + ) + + primary_featurelayer.gdf.drop_duplicates(inplace=True) + primary_featurelayer.rebuild_gdf() + + return primary_featurelayer diff --git a/data/src/data_utils/tactical_urbanism.py b/data/src/data_utils/tactical_urbanism.py new file mode 100644 index 00000000..df15a0f2 --- /dev/null +++ b/data/src/data_utils/tactical_urbanism.py @@ -0,0 +1,25 @@ +def tactical_urbanism(dataset): + unsafe_words = [ + "dangerous", + ] + + tactical_urbanism_values = [] + + for idx, row in dataset.gdf.iterrows(): + li_complaints_lower = str(row["li_complaints"]).lower().split(" ") + contains_unsafe_word = any(word in li_complaints_lower for word in unsafe_words) + + if ( + row["parcel_type"] == "Land" + and row["unsafe_building"] == "N" + and row["imm_dang_building"] == "N" + and not contains_unsafe_word + ): + tactical_urbanism = "Yes" + else: + tactical_urbanism = "No" + + tactical_urbanism_values.append(tactical_urbanism) + + dataset.gdf["tactical_urbanism"] = tactical_urbanism_values + return dataset diff --git a/data/src/data_utils/tree_canopy.py b/data/src/data_utils/tree_canopy.py new file mode 100644 index 00000000..bc133893 --- /dev/null +++ b/data/src/data_utils/tree_canopy.py @@ -0,0 +1,32 @@ +import requests +import io +import zipfile +import geopandas as gpd +from classes.featurelayer import FeatureLayer +from config.config import USE_CRS + + +def tree_canopy(primary_featurelayer): + tree_url = ( + "https://national-tes-data-share.s3.amazonaws.com/national_tes_share/pa.zip.zip" + ) + + tree_response = requests.get(tree_url) + + with io.BytesIO(tree_response.content) as f: + with zipfile.ZipFile(f, "r") as zip_ref: + zip_ref.extractall("tmp/") + + pa_trees = gpd.read_file("tmp/pa.shp") + pa_trees = pa_trees.to_crs(USE_CRS) + phl_trees = pa_trees[pa_trees["county"] == "Philadelphia County"] + phl_trees = phl_trees[["tc_gap", "geometry"]] + + phl_trees.rename(columns={"tc_gap": "tree_canopy_gap"}, inplace=True) + + tree_canopy = FeatureLayer("Tree Canopy") + tree_canopy.gdf = phl_trees + + primary_featurelayer.spatial_join(tree_canopy) + + return primary_featurelayer diff --git a/data/src/data_utils/unsafe_buildings.py b/data/src/data_utils/unsafe_buildings.py new file mode 100644 index 00000000..b44edd00 --- /dev/null +++ b/data/src/data_utils/unsafe_buildings.py @@ -0,0 +1,28 @@ +from classes.featurelayer import FeatureLayer +from constants.services import UNSAFE_BUILDINGS_QUERY + + +def unsafe_buildings(primary_featurelayer): + unsafe_buildings = FeatureLayer( + name="Unsafe Buildings", + carto_sql_queries=UNSAFE_BUILDINGS_QUERY, + use_wkb_geom_field="the_geom", + cols=["opa_account_num"], + ) + + unsafe_buildings.gdf.loc[:, "unsafe_building"] = "Y" + + unsafe_buildings.gdf = unsafe_buildings.gdf.rename( + columns={"opa_account_num": "opa_number"} + ) + + primary_featurelayer.opa_join( + unsafe_buildings.gdf, + "opa_number", + ) + + primary_featurelayer.gdf.loc[:, "unsafe_building"] = primary_featurelayer.gdf[ + "unsafe_building" + ].fillna("N") + + return primary_featurelayer diff --git a/data/src/data_utils/utils.py b/data/src/data_utils/utils.py new file mode 100644 index 00000000..b7b9ef4e --- /dev/null +++ b/data/src/data_utils/utils.py @@ -0,0 +1,42 @@ +import os +import re + +import requests + + +def mask_password(value: str): + """remove the password from this postgresql connect string so we don't write it to logs, etc. + + Args: + value (str): the unmasked string containing one or more postgres connect string. + + Returns: + _type_: the string with the password replaced by MASKED + """ + return re.sub(":\w+@", ":MASKED@", value) + + +def save_stream_url(url: str) -> str: + """download the file from this url to the tmp/ directory by streaming in a memory-friendly way. + If local file already exists, use it and don't download. + Args: + url (str): the url of the zip file + + Returns: + str: the relative local path of the saved zip file + """ + local_filename = "tmp/" + url.split('/')[-1] + if os.path.exists(local_filename): + return local_filename + + with requests.get(url, stream=True) as r: + r.raise_for_status() + with open(local_filename, 'wb') as f: + for chunk in r.iter_content(chunk_size=8192): + # If you have chunk encoded response uncomment if + # and set chunk_size parameter to None. + #if chunk: + f.write(chunk) + f.close() + r.close() + return local_filename diff --git a/data/src/data_utils/vacant_properties.py b/data/src/data_utils/vacant_properties.py new file mode 100644 index 00000000..d6573218 --- /dev/null +++ b/data/src/data_utils/vacant_properties.py @@ -0,0 +1,186 @@ +from classes.featurelayer import FeatureLayer, google_cloud_bucket +from constants.services import VACANT_PROPS_LAYERS_TO_LOAD +import geopandas as gpd +from config.config import USE_CRS +from io import BytesIO + +import pandas as pd + + +def load_backup_data_from_gcs(file_name: str) -> gpd.GeoDataFrame: + bucket = google_cloud_bucket() + blob = bucket.blob(file_name) + if not blob.exists(): + raise FileNotFoundError(f"File {file_name} not found in the GCS bucket.") + + file_bytes = blob.download_as_bytes() + try: + gdf = gpd.read_file(BytesIO(file_bytes)) + except Exception as e: + raise ValueError(f"Error reading GeoJSON file: {e}") + + print("Loaded backup data from GCS.") + + # Ensure column names are consistent + gdf = gdf.rename( + columns={ + "ADDRESS": "address", + "OWNER1": "owner_1", + "OWNER2": "owner_2", + "BLDG_DESC": "building_description", + "CouncilDistrict": "council_district", + "ZoningBaseDistrict": "zoning_base_district", + "ZipCode": "zipcode", + "OPA_ID": "opa_id", + } + ) + + return gdf + + +def check_null_percentage(df: pd.DataFrame, threshold: float = 0.05): + """Checks if any column in the dataframe has more than the given threshold of null values.""" + null_percentages = df.isnull().mean() + for col, pct in null_percentages.items(): + if col not in ["owner1", "owner2"] and pct > threshold: + raise ValueError( + f"Column '{col}' has more than {threshold * 100}% null values ({pct * 100}%)." + ) + + +def vacant_properties() -> FeatureLayer: + vacant_properties = FeatureLayer( + name="Vacant Properties", + esri_rest_urls=VACANT_PROPS_LAYERS_TO_LOAD, + cols=[ + "ADDRESS", + "OWNER1", + "OWNER2", + "BLDG_DESC", + "COUNCILDISTRICT", + "ZONINGBASEDISTRICT", + "ZIPCODE", + "OPA_ID", + "parcel_type", + ], + ) + + # Rename columns for consistency in the original data + vacant_properties.gdf = vacant_properties.gdf.rename( + columns={ + "ADDRESS": "address", + "OWNER1": "owner_1", + "OWNER2": "owner_2", + "BLDG_DESC": "building_description", + "COUNCILDISTRICT": "council_district", + "ZONINGBASEDISTRICT": "zoning_base_district", + "ZIPCODE": "zipcode", + "OPA_ID": "opa_id", + } + ) + + vacant_land_gdf = vacant_properties.gdf[ + vacant_properties.gdf["parcel_type"] == "Land" + ] + print(f"Vacant land data size: {len(vacant_land_gdf)} rows.") + + if len(vacant_land_gdf) < 20000: + print("Vacant land data is below the threshold. Loading backup data from GCS.") + backup_gdf = load_backup_data_from_gcs("vacant_indicators_land_06_2024.geojson") + + # Ensure CRS is consistent with project-wide CRS (USE_CRS) + if backup_gdf.crs != USE_CRS: + print(f"Reprojecting backup data from {backup_gdf.crs} to {USE_CRS}") + backup_gdf = backup_gdf.to_crs(USE_CRS) + + # Ensure CRS is the same + if backup_gdf.crs != vacant_properties.gdf.crs: + backup_gdf = backup_gdf.to_crs(vacant_properties.gdf.crs) + + # Map backup dataset column names to match the original dataset + backup_gdf = backup_gdf.rename( + columns={ + "owner_1": "owner1", + "owner_2": "owner2", + "building_description": "bldg_desc", + "council_district": "councildistrict", + "zoning_base_district": "zoningbasedistrict", + } + ) + + # Set parcel_type to "Land" for backup data + backup_gdf["parcel_type"] = "Land" + + # Select only the columns present in the original dataset + backup_gdf = backup_gdf[vacant_properties.gdf.columns] + + # Ensure all necessary columns are present in backup data + for col in vacant_properties.gdf.columns: + if col not in backup_gdf.columns: + backup_gdf[col] = None + + # Check for column mismatches between original and backup datasets + for col in vacant_properties.gdf.columns: + if vacant_properties.gdf[col].dtype != backup_gdf[col].dtype: + print( + f"Warning: Data type mismatch in column '{col}'. Original: {vacant_properties.gdf[col].dtype}, Backup: {backup_gdf[col].dtype}" + ) + + # Verify if backup data contains more than expected null values + check_null_percentage(backup_gdf) + + # Remove existing Land data + vacant_properties.gdf = vacant_properties.gdf[ + vacant_properties.gdf["parcel_type"] != "Land" + ] + + # Concatenate the backup data with the existing data + print(f"Appending backup data ({len(backup_gdf)} rows) to the existing data.") + vacant_properties.gdf = pd.concat( + [vacant_properties.gdf, backup_gdf], ignore_index=True + ) + + # Ensure concatenated data is still a GeoDataFrame + vacant_properties.gdf = gpd.GeoDataFrame( + vacant_properties.gdf, geometry="geometry" + ) + + vacant_properties.gdf.dropna(subset=["opa_id"], inplace=True) + + # Final null value check before returning + check_null_percentage(vacant_properties.gdf) + + # Final column renaming and selection + vacant_properties.gdf = vacant_properties.gdf.rename( + columns={ + "owner1": "owner_1", + "owner2": "owner_2", + "councildistrict": "council_district", + "zoningbasedistrict": "zoning_base_district", + } + ) + + # Select only the final columns needed + final_columns = [ + "address", + "owner_1", + "owner_2", + "council_district", + "zoning_base_district", + "zipcode", + "opa_id", + "parcel_type", + "geometry", + ] + + vacant_properties.gdf = vacant_properties.gdf[final_columns] + + # Ensure concatenated data is still a GeoDataFrame + vacant_properties.gdf = gpd.GeoDataFrame(vacant_properties.gdf, geometry="geometry") + + before_drop = vacant_properties.gdf.shape[0] + vacant_properties.gdf = vacant_properties.gdf.drop_duplicates(subset="opa_id") + after_drop = vacant_properties.gdf.shape[0] + print(f"Duplicate vacant properties dropped: {before_drop - after_drop}") + + return vacant_properties From 4255b5d6f3e80ff6f374a6cfb03d8c2ff03528d0 Mon Sep 17 00:00:00 2001 From: nlebovits Date: Wed, 20 Nov 2024 22:02:50 -0500 Subject: [PATCH 14/18] add main.py for new etl script --- data/src/main.py | 178 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 data/src/main.py diff --git a/data/src/main.py b/data/src/main.py new file mode 100644 index 00000000..82e54398 --- /dev/null +++ b/data/src/main.py @@ -0,0 +1,178 @@ +import sys +import time + +from classes.backup_archive_database import BackupArchiveDatabase +from config.config import FORCE_RELOAD +from config.psql import conn +from data_utils.access_process import access_process +from data_utils.contig_neighbors import contig_neighbors +from data_utils.dev_probability import dev_probability +from data_utils.negligent_devs import negligent_devs +from data_utils.opa_properties import opa_properties +from data_utils.priority_level import priority_level +from data_utils.vacant_properties import vacant_properties +from data_utils.pwd_parcels import pwd_parcels +from data_utils.city_owned_properties import city_owned_properties +from data_utils.phs_properties import phs_properties +from data_utils.li_violations import li_violations +from data_utils.li_complaints import li_complaints +from data_utils.rco_geoms import rco_geoms +from data_utils.council_dists import council_dists +from data_utils.tree_canopy import tree_canopy +from data_utils.nbhoods import nbhoods +from data_utils.gun_crimes import gun_crimes +from data_utils.drug_crimes import drug_crimes +from data_utils.delinquencies import delinquencies +from data_utils.unsafe_buildings import unsafe_buildings +from data_utils.imm_dang_buildings import imm_dang_buildings +from data_utils.tactical_urbanism import tactical_urbanism +from data_utils.conservatorship import conservatorship +from data_utils.owner_type import owner_type +from data_utils.community_gardens import community_gardens +from data_utils.park_priority import park_priority +from data_utils.ppr_properties import ppr_properties + +import pandas as pd +import geopandas as gpd + + +# Ensure the directory containing awkde is in the Python path +awkde_path = "/usr/src/app" +if awkde_path not in sys.path: + sys.path.append(awkde_path) + +services = [ + # vacant designation + vacant_properties, # needs to run early so that other utils can make use of the `vacant` designation + + # geometries/areas + pwd_parcels, + council_dists, + nbhoods, + rco_geoms, + + # ownership + city_owned_properties, + phs_properties, + community_gardens, + ppr_properties, + owner_type, + + # quality of life + li_violations, + li_complaints, + tree_canopy, + gun_crimes, + drug_crimes, + delinquencies, + unsafe_buildings, + imm_dang_buildings, + + # development + contig_neighbors, + dev_probability, + negligent_devs, + + # access/interventions + tactical_urbanism, + conservatorship, + park_priority, + + +] + +# backup sql schema if we are reloading data +backup: BackupArchiveDatabase = None +if FORCE_RELOAD: + # first archive any remaining backup that may exist from a previous run that errored + backup = BackupArchiveDatabase() + if backup.is_backup_schema_exists(): + backup.archive_backup_schema() + conn.commit() + time.sleep(1) # make sure we get a different timestamp + backup = BackupArchiveDatabase() # create a new one so we get a new timestamp + + backup.backup_schema() + conn.commit() + +dataset = opa_properties() + +print("Initial Dataset:") +print("Shape:", dataset.gdf.shape) +print("Head:\n", dataset.gdf.head()) +print("NA Counts:\n", dataset.gdf.isna().sum()) + +for service in services: + dataset = service(dataset) + print(f"After {service.__name__}:") + print("Dataset type:", type(dataset.gdf).__name__) + print("Shape:", dataset.gdf.shape) + print("Head:\n", dataset.gdf.head()) + print("NA Counts:\n", dataset.gdf.isna().sum()) + +before_drop = dataset.gdf.shape[0] +dataset.gdf = dataset.gdf.drop_duplicates(subset="opa_id") +after_drop = dataset.gdf.shape[0] +print( + f"Duplicate dataset rows dropped after initial services: {before_drop - after_drop}" +) + +# Add Priority Level +dataset = priority_level(dataset) + +# Print the distribution of "priority_level" +distribution = dataset.gdf["priority_level"].value_counts() +print("Distribution of priority level:") +print(distribution) + +# Add Access Process +dataset = access_process(dataset) + +# Print the distribution of "access_process" +distribution = dataset.gdf["access_process"].value_counts() +print("Distribution of access process:") +print(distribution) + +before_drop = dataset.gdf.shape[0] +dataset.gdf = dataset.gdf.drop_duplicates(subset="opa_id") +after_drop = dataset.gdf.shape[0] +print(f"Duplicate final dataset rows droppeds: {before_drop - after_drop}") + +# Convert problematic columns to numeric +numeric_columns = [ + "market_value", + "sale_price", + "total_assessment", + "total_due", + "num_years_owed", + "permit_count", +] +for col in numeric_columns: + dataset.gdf[col] = pd.to_numeric(dataset.gdf[col], errors="coerce") + +dataset.gdf["most_recent_year_owed"] = dataset.gdf["most_recent_year_owed"].astype(str) + +print("Column data types before exporting to Parquet:") +print(dataset.gdf.dtypes) + +# Quick dataset profiling +print("\nQuick dataset profile:") + +# 1) Number of NA values per column +print("\nNumber of NA values per column:") +print(dataset.gdf.isna().sum()) + +# 2) Mean, median, and std of numeric columns +print("\nMean, Median, and Standard Deviation of numeric columns:") +numeric_columns = dataset.gdf.select_dtypes(include=["float", "int"]).columns +numeric_summary = dataset.gdf[numeric_columns].describe().loc[["mean", "50%", "std"]] +numeric_summary.rename(index={"50%": "median"}, inplace=True) +print(numeric_summary) + +# 3) Number of unique values in string columns +print("\nNumber of unique values in string columns:") +string_columns = dataset.gdf.select_dtypes(include=["object", "string"]).columns +unique_values = dataset.gdf[string_columns].nunique() +print(unique_values) + +dataset.gdf.to_parquet("tmp/test_output.parquet") From e8ceaaad599ce52c4206398e4c473537a296c14c Mon Sep 17 00:00:00 2001 From: nlebovits Date: Wed, 20 Nov 2024 22:37:10 -0500 Subject: [PATCH 15/18] fix imports --- data/src/new_etl/data_utils/__init__.py | 29 +++++++++++++++++++ .../{data_utils => }/access_process.py | 0 .../{data_utils => }/city_owned_properties.py | 4 +-- .../{data_utils => }/community_gardens.py | 4 +-- .../{data_utils => }/conservatorship.py | 0 .../{data_utils => }/contig_neighbors.py | 0 .../{data_utils => }/council_dists.py | 4 +-- .../new_etl/data_utils/data_utils/__init__.py | 29 ------------------- .../{data_utils => }/delinquencies.py | 4 +-- .../{data_utils => }/dev_probability.py | 5 ++-- .../{data_utils => }/drug_crimes.py | 4 +-- .../data_utils/{data_utils => }/gun_crimes.py | 4 +-- .../{data_utils => }/imm_dang_buildings.py | 4 +-- .../data_utils/{data_utils => }/kde.py | 2 +- .../{data_utils => }/li_complaints.py | 4 +-- .../{data_utils => }/li_violations.py | 4 +-- .../data_utils/{data_utils => }/nbhoods.py | 4 +-- .../{data_utils => }/negligent_devs.py | 0 .../{data_utils => }/opa_properties.py | 4 +-- .../data_utils/{data_utils => }/owner_type.py | 2 +- .../{data_utils => }/park_priority.py | 2 +- .../{data_utils => }/phs_properties.py | 4 +-- .../{data_utils => }/ppr_properties.py | 4 +-- .../{data_utils => }/priority_level.py | 0 .../{data_utils => }/pwd_parcels.py | 4 +-- .../data_utils/{data_utils => }/rco_geoms.py | 4 +-- .../{data_utils => }/tactical_urbanism.py | 5 ++-- .../{data_utils => }/tree_canopy.py | 2 +- .../{data_utils => }/unsafe_buildings.py | 4 +-- .../data_utils/{data_utils => }/utils.py | 0 .../{data_utils => }/vacant_properties.py | 4 +-- 31 files changed, 72 insertions(+), 72 deletions(-) rename data/src/new_etl/data_utils/{data_utils => }/access_process.py (100%) rename data/src/new_etl/data_utils/{data_utils => }/city_owned_properties.py (96%) rename data/src/new_etl/data_utils/{data_utils => }/community_gardens.py (92%) rename data/src/new_etl/data_utils/{data_utils => }/conservatorship.py (100%) rename data/src/new_etl/data_utils/{data_utils => }/contig_neighbors.py (100%) rename data/src/new_etl/data_utils/{data_utils => }/council_dists.py (90%) delete mode 100644 data/src/new_etl/data_utils/data_utils/__init__.py rename data/src/new_etl/data_utils/{data_utils => }/delinquencies.py (91%) rename data/src/new_etl/data_utils/{data_utils => }/dev_probability.py (94%) rename data/src/new_etl/data_utils/{data_utils => }/drug_crimes.py (57%) rename data/src/new_etl/data_utils/{data_utils => }/gun_crimes.py (54%) rename data/src/new_etl/data_utils/{data_utils => }/imm_dang_buildings.py (86%) rename data/src/new_etl/data_utils/{data_utils => }/kde.py (99%) rename data/src/new_etl/data_utils/{data_utils => }/li_complaints.py (60%) rename data/src/new_etl/data_utils/{data_utils => }/li_violations.py (97%) rename data/src/new_etl/data_utils/{data_utils => }/nbhoods.py (86%) rename data/src/new_etl/data_utils/{data_utils => }/negligent_devs.py (100%) rename data/src/new_etl/data_utils/{data_utils => }/opa_properties.py (97%) rename data/src/new_etl/data_utils/{data_utils => }/owner_type.py (96%) rename data/src/new_etl/data_utils/{data_utils => }/park_priority.py (99%) rename data/src/new_etl/data_utils/{data_utils => }/phs_properties.py (91%) rename data/src/new_etl/data_utils/{data_utils => }/ppr_properties.py (94%) rename data/src/new_etl/data_utils/{data_utils => }/priority_level.py (100%) rename data/src/new_etl/data_utils/{data_utils => }/pwd_parcels.py (96%) rename data/src/new_etl/data_utils/{data_utils => }/rco_geoms.py (93%) rename data/src/new_etl/data_utils/{data_utils => }/tactical_urbanism.py (90%) rename data/src/new_etl/data_utils/{data_utils => }/tree_canopy.py (94%) rename data/src/new_etl/data_utils/{data_utils => }/unsafe_buildings.py (86%) rename data/src/new_etl/data_utils/{data_utils => }/utils.py (100%) rename data/src/new_etl/data_utils/{data_utils => }/vacant_properties.py (96%) diff --git a/data/src/new_etl/data_utils/__init__.py b/data/src/new_etl/data_utils/__init__.py index e69de29b..d351ecd4 100644 --- a/data/src/new_etl/data_utils/__init__.py +++ b/data/src/new_etl/data_utils/__init__.py @@ -0,0 +1,29 @@ +from .city_owned_properties import city_owned_properties +from .phs_properties import phs_properties +from .li_violations import li_violations +from .li_complaints import li_complaints +from .rco_geoms import rco_geoms +from .tree_canopy import tree_canopy +from .nbhoods import nbhoods +from .gun_crimes import gun_crimes +from .delinquencies import delinquencies +from .opa_properties import opa_properties +from .vacant_properties import vacant_properties +from .priority_level import priority_level +from .access_process import access_process + +__all__ = [ + "city_owned_properties", + "phs_properties", + "li_violations", + "li_complaints", + "rco_geoms", + "tree_canopy", + "nbhoods", + "gun_crimes", + "delinquencies", + "opa_properties", + "vacant_properties", + "priority_level", + "access_process", +] diff --git a/data/src/new_etl/data_utils/data_utils/access_process.py b/data/src/new_etl/data_utils/access_process.py similarity index 100% rename from data/src/new_etl/data_utils/data_utils/access_process.py rename to data/src/new_etl/data_utils/access_process.py diff --git a/data/src/new_etl/data_utils/data_utils/city_owned_properties.py b/data/src/new_etl/data_utils/city_owned_properties.py similarity index 96% rename from data/src/new_etl/data_utils/data_utils/city_owned_properties.py rename to data/src/new_etl/data_utils/city_owned_properties.py index dc6f708a..e9d93059 100644 --- a/data/src/new_etl/data_utils/data_utils/city_owned_properties.py +++ b/data/src/new_etl/data_utils/city_owned_properties.py @@ -1,5 +1,5 @@ -from classes.featurelayer import FeatureLayer -from constants.services import CITY_OWNED_PROPERTIES_TO_LOAD +from ..classes.featurelayer import FeatureLayer +from ..constants.services import CITY_OWNED_PROPERTIES_TO_LOAD def city_owned_properties(primary_featurelayer: FeatureLayer) -> FeatureLayer: diff --git a/data/src/new_etl/data_utils/data_utils/community_gardens.py b/data/src/new_etl/data_utils/community_gardens.py similarity index 92% rename from data/src/new_etl/data_utils/data_utils/community_gardens.py rename to data/src/new_etl/data_utils/community_gardens.py index 603d2b7c..1bb79ee2 100644 --- a/data/src/new_etl/data_utils/data_utils/community_gardens.py +++ b/data/src/new_etl/data_utils/community_gardens.py @@ -1,5 +1,5 @@ -from classes.featurelayer import FeatureLayer -from constants.services import COMMUNITY_GARDENS_TO_LOAD +from ..classes.featurelayer import FeatureLayer +from ..constants.services import COMMUNITY_GARDENS_TO_LOAD def community_gardens(primary_featurelayer): diff --git a/data/src/new_etl/data_utils/data_utils/conservatorship.py b/data/src/new_etl/data_utils/conservatorship.py similarity index 100% rename from data/src/new_etl/data_utils/data_utils/conservatorship.py rename to data/src/new_etl/data_utils/conservatorship.py diff --git a/data/src/new_etl/data_utils/data_utils/contig_neighbors.py b/data/src/new_etl/data_utils/contig_neighbors.py similarity index 100% rename from data/src/new_etl/data_utils/data_utils/contig_neighbors.py rename to data/src/new_etl/data_utils/contig_neighbors.py diff --git a/data/src/new_etl/data_utils/data_utils/council_dists.py b/data/src/new_etl/data_utils/council_dists.py similarity index 90% rename from data/src/new_etl/data_utils/data_utils/council_dists.py rename to data/src/new_etl/data_utils/council_dists.py index 4b87f259..6ef8b8ee 100644 --- a/data/src/new_etl/data_utils/data_utils/council_dists.py +++ b/data/src/new_etl/data_utils/council_dists.py @@ -1,5 +1,5 @@ -from classes.featurelayer import FeatureLayer -from constants.services import COUNCIL_DISTRICTS_TO_LOAD +from ..classes.featurelayer import FeatureLayer +from ..constants.services import COUNCIL_DISTRICTS_TO_LOAD import pandas as pd diff --git a/data/src/new_etl/data_utils/data_utils/__init__.py b/data/src/new_etl/data_utils/data_utils/__init__.py deleted file mode 100644 index d351ecd4..00000000 --- a/data/src/new_etl/data_utils/data_utils/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -from .city_owned_properties import city_owned_properties -from .phs_properties import phs_properties -from .li_violations import li_violations -from .li_complaints import li_complaints -from .rco_geoms import rco_geoms -from .tree_canopy import tree_canopy -from .nbhoods import nbhoods -from .gun_crimes import gun_crimes -from .delinquencies import delinquencies -from .opa_properties import opa_properties -from .vacant_properties import vacant_properties -from .priority_level import priority_level -from .access_process import access_process - -__all__ = [ - "city_owned_properties", - "phs_properties", - "li_violations", - "li_complaints", - "rco_geoms", - "tree_canopy", - "nbhoods", - "gun_crimes", - "delinquencies", - "opa_properties", - "vacant_properties", - "priority_level", - "access_process", -] diff --git a/data/src/new_etl/data_utils/data_utils/delinquencies.py b/data/src/new_etl/data_utils/delinquencies.py similarity index 91% rename from data/src/new_etl/data_utils/data_utils/delinquencies.py rename to data/src/new_etl/data_utils/delinquencies.py index 48c474ac..7512c163 100644 --- a/data/src/new_etl/data_utils/data_utils/delinquencies.py +++ b/data/src/new_etl/data_utils/delinquencies.py @@ -1,5 +1,5 @@ -from classes.featurelayer import FeatureLayer -from constants.services import DELINQUENCIES_QUERY +from ..classes.featurelayer import FeatureLayer +from ..constants.services import DELINQUENCIES_QUERY def delinquencies(primary_featurelayer): diff --git a/data/src/new_etl/data_utils/data_utils/dev_probability.py b/data/src/new_etl/data_utils/dev_probability.py similarity index 94% rename from data/src/new_etl/data_utils/data_utils/dev_probability.py rename to data/src/new_etl/data_utils/dev_probability.py index 4c8a220f..c3ede1cd 100644 --- a/data/src/new_etl/data_utils/data_utils/dev_probability.py +++ b/data/src/new_etl/data_utils/dev_probability.py @@ -2,12 +2,13 @@ import jenkspy import pandas as pd import requests -from classes.featurelayer import FeatureLayer -from constants.services import CENSUS_BGS_URL, PERMITS_QUERY +from ..classes.featurelayer import FeatureLayer +from ..constants.services import CENSUS_BGS_URL, PERMITS_QUERY from config.config import USE_CRS + def dev_probability(primary_featurelayer): census_bgs_gdf = gpd.read_file(CENSUS_BGS_URL) census_bgs_gdf = census_bgs_gdf.to_crs(USE_CRS) diff --git a/data/src/new_etl/data_utils/data_utils/drug_crimes.py b/data/src/new_etl/data_utils/drug_crimes.py similarity index 57% rename from data/src/new_etl/data_utils/data_utils/drug_crimes.py rename to data/src/new_etl/data_utils/drug_crimes.py index 61a4a43c..371fac36 100644 --- a/data/src/new_etl/data_utils/data_utils/drug_crimes.py +++ b/data/src/new_etl/data_utils/drug_crimes.py @@ -1,7 +1,7 @@ -from constants.services import DRUGCRIME_SQL_QUERY +from ..constants.services import DRUGCRIME_SQL_QUERY -from data_utils.kde import apply_kde_to_primary +from new_etl.data_utils.kde import apply_kde_to_primary def drug_crimes(primary_featurelayer): diff --git a/data/src/new_etl/data_utils/data_utils/gun_crimes.py b/data/src/new_etl/data_utils/gun_crimes.py similarity index 54% rename from data/src/new_etl/data_utils/data_utils/gun_crimes.py rename to data/src/new_etl/data_utils/gun_crimes.py index 27155546..0b038eef 100644 --- a/data/src/new_etl/data_utils/data_utils/gun_crimes.py +++ b/data/src/new_etl/data_utils/gun_crimes.py @@ -1,7 +1,7 @@ -from constants.services import GUNCRIME_SQL_QUERY +from ..constants.services import GUNCRIME_SQL_QUERY -from data_utils.kde import apply_kde_to_primary +from new_etl.data_utils.kde import apply_kde_to_primary def gun_crimes(primary_featurelayer): diff --git a/data/src/new_etl/data_utils/data_utils/imm_dang_buildings.py b/data/src/new_etl/data_utils/imm_dang_buildings.py similarity index 86% rename from data/src/new_etl/data_utils/data_utils/imm_dang_buildings.py rename to data/src/new_etl/data_utils/imm_dang_buildings.py index 7e7041ba..628c88b0 100644 --- a/data/src/new_etl/data_utils/data_utils/imm_dang_buildings.py +++ b/data/src/new_etl/data_utils/imm_dang_buildings.py @@ -1,5 +1,5 @@ -from classes.featurelayer import FeatureLayer -from constants.services import IMMINENT_DANGER_BUILDINGS_QUERY +from ..classes.featurelayer import FeatureLayer +from ..constants.services import IMMINENT_DANGER_BUILDINGS_QUERY def imm_dang_buildings(primary_featurelayer): diff --git a/data/src/new_etl/data_utils/data_utils/kde.py b/data/src/new_etl/data_utils/kde.py similarity index 99% rename from data/src/new_etl/data_utils/data_utils/kde.py rename to data/src/new_etl/data_utils/kde.py index 477cc306..b3789f74 100644 --- a/data/src/new_etl/data_utils/data_utils/kde.py +++ b/data/src/new_etl/data_utils/kde.py @@ -1,7 +1,7 @@ import numpy as np import rasterio from awkde.awkde import GaussianKDE -from classes.featurelayer import FeatureLayer +from ..classes.featurelayer import FeatureLayer from config.config import USE_CRS from rasterio.transform import Affine from tqdm import tqdm diff --git a/data/src/new_etl/data_utils/data_utils/li_complaints.py b/data/src/new_etl/data_utils/li_complaints.py similarity index 60% rename from data/src/new_etl/data_utils/data_utils/li_complaints.py rename to data/src/new_etl/data_utils/li_complaints.py index ee219f36..c6253d4c 100644 --- a/data/src/new_etl/data_utils/data_utils/li_complaints.py +++ b/data/src/new_etl/data_utils/li_complaints.py @@ -1,7 +1,7 @@ -from constants.services import COMPLAINTS_SQL_QUERY +from ..constants.services import COMPLAINTS_SQL_QUERY -from data_utils.kde import apply_kde_to_primary +from ..data_utils.kde import apply_kde_to_primary def li_complaints(primary_featurelayer): diff --git a/data/src/new_etl/data_utils/data_utils/li_violations.py b/data/src/new_etl/data_utils/li_violations.py similarity index 97% rename from data/src/new_etl/data_utils/data_utils/li_violations.py rename to data/src/new_etl/data_utils/li_violations.py index c207bf4c..52dfbe16 100644 --- a/data/src/new_etl/data_utils/data_utils/li_violations.py +++ b/data/src/new_etl/data_utils/li_violations.py @@ -1,8 +1,8 @@ import pandas as pd import geopandas as gpd from typing import List -from classes.featurelayer import FeatureLayer -from constants.services import VIOLATIONS_SQL_QUERY +from ..classes.featurelayer import FeatureLayer +from ..constants.services import VIOLATIONS_SQL_QUERY def li_violations(primary_featurelayer: FeatureLayer) -> FeatureLayer: diff --git a/data/src/new_etl/data_utils/data_utils/nbhoods.py b/data/src/new_etl/data_utils/nbhoods.py similarity index 86% rename from data/src/new_etl/data_utils/data_utils/nbhoods.py rename to data/src/new_etl/data_utils/nbhoods.py index d0de302b..f0fe7570 100644 --- a/data/src/new_etl/data_utils/data_utils/nbhoods.py +++ b/data/src/new_etl/data_utils/nbhoods.py @@ -1,6 +1,6 @@ import geopandas as gpd -from classes.featurelayer import FeatureLayer -from constants.services import NBHOODS_URL +from ..classes.featurelayer import FeatureLayer +from ..constants.services import NBHOODS_URL from config.config import USE_CRS diff --git a/data/src/new_etl/data_utils/data_utils/negligent_devs.py b/data/src/new_etl/data_utils/negligent_devs.py similarity index 100% rename from data/src/new_etl/data_utils/data_utils/negligent_devs.py rename to data/src/new_etl/data_utils/negligent_devs.py diff --git a/data/src/new_etl/data_utils/data_utils/opa_properties.py b/data/src/new_etl/data_utils/opa_properties.py similarity index 97% rename from data/src/new_etl/data_utils/data_utils/opa_properties.py rename to data/src/new_etl/data_utils/opa_properties.py index 0b71eccd..d74db05d 100644 --- a/data/src/new_etl/data_utils/data_utils/opa_properties.py +++ b/data/src/new_etl/data_utils/opa_properties.py @@ -1,5 +1,5 @@ -from classes.featurelayer import FeatureLayer -from constants.services import OPA_PROPERTIES_QUERY +from ..classes.featurelayer import FeatureLayer +from ..constants.services import OPA_PROPERTIES_QUERY import pandas as pd import re diff --git a/data/src/new_etl/data_utils/data_utils/owner_type.py b/data/src/new_etl/data_utils/owner_type.py similarity index 96% rename from data/src/new_etl/data_utils/data_utils/owner_type.py rename to data/src/new_etl/data_utils/owner_type.py index bd2aa7fd..bcae8e00 100644 --- a/data/src/new_etl/data_utils/data_utils/owner_type.py +++ b/data/src/new_etl/data_utils/owner_type.py @@ -1,5 +1,5 @@ import pandas as pd -from classes.featurelayer import FeatureLayer +from ..classes.featurelayer import FeatureLayer def owner_type(primary_featurelayer: FeatureLayer) -> FeatureLayer: diff --git a/data/src/new_etl/data_utils/data_utils/park_priority.py b/data/src/new_etl/data_utils/park_priority.py similarity index 99% rename from data/src/new_etl/data_utils/data_utils/park_priority.py rename to data/src/new_etl/data_utils/park_priority.py index 7a97fb3b..a35b652e 100644 --- a/data/src/new_etl/data_utils/data_utils/park_priority.py +++ b/data/src/new_etl/data_utils/park_priority.py @@ -6,7 +6,7 @@ import geopandas as gpd import requests from bs4 import BeautifulSoup -from classes.featurelayer import FeatureLayer +from ..classes.featurelayer import FeatureLayer from config.config import USE_CRS from tqdm import tqdm import pyogrio diff --git a/data/src/new_etl/data_utils/data_utils/phs_properties.py b/data/src/new_etl/data_utils/phs_properties.py similarity index 91% rename from data/src/new_etl/data_utils/data_utils/phs_properties.py rename to data/src/new_etl/data_utils/phs_properties.py index f1331b28..fb0f20be 100644 --- a/data/src/new_etl/data_utils/data_utils/phs_properties.py +++ b/data/src/new_etl/data_utils/phs_properties.py @@ -1,5 +1,5 @@ -from classes.featurelayer import FeatureLayer -from constants.services import PHS_LAYERS_TO_LOAD +from ..classes.featurelayer import FeatureLayer +from ..constants.services import PHS_LAYERS_TO_LOAD def phs_properties(primary_featurelayer: FeatureLayer) -> FeatureLayer: diff --git a/data/src/new_etl/data_utils/data_utils/ppr_properties.py b/data/src/new_etl/data_utils/ppr_properties.py similarity index 94% rename from data/src/new_etl/data_utils/data_utils/ppr_properties.py rename to data/src/new_etl/data_utils/ppr_properties.py index 67e7ce28..568ea048 100644 --- a/data/src/new_etl/data_utils/data_utils/ppr_properties.py +++ b/data/src/new_etl/data_utils/ppr_properties.py @@ -2,8 +2,8 @@ import geopandas as gpd import requests -from classes.featurelayer import FeatureLayer -from constants.services import PPR_PROPERTIES_TO_LOAD +from ..classes.featurelayer import FeatureLayer +from ..constants.services import PPR_PROPERTIES_TO_LOAD from config.config import USE_CRS diff --git a/data/src/new_etl/data_utils/data_utils/priority_level.py b/data/src/new_etl/data_utils/priority_level.py similarity index 100% rename from data/src/new_etl/data_utils/data_utils/priority_level.py rename to data/src/new_etl/data_utils/priority_level.py diff --git a/data/src/new_etl/data_utils/data_utils/pwd_parcels.py b/data/src/new_etl/data_utils/pwd_parcels.py similarity index 96% rename from data/src/new_etl/data_utils/data_utils/pwd_parcels.py rename to data/src/new_etl/data_utils/pwd_parcels.py index 00fadc8c..f8b5ccff 100644 --- a/data/src/new_etl/data_utils/data_utils/pwd_parcels.py +++ b/data/src/new_etl/data_utils/pwd_parcels.py @@ -1,5 +1,5 @@ -from classes.featurelayer import FeatureLayer -from constants.services import PWD_PARCELS_QUERY +from ..classes.featurelayer import FeatureLayer +from ..constants.services import PWD_PARCELS_QUERY import geopandas as gpd diff --git a/data/src/new_etl/data_utils/data_utils/rco_geoms.py b/data/src/new_etl/data_utils/rco_geoms.py similarity index 93% rename from data/src/new_etl/data_utils/data_utils/rco_geoms.py rename to data/src/new_etl/data_utils/rco_geoms.py index 8f293c2a..361a5daf 100644 --- a/data/src/new_etl/data_utils/data_utils/rco_geoms.py +++ b/data/src/new_etl/data_utils/rco_geoms.py @@ -1,5 +1,5 @@ -from classes.featurelayer import FeatureLayer -from constants.services import RCOS_LAYERS_TO_LOAD +from ..classes.featurelayer import FeatureLayer +from ..constants.services import RCOS_LAYERS_TO_LOAD import pandas as pd pd.set_option("future.no_silent_downcasting", True) diff --git a/data/src/new_etl/data_utils/data_utils/tactical_urbanism.py b/data/src/new_etl/data_utils/tactical_urbanism.py similarity index 90% rename from data/src/new_etl/data_utils/data_utils/tactical_urbanism.py rename to data/src/new_etl/data_utils/tactical_urbanism.py index 205ae264..63e4b0e6 100644 --- a/data/src/new_etl/data_utils/data_utils/tactical_urbanism.py +++ b/data/src/new_etl/data_utils/tactical_urbanism.py @@ -1,6 +1,5 @@ -from classes.featurelayer import ( - FeatureLayer, -) # Replace with the actual module where FeatureLayer is defined +from ..classes.featurelayer import FeatureLayer + def tactical_urbanism(primary_featurelayer: FeatureLayer) -> FeatureLayer: diff --git a/data/src/new_etl/data_utils/data_utils/tree_canopy.py b/data/src/new_etl/data_utils/tree_canopy.py similarity index 94% rename from data/src/new_etl/data_utils/data_utils/tree_canopy.py rename to data/src/new_etl/data_utils/tree_canopy.py index bc133893..108d505d 100644 --- a/data/src/new_etl/data_utils/data_utils/tree_canopy.py +++ b/data/src/new_etl/data_utils/tree_canopy.py @@ -2,7 +2,7 @@ import io import zipfile import geopandas as gpd -from classes.featurelayer import FeatureLayer +from ..classes.featurelayer import FeatureLayer from config.config import USE_CRS diff --git a/data/src/new_etl/data_utils/data_utils/unsafe_buildings.py b/data/src/new_etl/data_utils/unsafe_buildings.py similarity index 86% rename from data/src/new_etl/data_utils/data_utils/unsafe_buildings.py rename to data/src/new_etl/data_utils/unsafe_buildings.py index b44edd00..2aae8dfd 100644 --- a/data/src/new_etl/data_utils/data_utils/unsafe_buildings.py +++ b/data/src/new_etl/data_utils/unsafe_buildings.py @@ -1,5 +1,5 @@ -from classes.featurelayer import FeatureLayer -from constants.services import UNSAFE_BUILDINGS_QUERY +from ..classes.featurelayer import FeatureLayer +from ..constants.services import UNSAFE_BUILDINGS_QUERY def unsafe_buildings(primary_featurelayer): diff --git a/data/src/new_etl/data_utils/data_utils/utils.py b/data/src/new_etl/data_utils/utils.py similarity index 100% rename from data/src/new_etl/data_utils/data_utils/utils.py rename to data/src/new_etl/data_utils/utils.py diff --git a/data/src/new_etl/data_utils/data_utils/vacant_properties.py b/data/src/new_etl/data_utils/vacant_properties.py similarity index 96% rename from data/src/new_etl/data_utils/data_utils/vacant_properties.py rename to data/src/new_etl/data_utils/vacant_properties.py index f3457be9..5e52344d 100644 --- a/data/src/new_etl/data_utils/data_utils/vacant_properties.py +++ b/data/src/new_etl/data_utils/vacant_properties.py @@ -1,5 +1,5 @@ -from classes.featurelayer import FeatureLayer, google_cloud_bucket -from constants.services import VACANT_PROPS_LAYERS_TO_LOAD +from ..classes.featurelayer import FeatureLayer, google_cloud_bucket +from ..constants.services import VACANT_PROPS_LAYERS_TO_LOAD import geopandas as gpd from io import BytesIO import pandas as pd From b228c3adfd343cfc2c36ccb3b99a80e195a8e687 Mon Sep 17 00:00:00 2001 From: nlebovits Date: Thu, 21 Nov 2024 00:43:43 -0500 Subject: [PATCH 16/18] clean up li complaints-related items; add dor parcel boundaries (not yet finished--need to handle mismatched number of observations --- data/src/main.py | 72 ++++++++----------- data/src/new_etl/constants/services.py | 4 ++ .../src/new_etl/data_utils/conservatorship.py | 23 +----- data/src/new_etl/data_utils/dor_parcels.py | 71 ++++++++++++++++++ data/src/new_etl/data_utils/li_violations.py | 16 ----- data/src/new_etl/data_utils/priority_level.py | 12 ++-- 6 files changed, 112 insertions(+), 86 deletions(-) create mode 100644 data/src/new_etl/data_utils/dor_parcels.py diff --git a/data/src/main.py b/data/src/main.py index 82e54398..35596318 100644 --- a/data/src/main.py +++ b/data/src/main.py @@ -1,36 +1,36 @@ import sys import time -from classes.backup_archive_database import BackupArchiveDatabase from config.config import FORCE_RELOAD from config.psql import conn -from data_utils.access_process import access_process -from data_utils.contig_neighbors import contig_neighbors -from data_utils.dev_probability import dev_probability -from data_utils.negligent_devs import negligent_devs -from data_utils.opa_properties import opa_properties -from data_utils.priority_level import priority_level -from data_utils.vacant_properties import vacant_properties -from data_utils.pwd_parcels import pwd_parcels -from data_utils.city_owned_properties import city_owned_properties -from data_utils.phs_properties import phs_properties -from data_utils.li_violations import li_violations -from data_utils.li_complaints import li_complaints -from data_utils.rco_geoms import rco_geoms -from data_utils.council_dists import council_dists -from data_utils.tree_canopy import tree_canopy -from data_utils.nbhoods import nbhoods -from data_utils.gun_crimes import gun_crimes -from data_utils.drug_crimes import drug_crimes -from data_utils.delinquencies import delinquencies -from data_utils.unsafe_buildings import unsafe_buildings -from data_utils.imm_dang_buildings import imm_dang_buildings -from data_utils.tactical_urbanism import tactical_urbanism -from data_utils.conservatorship import conservatorship -from data_utils.owner_type import owner_type -from data_utils.community_gardens import community_gardens -from data_utils.park_priority import park_priority -from data_utils.ppr_properties import ppr_properties +from new_etl.data_utils.access_process import access_process +from new_etl.data_utils.contig_neighbors import contig_neighbors +from new_etl.data_utils.dev_probability import dev_probability +from new_etl.data_utils.negligent_devs import negligent_devs +from new_etl.data_utils.opa_properties import opa_properties +from new_etl.data_utils.priority_level import priority_level +from new_etl.data_utils.vacant_properties import vacant_properties +from new_etl.data_utils.pwd_parcels import pwd_parcels +from new_etl.data_utils.dor_parcels import dor_parcels +from new_etl.data_utils.city_owned_properties import city_owned_properties +from new_etl.data_utils.phs_properties import phs_properties +from new_etl.data_utils.li_violations import li_violations +from new_etl.data_utils.li_complaints import li_complaints +from new_etl.data_utils.rco_geoms import rco_geoms +from new_etl.data_utils.council_dists import council_dists +from new_etl.data_utils.tree_canopy import tree_canopy +from new_etl.data_utils.nbhoods import nbhoods +from new_etl.data_utils.gun_crimes import gun_crimes +from new_etl.data_utils.drug_crimes import drug_crimes +from new_etl.data_utils.delinquencies import delinquencies +from new_etl.data_utils.unsafe_buildings import unsafe_buildings +from new_etl.data_utils.imm_dang_buildings import imm_dang_buildings +from new_etl.data_utils.tactical_urbanism import tactical_urbanism +from new_etl.data_utils.conservatorship import conservatorship +from new_etl.data_utils.owner_type import owner_type +from new_etl.data_utils.community_gardens import community_gardens +from new_etl.data_utils.park_priority import park_priority +from new_etl.data_utils.ppr_properties import ppr_properties import pandas as pd import geopandas as gpd @@ -46,7 +46,7 @@ vacant_properties, # needs to run early so that other utils can make use of the `vacant` designation # geometries/areas - pwd_parcels, + dor_parcels, council_dists, nbhoods, rco_geoms, @@ -81,20 +81,6 @@ ] -# backup sql schema if we are reloading data -backup: BackupArchiveDatabase = None -if FORCE_RELOAD: - # first archive any remaining backup that may exist from a previous run that errored - backup = BackupArchiveDatabase() - if backup.is_backup_schema_exists(): - backup.archive_backup_schema() - conn.commit() - time.sleep(1) # make sure we get a different timestamp - backup = BackupArchiveDatabase() # create a new one so we get a new timestamp - - backup.backup_schema() - conn.commit() - dataset = opa_properties() print("Initial Dataset:") diff --git a/data/src/new_etl/constants/services.py b/data/src/new_etl/constants/services.py index 34f2eb41..f372fad5 100644 --- a/data/src/new_etl/constants/services.py +++ b/data/src/new_etl/constants/services.py @@ -94,3 +94,7 @@ CENSUS_BGS_URL = ( "https://opendata.arcgis.com/datasets/2f982bada233478ea0100528227febce_0.geojson" ) + +DOR_PARCELS_URL = ( + "https://opendata.arcgis.com/datasets/1c57dd1b3ff84449a4b0e3fb29d3cafd_0.geojson" + ) \ No newline at end of file diff --git a/data/src/new_etl/data_utils/conservatorship.py b/data/src/new_etl/data_utils/conservatorship.py index 5f9c9793..fda16a64 100644 --- a/data/src/new_etl/data_utils/conservatorship.py +++ b/data/src/new_etl/data_utils/conservatorship.py @@ -7,24 +7,6 @@ est ) -blight_words = [ - "weed", - "rubbish", - "garbage", - "tire", - "debris", - "clean", - "waste", - "vegetation", - "dumping", - "scrap", - "auto", - "vehicle", - "graffiti", - "dangerous", -] - - def conservatorship(primary_featurelayer): conservatorships = [] @@ -34,8 +16,7 @@ def conservatorship(primary_featurelayer): market_value_over_1000 = ( row["market_value"] and float(row["market_value"]) > 1000 ) - li_complaints_lower = str(row["li_complaints"]).lower().split(" ") - contains_blight_word = any(word in li_complaints_lower for word in blight_words) + violations_exist = float(row["all_violations_past_year"]) > 0 try: sale_date = parse(row["sale_date"]).astimezone(est) @@ -48,7 +29,7 @@ def conservatorship(primary_featurelayer): not sale_date_6_months_ago and market_value_over_1000 ): conservatorship = "No" - elif contains_blight_word and not sheriff_sale and sale_date_6_months_ago: + elif violations_exist and not sheriff_sale and sale_date_6_months_ago: conservatorship = "Yes" else: conservatorship = "No" diff --git a/data/src/new_etl/data_utils/dor_parcels.py b/data/src/new_etl/data_utils/dor_parcels.py new file mode 100644 index 00000000..74d73a24 --- /dev/null +++ b/data/src/new_etl/data_utils/dor_parcels.py @@ -0,0 +1,71 @@ +from ..classes.featurelayer import FeatureLayer +from ..constants.services import DOR_PARCELS_URL +import geopandas as gpd +from config.config import USE_CRS + +def dor_parcels(primary_featurelayer: FeatureLayer) -> FeatureLayer: + """ + Updates the primary feature layer by replacing its geometry column with + polygon geometries from DOR parcels where intersections occur. + + Args: + primary_featurelayer (FeatureLayer): The primary feature layer to update. + + Returns: + FeatureLayer: The updated primary feature layer with geometries replaced + by DOR parcel polygons where possible. + """ + print("Loading DOR properties from GeoJSON...") + # Load and preprocess DOR parcels + dor_parcels = gpd.read_file(DOR_PARCELS_URL).to_crs(USE_CRS) + dor_parcels["geometry"] = dor_parcels["geometry"].make_valid() + + # Filter only valid polygon or multipolygon geometries + dor_parcels = dor_parcels[dor_parcels.geometry.type.isin(["Polygon", "MultiPolygon"])] + print(f"Number of valid polygon/multipolygon geometries in DOR parcels: {len(dor_parcels)}") + + # Ensure the primary feature layer has the same CRS + primary_featurelayer.gdf = primary_featurelayer.gdf.to_crs(USE_CRS) + + # Perform spatial join to identify intersecting polygons + print("Performing spatial join between points and polygons...") + spatial_join_result = gpd.sjoin( + primary_featurelayer.gdf, + dor_parcels[["geometry"]], # Only keep geometry column + how="left", + predicate="intersects" + ) + + # Replace point geometries with polygon geometries where intersections occur + mask = ~spatial_join_result["index_right"].isna() + spatial_join_result.loc[mask, "geometry"] = dor_parcels.loc[ + spatial_join_result.loc[mask, "index_right"], + "geometry" + ].values + + # Drop spatial join index column + spatial_join_result.drop(columns=["index_right"], errors="ignore", inplace=True) + + # Update primary feature layer + primary_featurelayer.gdf = gpd.GeoDataFrame( + spatial_join_result, + geometry="geometry", + crs=USE_CRS + ) + + # Count match statistics + total_rows = len(spatial_join_result) + matched_rows = mask.sum() + unmatched_rows = total_rows - matched_rows + + print(f"Total rows: {total_rows}") + print(f"Matched rows (with polygons): {matched_rows}") + print(f"Unmatched rows: {unmatched_rows}") + + # Count and drop duplicate opa_ids in the primary feature layer + multiple_matches = primary_featurelayer.gdf.duplicated(subset="opa_id", keep=False).sum() + print(f"Rows with duplicate opa_id values in the primary feature layer: {multiple_matches}") + primary_featurelayer.gdf = primary_featurelayer.gdf[~primary_featurelayer.gdf.duplicated(subset="opa_id", keep=False)] + print(f"Updated size of primary feature layer after dropping duplicates: {len(primary_featurelayer.gdf)}") + + return primary_featurelayer \ No newline at end of file diff --git a/data/src/new_etl/data_utils/li_violations.py b/data/src/new_etl/data_utils/li_violations.py index 52dfbe16..efd52db5 100644 --- a/data/src/new_etl/data_utils/li_violations.py +++ b/data/src/new_etl/data_utils/li_violations.py @@ -104,22 +104,6 @@ def li_violations(primary_featurelayer: FeatureLayer) -> FeatureLayer: "opa_account_num", ) - # Clean up the NaN values in the li_complaints column - def remove_nan_strings(x: str) -> str | None: - """ - Remove 'nan' strings from the input. - - Args: - x (str): Input string. - - Returns: - str | None: Cleaned string or None if only 'nan' values. - """ - if x == "nan" or ("nan;" in x): - return None - else: - return x - primary_featurelayer.gdf[ ["all_violations_past_year", "open_violations_past_year"] ] = ( diff --git a/data/src/new_etl/data_utils/priority_level.py b/data/src/new_etl/data_utils/priority_level.py index 84890404..5d2ea793 100644 --- a/data/src/new_etl/data_utils/priority_level.py +++ b/data/src/new_etl/data_utils/priority_level.py @@ -9,10 +9,10 @@ def priority_level(dataset): # Decision Points guncrime_density_percentile = row["gun_crimes_density_percentile"] in_phs_landcare = pd.notna(row["phs_care_program"]) - has_li_complaint_or_violation = ( - row["li_complaints"] is not None - and float(row["all_violations_past_year"]) > 0 - ) or (row["l_and_i_complaints_density_percentile"] > 50) + has_violation_or_high_density = ( + float(row["all_violations_past_year"]) > 0 + or row["l_and_i_complaints_density_percentile"] > 50 + ) very_low_tree_canopy = row["tree_canopy_gap"] >= 0.3 # Updated logic based on percentile values @@ -23,7 +23,7 @@ def priority_level(dataset): elif guncrime_density_percentile > 75: # High Gun Crime Density (Top 25%) - if has_li_complaint_or_violation: + if has_violation_or_high_density: priority_level = "High" else: if in_phs_landcare: @@ -36,7 +36,7 @@ def priority_level(dataset): else: # Medium Gun Crime Density (Between 50% and 75%) - if has_li_complaint_or_violation: + if has_violation_or_high_density: if in_phs_landcare: priority_level = "Medium" else: From e629bb2c981937826545f890b097b6df3c812c88 Mon Sep 17 00:00:00 2001 From: nlebovits Date: Thu, 21 Nov 2024 19:31:58 -0500 Subject: [PATCH 17/18] more formatting --- data/src/main.py | 27 ++-- data/src/new_etl/constants/services.py | 2 +- .../new_etl/data_utils/community_gardens.py | 117 +++++++++++++++--- .../src/new_etl/data_utils/conservatorship.py | 1 + .../src/new_etl/data_utils/dev_probability.py | 1 - data/src/new_etl/data_utils/dor_parcels.py | 91 ++++++++++---- data/src/new_etl/data_utils/ppr_properties.py | 48 +++++-- .../new_etl/data_utils/tactical_urbanism.py | 1 - 8 files changed, 215 insertions(+), 73 deletions(-) diff --git a/data/src/main.py b/data/src/main.py index 35596318..e0131edd 100644 --- a/data/src/main.py +++ b/data/src/main.py @@ -1,8 +1,5 @@ import sys -import time -from config.config import FORCE_RELOAD -from config.psql import conn from new_etl.data_utils.access_process import access_process from new_etl.data_utils.contig_neighbors import contig_neighbors from new_etl.data_utils.dev_probability import dev_probability @@ -11,7 +8,6 @@ from new_etl.data_utils.priority_level import priority_level from new_etl.data_utils.vacant_properties import vacant_properties from new_etl.data_utils.pwd_parcels import pwd_parcels -from new_etl.data_utils.dor_parcels import dor_parcels from new_etl.data_utils.city_owned_properties import city_owned_properties from new_etl.data_utils.phs_properties import phs_properties from new_etl.data_utils.li_violations import li_violations @@ -33,7 +29,6 @@ from new_etl.data_utils.ppr_properties import ppr_properties import pandas as pd -import geopandas as gpd # Ensure the directory containing awkde is in the Python path @@ -43,21 +38,18 @@ services = [ # vacant designation - vacant_properties, # needs to run early so that other utils can make use of the `vacant` designation - + vacant_properties, # needs to run early so that other utils can make use of the `vacant` designation # geometries/areas - dor_parcels, + pwd_parcels, council_dists, nbhoods, rco_geoms, - # ownership city_owned_properties, phs_properties, - community_gardens, + community_gardens, ppr_properties, owner_type, - # quality of life li_violations, li_complaints, @@ -67,18 +59,14 @@ delinquencies, unsafe_buildings, imm_dang_buildings, - # development contig_neighbors, dev_probability, negligent_devs, - # access/interventions tactical_urbanism, conservatorship, park_priority, - - ] dataset = opa_properties() @@ -151,9 +139,12 @@ # 2) Mean, median, and std of numeric columns print("\nMean, Median, and Standard Deviation of numeric columns:") numeric_columns = dataset.gdf.select_dtypes(include=["float", "int"]).columns -numeric_summary = dataset.gdf[numeric_columns].describe().loc[["mean", "50%", "std"]] -numeric_summary.rename(index={"50%": "median"}, inplace=True) -print(numeric_summary) + +for column in numeric_columns: + mean = dataset.gdf[column].mean() + median = dataset.gdf[column].median() + std = dataset.gdf[column].std() + print(f"{column}:\n Mean: {mean:.2f}\n Median: {median:.2f}\n Std: {std:.2f}") # 3) Number of unique values in string columns print("\nNumber of unique values in string columns:") diff --git a/data/src/new_etl/constants/services.py b/data/src/new_etl/constants/services.py index f372fad5..52f8237c 100644 --- a/data/src/new_etl/constants/services.py +++ b/data/src/new_etl/constants/services.py @@ -97,4 +97,4 @@ DOR_PARCELS_URL = ( "https://opendata.arcgis.com/datasets/1c57dd1b3ff84449a4b0e3fb29d3cafd_0.geojson" - ) \ No newline at end of file +) diff --git a/data/src/new_etl/data_utils/community_gardens.py b/data/src/new_etl/data_utils/community_gardens.py index 1bb79ee2..a47b6760 100644 --- a/data/src/new_etl/data_utils/community_gardens.py +++ b/data/src/new_etl/data_utils/community_gardens.py @@ -1,35 +1,116 @@ from ..classes.featurelayer import FeatureLayer from ..constants.services import COMMUNITY_GARDENS_TO_LOAD +from config.config import USE_CRS -def community_gardens(primary_featurelayer): - # this script *removes* (rather than adds) known community gardens from the dataset in order to protect them from potential predatory developers +def community_gardens(primary_featurelayer: FeatureLayer) -> FeatureLayer: + """ + Updates the 'vacant' column in the primary feature layer to ensure community gardens + are marked as not vacant. This protects known community gardens from being categorized + as vacant, preventing potential predatory development. + """ + if "vacant" not in primary_featurelayer.gdf.columns: + raise ValueError("The 'vacant' column is missing in the primary feature layer.") + + print( + "Geometry types in primary feature layer:", + primary_featurelayer.gdf.geometry.type.value_counts(), + ) + + # Load community gardens community_gardens = FeatureLayer( name="Community Gardens", esri_rest_urls=COMMUNITY_GARDENS_TO_LOAD ) - community_gardens.gdf = community_gardens.gdf[["site_name", "geometry"]] + # Ensure both layers are in the same CRS + if community_gardens.gdf.crs != USE_CRS: + print( + f"Transforming community gardens from {community_gardens.gdf.crs} to {USE_CRS}" + ) + community_gardens.gdf = community_gardens.gdf.to_crs(USE_CRS) - primary_featurelayer.spatial_join(community_gardens) + # Check geometry types + geom_types = community_gardens.gdf.geometry.geom_type.value_counts() + print("\nCommunity gardens geometry types:") + print(geom_types) - # Print the columns to debug and confirm that "site_name" exists - print("Columns in primary_featurelayer.gdf:", primary_featurelayer.gdf.columns) + # Identify problematic gardens + if len(geom_types) > 1: + print("\nGardens with non-Point geometries:") + non_point_gardens = community_gardens.gdf[ + community_gardens.gdf.geometry.geom_type != "Point" + ] + print(f"Total non-Point geometries: {len(non_point_gardens)}") + print("\nSample of problematic records:") + print(non_point_gardens[["site_name", "geometry"]].head()) - # Create a boolean mask where 'site_name' is not null - mask = primary_featurelayer.gdf["site_name"].notnull() + # Convert any non-point geometries to points using centroid + print("\nConverting non-Point geometries to points using centroids...") + community_gardens.gdf.loc[ + community_gardens.gdf.geometry.geom_type != "Point", "geometry" + ] = community_gardens.gdf[ + community_gardens.gdf.geometry.geom_type != "Point" + ].geometry.centroid + + # Verify all geometries are now points + if not all(community_gardens.gdf.geometry.geom_type == "Point"): + raise ValueError("Failed to convert all geometries to points") + + # Limit the community gardens data to relevant columns + community_gardens.gdf = community_gardens.gdf[["site_name", "geometry"]] - count_dropped = mask.sum() - print(f"Number of community gardens being dropped: {count_dropped}") + print(f"\nTotal community gardens: {len(community_gardens.gdf)}") - # Use this mask to drop rows where 'site_name' is not null - primary_featurelayer.gdf = primary_featurelayer.gdf.drop( - primary_featurelayer.gdf[mask].index + # Use 'contains' predicate since we want the parcel that contains each point + joined_gdf = primary_featurelayer.gdf.sjoin( + community_gardens.gdf, predicate="contains", how="inner" ) - # Ensure 'site_name' exists before attempting to drop it - if "site_name" in primary_featurelayer.gdf.columns: - primary_featurelayer.gdf = primary_featurelayer.gdf.drop(columns=["site_name"]) - else: - print("'site_name' column is missing, cannot drop.") + # Count matches per garden + matches_per_garden = joined_gdf.groupby("site_name").size() + print("\nMatches per garden:") + print(f"Min matches: {matches_per_garden.min()}") + print(f"Max matches: {matches_per_garden.max()}") + print(f"Average matches: {matches_per_garden.mean():.2f}") + + # Print details for gardens with unusually high matches + # Gardens with high number of matches + if matches_per_garden.max() > 1: # arbitrary threshold + print("\nGardens with high number of matches:") + high_matches = matches_per_garden[matches_per_garden > 1] + print(high_matches) + + # Print concise details about properties matching these gardens + print("\nSummary of matched properties for high-match gardens:") + for garden_name in high_matches.index: + matched_properties = joined_gdf[joined_gdf["site_name"] == garden_name] + print(f"\nGarden: {garden_name}") + print("Matched parcels:") + print( + matched_properties[["opa_id"]] + .drop_duplicates() + .head(5) + .to_string(index=False) + ) + print( + f"...and {len(matched_properties) - 5} more matches." + if len(matched_properties) > 5 + else "" + ) + + # Get unique parcels that contain garden points + garden_parcels = set(joined_gdf["opa_id"]) + print(f"\nUnique parcels containing gardens: {len(garden_parcels)}") + + if len(garden_parcels) > len(community_gardens.gdf): + print( + "\nWARNING: More matching parcels than gardens. This suggests possible data issues." + ) + + # Update vacant status for parcels containing gardens + mask = primary_featurelayer.gdf["opa_id"].isin(garden_parcels) + primary_featurelayer.gdf.loc[mask, "vacant"] = False + + print(f"\nTotal parcels updated: {mask.sum()}") return primary_featurelayer diff --git a/data/src/new_etl/data_utils/conservatorship.py b/data/src/new_etl/data_utils/conservatorship.py index fda16a64..2294e3ac 100644 --- a/data/src/new_etl/data_utils/conservatorship.py +++ b/data/src/new_etl/data_utils/conservatorship.py @@ -7,6 +7,7 @@ est ) + def conservatorship(primary_featurelayer): conservatorships = [] diff --git a/data/src/new_etl/data_utils/dev_probability.py b/data/src/new_etl/data_utils/dev_probability.py index c3ede1cd..ab36ea31 100644 --- a/data/src/new_etl/data_utils/dev_probability.py +++ b/data/src/new_etl/data_utils/dev_probability.py @@ -8,7 +8,6 @@ from config.config import USE_CRS - def dev_probability(primary_featurelayer): census_bgs_gdf = gpd.read_file(CENSUS_BGS_URL) census_bgs_gdf = census_bgs_gdf.to_crs(USE_CRS) diff --git a/data/src/new_etl/data_utils/dor_parcels.py b/data/src/new_etl/data_utils/dor_parcels.py index 74d73a24..dca5f902 100644 --- a/data/src/new_etl/data_utils/dor_parcels.py +++ b/data/src/new_etl/data_utils/dor_parcels.py @@ -2,70 +2,111 @@ from ..constants.services import DOR_PARCELS_URL import geopandas as gpd from config.config import USE_CRS +from shapely.strtree import STRtree + def dor_parcels(primary_featurelayer: FeatureLayer) -> FeatureLayer: """ - Updates the primary feature layer by replacing its geometry column with + Updates the primary feature layer by replacing its geometry column with polygon geometries from DOR parcels where intersections occur. - + Args: primary_featurelayer (FeatureLayer): The primary feature layer to update. - + Returns: FeatureLayer: The updated primary feature layer with geometries replaced by DOR parcel polygons where possible. """ print("Loading DOR properties from GeoJSON...") + # Load and preprocess DOR parcels dor_parcels = gpd.read_file(DOR_PARCELS_URL).to_crs(USE_CRS) dor_parcels["geometry"] = dor_parcels["geometry"].make_valid() - + dor_parcels = dor_parcels[ + dor_parcels["STATUS"] == 1 + ] # filter for what I think are only active parcel boundaries + # Filter only valid polygon or multipolygon geometries - dor_parcels = dor_parcels[dor_parcels.geometry.type.isin(["Polygon", "MultiPolygon"])] - print(f"Number of valid polygon/multipolygon geometries in DOR parcels: {len(dor_parcels)}") - + dor_parcels = dor_parcels[ + dor_parcels.geometry.type.isin(["Polygon", "MultiPolygon"]) + ] + print( + f"Number of valid polygon/multipolygon geometries in DOR parcels: {len(dor_parcels)}" + ) + # Ensure the primary feature layer has the same CRS primary_featurelayer.gdf = primary_featurelayer.gdf.to_crs(USE_CRS) - + # Perform spatial join to identify intersecting polygons print("Performing spatial join between points and polygons...") spatial_join_result = gpd.sjoin( primary_featurelayer.gdf, dor_parcels[["geometry"]], # Only keep geometry column - how="left", - predicate="intersects" + how="left", + predicate="intersects", ) - + # Replace point geometries with polygon geometries where intersections occur mask = ~spatial_join_result["index_right"].isna() spatial_join_result.loc[mask, "geometry"] = dor_parcels.loc[ - spatial_join_result.loc[mask, "index_right"], - "geometry" + spatial_join_result.loc[mask, "index_right"], "geometry" ].values - + # Drop spatial join index column spatial_join_result.drop(columns=["index_right"], errors="ignore", inplace=True) - + # Update primary feature layer primary_featurelayer.gdf = gpd.GeoDataFrame( - spatial_join_result, - geometry="geometry", - crs=USE_CRS + spatial_join_result, geometry="geometry", crs=USE_CRS ) - + # Count match statistics total_rows = len(spatial_join_result) matched_rows = mask.sum() unmatched_rows = total_rows - matched_rows - + print(f"Total rows: {total_rows}") print(f"Matched rows (with polygons): {matched_rows}") print(f"Unmatched rows: {unmatched_rows}") + # Filter out POINT geometries + primary_featurelayer.gdf = primary_featurelayer.gdf[ + primary_featurelayer.gdf.geometry.type.isin(["Polygon", "MultiPolygon"]) + ] + + # Dissolve overlapping parcels by opa_id + print("Dissolving overlapping parcels by opa_id...") + primary_featurelayer.gdf = primary_featurelayer.gdf.dissolve( + by="opa_id", as_index=False + ) + print( + f"Size of primary feature layer after dissolve: {len(primary_featurelayer.gdf)}" + ) + + # Create an STRtree for fast spatial indexing of the parcel geometries + parcel_tree = STRtree(primary_featurelayer.gdf.geometry) + + # Count overlapping geometries + overlapping_count = primary_featurelayer.gdf.geometry.apply( + lambda geom: len(parcel_tree.query(geom)) + ) + print("Number of overlaps per parcel after dissolve:") + print(overlapping_count.value_counts()) + # Count and drop duplicate opa_ids in the primary feature layer - multiple_matches = primary_featurelayer.gdf.duplicated(subset="opa_id", keep=False).sum() - print(f"Rows with duplicate opa_id values in the primary feature layer: {multiple_matches}") - primary_featurelayer.gdf = primary_featurelayer.gdf[~primary_featurelayer.gdf.duplicated(subset="opa_id", keep=False)] - print(f"Updated size of primary feature layer after dropping duplicates: {len(primary_featurelayer.gdf)}") + multiple_matches = primary_featurelayer.gdf.duplicated( + subset="opa_id", keep=False + ).sum() + print( + f"Rows with duplicate opa_id values in the primary feature layer: {multiple_matches}" + ) + + # Drop duplicates based on opa_id + primary_featurelayer.gdf = primary_featurelayer.gdf.drop_duplicates( + subset="opa_id", keep="first" + ) + print( + f"Updated size of primary feature layer after dropping duplicates: {len(primary_featurelayer.gdf)}" + ) - return primary_featurelayer \ No newline at end of file + return primary_featurelayer diff --git a/data/src/new_etl/data_utils/ppr_properties.py b/data/src/new_etl/data_utils/ppr_properties.py index 568ea048..46e3dc87 100644 --- a/data/src/new_etl/data_utils/ppr_properties.py +++ b/data/src/new_etl/data_utils/ppr_properties.py @@ -1,17 +1,27 @@ import io - import geopandas as gpd import requests from ..classes.featurelayer import FeatureLayer from ..constants.services import PPR_PROPERTIES_TO_LOAD - from config.config import USE_CRS -def ppr_properties(primary_featurelayer): +def ppr_properties(primary_featurelayer: FeatureLayer) -> FeatureLayer: + """ + Updates the 'vacant' column in the primary feature layer to ensure PPR properties + are marked as not vacant. This prevents PPR properties from being miscategorized + as vacant. + + Args: + primary_featurelayer (FeatureLayer): The primary feature layer to update. + + Returns: + FeatureLayer: The updated primary feature layer. + """ fallback_url = "https://opendata.arcgis.com/datasets/d52445160ab14380a673e5849203eb64_0.geojson" try: + # Load PPR properties from Esri REST URLs ppr_properties = FeatureLayer( name="PPR Properties", esri_rest_urls=PPR_PROPERTIES_TO_LOAD, @@ -36,21 +46,41 @@ def ppr_properties(primary_featurelayer): ppr_properties = FeatureLayer(name="PPR Properties") ppr_properties.gdf = ppr_properties_gdf + # Limit PPR properties to relevant columns and apply CRS ppr_properties.gdf = ppr_properties.gdf[["public_name", "geometry"]] - ppr_properties.gdf = ppr_properties.gdf.to_crs(USE_CRS) + # Perform a spatial join with the primary feature layer primary_featurelayer.spatial_join(ppr_properties) + # Ensure the 'vacant' column exists in the primary feature layer + if "vacant" not in primary_featurelayer.gdf.columns: + raise ValueError( + "The 'vacant' column is missing in the primary feature layer. Ensure it exists before running this function." + ) + + # Create a mask for rows where PPR properties are identified mask = primary_featurelayer.gdf["public_name"].notnull() - count_dropped = mask.sum() - print(f"Number of PPR properties being dropped: {count_dropped}") + # Count rows where the garden is identified and 'vacant' is currently True + count_updated = primary_featurelayer.gdf.loc[ + mask & (primary_featurelayer.gdf["vacant"] == True) + ].shape[0] - primary_featurelayer.gdf = primary_featurelayer.gdf.drop( - primary_featurelayer.gdf[mask].index + # Update the 'vacant' column to False for identified PPR properties + primary_featurelayer.gdf.loc[mask, "vacant"] = False + + # Log results + print( + f"Updated 'vacant' column for PPR properties. Total rows updated: {count_updated}" ) - primary_featurelayer.gdf = primary_featurelayer.gdf.drop(columns=["public_name"]) + # Drop the "public_name" column if it exists, as it's no longer needed + if "public_name" in primary_featurelayer.gdf.columns: + primary_featurelayer.gdf = primary_featurelayer.gdf.drop( + columns=["public_name"] + ) + else: + print("'public_name' column is missing, cannot drop.") return primary_featurelayer diff --git a/data/src/new_etl/data_utils/tactical_urbanism.py b/data/src/new_etl/data_utils/tactical_urbanism.py index 63e4b0e6..68ca85e2 100644 --- a/data/src/new_etl/data_utils/tactical_urbanism.py +++ b/data/src/new_etl/data_utils/tactical_urbanism.py @@ -1,7 +1,6 @@ from ..classes.featurelayer import FeatureLayer - def tactical_urbanism(primary_featurelayer: FeatureLayer) -> FeatureLayer: """ Assigns a 'tactical_urbanism' value to each row in the primary feature layer based on specific conditions. From ea35c075ca6f3b03fa31091cee9f23273f28df50 Mon Sep 17 00:00:00 2001 From: nlebovits Date: Thu, 21 Nov 2024 19:35:35 -0500 Subject: [PATCH 18/18] Reset Dockerfile-pg to match staging as changes are outside scope of current PR --- data/Dockerfile-pg | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/data/Dockerfile-pg b/data/Dockerfile-pg index 8fbca869..d4d62d77 100644 --- a/data/Dockerfile-pg +++ b/data/Dockerfile-pg @@ -1,26 +1,26 @@ +# +# NOTE: THIS DOCKERFILE IS GENERATED VIA "make update"! PLEASE DO NOT EDIT IT DIRECTLY. +# + FROM postgres:16-bullseye LABEL maintainer="PostGIS Project - https://postgis.net" \ - org.opencontainers.image.description="PostGIS with PostgreSQL 16" \ + org.opencontainers.image.description="PostGIS 3.4.3+dfsg-2.pgdg110+1 spatial database extension with PostgreSQL 16 bullseye" \ org.opencontainers.image.source="https://github.com/postgis/docker-postgis" ENV POSTGIS_MAJOR 3 +ENV POSTGIS_VERSION 3.4.3+dfsg-2.pgdg110+1 -# Install dependencies and PostGIS RUN apt-get update \ - && apt-get install -y --no-install-recommends \ - gnupg \ - postgresql-common \ - apt-transport-https \ - lsb-release \ - wget \ + && apt-cache showpkg postgresql-$PG_MAJOR-postgis-$POSTGIS_MAJOR \ + && apt-get install -y --no-install-recommends \ + # ca-certificates: for accessing remote raster files; + # fix: https://github.com/postgis/docker-postgis/issues/307 ca-certificates \ - && yes | /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh \ - && apt-get update \ - && apt-get install -y --no-install-recommends \ - postgresql-16-postgis-3 \ - postgresql-16-postgis-3-scripts \ - postgresql-client-16 \ - && rm -rf /var/lib/apt/lists/* + \ + postgresql-$PG_MAJOR-postgis-$POSTGIS_MAJOR=$POSTGIS_VERSION \ + postgresql-$PG_MAJOR-postgis-$POSTGIS_MAJOR-scripts \ + && rm -rf /var/lib/apt/lists/* RUN mkdir -p /docker-entrypoint-initdb.d +