From e7c672938fc93890e223daa5ed805f0466b80090 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 30 Jul 2024 15:56:21 +0200 Subject: [PATCH 01/99] feat(mvt): add artificial area vector tiles endpoint --- Pipfile | 1 + Pipfile.lock | 753 ++++++++++++++++++++---------------------- config/middlewares.py | 2 + config/settings.py | 2 + public_data/urls.py | 5 + public_data/views.py | 45 ++- 6 files changed, 406 insertions(+), 402 deletions(-) diff --git a/Pipfile b/Pipfile index fa27e1f8c..351abfdfd 100644 --- a/Pipfile +++ b/Pipfile @@ -70,3 +70,4 @@ sentry-sdk = "*" setuptools = "*" py7zr = "*" dependency-injector = "*" +django-vectortiles = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 68ec55ef7..b8030d1eb 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "d2695e46bd7e953d3504c4c4e82a6943fb55741b1b9ab45541464bed48e411aa" + "sha256": "f01c885be0dc2329c5e8599d5f252f00fe6c3b1fcb8120ee80eed1d9ec059f08" }, "pipfile-spec": 6, "requires": { @@ -66,20 +66,20 @@ }, "boto3": { "hashes": [ - "sha256:23ca8d8f7a30c3bbd989808056b5fc5d68ff5121c02c722c6167b6b1bb7f8726", - "sha256:578bbd5e356005719b6b610d03edff7ea1b0824d078afe62d3fb8bea72f83a87" + "sha256:894b222f7850b870a7ac63d7e378ac36c5c34375da24ddc30e131d9fafe369dc", + "sha256:ad648c89a4935590a69341e5430fc42a021489a22de171ee3fd7bb204f9ef0fa" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==1.34.140" + "version": "==1.34.150" }, "botocore": { "hashes": [ - "sha256:43940d3a67d946ba3301631ba4078476a75f1015d4fb0fb0272d0b754b2cf9de", - "sha256:86302b2226c743b9eec7915a4c6cfaffd338ae03989cd9ee181078ef39d1ab39" + "sha256:4d23387e0f076d87b637a2a35c0ff2b8daca16eace36b63ce27f65630c6b375a", + "sha256:b988d47f4d502df85befce11a48002421e4e6ea4289997b5e0261bac5fa76ce6" ], "markers": "python_version >= '3.8'", - "version": "==1.34.140" + "version": "==1.34.150" }, "brotli": { "hashes": [ @@ -499,12 +499,12 @@ }, "django": { "hashes": [ - "sha256:837e3cf1f6c31347a1396a3f6b65688f2b4bb4a11c580dcb628b5afe527b68a5", - "sha256:a17fcba2aad3fc7d46fdb23215095dbbd64e6174bf4589171e732b18b07e426a" + "sha256:3ec32bc2c616ab02834b9cac93143a7dc1cdcd5b822d78ac95fc20a38c534240", + "sha256:fc6919875a6226c7ffcae1a7d51e0f2ceaf6f160393180818f6c95f51b1e7b96" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==4.2.13" + "version": "==4.2.14" }, "django-app-parameter": { "hashes": [ @@ -526,12 +526,12 @@ }, "django-crispy-forms": { "hashes": [ - "sha256:92cb7b7786fb82646bad739343fd17a99a04b0b736a09e34f3b91f26cbca7e4a", - "sha256:a681cffd5af270b3082bda02cf8f81bdb5717ed66d2265e87e3df2f4ccf46277" + "sha256:2db17ae08527201be1273f0df789e5f92819e23dd28fec69cffba7f3762e1a38", + "sha256:efc4c31e5202bbec6af70d383a35e12fc80ea769d464fb0e7fe21768bb138a20" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==2.2" + "version": "==2.3" }, "django-csp": { "hashes": [ @@ -569,12 +569,12 @@ }, "django-import-export": { "hashes": [ - "sha256:13de8d28bf3d7ffc45da5fdf60d53ff70c285827a39eea937f90450fbda0df3d", - "sha256:639f8488bdf155f46d15910220ef984d72fd2f5a8f4f448b49078125f11701d3" + "sha256:16ecc5a9f0df46bde6eb278a3e65ebda0ee1db55656f36440e9fb83f40ab85a3", + "sha256:730ae2443a02b1ba27d8dba078a27ae9123adfcabb78161b4f130843607b3df9" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==4.1.0" + "version": "==4.1.1" }, "django-redis": { "hashes": [ @@ -605,12 +605,20 @@ }, "django-storages": { "hashes": [ - "sha256:31f263389e95ce3a1b902fb5f739a7ed32895f7d8b80179fe7453ecc0dfe102e", - "sha256:95a12836cd998d4c7a4512347322331c662d9114c4344f932f5e9c0fce000608" + "sha256:69aca94d26e6714d14ad63f33d13619e697508ee33ede184e462ed766dc2a73f", + "sha256:d61930acb4a25e3aebebc6addaf946a3b1df31c803a6bf1af2f31c9047febaa3" ], "index": "pypi", "markers": "python_version >= '3.7'", - "version": "==1.14.3" + "version": "==1.14.4" + }, + "django-vectortiles": { + "hashes": [ + "sha256:9710492bb77328b202dcbd889b65f9d08807e7dc061a5593d86cfbf1ea545caf" + ], + "index": "pypi", + "markers": "python_version >= '3.6'", + "version": "==0.2.0" }, "djangorestframework": { "hashes": [ @@ -1266,55 +1274,55 @@ }, "numpy": { "hashes": [ - "sha256:04494f6ec467ccb5369d1808570ae55f6ed9b5809d7f035059000a37b8d7e86f", - "sha256:0a43f0974d501842866cc83471bdb0116ba0dffdbaac33ec05e6afed5b615238", - "sha256:0e50842b2295ba8414c8c1d9d957083d5dfe9e16828b37de883f51fc53c4016f", - "sha256:0ec84b9ba0654f3b962802edc91424331f423dcf5d5f926676e0150789cb3d95", - "sha256:17067d097ed036636fa79f6a869ac26df7db1ba22039d962422506640314933a", - "sha256:1cde1753efe513705a0c6d28f5884e22bdc30438bf0085c5c486cdaff40cd67a", - "sha256:1e72728e7501a450288fc8e1f9ebc73d90cfd4671ebbd631f3e7857c39bd16f2", - "sha256:2635dbd200c2d6faf2ef9a0d04f0ecc6b13b3cad54f7c67c61155138835515d2", - "sha256:2ce46fd0b8a0c947ae047d222f7136fc4d55538741373107574271bc00e20e8f", - "sha256:34f003cb88b1ba38cb9a9a4a3161c1604973d7f9d5552c38bc2f04f829536609", - "sha256:354f373279768fa5a584bac997de6a6c9bc535c482592d7a813bb0c09be6c76f", - "sha256:38ecb5b0582cd125f67a629072fed6f83562d9dd04d7e03256c9829bdec027ad", - "sha256:3e8e01233d57639b2e30966c63d36fcea099d17c53bf424d77f088b0f4babd86", - "sha256:3f6bed7f840d44c08ebdb73b1825282b801799e325bcbdfa6bc5c370e5aecc65", - "sha256:4554eb96f0fd263041baf16cf0881b3f5dafae7a59b1049acb9540c4d57bc8cb", - "sha256:46e161722e0f619749d1cd892167039015b2c2817296104487cd03ed4a955995", - "sha256:49d9f7d256fbc804391a7f72d4a617302b1afac1112fac19b6c6cec63fe7fe8a", - "sha256:4d2f62e55a4cd9c58c1d9a1c9edaedcd857a73cb6fda875bf79093f9d9086f85", - "sha256:5f64641b42b2429f56ee08b4f427a4d2daf916ec59686061de751a55aafa22e4", - "sha256:63b92c512d9dbcc37f9d81b123dec99fdb318ba38c8059afc78086fe73820275", - "sha256:6d7696c615765091cc5093f76fd1fa069870304beaccfd58b5dcc69e55ef49c1", - "sha256:79e843d186c8fb1b102bef3e2bc35ef81160ffef3194646a7fdd6a73c6b97196", - "sha256:821eedb7165ead9eebdb569986968b541f9908979c2da8a4967ecac4439bae3d", - "sha256:84554fc53daa8f6abf8e8a66e076aff6ece62de68523d9f665f32d2fc50fd66e", - "sha256:8d83bb187fb647643bd56e1ae43f273c7f4dbcdf94550d7938cfc32566756514", - "sha256:903703372d46bce88b6920a0cd86c3ad82dae2dbef157b5fc01b70ea1cfc430f", - "sha256:9416a5c2e92ace094e9f0082c5fd473502c91651fb896bc17690d6fc475128d6", - "sha256:9a1712c015831da583b21c5bfe15e8684137097969c6d22e8316ba66b5baabe4", - "sha256:9c27f0946a3536403efb0e1c28def1ae6730a72cd0d5878db38824855e3afc44", - "sha256:a356364941fb0593bb899a1076b92dfa2029f6f5b8ba88a14fd0984aaf76d0df", - "sha256:a7039a136017eaa92c1848152827e1424701532ca8e8967fe480fe1569dae581", - "sha256:acd3a644e4807e73b4e1867b769fbf1ce8c5d80e7caaef0d90dcdc640dfc9787", - "sha256:ad0c86f3455fbd0de6c31a3056eb822fc939f81b1618f10ff3406971893b62a5", - "sha256:b4c76e3d4c56f145d41b7b6751255feefae92edbc9a61e1758a98204200f30fc", - "sha256:b6f6a8f45d0313db07d6d1d37bd0b112f887e1369758a5419c0370ba915b3871", - "sha256:c5a59996dc61835133b56a32ebe4ef3740ea5bc19b3983ac60cc32be5a665d54", - "sha256:c73aafd1afca80afecb22718f8700b40ac7cab927b8abab3c3e337d70e10e5a2", - "sha256:cee6cc0584f71adefe2c908856ccc98702baf95ff80092e4ca46061538a2ba98", - "sha256:cef04d068f5fb0518a77857953193b6bb94809a806bd0a14983a8f12ada060c9", - "sha256:cf5d1c9e6837f8af9f92b6bd3e86d513cdc11f60fd62185cc49ec7d1aba34864", - "sha256:e61155fae27570692ad1d327e81c6cf27d535a5d7ef97648a17d922224b216de", - "sha256:e7f387600d424f91576af20518334df3d97bc76a300a755f9a8d6e4f5cadd289", - "sha256:ed08d2703b5972ec736451b818c2eb9da80d66c3e84aed1deeb0c345fefe461b", - "sha256:fbd6acc766814ea6443628f4e6751d0da6593dae29c08c0b2606164db026970c", - "sha256:feff59f27338135776f6d4e2ec7aeeac5d5f7a08a83e80869121ef8164b74af9" + "sha256:08458fbf403bff5e2b45f08eda195d4b0c9b35682311da5a5a0a0925b11b9bd8", + "sha256:0fbb536eac80e27a2793ffd787895242b7f18ef792563d742c2d673bfcb75134", + "sha256:12f5d865d60fb9734e60a60f1d5afa6d962d8d4467c120a1c0cda6eb2964437d", + "sha256:15eb4eca47d36ec3f78cde0a3a2ee24cf05ca7396ef808dda2c0ddad7c2bde67", + "sha256:173a00b9995f73b79eb0191129f2455f1e34c203f559dd118636858cc452a1bf", + "sha256:1b902ce0e0a5bb7704556a217c4f63a7974f8f43e090aff03fcf262e0b135e02", + "sha256:1f682ea61a88479d9498bf2091fdcd722b090724b08b31d63e022adc063bad59", + "sha256:1f87fec1f9bc1efd23f4227becff04bd0e979e23ca50cc92ec88b38489db3b55", + "sha256:24a0e1befbfa14615b49ba9659d3d8818a0f4d8a1c5822af8696706fbda7310c", + "sha256:2c3a346ae20cfd80b6cfd3e60dc179963ef2ea58da5ec074fd3d9e7a1e7ba97f", + "sha256:36d3a9405fd7c511804dc56fc32974fa5533bdeb3cd1604d6b8ff1d292b819c4", + "sha256:3fdabe3e2a52bc4eff8dc7a5044342f8bd9f11ef0934fcd3289a788c0eb10018", + "sha256:4127d4303b9ac9f94ca0441138acead39928938660ca58329fe156f84b9f3015", + "sha256:4658c398d65d1b25e1760de3157011a80375da861709abd7cef3bad65d6543f9", + "sha256:485b87235796410c3519a699cfe1faab097e509e90ebb05dcd098db2ae87e7b3", + "sha256:529af13c5f4b7a932fb0e1911d3a75da204eff023ee5e0e79c1751564221a5c8", + "sha256:5a3d94942c331dd4e0e1147f7a8699a4aa47dffc11bf8a1523c12af8b2e91bbe", + "sha256:5daab361be6ddeb299a918a7c0864fa8618af66019138263247af405018b04e1", + "sha256:61728fba1e464f789b11deb78a57805c70b2ed02343560456190d0501ba37b0f", + "sha256:6790654cb13eab303d8402354fabd47472b24635700f631f041bd0b65e37298a", + "sha256:69ff563d43c69b1baba77af455dd0a839df8d25e8590e79c90fcbe1499ebde42", + "sha256:6bf4e6f4a2a2e26655717a1983ef6324f2664d7011f6ef7482e8c0b3d51e82ac", + "sha256:6e4eeb6eb2fced786e32e6d8df9e755ce5be920d17f7ce00bc38fcde8ccdbf9e", + "sha256:72dc22e9ec8f6eaa206deb1b1355eb2e253899d7347f5e2fae5f0af613741d06", + "sha256:75b4e316c5902d8163ef9d423b1c3f2f6252226d1aa5cd8a0a03a7d01ffc6268", + "sha256:7b9853803278db3bdcc6cd5beca37815b133e9e77ff3d4733c247414e78eb8d1", + "sha256:7d6fddc5fe258d3328cd8e3d7d3e02234c5d70e01ebe377a6ab92adb14039cb4", + "sha256:81b0893a39bc5b865b8bf89e9ad7807e16717f19868e9d234bdaf9b1f1393868", + "sha256:8efc84f01c1cd7e34b3fb310183e72fcdf55293ee736d679b6d35b35d80bba26", + "sha256:8fae4ebbf95a179c1156fab0b142b74e4ba4204c87bde8d3d8b6f9c34c5825ef", + "sha256:99d0d92a5e3613c33a5f01db206a33f8fdf3d71f2912b0de1739894668b7a93b", + "sha256:9adbd9bb520c866e1bfd7e10e1880a1f7749f1f6e5017686a5fbb9b72cf69f82", + "sha256:a1e01dcaab205fbece13c1410253a9eea1b1c9b61d237b6fa59bcc46e8e89343", + "sha256:a8fc2de81ad835d999113ddf87d1ea2b0f4704cbd947c948d2f5513deafe5a7b", + "sha256:b83e16a5511d1b1f8a88cbabb1a6f6a499f82c062a4251892d9ad5d609863fb7", + "sha256:bb2124fdc6e62baae159ebcfa368708867eb56806804d005860b6007388df171", + "sha256:bfc085b28d62ff4009364e7ca34b80a9a080cbd97c2c0630bb5f7f770dae9414", + "sha256:cbab9fc9c391700e3e1287666dfd82d8666d10e69a6c4a09ab97574c0b7ee0a7", + "sha256:e5eeca8067ad04bc8a2a8731183d51d7cbaac66d86085d5f4766ee6bf19c7f87", + "sha256:e9e81fa9017eaa416c056e5d9e71be93d05e2c3c2ab308d23307a8bc4443c368", + "sha256:ea2326a4dca88e4a274ba3a4405eb6c6467d3ffbd8c7d38632502eaae3820587", + "sha256:eacf3291e263d5a67d8c1a581a8ebbcfd6447204ef58828caf69a5e3e8c75990", + "sha256:ec87f5f8aca726117a1c9b7083e7656a9d0d606eec7299cc067bb83d26f16e0c", + "sha256:f1659887361a7151f89e79b276ed8dff3d75877df906328f14d8bb40bb4f5101", + "sha256:f9cf5ea551aec449206954b075db819f52adc1638d46a6738253a712d553c7b4" ], "index": "pypi", "markers": "python_version >= '3.9'", - "version": "==2.0.0" + "version": "==2.0.1" }, "openpyxl": { "hashes": [ @@ -1979,68 +1987,63 @@ }, "sentry-sdk": { "hashes": [ - "sha256:6051562d2cfa8087bb8b4b8b79dc44690f8a054762a29c07e22588b1f619bfb5", - "sha256:aa4314f877d9cd9add5a0c9ba18e3f27f99f7de835ce36bd150e48a41c7c646f" + "sha256:4ca16e9f5c7c6bc2fb2d5c956219f4926b148e511fffdbbde711dc94f1e0468f", + "sha256:d964710e2dbe015d9dc4ff0ad16225d68c3b36936b742a6fe0504565b760a3b7" ], "index": "pypi", "markers": "python_version >= '3.6'", - "version": "==2.8.0" + "version": "==2.11.0" }, "setuptools": { "hashes": [ - "sha256:b8b8060bb426838fbe942479c90296ce976249451118ef566a5a0b7d8b78fb05", - "sha256:bd63e505105011b25c3c11f753f7e3b8465ea739efddaccef8f0efac2137bac1" + "sha256:5a03e1860cf56bb6ef48ce186b0e557fdba433237481a9a625176c2831be15d1", + "sha256:8d243eff56d095e5817f796ede6ae32941278f542e0f941867cc05ae52b162ec" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==70.2.0" + "version": "==72.1.0" }, "shapely": { "hashes": [ - "sha256:011b77153906030b795791f2fdfa2d68f1a8d7e40bce78b029782ade3afe4f2f", - "sha256:03152442d311a5e85ac73b39680dd64a9892fa42bb08fd83b3bab4fe6999bfa0", - "sha256:05ffd6491e9e8958b742b0e2e7c346635033d0a5f1a0ea083547fcc854e5d5cf", - "sha256:0776c92d584f72f1e584d2e43cfc5542c2f3dd19d53f70df0900fda643f4bae6", - "sha256:263bcf0c24d7a57c80991e64ab57cba7a3906e31d2e21b455f493d4aab534aaa", - "sha256:2fbdc1140a7d08faa748256438291394967aa54b40009f54e8d9825e75ef6113", - "sha256:30982f79f21bb0ff7d7d4a4e531e3fcaa39b778584c2ce81a147f95be1cd58c9", - "sha256:31c19a668b5a1eadab82ff070b5a260478ac6ddad3a5b62295095174a8d26398", - "sha256:3f9103abd1678cb1b5f7e8e1af565a652e036844166c91ec031eeb25c5ca8af0", - "sha256:41388321a73ba1a84edd90d86ecc8bfed55e6a1e51882eafb019f45895ec0f65", - "sha256:4310b5494271e18580d61022c0857eb85d30510d88606fa3b8314790df7f367d", - "sha256:464157509ce4efa5ff285c646a38b49f8c5ef8d4b340f722685b09bb033c5ccf", - "sha256:485246fcdb93336105c29a5cfbff8a226949db37b7473c89caa26c9bae52a242", - "sha256:489c19152ec1f0e5c5e525356bcbf7e532f311bff630c9b6bc2db6f04da6a8b9", - "sha256:4f2ab0faf8188b9f99e6a273b24b97662194160cc8ca17cf9d1fb6f18d7fb93f", - "sha256:55a38dcd1cee2f298d8c2ebc60fc7d39f3b4535684a1e9e2f39a80ae88b0cea7", - "sha256:58b0ecc505bbe49a99551eea3f2e8a9b3b24b3edd2a4de1ac0dc17bc75c9ec07", - "sha256:5af4cd0d8cf2912bd95f33586600cac9c4b7c5053a036422b97cfe4728d2eb53", - "sha256:5bbd974193e2cc274312da16b189b38f5f128410f3377721cadb76b1e8ca5328", - "sha256:5c4849916f71dc44e19ed370421518c0d86cf73b26e8656192fcfcda08218fbd", - "sha256:5dc736127fac70009b8d309a0eeb74f3e08979e530cf7017f2f507ef62e6cfb8", - "sha256:63f3a80daf4f867bd80f5c97fbe03314348ac1b3b70fb1c0ad255a69e3749879", - "sha256:674d7baf0015a6037d5758496d550fc1946f34bfc89c1bf247cabdc415d7747e", - "sha256:6cd4ccecc5ea5abd06deeaab52fcdba372f649728050c6143cc405ee0c166679", - "sha256:790a168a808bd00ee42786b8ba883307c0e3684ebb292e0e20009588c426da47", - "sha256:7d56ce3e2a6a556b59a288771cf9d091470116867e578bebced8bfc4147fbfd7", - "sha256:841f93a0e31e4c64d62ea570d81c35de0f6cea224568b2430d832967536308e6", - "sha256:8de4578e838a9409b5b134a18ee820730e507b2d21700c14b71a2b0757396acc", - "sha256:92a41d936f7d6743f343be265ace93b7c57f5b231e21b9605716f5a47c2879e7", - "sha256:9831816a5d34d5170aa9ed32a64982c3d6f4332e7ecfe62dc97767e163cb0b17", - "sha256:994c244e004bc3cfbea96257b883c90a86e8cbd76e069718eb4c6b222a56f78b", - "sha256:9dab4c98acfb5fb85f5a20548b5c0abe9b163ad3525ee28822ffecb5c40e724c", - "sha256:b79bbd648664aa6f44ef018474ff958b6b296fed5c2d42db60078de3cffbc8aa", - "sha256:c3e700abf4a37b7b8b90532fa6ed5c38a9bfc777098bc9fbae5ec8e618ac8f30", - "sha256:c52ed79f683f721b69a10fb9e3d940a468203f5054927215586c5d49a072de8d", - "sha256:c75c98380b1ede1cae9a252c6dc247e6279403fae38c77060a5e6186c95073ac", - "sha256:d2b4431f522b277c79c34b65da128029a9955e4481462cbf7ebec23aab61fc58", - "sha256:ddf4a9bfaac643e62702ed662afc36f6abed2a88a21270e891038f9a19bc08fc", - "sha256:de0205cb21ad5ddaef607cda9a3191eadd1e7a62a756ea3a356369675230ac35", - "sha256:ec555c9d0db12d7fd777ba3f8b75044c73e576c720a851667432fabb7057da6c", - "sha256:fb5cdcbbe3080181498931b52a91a21a781a35dcb859da741c0345c6402bf00c" + "sha256:03bd7b5fa5deb44795cc0a503999d10ae9d8a22df54ae8d4a4cd2e8a93466195", + "sha256:06efe39beafde3a18a21dde169d32f315c57da962826a6d7d22630025200c5e6", + "sha256:0f8e71bb9a46814019f6644c4e2560a09d44b80100e46e371578f35eaaa9da1c", + "sha256:1b65365cfbf657604e50d15161ffcc68de5cdb22a601bbf7823540ab4918a98d", + "sha256:1e5cb5ee72f1bc7ace737c9ecd30dc174a5295fae412972d3879bac2e82c8fae", + "sha256:21f64e647a025b61b19585d2247137b3a38a35314ea68c66aaf507a1c03ef6fe", + "sha256:2e119444bc27ca33e786772b81760f2028d930ac55dafe9bc50ef538b794a8e1", + "sha256:2ff9521991ed9e201c2e923da014e766c1aa04771bc93e6fe97c27dcf0d40ace", + "sha256:30e8737983c9d954cd17feb49eb169f02f1da49e24e5171122cf2c2b62d65c95", + "sha256:35110e80070d664781ec7955c7de557456b25727a0257b354830abb759bf8311", + "sha256:3ac7dc1350700c139c956b03d9c3df49a5b34aaf91d024d1510a09717ea39199", + "sha256:401cb794c5067598f50518e5a997e270cd7642c4992645479b915c503866abed", + "sha256:4461509afdb15051e73ab178fae79974387f39c47ab635a7330d7fee02c68a3f", + "sha256:45211276900c4790d6bfc6105cbf1030742da67594ea4161a9ce6812a6721e68", + "sha256:49b299b91557b04acb75e9732645428470825061f871a2edc36b9417d66c1fc5", + "sha256:4c83a36f12ec8dee2066946d98d4d841ab6512a6ed7eb742e026a64854019b5f", + "sha256:5bbfb048a74cf273db9091ff3155d373020852805a37dfc846ab71dde4be93ec", + "sha256:6c6b78c0007a34ce7144f98b7418800e0a6a5d9a762f2244b00ea560525290c9", + "sha256:7545a39c55cad1562be302d74c74586f79e07b592df8ada56b79a209731c0219", + "sha256:798090b426142df2c5258779c1d8d5734ec6942f778dab6c6c30cfe7f3bf64ff", + "sha256:7e8cf5c252fac1ea51b3162be2ec3faddedc82c256a1160fc0e8ddbec81b06d2", + "sha256:7fed9dbfbcfec2682d9a047b9699db8dcc890dfca857ecba872c42185fc9e64e", + "sha256:8203a8b2d44dcb366becbc8c3d553670320e4acf0616c39e218c9561dd738d92", + "sha256:89d34787c44f77a7d37d55ae821f3a784fa33592b9d217a45053a93ade899375", + "sha256:89e640c2cd37378480caf2eeda9a51be64201f01f786d127e78eaeff091ec897", + "sha256:8af6f7260f809c0862741ad08b1b89cb60c130ae30efab62320bbf4ee9cc71fa", + "sha256:93be600cbe2fbaa86c8eb70656369f2f7104cd231f0d6585c7d0aa555d6878b8", + "sha256:9a4492a2b2ccbeaebf181e7310d2dfff4fdd505aef59d6cb0f217607cb042fb3", + "sha256:b5870633f8e684bf6d1ae4df527ddcb6f3895f7b12bced5c13266ac04f47d231", + "sha256:b714a840402cde66fd7b663bb08cacb7211fa4412ea2a209688f671e0d0631fd", + "sha256:bff2366bc786bfa6cb353d6b47d0443c570c32776612e527ee47b6df63fcfe32", + "sha256:d5251c28a29012e92de01d2e84f11637eb1d48184ee8f22e2df6c8c578d26760", + "sha256:e91ee179af539100eb520281ba5394919067c6b51824e6ab132ad4b3b3e76dd0", + "sha256:f5456dd522800306ba3faef77c5ba847ec30a0bd73ab087a25e0acdd4db2514f", + "sha256:ff7731fea5face9ec08a861ed351734a79475631b7540ceb0b66fb9732a5f529", + "sha256:ff9e520af0c5a578e174bca3c18713cd47a6c6a15b6cf1f50ac17dc8bb8db6a2" ], "markers": "python_version >= '3.7'", - "version": "==2.0.4" + "version": "==2.0.5" }, "six": { "hashes": [ @@ -2059,11 +2062,11 @@ }, "sqlparse": { "hashes": [ - "sha256:714d0a4932c059d16189f58ef5411ec2287a4360f17cdd0edd2d09d4c5087c93", - "sha256:c204494cd97479d0e39f28c93d46c0b2d5959c7b9ab904762ea6c7af211c8663" + "sha256:773dcbf9a5ab44a090f3441e2180efe2560220203dc2f8c0b0fa141e18b505e4", + "sha256:bb6b4df465655ef332548e24f08e205afc81b9ab86cb1c45657a7ff173a3a00e" ], "markers": "python_version >= '3.8'", - "version": "==0.5.0" + "version": "==0.5.1" }, "static3": { "hashes": [ @@ -2150,6 +2153,14 @@ "markers": "python_version >= '3.7'", "version": "==3.7.1" }, + "appnope": { + "hashes": [ + "sha256:1de3860566df9caf38f01f86f65e0e13e379af54f9e4bee1e66b48f2efffd1ee", + "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c" + ], + "markers": "platform_system == 'Darwin'", + "version": "==0.1.4" + }, "argon2-cffi": { "hashes": [ "sha256:879c3e79a2729ce768ebb7d36d4609e3a78a4ca2ec3a9f12286ca057e3d0db08", @@ -2485,61 +2496,61 @@ "toml" ], "hashes": [ - "sha256:018a12985185038a5b2bcafab04ab833a9a0f2c59995b3cec07e10074c78635f", - "sha256:02ff6e898197cc1e9fa375581382b72498eb2e6d5fc0b53f03e496cfee3fac6d", - "sha256:042183de01f8b6d531e10c197f7f0315a61e8d805ab29c5f7b51a01d62782747", - "sha256:1014fbf665fef86cdfd6cb5b7371496ce35e4d2a00cda501cf9f5b9e6fced69f", - "sha256:1137f46adb28e3813dec8c01fefadcb8c614f33576f672962e323b5128d9a68d", - "sha256:16852febd96acd953b0d55fc842ce2dac1710f26729b31c80b940b9afcd9896f", - "sha256:2174e7c23e0a454ffe12267a10732c273243b4f2d50d07544a91198f05c48f47", - "sha256:2214ee920787d85db1b6a0bd9da5f8503ccc8fcd5814d90796c2f2493a2f4d2e", - "sha256:3257fdd8e574805f27bb5342b77bc65578e98cbc004a92232106344053f319ba", - "sha256:3684bc2ff328f935981847082ba4fdc950d58906a40eafa93510d1b54c08a66c", - "sha256:3a6612c99081d8d6134005b1354191e103ec9705d7ba2754e848211ac8cacc6b", - "sha256:3d7564cc09dd91b5a6001754a5b3c6ecc4aba6323baf33a12bd751036c998be4", - "sha256:44da56a2589b684813f86d07597fdf8a9c6ce77f58976727329272f5a01f99f7", - "sha256:5013ed890dc917cef2c9f765c4c6a8ae9df983cd60dbb635df8ed9f4ebc9f555", - "sha256:54317c2b806354cbb2dc7ac27e2b93f97096912cc16b18289c5d4e44fc663233", - "sha256:56b4eafa21c6c175b3ede004ca12c653a88b6f922494b023aeb1e836df953ace", - "sha256:581ea96f92bf71a5ec0974001f900db495488434a6928a2ca7f01eee20c23805", - "sha256:5cd64adedf3be66f8ccee418473c2916492d53cbafbfcff851cbec5a8454b136", - "sha256:5df54843b88901fdc2f598ac06737f03d71168fd1175728054c8f5a2739ac3e4", - "sha256:65e528e2e921ba8fd67d9055e6b9f9e34b21ebd6768ae1c1723f4ea6ace1234d", - "sha256:6aae5cce399a0f065da65c7bb1e8abd5c7a3043da9dceb429ebe1b289bc07806", - "sha256:6cfb5a4f556bb51aba274588200a46e4dd6b505fb1a5f8c5ae408222eb416f99", - "sha256:7076b4b3a5f6d2b5d7f1185fde25b1e54eb66e647a1dfef0e2c2bfaf9b4c88c8", - "sha256:73ca8fbc5bc622e54627314c1a6f1dfdd8db69788f3443e752c215f29fa87a0b", - "sha256:79b356f3dd5b26f3ad23b35c75dbdaf1f9e2450b6bcefc6d0825ea0aa3f86ca5", - "sha256:7a892be37ca35eb5019ec85402c3371b0f7cda5ab5056023a7f13da0961e60da", - "sha256:8192794d120167e2a64721d88dbd688584675e86e15d0569599257566dec9bf0", - "sha256:820bc841faa502e727a48311948e0461132a9c8baa42f6b2b84a29ced24cc078", - "sha256:8f894208794b164e6bd4bba61fc98bf6b06be4d390cf2daacfa6eca0a6d2bb4f", - "sha256:a04e990a2a41740b02d6182b498ee9796cf60eefe40cf859b016650147908029", - "sha256:a44963520b069e12789d0faea4e9fdb1e410cdc4aab89d94f7f55cbb7fef0353", - "sha256:a6bb74ed465d5fb204b2ec41d79bcd28afccf817de721e8a807d5141c3426638", - "sha256:ab73b35e8d109bffbda9a3e91c64e29fe26e03e49addf5b43d85fc426dde11f9", - "sha256:aea072a941b033813f5e4814541fc265a5c12ed9720daef11ca516aeacd3bd7f", - "sha256:b1ccf5e728ccf83acd313c89f07c22d70d6c375a9c6f339233dcf792094bcbf7", - "sha256:b385d49609f8e9efc885790a5a0e89f2e3ae042cdf12958b6034cc442de428d3", - "sha256:b3d45ff86efb129c599a3b287ae2e44c1e281ae0f9a9bad0edc202179bcc3a2e", - "sha256:b4a474f799456e0eb46d78ab07303286a84a3140e9700b9e154cfebc8f527016", - "sha256:b95c3a8cb0463ba9f77383d0fa8c9194cf91f64445a63fc26fb2327e1e1eb088", - "sha256:c5986ee7ea0795a4095ac4d113cbb3448601efca7f158ec7f7087a6c705304e4", - "sha256:cdd31315fc20868c194130de9ee6bfd99755cc9565edff98ecc12585b90be882", - "sha256:cef4649ec906ea7ea5e9e796e68b987f83fa9a718514fe147f538cfeda76d7a7", - "sha256:d05c16cf4b4c2fc880cb12ba4c9b526e9e5d5bb1d81313d4d732a5b9fe2b9d53", - "sha256:d2e344d6adc8ef81c5a233d3a57b3c7d5181f40e79e05e1c143da143ccb6377d", - "sha256:d45d3cbd94159c468b9b8c5a556e3f6b81a8d1af2a92b77320e887c3e7a5d080", - "sha256:db14f552ac38f10758ad14dd7b983dbab424e731588d300c7db25b6f89e335b5", - "sha256:dbc5958cb471e5a5af41b0ddaea96a37e74ed289535e8deca404811f6cb0bc3d", - "sha256:ddbd2f9713a79e8e7242d7c51f1929611e991d855f414ca9996c20e44a895f7c", - "sha256:e16f3d6b491c48c5ae726308e6ab1e18ee830b4cdd6913f2d7f77354b33f91c8", - "sha256:e2afe743289273209c992075a5a4913e8d007d569a406ffed0bd080ea02b0633", - "sha256:e564c2cf45d2f44a9da56f4e3a26b2236504a496eb4cb0ca7221cd4cc7a9aca9", - "sha256:ed550e7442f278af76d9d65af48069f1fb84c9f745ae249c1a183c1e9d1b025c" + "sha256:0086cd4fc71b7d485ac93ca4239c8f75732c2ae3ba83f6be1c9be59d9e2c6382", + "sha256:01c322ef2bbe15057bc4bf132b525b7e3f7206f071799eb8aa6ad1940bcf5fb1", + "sha256:03cafe82c1b32b770a29fd6de923625ccac3185a54a5e66606da26d105f37dac", + "sha256:044a0985a4f25b335882b0966625270a8d9db3d3409ddc49a4eb00b0ef5e8cee", + "sha256:07ed352205574aad067482e53dd606926afebcb5590653121063fbf4e2175166", + "sha256:0d1b923fc4a40c5832be4f35a5dab0e5ff89cddf83bb4174499e02ea089daf57", + "sha256:0e7b27d04131c46e6894f23a4ae186a6a2207209a05df5b6ad4caee6d54a222c", + "sha256:1fad32ee9b27350687035cb5fdf9145bc9cf0a094a9577d43e909948ebcfa27b", + "sha256:289cc803fa1dc901f84701ac10c9ee873619320f2f9aff38794db4a4a0268d51", + "sha256:3c59105f8d58ce500f348c5b56163a4113a440dad6daa2294b5052a10db866da", + "sha256:46c3d091059ad0b9c59d1034de74a7f36dcfa7f6d3bde782c49deb42438f2450", + "sha256:482855914928c8175735a2a59c8dc5806cf7d8f032e4820d52e845d1f731dca2", + "sha256:49c76cdfa13015c4560702574bad67f0e15ca5a2872c6a125f6327ead2b731dd", + "sha256:4b03741e70fb811d1a9a1d75355cf391f274ed85847f4b78e35459899f57af4d", + "sha256:4bea27c4269234e06f621f3fac3925f56ff34bc14521484b8f66a580aacc2e7d", + "sha256:4d5fae0a22dc86259dee66f2cc6c1d3e490c4a1214d7daa2a93d07491c5c04b6", + "sha256:543ef9179bc55edfd895154a51792b01c017c87af0ebaae092720152e19e42ca", + "sha256:54dece71673b3187c86226c3ca793c5f891f9fc3d8aa183f2e3653da18566169", + "sha256:6379688fb4cfa921ae349c76eb1a9ab26b65f32b03d46bb0eed841fd4cb6afb1", + "sha256:65fa405b837060db569a61ec368b74688f429b32fa47a8929a7a2f9b47183713", + "sha256:6616d1c9bf1e3faea78711ee42a8b972367d82ceae233ec0ac61cc7fec09fa6b", + "sha256:6fe885135c8a479d3e37a7aae61cbd3a0fb2deccb4dda3c25f92a49189f766d6", + "sha256:7221f9ac9dad9492cecab6f676b3eaf9185141539d5c9689d13fd6b0d7de840c", + "sha256:76d5f82213aa78098b9b964ea89de4617e70e0d43e97900c2778a50856dac605", + "sha256:7792f0ab20df8071d669d929c75c97fecfa6bcab82c10ee4adb91c7a54055463", + "sha256:831b476d79408ab6ccfadaaf199906c833f02fdb32c9ab907b1d4aa0713cfa3b", + "sha256:9146579352d7b5f6412735d0f203bbd8d00113a680b66565e205bc605ef81bc6", + "sha256:9cc44bf0315268e253bf563f3560e6c004efe38f76db03a1558274a6e04bf5d5", + "sha256:a73d18625f6a8a1cbb11eadc1d03929f9510f4131879288e3f7922097a429f63", + "sha256:a8659fd33ee9e6ca03950cfdcdf271d645cf681609153f218826dd9805ab585c", + "sha256:a94925102c89247530ae1dab7dc02c690942566f22e189cbd53579b0693c0783", + "sha256:ad4567d6c334c46046d1c4c20024de2a1c3abc626817ae21ae3da600f5779b44", + "sha256:b2e16f4cd2bc4d88ba30ca2d3bbf2f21f00f382cf4e1ce3b1ddc96c634bc48ca", + "sha256:bbdf9a72403110a3bdae77948b8011f644571311c2fb35ee15f0f10a8fc082e8", + "sha256:beb08e8508e53a568811016e59f3234d29c2583f6b6e28572f0954a6b4f7e03d", + "sha256:c4cbe651f3904e28f3a55d6f371203049034b4ddbce65a54527a3f189ca3b390", + "sha256:c7b525ab52ce18c57ae232ba6f7010297a87ced82a2383b1afd238849c1ff933", + "sha256:ca5d79cfdae420a1d52bf177de4bc2289c321d6c961ae321503b2ca59c17ae67", + "sha256:cdab02a0a941af190df8782aafc591ef3ad08824f97850b015c8c6a8b3877b0b", + "sha256:d17c6a415d68cfe1091d3296ba5749d3d8696e42c37fca5d4860c5bf7b729f03", + "sha256:d39bd10f0ae453554798b125d2f39884290c480f56e8a02ba7a6ed552005243b", + "sha256:d4b3cd1ca7cd73d229487fa5caca9e4bc1f0bca96526b922d61053ea751fe791", + "sha256:d50a252b23b9b4dfeefc1f663c568a221092cbaded20a05a11665d0dbec9b8fb", + "sha256:da8549d17489cd52f85a9829d0e1d91059359b3c54a26f28bec2c5d369524807", + "sha256:dcd070b5b585b50e6617e8972f3fbbee786afca71b1936ac06257f7e178f00f6", + "sha256:ddaaa91bfc4477d2871442bbf30a125e8fe6b05da8a0015507bfbf4718228ab2", + "sha256:df423f351b162a702c053d5dddc0fc0ef9a9e27ea3f449781ace5f906b664428", + "sha256:dff044f661f59dace805eedb4a7404c573b6ff0cdba4a524141bc63d7be5c7fd", + "sha256:e7e128f85c0b419907d1f38e616c4f1e9f1d1b37a7949f44df9a73d5da5cd53c", + "sha256:ed8d1d1821ba5fc88d4a4f45387b65de52382fa3ef1f0115a4f7a20cdfab0e94", + "sha256:f2501d60d7497fd55e391f423f965bbe9e650e9ffc3c627d5f0ac516026000b8", + "sha256:f7db0b6ae1f96ae41afe626095149ecd1b212b424626175a6633c2999eaad45b" ], "markers": "python_version >= '3.8'", - "version": "==7.5.4" + "version": "==7.6.0" }, "cryptography": { "hashes": [ @@ -2638,21 +2649,21 @@ }, "django": { "hashes": [ - "sha256:837e3cf1f6c31347a1396a3f6b65688f2b4bb4a11c580dcb628b5afe527b68a5", - "sha256:a17fcba2aad3fc7d46fdb23215095dbbd64e6174bf4589171e732b18b07e426a" + "sha256:3ec32bc2c616ab02834b9cac93143a7dc1cdcd5b822d78ac95fc20a38c534240", + "sha256:fc6919875a6226c7ffcae1a7d51e0f2ceaf6f160393180818f6c95f51b1e7b96" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==4.2.13" + "version": "==4.2.14" }, "django-debug-toolbar": { "hashes": [ - "sha256:8298ce966b4c8fc71430082dd4739ef2badb5f867734e1973a413c4ab2ea81b7", - "sha256:91425606673ee674d780f7aeedf3595c264eb382dcf41f55c6779577900904c0" + "sha256:36e421cb908c2f0675e07f9f41e3d1d8618dc386392ec82d23bcfcd5d29c7044", + "sha256:3beb671c9ec44ffb817fad2780667f172bd1c067dbcabad6268ce39a81335f45" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==4.4.5" + "version": "==4.4.6" }, "django-extensions": { "hashes": [ @@ -2744,70 +2755,6 @@ "markers": "python_version >= '3.8'", "version": "==1.29.0" }, - "greenlet": { - "hashes": [ - "sha256:01bc7ea167cf943b4c802068e178bbf70ae2e8c080467070d01bfa02f337ee67", - "sha256:0448abc479fab28b00cb472d278828b3ccca164531daab4e970a0458786055d6", - "sha256:086152f8fbc5955df88382e8a75984e2bb1c892ad2e3c80a2508954e52295257", - "sha256:098d86f528c855ead3479afe84b49242e174ed262456c342d70fc7f972bc13c4", - "sha256:149e94a2dd82d19838fe4b2259f1b6b9957d5ba1b25640d2380bea9c5df37676", - "sha256:1551a8195c0d4a68fac7a4325efac0d541b48def35feb49d803674ac32582f61", - "sha256:15d79dd26056573940fcb8c7413d84118086f2ec1a8acdfa854631084393efcc", - "sha256:1996cb9306c8595335bb157d133daf5cf9f693ef413e7673cb07e3e5871379ca", - "sha256:1a7191e42732df52cb5f39d3527217e7ab73cae2cb3694d241e18f53d84ea9a7", - "sha256:1ea188d4f49089fc6fb283845ab18a2518d279c7cd9da1065d7a84e991748728", - "sha256:1f672519db1796ca0d8753f9e78ec02355e862d0998193038c7073045899f305", - "sha256:2516a9957eed41dd8f1ec0c604f1cdc86758b587d964668b5b196a9db5bfcde6", - "sha256:2797aa5aedac23af156bbb5a6aa2cd3427ada2972c828244eb7d1b9255846379", - "sha256:2dd6e660effd852586b6a8478a1d244b8dc90ab5b1321751d2ea15deb49ed414", - "sha256:3ddc0f794e6ad661e321caa8d2f0a55ce01213c74722587256fb6566049a8b04", - "sha256:3ed7fb269f15dc662787f4119ec300ad0702fa1b19d2135a37c2c4de6fadfd4a", - "sha256:419b386f84949bf0e7c73e6032e3457b82a787c1ab4a0e43732898a761cc9dbf", - "sha256:43374442353259554ce33599da8b692d5aa96f8976d567d4badf263371fbe491", - "sha256:52f59dd9c96ad2fc0d5724107444f76eb20aaccb675bf825df6435acb7703559", - "sha256:57e8974f23e47dac22b83436bdcf23080ade568ce77df33159e019d161ce1d1e", - "sha256:5b51e85cb5ceda94e79d019ed36b35386e8c37d22f07d6a751cb659b180d5274", - "sha256:649dde7de1a5eceb258f9cb00bdf50e978c9db1b996964cd80703614c86495eb", - "sha256:64d7675ad83578e3fc149b617a444fab8efdafc9385471f868eb5ff83e446b8b", - "sha256:68834da854554926fbedd38c76e60c4a2e3198c6fbed520b106a8986445caaf9", - "sha256:6b66c9c1e7ccabad3a7d037b2bcb740122a7b17a53734b7d72a344ce39882a1b", - "sha256:70fb482fdf2c707765ab5f0b6655e9cfcf3780d8d87355a063547b41177599be", - "sha256:7170375bcc99f1a2fbd9c306f5be8764eaf3ac6b5cb968862cad4c7057756506", - "sha256:73a411ef564e0e097dbe7e866bb2dda0f027e072b04da387282b02c308807405", - "sha256:77457465d89b8263bca14759d7c1684df840b6811b2499838cc5b040a8b5b113", - "sha256:7f362975f2d179f9e26928c5b517524e89dd48530a0202570d55ad6ca5d8a56f", - "sha256:81bb9c6d52e8321f09c3d165b2a78c680506d9af285bfccbad9fb7ad5a5da3e5", - "sha256:881b7db1ebff4ba09aaaeae6aa491daeb226c8150fc20e836ad00041bcb11230", - "sha256:894393ce10ceac937e56ec00bb71c4c2f8209ad516e96033e4b3b1de270e200d", - "sha256:99bf650dc5d69546e076f413a87481ee1d2d09aaaaaca058c9251b6d8c14783f", - "sha256:9da2bd29ed9e4f15955dd1595ad7bc9320308a3b766ef7f837e23ad4b4aac31a", - "sha256:afaff6cf5200befd5cec055b07d1c0a5a06c040fe5ad148abcd11ba6ab9b114e", - "sha256:b1b5667cced97081bf57b8fa1d6bfca67814b0afd38208d52538316e9422fc61", - "sha256:b37eef18ea55f2ffd8f00ff8fe7c8d3818abd3e25fb73fae2ca3b672e333a7a6", - "sha256:b542be2440edc2d48547b5923c408cbe0fc94afb9f18741faa6ae970dbcb9b6d", - "sha256:b7dcbe92cc99f08c8dd11f930de4d99ef756c3591a5377d1d9cd7dd5e896da71", - "sha256:b7f009caad047246ed379e1c4dbcb8b020f0a390667ea74d2387be2998f58a22", - "sha256:bba5387a6975598857d86de9eac14210a49d554a77eb8261cc68b7d082f78ce2", - "sha256:c5e1536de2aad7bf62e27baf79225d0d64360d4168cf2e6becb91baf1ed074f3", - "sha256:c5ee858cfe08f34712f548c3c363e807e7186f03ad7a5039ebadb29e8c6be067", - "sha256:c9db1c18f0eaad2f804728c67d6c610778456e3e1cc4ab4bbd5eeb8e6053c6fc", - "sha256:d353cadd6083fdb056bb46ed07e4340b0869c305c8ca54ef9da3421acbdf6881", - "sha256:d46677c85c5ba00a9cb6f7a00b2bfa6f812192d2c9f7d9c4f6a55b60216712f3", - "sha256:d4d1ac74f5c0c0524e4a24335350edad7e5f03b9532da7ea4d3c54d527784f2e", - "sha256:d73a9fe764d77f87f8ec26a0c85144d6a951a6c438dfe50487df5595c6373eac", - "sha256:da70d4d51c8b306bb7a031d5cff6cc25ad253affe89b70352af5f1cb68e74b53", - "sha256:daf3cb43b7cf2ba96d614252ce1684c1bccee6b2183a01328c98d36fcd7d5cb0", - "sha256:dca1e2f3ca00b84a396bc1bce13dd21f680f035314d2379c4160c98153b2059b", - "sha256:dd4f49ae60e10adbc94b45c0b5e6a179acc1736cf7a90160b404076ee283cf83", - "sha256:e1f145462f1fa6e4a4ae3c0f782e580ce44d57c8f2c7aae1b6fa88c0b2efdb41", - "sha256:e3391d1e16e2a5a1507d83e4a8b100f4ee626e8eca43cf2cadb543de69827c4c", - "sha256:fcd2469d6a2cf298f198f0487e0a5b1a47a42ca0fa4dfd1b6862c999f018ebbf", - "sha256:fd096eb7ffef17c456cfa587523c5f92321ae02427ff955bebe9e3c63bc9f0da", - "sha256:fe754d231288e1e64323cfad462fcee8f0288654c10bdf4f603a39ed923bef33" - ], - "markers": "python_version < '3.13' and platform_machine == 'aarch64' or (platform_machine == 'ppc64le' or (platform_machine == 'x86_64' or (platform_machine == 'amd64' or (platform_machine == 'AMD64' or (platform_machine == 'win32' or platform_machine == 'WIN32')))))", - "version": "==3.0.3" - }, "html-tag-names": { "hashes": [ "sha256:04924aca48770f36b5a41c27e4d917062507be05118acb0ba869c97389084297", @@ -2858,12 +2805,12 @@ }, "ipyleaflet": { "hashes": [ - "sha256:3454dbc0d360150516ea2a3ab4109503ceb9b7347fa1746a6cafd889075a0ff8", - "sha256:ffb90f67576c22c85438490bb32af1f30de88993bfb5dc167503e3a278252139" + "sha256:7cc9157848baca2e1793b96e79f8bdb1aa7340521d2b7d8a62aa8bc30eab5278", + "sha256:b3b83fe3460e742964c2a5924ea7934365a3749bb75310ce388d45fd751372d2" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==0.19.1" + "version": "==0.19.2" }, "ipython": { "hashes": [ @@ -2938,11 +2885,11 @@ }, "jsonschema": { "hashes": [ - "sha256:5b22d434a45935119af990552c862e5d6d564e8f6601206b305a61fdf661a2b7", - "sha256:ff4cfd6b1367a40e7bc6411caec72effadd3db0bbe5017de188f2d6108335802" + "sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4", + "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566" ], "markers": "python_version >= '3.8'", - "version": "==4.22.0" + "version": "==4.23.0" }, "jsonschema-specifications": { "hashes": [ @@ -2988,11 +2935,11 @@ }, "jupyter-leaflet": { "hashes": [ - "sha256:8001a7304e9262394b8f896003539438467bed712bb9330dd65785bd9a5f8add", - "sha256:f4c1ab7a8b2c91d01a0940d1bab6543c57e91dca6425789632780eef7d58b266" + "sha256:0d57e15e80c08a4360f0cde0b4c490beddc5d422bb0e9bc1c0b4479d3fb725a6", + "sha256:b09b5ba48b1488cb61da37a6f558347269eb53ff6d64dc1a73e005ffc4420063" ], "markers": "python_version >= '3.8'", - "version": "==0.19.1" + "version": "==0.19.2" }, "jupyter-server": { "hashes": [ @@ -3126,37 +3073,37 @@ }, "mypy": { "hashes": [ - "sha256:0cd62192a4a32b77ceb31272d9e74d23cd88c8060c34d1d3622db3267679a5d9", - "sha256:1b3a2ffce52cc4dbaeee4df762f20a2905aa171ef157b82192f2e2f368eec05d", - "sha256:1f8f492d7db9e3593ef42d4f115f04e556130f2819ad33ab84551403e97dd4c0", - "sha256:2189ff1e39db399f08205e22a797383613ce1cb0cb3b13d8bcf0170e45b96cc3", - "sha256:378c03f53f10bbdd55ca94e46ec3ba255279706a6aacaecac52ad248f98205d3", - "sha256:37fd87cab83f09842653f08de066ee68f1182b9b5282e4634cdb4b407266bade", - "sha256:3c4c2992f6ea46ff7fce0072642cfb62af7a2484efe69017ed8b095f7b39ef31", - "sha256:51a46974340baaa4145363b9e051812a2446cf583dfaeba124af966fa44593f7", - "sha256:5bb9cd11c01c8606a9d0b83ffa91d0b236a0e91bc4126d9ba9ce62906ada868e", - "sha256:5cc3ca0a244eb9a5249c7c583ad9a7e881aa5d7b73c35652296ddcdb33b2b9c7", - "sha256:604282c886497645ffb87b8f35a57ec773a4a2721161e709a4422c1636ddde5c", - "sha256:6166a88b15f1759f94a46fa474c7b1b05d134b1b61fca627dd7335454cc9aa6b", - "sha256:6bacf8f3a3d7d849f40ca6caea5c055122efe70e81480c8328ad29c55c69e93e", - "sha256:6be84c06e6abd72f960ba9a71561c14137a583093ffcf9bbfaf5e613d63fa531", - "sha256:701b5f71413f1e9855566a34d6e9d12624e9e0a8818a5704d74d6b0402e66c04", - "sha256:71d8ac0b906354ebda8ef1673e5fde785936ac1f29ff6987c7483cfbd5a4235a", - "sha256:8addf6313777dbb92e9564c5d32ec122bf2c6c39d683ea64de6a1fd98b90fe37", - "sha256:901c89c2d67bba57aaaca91ccdb659aa3a312de67f23b9dfb059727cce2e2e0a", - "sha256:97a131ee36ac37ce9581f4220311247ab6cba896b4395b9c87af0675a13a755f", - "sha256:a1bbb3a6f5ff319d2b9d40b4080d46cd639abe3516d5a62c070cf0114a457d84", - "sha256:a2cbc68cb9e943ac0814c13e2452d2046c2f2b23ff0278e26599224cf164e78d", - "sha256:b8edd4e9bbbc9d7b79502eb9592cab808585516ae1bcc1446eb9122656c6066f", - "sha256:bd6f629b67bb43dc0d9211ee98b96d8dabc97b1ad38b9b25f5e4c4d7569a0c6a", - "sha256:c2ae450d60d7d020d67ab440c6e3fae375809988119817214440033f26ddf7bf", - "sha256:d8681909f7b44d0b7b86e653ca152d6dff0eb5eb41694e163c6092124f8246d7", - "sha256:e36f229acfe250dc660790840916eb49726c928e8ce10fbdf90715090fe4ae02", - "sha256:fe85ed6836165d52ae8b88f99527d3d1b2362e0cb90b005409b8bed90e9059b3" + "sha256:0bea2a0e71c2a375c9fa0ede3d98324214d67b3cbbfcbd55ac8f750f85a414e3", + "sha256:104e9c1620c2675420abd1f6c44bab7dd33cc85aea751c985006e83dcd001095", + "sha256:14f9294528b5f5cf96c721f231c9f5b2733164e02c1c018ed1a0eff8a18005ac", + "sha256:1a5d8d8dd8613a3e2be3eae829ee891b6b2de6302f24766ff06cb2875f5be9c6", + "sha256:1d44c1e44a8be986b54b09f15f2c1a66368eb43861b4e82573026e04c48a9e20", + "sha256:25bcfa75b9b5a5f8d67147a54ea97ed63a653995a82798221cca2a315c0238c1", + "sha256:35ce88b8ed3a759634cb4eb646d002c4cef0a38f20565ee82b5023558eb90c00", + "sha256:56913ec8c7638b0091ef4da6fcc9136896914a9d60d54670a75880c3e5b99ace", + "sha256:65f190a6349dec29c8d1a1cd4aa71284177aee5949e0502e6379b42873eddbe7", + "sha256:6801319fe76c3f3a3833f2b5af7bd2c17bb93c00026a2a1b924e6762f5b19e13", + "sha256:72596a79bbfb195fd41405cffa18210af3811beb91ff946dbcb7368240eed6be", + "sha256:93743608c7348772fdc717af4aeee1997293a1ad04bc0ea6efa15bf65385c538", + "sha256:940bfff7283c267ae6522ef926a7887305945f716a7704d3344d6d07f02df850", + "sha256:96f8dbc2c85046c81bcddc246232d500ad729cb720da4e20fce3b542cab91287", + "sha256:98790025861cb2c3db8c2f5ad10fc8c336ed2a55f4daf1b8b3f877826b6ff2eb", + "sha256:a3824187c99b893f90c845bab405a585d1ced4ff55421fdf5c84cb7710995229", + "sha256:a83ec98ae12d51c252be61521aa5731f5512231d0b738b4cb2498344f0b840cd", + "sha256:becc9111ca572b04e7e77131bc708480cc88a911adf3d0239f974c034b78085c", + "sha256:c1a184c64521dc549324ec6ef7cbaa6b351912be9cb5edb803c2808a0d7e85ac", + "sha256:c7b73a856522417beb78e0fb6d33ef89474e7a622db2653bc1285af36e2e3e3d", + "sha256:cea3d0fb69637944dd321f41bc896e11d0fb0b0aa531d887a6da70f6e7473aba", + "sha256:d2b3d36baac48e40e3064d2901f2fbd2a2d6880ec6ce6358825c85031d7c0d4d", + "sha256:d7b54c27783991399046837df5c7c9d325d921394757d09dbcbf96aee4649fe9", + "sha256:d8e2e43977f0e09f149ea69fd0556623919f816764e26d74da0c8a7b48f3e18a", + "sha256:dbe286303241fea8c2ea5466f6e0e6a046a135a7e7609167b07fd4e7baf151bf", + "sha256:f006e955718ecd8d159cee9932b64fba8f86ee6f7728ca3ac66c3a54b0062abe", + "sha256:f2268d9fcd9686b61ab64f077be7ffbc6fbcdfb4103e5dd0cc5eaab53a8886c2" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==1.10.1" + "version": "==1.11.0" }, "mypy-extensions": { "hashes": [ @@ -3305,21 +3252,21 @@ }, "pre-commit": { "hashes": [ - "sha256:8ca3ad567bc78a4972a3f1a477e94a79d4597e8140a6e0b651c5e33899c3654a", - "sha256:fae36fd1d7ad7d6a5a1c0b0d5adb2ed1a3bda5a21bf6c3e5372073d7a11cd4c5" + "sha256:8bb6494d4a20423842e198980c9ecf9f96607a07ea29549e180eef9ae80fe7af", + "sha256:9a90a53bf82fdd8778d58085faf8d83df56e40dfe18f45b19446e26bf1b3a63f" ], "index": "pypi", "markers": "python_version >= '3.9'", - "version": "==3.7.1" + "version": "==3.8.0" }, "prettytable": { "hashes": [ - "sha256:6536efaf0757fdaa7d22e78b3aac3b69ea1b7200538c2c6995d649365bddab92", - "sha256:9665594d137fb08a1117518c25551e0ede1687197cf353a4fdc78d27e1073568" + "sha256:1cbfdeb4bcc73976a778a0fb33cb6d752e75396f16574dcb3e2d6332fd93c76a", + "sha256:29ec6c34260191d42cd4928c28d56adec360ac2b1208a26c7e4f14b90cc8bc84" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==3.10.0" + "version": "==3.10.2" }, "prometheus-client": { "hashes": [ @@ -3370,10 +3317,10 @@ }, "pure-eval": { "hashes": [ - "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350", - "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3" + "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", + "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42" ], - "version": "==0.2.2" + "version": "==0.2.3" }, "pycodestyle": { "hashes": [ @@ -3425,12 +3372,12 @@ }, "pytest": { "hashes": [ - "sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343", - "sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977" + "sha256:4ba08f9ae7dcf84ded419494d229b48d0903ea6407b030eaec46df5e6a73bba5", + "sha256:c132345d12ce551242c87269de812483f5bcc87cdbb4722e48487ba194f9fdce" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==8.2.2" + "version": "==8.3.2" }, "pytest-cov": { "hashes": [ @@ -3776,108 +3723,112 @@ }, "rpds-py": { "hashes": [ - "sha256:05f3d615099bd9b13ecf2fc9cf2d839ad3f20239c678f461c753e93755d629ee", - "sha256:06d218939e1bf2ca50e6b0ec700ffe755e5216a8230ab3e87c059ebb4ea06afc", - "sha256:07f2139741e5deb2c5154a7b9629bc5aa48c766b643c1a6750d16f865a82c5fc", - "sha256:08d74b184f9ab6289b87b19fe6a6d1a97fbfea84b8a3e745e87a5de3029bf944", - "sha256:0abeee75434e2ee2d142d650d1e54ac1f8b01e6e6abdde8ffd6eeac6e9c38e20", - "sha256:154bf5c93d79558b44e5b50cc354aa0459e518e83677791e6adb0b039b7aa6a7", - "sha256:17c6d2155e2423f7e79e3bb18151c686d40db42d8645e7977442170c360194d4", - "sha256:1805d5901779662d599d0e2e4159d8a82c0b05faa86ef9222bf974572286b2b6", - "sha256:19ba472b9606c36716062c023afa2484d1e4220548751bda14f725a7de17b4f6", - "sha256:19e515b78c3fc1039dd7da0a33c28c3154458f947f4dc198d3c72db2b6b5dc93", - "sha256:1d54f74f40b1f7aaa595a02ff42ef38ca654b1469bef7d52867da474243cc633", - "sha256:207c82978115baa1fd8d706d720b4a4d2b0913df1c78c85ba73fe6c5804505f0", - "sha256:2625f03b105328729f9450c8badda34d5243231eef6535f80064d57035738360", - "sha256:27bba383e8c5231cd559affe169ca0b96ec78d39909ffd817f28b166d7ddd4d8", - "sha256:2c3caec4ec5cd1d18e5dd6ae5194d24ed12785212a90b37f5f7f06b8bedd7139", - "sha256:2cc7c1a47f3a63282ab0f422d90ddac4aa3034e39fc66a559ab93041e6505da7", - "sha256:2fc24a329a717f9e2448f8cd1f960f9dac4e45b6224d60734edeb67499bab03a", - "sha256:312fe69b4fe1ffbe76520a7676b1e5ac06ddf7826d764cc10265c3b53f96dbe9", - "sha256:32b7daaa3e9389db3695964ce8e566e3413b0c43e3394c05e4b243a4cd7bef26", - "sha256:338dee44b0cef8b70fd2ef54b4e09bb1b97fc6c3a58fea5db6cc083fd9fc2724", - "sha256:352a88dc7892f1da66b6027af06a2e7e5d53fe05924cc2cfc56495b586a10b72", - "sha256:35b2b771b13eee8729a5049c976197ff58a27a3829c018a04341bcf1ae409b2b", - "sha256:38e14fb4e370885c4ecd734f093a2225ee52dc384b86fa55fe3f74638b2cfb09", - "sha256:3c20f05e8e3d4fc76875fc9cb8cf24b90a63f5a1b4c5b9273f0e8225e169b100", - "sha256:3dd3cd86e1db5aadd334e011eba4e29d37a104b403e8ca24dcd6703c68ca55b3", - "sha256:489bdfe1abd0406eba6b3bb4fdc87c7fa40f1031de073d0cfb744634cc8fa261", - "sha256:48c2faaa8adfacefcbfdb5f2e2e7bdad081e5ace8d182e5f4ade971f128e6bb3", - "sha256:4a98a1f0552b5f227a3d6422dbd61bc6f30db170939bd87ed14f3c339aa6c7c9", - "sha256:4adec039b8e2928983f885c53b7cc4cda8965b62b6596501a0308d2703f8af1b", - "sha256:4e0ee01ad8260184db21468a6e1c37afa0529acc12c3a697ee498d3c2c4dcaf3", - "sha256:51584acc5916212e1bf45edd17f3a6b05fe0cbb40482d25e619f824dccb679de", - "sha256:531796fb842b53f2695e94dc338929e9f9dbf473b64710c28af5a160b2a8927d", - "sha256:5463c47c08630007dc0fe99fb480ea4f34a89712410592380425a9b4e1611d8e", - "sha256:5c45a639e93a0c5d4b788b2613bd637468edd62f8f95ebc6fcc303d58ab3f0a8", - "sha256:6031b25fb1b06327b43d841f33842b383beba399884f8228a6bb3df3088485ff", - "sha256:607345bd5912aacc0c5a63d45a1f73fef29e697884f7e861094e443187c02be5", - "sha256:618916f5535784960f3ecf8111581f4ad31d347c3de66d02e728de460a46303c", - "sha256:636a15acc588f70fda1661234761f9ed9ad79ebed3f2125d44be0862708b666e", - "sha256:673fdbbf668dd958eff750e500495ef3f611e2ecc209464f661bc82e9838991e", - "sha256:6afd80f6c79893cfc0574956f78a0add8c76e3696f2d6a15bca2c66c415cf2d4", - "sha256:6b5ff7e1d63a8281654b5e2896d7f08799378e594f09cf3674e832ecaf396ce8", - "sha256:6c4c4c3f878df21faf5fac86eda32671c27889e13570645a9eea0a1abdd50922", - "sha256:6cd8098517c64a85e790657e7b1e509b9fe07487fd358e19431cb120f7d96338", - "sha256:6d1e42d2735d437e7e80bab4d78eb2e459af48c0a46e686ea35f690b93db792d", - "sha256:6e30ac5e329098903262dc5bdd7e2086e0256aa762cc8b744f9e7bf2a427d3f8", - "sha256:70a838f7754483bcdc830444952fd89645569e7452e3226de4a613a4c1793fb2", - "sha256:720edcb916df872d80f80a1cc5ea9058300b97721efda8651efcd938a9c70a72", - "sha256:732672fbc449bab754e0b15356c077cc31566df874964d4801ab14f71951ea80", - "sha256:740884bc62a5e2bbb31e584f5d23b32320fd75d79f916f15a788d527a5e83644", - "sha256:7700936ef9d006b7ef605dc53aa364da2de5a3aa65516a1f3ce73bf82ecfc7ae", - "sha256:7732770412bab81c5a9f6d20aeb60ae943a9b36dcd990d876a773526468e7163", - "sha256:7750569d9526199c5b97e5a9f8d96a13300950d910cf04a861d96f4273d5b104", - "sha256:7f1944ce16401aad1e3f7d312247b3d5de7981f634dc9dfe90da72b87d37887d", - "sha256:81c5196a790032e0fc2464c0b4ab95f8610f96f1f2fa3d4deacce6a79852da60", - "sha256:8352f48d511de5f973e4f2f9412736d7dea76c69faa6d36bcf885b50c758ab9a", - "sha256:8927638a4d4137a289e41d0fd631551e89fa346d6dbcfc31ad627557d03ceb6d", - "sha256:8c7672e9fba7425f79019db9945b16e308ed8bc89348c23d955c8c0540da0a07", - "sha256:8d2e182c9ee01135e11e9676e9a62dfad791a7a467738f06726872374a83db49", - "sha256:910e71711d1055b2768181efa0a17537b2622afeb0424116619817007f8a2b10", - "sha256:942695a206a58d2575033ff1e42b12b2aece98d6003c6bc739fbf33d1773b12f", - "sha256:9437ca26784120a279f3137ee080b0e717012c42921eb07861b412340f85bae2", - "sha256:967342e045564cef76dfcf1edb700b1e20838d83b1aa02ab313e6a497cf923b8", - "sha256:998125738de0158f088aef3cb264a34251908dd2e5d9966774fdab7402edfab7", - "sha256:9e6934d70dc50f9f8ea47081ceafdec09245fd9f6032669c3b45705dea096b88", - "sha256:a3d456ff2a6a4d2adcdf3c1c960a36f4fd2fec6e3b4902a42a384d17cf4e7a65", - "sha256:a7b28c5b066bca9a4eb4e2f2663012debe680f097979d880657f00e1c30875a0", - "sha256:a888e8bdb45916234b99da2d859566f1e8a1d2275a801bb8e4a9644e3c7e7909", - "sha256:aa3679e751408d75a0b4d8d26d6647b6d9326f5e35c00a7ccd82b78ef64f65f8", - "sha256:aaa71ee43a703c321906813bb252f69524f02aa05bf4eec85f0c41d5d62d0f4c", - "sha256:b646bf655b135ccf4522ed43d6902af37d3f5dbcf0da66c769a2b3938b9d8184", - "sha256:b906b5f58892813e5ba5c6056d6a5ad08f358ba49f046d910ad992196ea61397", - "sha256:b9bb1f182a97880f6078283b3505a707057c42bf55d8fca604f70dedfdc0772a", - "sha256:bd1105b50ede37461c1d51b9698c4f4be6e13e69a908ab7751e3807985fc0346", - "sha256:bf18932d0003c8c4d51a39f244231986ab23ee057d235a12b2684ea26a353590", - "sha256:c273e795e7a0f1fddd46e1e3cb8be15634c29ae8ff31c196debb620e1edb9333", - "sha256:c69882964516dc143083d3795cb508e806b09fc3800fd0d4cddc1df6c36e76bb", - "sha256:c827576e2fa017a081346dce87d532a5310241648eb3700af9a571a6e9fc7e74", - "sha256:cbfbea39ba64f5e53ae2915de36f130588bba71245b418060ec3330ebf85678e", - "sha256:ce0bb20e3a11bd04461324a6a798af34d503f8d6f1aa3d2aa8901ceaf039176d", - "sha256:d0cee71bc618cd93716f3c1bf56653740d2d13ddbd47673efa8bf41435a60daa", - "sha256:d21be4770ff4e08698e1e8e0bce06edb6ea0626e7c8f560bc08222880aca6a6f", - "sha256:d31dea506d718693b6b2cffc0648a8929bdc51c70a311b2770f09611caa10d53", - "sha256:d44607f98caa2961bab4fa3c4309724b185b464cdc3ba6f3d7340bac3ec97cc1", - "sha256:d58ad6317d188c43750cb76e9deacf6051d0f884d87dc6518e0280438648a9ac", - "sha256:d70129cef4a8d979caa37e7fe957202e7eee8ea02c5e16455bc9808a59c6b2f0", - "sha256:d85164315bd68c0806768dc6bb0429c6f95c354f87485ee3593c4f6b14def2bd", - "sha256:d960de62227635d2e61068f42a6cb6aae91a7fe00fca0e3aeed17667c8a34611", - "sha256:dc48b479d540770c811fbd1eb9ba2bb66951863e448efec2e2c102625328e92f", - "sha256:e1735502458621921cee039c47318cb90b51d532c2766593be6207eec53e5c4c", - "sha256:e2be6e9dd4111d5b31ba3b74d17da54a8319d8168890fbaea4b9e5c3de630ae5", - "sha256:e4c39ad2f512b4041343ea3c7894339e4ca7839ac38ca83d68a832fc8b3748ab", - "sha256:ed402d6153c5d519a0faf1bb69898e97fb31613b49da27a84a13935ea9164dfc", - "sha256:ee17cd26b97d537af8f33635ef38be873073d516fd425e80559f4585a7b90c43", - "sha256:f3027be483868c99b4985fda802a57a67fdf30c5d9a50338d9db646d590198da", - "sha256:f5bab211605d91db0e2995a17b5c6ee5edec1270e46223e513eaa20da20076ac", - "sha256:f6f8e3fecca256fefc91bb6765a693d96692459d7d4c644660a9fff32e517843", - "sha256:f7afbfee1157e0f9376c00bb232e80a60e59ed716e3211a80cb8506550671e6e", - "sha256:fa242ac1ff583e4ec7771141606aafc92b361cd90a05c30d93e343a0c2d82a89", - "sha256:fab6ce90574645a0d6c58890e9bcaac8d94dff54fb51c69e5522a7358b80ab64" + "sha256:01227f8b3e6c8961490d869aa65c99653df80d2f0a7fde8c64ebddab2b9b02fd", + "sha256:08ce9c95a0b093b7aec75676b356a27879901488abc27e9d029273d280438505", + "sha256:0b02dd77a2de6e49078c8937aadabe933ceac04b41c5dde5eca13a69f3cf144e", + "sha256:0d4b52811dcbc1aba08fd88d475f75b4f6db0984ba12275d9bed1a04b2cae9b5", + "sha256:13e6d4840897d4e4e6b2aa1443e3a8eca92b0402182aafc5f4ca1f5e24f9270a", + "sha256:1a129c02b42d46758c87faeea21a9f574e1c858b9f358b6dd0bbd71d17713175", + "sha256:1a8dfa125b60ec00c7c9baef945bb04abf8ac772d8ebefd79dae2a5f316d7850", + "sha256:1c32e41de995f39b6b315d66c27dea3ef7f7c937c06caab4c6a79a5e09e2c415", + "sha256:1d494887d40dc4dd0d5a71e9d07324e5c09c4383d93942d391727e7a40ff810b", + "sha256:1d4af2eb520d759f48f1073ad3caef997d1bfd910dc34e41261a595d3f038a94", + "sha256:1fb93d3486f793d54a094e2bfd9cd97031f63fcb5bc18faeb3dd4b49a1c06523", + "sha256:24f8ae92c7fae7c28d0fae9b52829235df83f34847aa8160a47eb229d9666c7b", + "sha256:24fc5a84777cb61692d17988989690d6f34f7f95968ac81398d67c0d0994a897", + "sha256:26ab43b6d65d25b1a333c8d1b1c2f8399385ff683a35ab5e274ba7b8bb7dc61c", + "sha256:271accf41b02687cef26367c775ab220372ee0f4925591c6796e7c148c50cab5", + "sha256:2ddd50f18ebc05ec29a0d9271e9dbe93997536da3546677f8ca00b76d477680c", + "sha256:31dd5794837f00b46f4096aa8ccaa5972f73a938982e32ed817bb520c465e520", + "sha256:31e450840f2f27699d014cfc8865cc747184286b26d945bcea6042bb6aa4d26e", + "sha256:32e0db3d6e4f45601b58e4ac75c6f24afbf99818c647cc2066f3e4b192dabb1f", + "sha256:346557f5b1d8fd9966059b7a748fd79ac59f5752cd0e9498d6a40e3ac1c1875f", + "sha256:34bca66e2e3eabc8a19e9afe0d3e77789733c702c7c43cd008e953d5d1463fde", + "sha256:3511f6baf8438326e351097cecd137eb45c5f019944fe0fd0ae2fea2fd26be39", + "sha256:35af5e4d5448fa179fd7fff0bba0fba51f876cd55212f96c8bbcecc5c684ae5c", + "sha256:3837c63dd6918a24de6c526277910e3766d8c2b1627c500b155f3eecad8fad65", + "sha256:39d67896f7235b2c886fb1ee77b1491b77049dcef6fbf0f401e7b4cbed86bbd4", + "sha256:3b823be829407393d84ee56dc849dbe3b31b6a326f388e171555b262e8456cc1", + "sha256:3c73254c256081704dba0a333457e2fb815364018788f9b501efe7c5e0ada401", + "sha256:3ddab996807c6b4227967fe1587febade4e48ac47bb0e2d3e7858bc621b1cace", + "sha256:3e1dc59a5e7bc7f44bd0c048681f5e05356e479c50be4f2c1a7089103f1621d5", + "sha256:4383beb4a29935b8fa28aca8fa84c956bf545cb0c46307b091b8d312a9150e6a", + "sha256:4cc4bc73e53af8e7a42c8fd7923bbe35babacfa7394ae9240b3430b5dcf16b2a", + "sha256:4dd02e29c8cbed21a1875330b07246b71121a1c08e29f0ee3db5b4cfe16980c4", + "sha256:4f580ae79d0b861dfd912494ab9d477bea535bfb4756a2269130b6607a21802e", + "sha256:53dbc35808c6faa2ce3e48571f8f74ef70802218554884787b86a30947842a14", + "sha256:56313be667a837ff1ea3508cebb1ef6681d418fa2913a0635386cf29cff35165", + "sha256:57863d16187995c10fe9cf911b897ed443ac68189179541734502353af33e693", + "sha256:5953391af1405f968eb5701ebbb577ebc5ced8d0041406f9052638bafe52209d", + "sha256:5beffdbe766cfe4fb04f30644d822a1080b5359df7db3a63d30fa928375b2720", + "sha256:5e360188b72f8080fefa3adfdcf3618604cc8173651c9754f189fece068d2a45", + "sha256:5e58b61dcbb483a442c6239c3836696b79f2cd8e7eec11e12155d3f6f2d886d1", + "sha256:69084fd29bfeff14816666c93a466e85414fe6b7d236cfc108a9c11afa6f7301", + "sha256:6d1d7539043b2b31307f2c6c72957a97c839a88b2629a348ebabe5aa8b626d6b", + "sha256:6d8b735c4d162dc7d86a9cf3d717f14b6c73637a1f9cd57fe7e61002d9cb1972", + "sha256:6ea961a674172ed2235d990d7edf85d15d8dfa23ab8575e48306371c070cda67", + "sha256:71157f9db7f6bc6599a852852f3389343bea34315b4e6f109e5cbc97c1fb2963", + "sha256:720f3108fb1bfa32e51db58b832898372eb5891e8472a8093008010911e324c5", + "sha256:74129d5ffc4cde992d89d345f7f7d6758320e5d44a369d74d83493429dad2de5", + "sha256:747251e428406b05fc86fee3904ee19550c4d2d19258cef274e2151f31ae9d38", + "sha256:75130df05aae7a7ac171b3b5b24714cffeabd054ad2ebc18870b3aa4526eba23", + "sha256:7b3661e6d4ba63a094138032c1356d557de5b3ea6fd3cca62a195f623e381c76", + "sha256:7d5c7e32f3ee42f77d8ff1a10384b5cdcc2d37035e2e3320ded909aa192d32c3", + "sha256:8124101e92c56827bebef084ff106e8ea11c743256149a95b9fd860d3a4f331f", + "sha256:81db2e7282cc0487f500d4db203edc57da81acde9e35f061d69ed983228ffe3b", + "sha256:840e18c38098221ea6201f091fc5d4de6128961d2930fbbc96806fb43f69aec1", + "sha256:89cc8921a4a5028d6dd388c399fcd2eef232e7040345af3d5b16c04b91cf3c7e", + "sha256:8b32cd4ab6db50c875001ba4f5a6b30c0f42151aa1fbf9c2e7e3674893fb1dc4", + "sha256:8df1c283e57c9cb4d271fdc1875f4a58a143a2d1698eb0d6b7c0d7d5f49c53a1", + "sha256:902cf4739458852fe917104365ec0efbea7d29a15e4276c96a8d33e6ed8ec137", + "sha256:97fbb77eaeb97591efdc654b8b5f3ccc066406ccfb3175b41382f221ecc216e8", + "sha256:9c7042488165f7251dc7894cd533a875d2875af6d3b0e09eda9c4b334627ad1c", + "sha256:9e318e6786b1e750a62f90c6f7fa8b542102bdcf97c7c4de2a48b50b61bd36ec", + "sha256:a9421b23c85f361a133aa7c5e8ec757668f70343f4ed8fdb5a4a14abd5437244", + "sha256:aaf71f95b21f9dc708123335df22e5a2fef6307e3e6f9ed773b2e0938cc4d491", + "sha256:afedc35fe4b9e30ab240b208bb9dc8938cb4afe9187589e8d8d085e1aacb8309", + "sha256:b5e28e56143750808c1c79c70a16519e9bc0a68b623197b96292b21b62d6055c", + "sha256:b82c9514c6d74b89a370c4060bdb80d2299bc6857e462e4a215b4ef7aa7b090e", + "sha256:b8f78398e67a7227aefa95f876481485403eb974b29e9dc38b307bb6eb2315ea", + "sha256:bbda75f245caecff8faa7e32ee94dfaa8312a3367397975527f29654cd17a6ed", + "sha256:bca34e913d27401bda2a6f390d0614049f5a95b3b11cd8eff80fe4ec340a1208", + "sha256:bd04d8cab16cab5b0a9ffc7d10f0779cf1120ab16c3925404428f74a0a43205a", + "sha256:c149a652aeac4902ecff2dd93c3b2681c608bd5208c793c4a99404b3e1afc87c", + "sha256:c2087dbb76a87ec2c619253e021e4fb20d1a72580feeaa6892b0b3d955175a71", + "sha256:c34f751bf67cab69638564eee34023909380ba3e0d8ee7f6fe473079bf93f09b", + "sha256:c6d20c8896c00775e6f62d8373aba32956aa0b850d02b5ec493f486c88e12859", + "sha256:c7af6f7b80f687b33a4cdb0a785a5d4de1fb027a44c9a049d8eb67d5bfe8a687", + "sha256:c7b07959866a6afb019abb9564d8a55046feb7a84506c74a6f197cbcdf8a208e", + "sha256:ca0dda0c5715efe2ab35bb83f813f681ebcd2840d8b1b92bfc6fe3ab382fae4a", + "sha256:cdb7eb3cf3deb3dd9e7b8749323b5d970052711f9e1e9f36364163627f96da58", + "sha256:ce757c7c90d35719b38fa3d4ca55654a76a40716ee299b0865f2de21c146801c", + "sha256:d1fa67ef839bad3815124f5f57e48cd50ff392f4911a9f3cf449d66fa3df62a5", + "sha256:d2dbd8f4990d4788cb122f63bf000357533f34860d269c1a8e90ae362090ff3a", + "sha256:d4ec0046facab83012d821b33cead742a35b54575c4edfb7ed7445f63441835f", + "sha256:dbceedcf4a9329cc665452db1aaf0845b85c666e4885b92ee0cddb1dbf7e052a", + "sha256:dc733d35f861f8d78abfaf54035461e10423422999b360966bf1c443cbc42705", + "sha256:dd635c2c4043222d80d80ca1ac4530a633102a9f2ad12252183bcf338c1b9474", + "sha256:de1f7cd5b6b351e1afd7568bdab94934d656abe273d66cda0ceea43bbc02a0c2", + "sha256:df7c841813f6265e636fe548a49664c77af31ddfa0085515326342a751a6ba51", + "sha256:e0f9d268b19e8f61bf42a1da48276bcd05f7ab5560311f541d22557f8227b866", + "sha256:e2d66eb41ffca6cc3c91d8387509d27ba73ad28371ef90255c50cb51f8953301", + "sha256:e429fc517a1c5e2a70d576077231538a98d59a45dfc552d1ac45a132844e6dfb", + "sha256:e4d2b88efe65544a7d5121b0c3b003ebba92bfede2ea3577ce548b69c5235185", + "sha256:e76c902d229a3aa9d5ceb813e1cbcc69bf5bda44c80d574ff1ac1fa3136dea71", + "sha256:ef07a0a1d254eeb16455d839cef6e8c2ed127f47f014bbda64a58b5482b6c836", + "sha256:f09529d2332264a902688031a83c19de8fda5eb5881e44233286b9c9ec91856d", + "sha256:f0a6d4a93d2a05daec7cb885157c97bbb0be4da739d6f9dfb02e101eb40921cd", + "sha256:f0cf2a0dbb5987da4bd92a7ca727eadb225581dd9681365beba9accbe5308f7d", + "sha256:f2671cb47e50a97f419a02cd1e0c339b31de017b033186358db92f4d8e2e17d8", + "sha256:f35b34a5184d5e0cc360b61664c1c06e866aab077b5a7c538a3e20c8fcdbf90b", + "sha256:f3d73022990ab0c8b172cce57c69fd9a89c24fd473a5e79cbce92df87e3d9c48", + "sha256:f5b8353ea1a4d7dfb59a7f45c04df66ecfd363bb5b35f33b11ea579111d4655f", + "sha256:f809a17cc78bd331e137caa25262b507225854073fd319e987bd216bed911b7c", + "sha256:f9bc4161bd3b970cd6a6fcda70583ad4afd10f2750609fb1f3ca9505050d4ef3", + "sha256:fdf4890cda3b59170009d012fca3294c00140e7f2abe1910e6a730809d0f3f9b" ], "markers": "python_version >= '3.8'", - "version": "==0.18.1" + "version": "==0.19.1" }, "send2trash": { "hashes": [ @@ -3889,12 +3840,12 @@ }, "setuptools": { "hashes": [ - "sha256:b8b8060bb426838fbe942479c90296ce976249451118ef566a5a0b7d8b78fb05", - "sha256:bd63e505105011b25c3c11f753f7e3b8465ea739efddaccef8f0efac2137bac1" + "sha256:5a03e1860cf56bb6ef48ce186b0e557fdba433237481a9a625176c2831be15d1", + "sha256:8d243eff56d095e5817f796ede6ae32941278f542e0f941867cc05ae52b162ec" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==70.2.0" + "version": "==72.1.0" }, "six": { "hashes": [ @@ -3977,11 +3928,11 @@ }, "sqlparse": { "hashes": [ - "sha256:714d0a4932c059d16189f58ef5411ec2287a4360f17cdd0edd2d09d4c5087c93", - "sha256:c204494cd97479d0e39f28c93d46c0b2d5959c7b9ab904762ea6c7af211c8663" + "sha256:773dcbf9a5ab44a090f3441e2180efe2560220203dc2f8c0b0fa141e18b505e4", + "sha256:bb6b4df465655ef332548e24f08e205afc81b9ab86cb1c45657a7ff173a3a00e" ], "markers": "python_version >= '3.8'", - "version": "==0.5.0" + "version": "==0.5.1" }, "stack-data": { "hashes": [ @@ -4056,28 +4007,28 @@ }, "types-requests": { "hashes": [ - "sha256:97bac6b54b5bd4cf91d407e62f0932a74821bc2211f22116d9ee1dd643826caf", - "sha256:ed5e8a412fcc39159d6319385c009d642845f250c63902718f605cd90faade31" + "sha256:90c079ff05e549f6bf50e02e910210b98b8ff1ebdd18e19c873cd237737c1358", + "sha256:f754283e152c752e46e70942fa2a146b5bc70393522257bb85bd1ef7e019dcc3" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==2.32.0.20240622" + "version": "==2.32.0.20240712" }, "types-setuptools": { "hashes": [ - "sha256:2f8d28d16ca1607080f9fdf19595bd49c942884b2bbd6529c9b8a9a8fc8db911", - "sha256:6b892d5441c2ed58dd255724516e3df1db54892fb20597599aea66d04c3e4d7f" + "sha256:85ba28e9461bb1be86ebba4db0f1c2408f2b11115b1966334ea9dc464e29303e", + "sha256:a7775376f36e0ff09bcad236bf265777590a66b11623e48c20bfc30f1444ea36" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==70.2.0.20240704" + "version": "==71.1.0.20240726" }, "typing-extensions": { "hashes": [ "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8" ], - "markers": "python_version < '3.12'", + "markers": "python_version >= '3.8'", "version": "==4.12.2" }, "typing-inspect": { diff --git a/config/middlewares.py b/config/middlewares.py index aaaeafc12..ca9e08262 100644 --- a/config/middlewares.py +++ b/config/middlewares.py @@ -51,6 +51,8 @@ def process_request(self, request): def process_response(self, request, response): """Replace nonce placeholder by its true value.""" + if response._csp_exempt: + return response response = super().process_response(request, response) if isinstance(response, HttpResponse): content = response.content.decode("utf-8") diff --git a/config/settings.py b/config/settings.py index 20a077355..a8f6d3734 100644 --- a/config/settings.py +++ b/config/settings.py @@ -262,6 +262,8 @@ "BACKEND": "config.cache_backends.RedisDummyCache", }, } + # to use qgis locally : + X_FRAME_OPTIONS = "ALLOW" else: CACHES = { "default": { diff --git a/public_data/urls.py b/public_data/urls.py index 6216a1d0d..911a165b7 100644 --- a/public_data/urls.py +++ b/public_data/urls.py @@ -10,6 +10,11 @@ path("matrix", views.DisplayMatrix.as_view(), name="matrix"), path("grid", views.grid_view.as_view(), name="grid"), path("search-land", views.SearchLandApiView.as_view({"post": "post"}), name="search-land"), + path( + "ocsge/zones-artificielle-v2///", + views.ArtificialAreaMVTView.as_view(), + name="ArtificialAreaMVTView", + ), ] diff --git a/public_data/views.py b/public_data/views.py index c3c0c1c32..544b41cbc 100644 --- a/public_data/views.py +++ b/public_data/views.py @@ -6,12 +6,13 @@ from django.http import HttpResponse from django.urls import reverse_lazy from django.views.generic import TemplateView -from rest_framework import viewsets +from rest_framework import renderers, viewsets from rest_framework.decorators import action from rest_framework.response import Response from rest_framework.views import APIView from rest_framework.viewsets import GenericViewSet from rest_framework_gis import filters +from vectortiles.postgis.views import MVTView from public_data import models, serializers from public_data.models.administration import Land @@ -349,6 +350,48 @@ def get_sql_where(self): return "WHERE o.year = %s" +class MVTRenderer(renderers.BaseRenderer): + media_type = "application/vnd.mapbox-vector-tile" + format = "pbf" + + def render(self, data, accepted_media_type=None, renderer_context=None): + return data + + +class ArtificialAreaMVTView(MVTView, APIView): + model = models.ArtificialArea + vector_tile_layer_name = "artificial_area" # name for data layer in vector tile + vector_tile_fields = ("year", "city") # model fields or queryset annotates to include in tile + vector_tile_content_type = "application/x-protobuf" # if you want to use custom content_type + vector_tile_geom_name = "mpoly" # geom field to consider in qs + renderer_classes = (MVTRenderer,) + accepted_renderer = MVTRenderer + + def get_vector_tile_queryset(self): + year = self.request.GET.get("year") + city = str(self.request.GET.get("city")) + + if not year: + raise ValueError("year parameter must be set") + if city and year: + return models.ArtificialArea.objects.filter(city=city, year=year) + if city: + return models.ArtificialArea.objects.filter(city=city) + if year: + return models.ArtificialArea.objects.filter(year=year) + + def get(self, request, *args, **kwargs): + response = Response( + self.get_tile( + kwargs.get("x"), + kwargs.get("y"), + kwargs.get("z"), + ) + ) + response._csp_exempt = True + return response + + class ArtificialAreaViewSet(OnlyBoundingBoxMixin, ZoomSimplificationMixin, OptimizedMixins, DataViewSet): queryset = models.ArtificialArea.objects.all() serializer_class = serializers.OcsgeDiffSerializer From fad5581efb3f6d31838afe020909f8c6922c8bc2 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 30 Jul 2024 16:09:04 +0200 Subject: [PATCH 02/99] fix(tests): remove test depending on s3 --- public_data/models/tests.py | 46 ------------------ public_data/test/__init__.py | 2 - public_data/test/test_build_shapefile.py | 33 ------------- public_data/test/test_cerema.py | 60 ------------------------ public_data/test/test_import_sudocuh.py | 32 ------------- 5 files changed, 173 deletions(-) delete mode 100644 public_data/models/tests.py delete mode 100644 public_data/test/__init__.py delete mode 100644 public_data/test/test_build_shapefile.py delete mode 100644 public_data/test/test_cerema.py delete mode 100644 public_data/test/test_import_sudocuh.py diff --git a/public_data/models/tests.py b/public_data/models/tests.py deleted file mode 100644 index c4e44232c..000000000 --- a/public_data/models/tests.py +++ /dev/null @@ -1,46 +0,0 @@ -from django.core.management import call_command -from django.db.models import F, Sum -from django.test import TestCase - -from public_data.models import Cerema, DataSource - - -class TestCerema(TestCase): - fixtures = ["public_data/models/data_source_fixture.json"] - - def setUp(self) -> None: - call_command( - command_name="load_shapefile", - dataset=DataSource.DatasetChoices.MAJIC, - ) - - def test_cerema_data(self): - with self.subTest("Test national mean value for 2011 to 2021"): - expected_total = 23730.427 - - fields = Cerema.get_art_field( - start=2011, - end=2021, - ) - result = Cerema.objects.aggregate( - national_mean_value_calculated_at_import=Sum("naf11art21") / 10000 / 10, - national_mean_value_from_rows=Sum( - F(fields[0]) - + F(fields[1]) - + F(fields[2]) - + F(fields[3]) - + F(fields[4]) - + F(fields[5]) - + F(fields[6]) - + F(fields[7]) - + F(fields[8]) - + F(fields[9]) - ) - / 10 - / 10000, - ) - - self.assertAlmostEqual(first=result["national_mean_value_from_rows"], second=expected_total, places=3) - self.assertAlmostEqual( - first=result["national_mean_value_calculated_at_import"], second=expected_total, places=3 - ) diff --git a/public_data/test/__init__.py b/public_data/test/__init__.py deleted file mode 100644 index c35462249..000000000 --- a/public_data/test/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .test_cerema import TestCerema # noqa: F401 -from .test_import_sudocuh import TestImportSudocuh # noqa: F401 diff --git a/public_data/test/test_build_shapefile.py b/public_data/test/test_build_shapefile.py deleted file mode 100644 index f91048a6f..000000000 --- a/public_data/test/test_build_shapefile.py +++ /dev/null @@ -1,33 +0,0 @@ -from pathlib import Path - -from django.core.management import call_command -from django.test import TestCase - -from public_data.models import DataSource - - -class TestBuildOcsge(TestCase): - def setUp(self) -> None: - DataSource.objects.all().delete() - call_command("loaddata", "public_data/models/data_source_fixture.json") - - def test_build_ocsge(self): - expected_files = [ - "OCSGE_DIFFERENCE_94_2018_2021_MDA.shp.zip", - "OCSGE_OCCUPATION_DU_SOL_94_2018_MDA.shp.zip", - "OCSGE_OCCUPATION_DU_SOL_94_2021_MDA.shp.zip", - "OCSGE_ZONE_ARTIFICIELLE_94_2018_MDA.shp.zip", - "OCSGE_ZONE_ARTIFICIELLE_94_2021_MDA.shp.zip", - "OCSGE_ZONE_CONSTRUITE_94_2018_MDA.shp.zip", - "OCSGE_ZONE_CONSTRUITE_94_2021_MDA.shp.zip", - ] - - call_command( - command_name="build_shapefile", - productor=DataSource.ProductorChoices.IGN, - dataset=DataSource.DatasetChoices.OCSGE, - land_id="94", - ) - - for file in expected_files: - self.assertTrue(Path(file).exists()) diff --git a/public_data/test/test_cerema.py b/public_data/test/test_cerema.py deleted file mode 100644 index 288deff69..000000000 --- a/public_data/test/test_cerema.py +++ /dev/null @@ -1,60 +0,0 @@ -from django.test import TestCase - -from public_data.models import Cerema - - -class TestCerema(TestCase): - def test_get_art_fields(self): - self.assertListEqual( - Cerema.get_art_field(start=2009, end=2021), - [ - "naf09art10", - "naf10art11", - "naf11art12", - "naf12art13", - "naf13art14", - "naf14art15", - "naf15art16", - "naf16art17", - "naf17art18", - "naf18art19", - "naf19art20", - "naf20art21", - "naf21art22", - ], - ) - - def test_list_attr(self): - self.assertListEqual( - Cerema.list_attr(), - [ - "naf09art10", - "naf10art11", - "naf11art12", - "naf12art13", - "naf13art14", - "naf14art15", - "naf15art16", - "naf16art17", - "naf17art18", - "naf18art19", - "naf19art20", - "naf20art21", - "naf21art22", - ], - ) - - def test_too_high_date_raises_error(self): - with self.assertRaises(ValueError): - Cerema.get_art_field(start=2009, end=2023) - - def test_too_low_date_raises_error(self): - with self.assertRaises(ValueError): - Cerema.get_art_field(start=2008, end=2021) - - def test_end_before_start_raises_error(self): - with self.assertRaises(ValueError): - Cerema.get_art_field(start=2021, end=2009) - - def test_same_start_and_end(self): - self.assertListEqual(Cerema.get_art_field(start=2014, end=2014), ["naf14art15"]) diff --git a/public_data/test/test_import_sudocuh.py b/public_data/test/test_import_sudocuh.py deleted file mode 100644 index 42f9d3bc7..000000000 --- a/public_data/test/test_import_sudocuh.py +++ /dev/null @@ -1,32 +0,0 @@ -from datetime import datetime - -from django.core.management import call_command -from django.test import TestCase - -from public_data.management.commands.import_sudocuh import ( - convert_km2_to_ha, - empty_string_to_none, - parse_date, -) -from public_data.models import Sudocuh, SudocuhEpci - - -class TestImportSudocuh(TestCase): - def test_import_sudocuh(self) -> None: - call_command(command_name="import_sudocuh") - expected_count = 34944 - expected_count_epci = 1537 - self.assertEqual(Sudocuh.objects.count(), expected_count) - self.assertEqual(SudocuhEpci.objects.count(), expected_count_epci) - - def test_empty_string_are_parsed_as_none(self): - self.assertIsNone(empty_string_to_none("")) - self.assertIsNone(empty_string_to_none(" ")) - - def test_date_are_parsed_properly(self): - self.assertEqual(parse_date("01/01/21"), datetime(2021, 1, 1).date()) - self.assertEqual(parse_date("12/31/99"), datetime(1999, 12, 31).date()) - self.assertEqual(parse_date("1/4/99", "%m/%d/%y"), datetime(1999, 1, 4).date()) - - def test_convert_superficie_to_ha(self): - self.assertEqual(convert_km2_to_ha("1"), 100) From 7121bd5f3d050c702f7fcfb273e9c2953a0f2814 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 6 Aug 2024 10:56:32 +0200 Subject: [PATCH 03/99] temp --- .astro/config.yaml | 2 + .astro/dag_integrity_exceptions.txt | 1 + .astro/test_dag_integrity_default.py | 130 +++++++ .dockerignore | 8 + airflow/.astro/config.yaml | 6 + airflow/.astro/dag_integrity_exceptions.txt | 1 + airflow/.astro/test_dag_integrity_default.py | 130 +++++++ airflow/.dockerignore | 8 + airflow/.gitignore | 11 + airflow/Dockerfile | 4 + airflow/README.md | 48 +++ airflow/dags/admin_express.py | 86 +++++ airflow/dags/ocsge.py | 349 ++++++++++++++++++ airflow/dbt_profile.yml | 12 + airflow/dependencies/container.py | 41 ++ airflow/dependencies/utils.py | 2 + airflow/packages.txt | 11 + airflow/requirements.txt | 10 + airflow/sql/sparte/.gitignore | 4 + airflow/sql/sparte/README.md | 15 + airflow/sql/sparte/analyses/.gitkeep | 0 airflow/sql/sparte/dbt_project.yml | 42 +++ airflow/sql/sparte/macros/.gitkeep | 0 airflow/sql/sparte/macros/is_artificial.sql | 42 +++ airflow/sql/sparte/macros/is_impermeable.sql | 8 + .../admin_express/commune_with_checksum.sql | 4 + .../sql/sparte/models/admin_express/land.sql | 30 ++ .../sparte/models/admin_express/schema.yml | 22 ++ airflow/sql/sparte/models/ocsge/artif/README | 0 .../ocsge/artif/artif_nat_by_surface.sql | 0 .../ocsge/artif/artificial_geom_union.sql | 0 .../artif/artificial_geom_union_dump.sql | 0 .../models/ocsge/artif/artificial_union.sql | 0 .../models/ocsge/artif/clustered_ocsge.sql | 0 .../sparte/models/ocsge/artif/final_artif.sql | 0 .../sparte/models/ocsge/artif/small_built.sql | 0 .../sql/sparte/models/ocsge/difference.sql | 52 +++ .../sparte/models/ocsge/occupation_du_sol.sql | 26 ++ airflow/sql/sparte/models/ocsge/schema.yml | 131 +++++++ .../sparte/models/ocsge/zone_construite.sql | 9 + airflow/sql/sparte/package-lock.yml | 6 + airflow/sql/sparte/packages.yml | 5 + airflow/sql/sparte/seeds/.gitkeep | 0 airflow/sql/sparte/snapshots/.gitkeep | 0 .../sparte/tests/generic/is_valid_geom.sql | 14 + airflow/tests/dags/test_dag_example.py | 72 ++++ airflow_settings.yaml | 25 ++ tests/dags/test_dag_example.py | 72 ++++ 48 files changed, 1439 insertions(+) create mode 100644 .astro/config.yaml create mode 100644 .astro/dag_integrity_exceptions.txt create mode 100644 .astro/test_dag_integrity_default.py create mode 100644 .dockerignore create mode 100644 airflow/.astro/config.yaml create mode 100644 airflow/.astro/dag_integrity_exceptions.txt create mode 100644 airflow/.astro/test_dag_integrity_default.py create mode 100644 airflow/.dockerignore create mode 100644 airflow/.gitignore create mode 100644 airflow/Dockerfile create mode 100644 airflow/README.md create mode 100644 airflow/dags/admin_express.py create mode 100644 airflow/dags/ocsge.py create mode 100644 airflow/dbt_profile.yml create mode 100644 airflow/dependencies/container.py create mode 100644 airflow/dependencies/utils.py create mode 100644 airflow/packages.txt create mode 100644 airflow/requirements.txt create mode 100644 airflow/sql/sparte/.gitignore create mode 100644 airflow/sql/sparte/README.md create mode 100644 airflow/sql/sparte/analyses/.gitkeep create mode 100644 airflow/sql/sparte/dbt_project.yml create mode 100644 airflow/sql/sparte/macros/.gitkeep create mode 100644 airflow/sql/sparte/macros/is_artificial.sql create mode 100644 airflow/sql/sparte/macros/is_impermeable.sql create mode 100644 airflow/sql/sparte/models/admin_express/commune_with_checksum.sql create mode 100644 airflow/sql/sparte/models/admin_express/land.sql create mode 100644 airflow/sql/sparte/models/admin_express/schema.yml create mode 100644 airflow/sql/sparte/models/ocsge/artif/README create mode 100644 airflow/sql/sparte/models/ocsge/artif/artif_nat_by_surface.sql create mode 100644 airflow/sql/sparte/models/ocsge/artif/artificial_geom_union.sql create mode 100644 airflow/sql/sparte/models/ocsge/artif/artificial_geom_union_dump.sql create mode 100644 airflow/sql/sparte/models/ocsge/artif/artificial_union.sql create mode 100644 airflow/sql/sparte/models/ocsge/artif/clustered_ocsge.sql create mode 100644 airflow/sql/sparte/models/ocsge/artif/final_artif.sql create mode 100644 airflow/sql/sparte/models/ocsge/artif/small_built.sql create mode 100644 airflow/sql/sparte/models/ocsge/difference.sql create mode 100644 airflow/sql/sparte/models/ocsge/occupation_du_sol.sql create mode 100644 airflow/sql/sparte/models/ocsge/schema.yml create mode 100644 airflow/sql/sparte/models/ocsge/zone_construite.sql create mode 100644 airflow/sql/sparte/package-lock.yml create mode 100644 airflow/sql/sparte/packages.yml create mode 100644 airflow/sql/sparte/seeds/.gitkeep create mode 100644 airflow/sql/sparte/snapshots/.gitkeep create mode 100644 airflow/sql/sparte/tests/generic/is_valid_geom.sql create mode 100644 airflow/tests/dags/test_dag_example.py create mode 100644 airflow_settings.yaml create mode 100644 tests/dags/test_dag_example.py diff --git a/.astro/config.yaml b/.astro/config.yaml new file mode 100644 index 000000000..cf31f7bad --- /dev/null +++ b/.astro/config.yaml @@ -0,0 +1,2 @@ +project: + name: sparte diff --git a/.astro/dag_integrity_exceptions.txt b/.astro/dag_integrity_exceptions.txt new file mode 100644 index 000000000..0d6bd898a --- /dev/null +++ b/.astro/dag_integrity_exceptions.txt @@ -0,0 +1 @@ +# Add dag files to exempt from parse test below. ex: dags/ diff --git a/.astro/test_dag_integrity_default.py b/.astro/test_dag_integrity_default.py new file mode 100644 index 000000000..c287dc631 --- /dev/null +++ b/.astro/test_dag_integrity_default.py @@ -0,0 +1,130 @@ +"""Test the validity of all DAGs. **USED BY DEV PARSE COMMAND DO NOT EDIT**""" + +import logging +import os +from contextlib import contextmanager + +import pytest +from airflow.hooks.base import BaseHook +from airflow.models import Connection, DagBag, Variable +from airflow.utils.db import initdb + +# init airflow database +initdb() + +# The following code patches errors caused by missing OS Variables, Airflow Connections, and Airflow Variables + + +# =========== MONKEYPATCH BaseHook.get_connection() =========== +def basehook_get_connection_monkeypatch(key: str, *args, **kwargs): + print(f"Attempted to fetch connection during parse returning an empty Connection object for {key}") + return Connection(key) + + +BaseHook.get_connection = basehook_get_connection_monkeypatch +# # =========== /MONKEYPATCH BASEHOOK.GET_CONNECTION() =========== + + +# =========== MONKEYPATCH OS.GETENV() =========== +def os_getenv_monkeypatch(key: str, *args, **kwargs): + default = None + if args: + default = args[0] # os.getenv should get at most 1 arg after the key + if kwargs: + default = kwargs.get("default", None) # and sometimes kwarg if people are using the sig + + env_value = os.environ.get(key, None) + + if env_value: + return env_value # if the env_value is set, return it + if key == "JENKINS_HOME" and default is None: # fix https://github.com/astronomer/astro-cli/issues/601 + return None + if default: + return default # otherwise return whatever default has been passed + return f"MOCKED_{key.upper()}_VALUE" # if absolutely nothing has been passed - return the mocked value + + +os.getenv = os_getenv_monkeypatch +# # =========== /MONKEYPATCH OS.GETENV() =========== + +# =========== MONKEYPATCH VARIABLE.GET() =========== + + +class magic_dict(dict): + def __init__(self, *args, **kwargs): + self.update(*args, **kwargs) + + def __getitem__(self, key): + return {}.get(key, "MOCKED_KEY_VALUE") + + +_no_default = object() # allow falsey defaults + + +def variable_get_monkeypatch(key: str, default_var=_no_default, deserialize_json=False): + print(f"Attempted to get Variable value during parse, returning a mocked value for {key}") + + if default_var is not _no_default: + return default_var + if deserialize_json: + return magic_dict() + return "NON_DEFAULT_MOCKED_VARIABLE_VALUE" + + +Variable.get = variable_get_monkeypatch +# # =========== /MONKEYPATCH VARIABLE.GET() =========== + + +@contextmanager +def suppress_logging(namespace): + """ + Suppress logging within a specific namespace to keep tests "clean" during build + """ + logger = logging.getLogger(namespace) + old_value = logger.disabled + logger.disabled = True + try: + yield + finally: + logger.disabled = old_value + + +def get_import_errors(): + """ + Generate a tuple for import errors in the dag bag, and include DAGs without errors. + """ + with suppress_logging("airflow"): + dag_bag = DagBag(include_examples=False) + + def strip_path_prefix(path): + return os.path.relpath(path, os.environ.get("AIRFLOW_HOME")) + + # Initialize an empty list to store the tuples + result = [] + + # Iterate over the items in import_errors + for k, v in dag_bag.import_errors.items(): + result.append((strip_path_prefix(k), v.strip())) + + # Check if there are DAGs without errors + for file_path in dag_bag.dags: + # Check if the file_path is not in import_errors, meaning no errors + if file_path not in dag_bag.import_errors: + result.append((strip_path_prefix(file_path), "No import errors")) + + return result + + +@pytest.mark.parametrize("rel_path, rv", get_import_errors(), ids=[x[0] for x in get_import_errors()]) +def test_file_imports(rel_path, rv): + """Test for import errors on a file""" + if os.path.exists(".astro/dag_integrity_exceptions.txt"): + with open(".astro/dag_integrity_exceptions.txt", "r") as f: + exceptions = f.readlines() + print(f"Exceptions: {exceptions}") + if (rv != "No import errors") and rel_path not in exceptions: + # If rv is not "No import errors," consider it a failed test + raise Exception(f"{rel_path} failed to import with message \n {rv}") + else: + # If rv is "No import errors," consider it a passed test + print(f"{rel_path} passed the import test") diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..a334663ba --- /dev/null +++ b/.dockerignore @@ -0,0 +1,8 @@ +astro +.git +.env +airflow_settings.yaml +logs/ +.venv +airflow.db +airflow.cfg diff --git a/airflow/.astro/config.yaml b/airflow/.astro/config.yaml new file mode 100644 index 000000000..79aded917 --- /dev/null +++ b/airflow/.astro/config.yaml @@ -0,0 +1,6 @@ +project: + name: airflow +postgres: + port: 5433 +webserver: + port: 9090 diff --git a/airflow/.astro/dag_integrity_exceptions.txt b/airflow/.astro/dag_integrity_exceptions.txt new file mode 100644 index 000000000..0d6bd898a --- /dev/null +++ b/airflow/.astro/dag_integrity_exceptions.txt @@ -0,0 +1 @@ +# Add dag files to exempt from parse test below. ex: dags/ diff --git a/airflow/.astro/test_dag_integrity_default.py b/airflow/.astro/test_dag_integrity_default.py new file mode 100644 index 000000000..c287dc631 --- /dev/null +++ b/airflow/.astro/test_dag_integrity_default.py @@ -0,0 +1,130 @@ +"""Test the validity of all DAGs. **USED BY DEV PARSE COMMAND DO NOT EDIT**""" + +import logging +import os +from contextlib import contextmanager + +import pytest +from airflow.hooks.base import BaseHook +from airflow.models import Connection, DagBag, Variable +from airflow.utils.db import initdb + +# init airflow database +initdb() + +# The following code patches errors caused by missing OS Variables, Airflow Connections, and Airflow Variables + + +# =========== MONKEYPATCH BaseHook.get_connection() =========== +def basehook_get_connection_monkeypatch(key: str, *args, **kwargs): + print(f"Attempted to fetch connection during parse returning an empty Connection object for {key}") + return Connection(key) + + +BaseHook.get_connection = basehook_get_connection_monkeypatch +# # =========== /MONKEYPATCH BASEHOOK.GET_CONNECTION() =========== + + +# =========== MONKEYPATCH OS.GETENV() =========== +def os_getenv_monkeypatch(key: str, *args, **kwargs): + default = None + if args: + default = args[0] # os.getenv should get at most 1 arg after the key + if kwargs: + default = kwargs.get("default", None) # and sometimes kwarg if people are using the sig + + env_value = os.environ.get(key, None) + + if env_value: + return env_value # if the env_value is set, return it + if key == "JENKINS_HOME" and default is None: # fix https://github.com/astronomer/astro-cli/issues/601 + return None + if default: + return default # otherwise return whatever default has been passed + return f"MOCKED_{key.upper()}_VALUE" # if absolutely nothing has been passed - return the mocked value + + +os.getenv = os_getenv_monkeypatch +# # =========== /MONKEYPATCH OS.GETENV() =========== + +# =========== MONKEYPATCH VARIABLE.GET() =========== + + +class magic_dict(dict): + def __init__(self, *args, **kwargs): + self.update(*args, **kwargs) + + def __getitem__(self, key): + return {}.get(key, "MOCKED_KEY_VALUE") + + +_no_default = object() # allow falsey defaults + + +def variable_get_monkeypatch(key: str, default_var=_no_default, deserialize_json=False): + print(f"Attempted to get Variable value during parse, returning a mocked value for {key}") + + if default_var is not _no_default: + return default_var + if deserialize_json: + return magic_dict() + return "NON_DEFAULT_MOCKED_VARIABLE_VALUE" + + +Variable.get = variable_get_monkeypatch +# # =========== /MONKEYPATCH VARIABLE.GET() =========== + + +@contextmanager +def suppress_logging(namespace): + """ + Suppress logging within a specific namespace to keep tests "clean" during build + """ + logger = logging.getLogger(namespace) + old_value = logger.disabled + logger.disabled = True + try: + yield + finally: + logger.disabled = old_value + + +def get_import_errors(): + """ + Generate a tuple for import errors in the dag bag, and include DAGs without errors. + """ + with suppress_logging("airflow"): + dag_bag = DagBag(include_examples=False) + + def strip_path_prefix(path): + return os.path.relpath(path, os.environ.get("AIRFLOW_HOME")) + + # Initialize an empty list to store the tuples + result = [] + + # Iterate over the items in import_errors + for k, v in dag_bag.import_errors.items(): + result.append((strip_path_prefix(k), v.strip())) + + # Check if there are DAGs without errors + for file_path in dag_bag.dags: + # Check if the file_path is not in import_errors, meaning no errors + if file_path not in dag_bag.import_errors: + result.append((strip_path_prefix(file_path), "No import errors")) + + return result + + +@pytest.mark.parametrize("rel_path, rv", get_import_errors(), ids=[x[0] for x in get_import_errors()]) +def test_file_imports(rel_path, rv): + """Test for import errors on a file""" + if os.path.exists(".astro/dag_integrity_exceptions.txt"): + with open(".astro/dag_integrity_exceptions.txt", "r") as f: + exceptions = f.readlines() + print(f"Exceptions: {exceptions}") + if (rv != "No import errors") and rel_path not in exceptions: + # If rv is not "No import errors," consider it a failed test + raise Exception(f"{rel_path} failed to import with message \n {rv}") + else: + # If rv is "No import errors," consider it a passed test + print(f"{rel_path} passed the import test") diff --git a/airflow/.dockerignore b/airflow/.dockerignore new file mode 100644 index 000000000..a334663ba --- /dev/null +++ b/airflow/.dockerignore @@ -0,0 +1,8 @@ +astro +.git +.env +airflow_settings.yaml +logs/ +.venv +airflow.db +airflow.cfg diff --git a/airflow/.gitignore b/airflow/.gitignore new file mode 100644 index 000000000..0e8bcca90 --- /dev/null +++ b/airflow/.gitignore @@ -0,0 +1,11 @@ +.git +.env +.DS_Store +airflow_settings.yaml +__pycache__/ +astro +.venv +airflow-webserver.pid +webserver_config.py +airflow.cfg +airflow.db diff --git a/airflow/Dockerfile b/airflow/Dockerfile new file mode 100644 index 000000000..21e13084a --- /dev/null +++ b/airflow/Dockerfile @@ -0,0 +1,4 @@ +FROM quay.io/astronomer/astro-runtime:11.7.0 + +RUN mkdir /home/astro/.dbt +COPY ./dbt_profile.yml /home/astro/.dbt/profiles.yml diff --git a/airflow/README.md b/airflow/README.md new file mode 100644 index 000000000..699fda7b9 --- /dev/null +++ b/airflow/README.md @@ -0,0 +1,48 @@ +Overview +======== + +Welcome to Astronomer! This project was generated after you ran 'astro dev init' using the Astronomer CLI. This readme describes the contents of the project, as well as how to run Apache Airflow on your local machine. + +Project Contents +================ + +Your Astro project contains the following files and folders: + +- dags: This folder contains the Python files for your Airflow DAGs. By default, this directory includes one example DAG: + - `example_astronauts`: This DAG shows a simple ETL pipeline example that queries the list of astronauts currently in space from the Open Notify API and prints a statement for each astronaut. The DAG uses the TaskFlow API to define tasks in Python, and dynamic task mapping to dynamically print a statement for each astronaut. For more on how this DAG works, see our [Getting started tutorial](https://docs.astronomer.io/learn/get-started-with-airflow). +- Dockerfile: This file contains a versioned Astro Runtime Docker image that provides a differentiated Airflow experience. If you want to execute other commands or overrides at runtime, specify them here. +- include: This folder contains any additional files that you want to include as part of your project. It is empty by default. +- packages.txt: Install OS-level packages needed for your project by adding them to this file. It is empty by default. +- requirements.txt: Install Python packages needed for your project by adding them to this file. It is empty by default. +- plugins: Add custom or community plugins for your project to this file. It is empty by default. +- airflow_settings.yaml: Use this local-only file to specify Airflow Connections, Variables, and Pools instead of entering them in the Airflow UI as you develop DAGs in this project. + +Deploy Your Project Locally +=========================== + +1. Start Airflow on your local machine by running 'astro dev start'. + +This command will spin up 4 Docker containers on your machine, each for a different Airflow component: + +- Postgres: Airflow's Metadata Database +- Webserver: The Airflow component responsible for rendering the Airflow UI +- Scheduler: The Airflow component responsible for monitoring and triggering tasks +- Triggerer: The Airflow component responsible for triggering deferred tasks + +2. Verify that all 4 Docker containers were created by running 'docker ps'. + +Note: Running 'astro dev start' will start your project with the Airflow Webserver exposed at port 8080 and Postgres exposed at port 5432. If you already have either of those ports allocated, you can either [stop your existing Docker containers or change the port](https://docs.astronomer.io/astro/test-and-troubleshoot-locally#ports-are-not-available). + +3. Access the Airflow UI for your local Airflow project. To do so, go to http://localhost:8080/ and log in with 'admin' for both your Username and Password. + +You should also be able to access your Postgres Database at 'localhost:5432/postgres'. + +Deploy Your Project to Astronomer +================================= + +If you have an Astronomer account, pushing code to a Deployment on Astronomer is simple. For deploying instructions, refer to Astronomer documentation: https://docs.astronomer.io/cloud/deploy-code/ + +Contact +======= + +The Astronomer CLI is maintained with love by the Astronomer team. To report a bug or suggest a change, reach out to our support. diff --git a/airflow/dags/admin_express.py b/airflow/dags/admin_express.py new file mode 100644 index 000000000..cb8717d31 --- /dev/null +++ b/airflow/dags/admin_express.py @@ -0,0 +1,86 @@ +""" +## Astronaut ETL example DAG + +This DAG queries the list of astronauts currently in space from the +Open Notify API and prints each astronaut's name and flying craft. + +There are two tasks, one to get the data from the API and save the results, +and another to print the results. Both tasks are written in Python using +Airflow's TaskFlow API, which allows you to easily turn Python functions into +Airflow tasks, and automatically infer dependencies and pass data. + +The second task uses dynamic task mapping to create a copy of the task for +each Astronaut in the list retrieved from the API. This list will change +depending on how many Astronauts are in space, and the DAG will adjust +accordingly each time it runs. + +For more explanation and getting started instructions, see our Write your +first DAG tutorial: https://docs.astronomer.io/learn/get-started-with-airflow +""" + +import os +import subprocess +from urllib.request import URLopener + +import py7zr +from airflow.decorators import dag, task +from dependencies.container import Container +from pendulum import datetime + +from airflow import Dataset + + +# Define the basic parameters of the DAG, like schedule and start_date +@dag( + start_date=datetime(2024, 1, 1), + schedule="@once", + catchup=False, + doc_md=__doc__, + default_args={"owner": "Alexis Athlani", "retries": 3}, + tags=["Admin Express"], +) +def admin_express(): + admin_express_archive_file = "admin_express.7z" + bucket_name = "airflow-staging" + path_on_bucket = f"{bucket_name}/{admin_express_archive_file}" + + @task.python + def download_admin_express() -> str: + url = "https://data.geopf.fr/telechargement/download/ADMIN-EXPRESS-COG/ADMIN-EXPRESS-COG_3-2__SHP_LAMB93_FXX_2024-02-22/ADMIN-EXPRESS-COG_3-2__SHP_LAMB93_FXX_2024-02-22.7z" # noqa: E501 + + opener = URLopener() + opener.addheader("User-Agent", "Mozilla/5.0") + opener.retrieve(url=url, filename=admin_express_archive_file) + + with open(admin_express_archive_file, "rb") as local_file: + with Container().s3().open(path_on_bucket, "wb") as distant_file: + distant_file.write(local_file.read()) + + @task( + outlets=[ + Dataset("arrondissement"), + Dataset("arrondissement_municipal"), + Dataset("canton"), + Dataset("collectivite_territoriale"), + Dataset("commune"), + Dataset("commune_associee_ou_deleguee"), + Dataset("departement"), + Dataset("epci"), + Dataset("region"), + ] + ) + def ingest_admin_express() -> str: + with Container().s3().open(path_on_bucket, "rb") as f: + py7zr.SevenZipFile(f, mode="r").extractall() + for dirpath, _, filenames in os.walk("."): + for filename in filenames: + if filename.endswith(".shp"): + path = os.path.abspath(os.path.join(dirpath, filename)) + cmd = f'ogr2ogr -f "PostgreSQL" "{Container().postgres_conn_str_ogr2ogr()}" -overwrite -lco GEOMETRY_NAME=geom -a_srs EPSG:2154 -nlt MULTIPOLYGON -nlt PROMOTE_TO_MULTI {path} --config PG_USE_COPY YES' # noqa: E501 + subprocess.run(cmd, shell=True, check=True) + + download_admin_express() >> ingest_admin_express() + + +# Instantiate the DAG +admin_express() diff --git a/airflow/dags/ocsge.py b/airflow/dags/ocsge.py new file mode 100644 index 000000000..ed72e2961 --- /dev/null +++ b/airflow/dags/ocsge.py @@ -0,0 +1,349 @@ +""" +## Astronaut ETL example DAG + +This DAG queries the list of astronauts currently in space from the +Open Notify API and prints each astronaut's name and flying craft. + +There are two tasks, one to get the data from the API and save the results, +and another to print the results. Both tasks are written in Python using +Airflow's TaskFlow API, which allows you to easily turn Python functions into +Airflow tasks, and automatically infer dependencies and pass data. + +The second task uses dynamic task mapping to create a copy of the task for +each Astronaut in the list retrieved from the API. This list will change +depending on how many Astronauts are in space, and the DAG will adjust +accordingly each time it runs. + +For more explanation and getting started instructions, see our Write your +first DAG tutorial: https://docs.astronomer.io/learn/get-started-with-airflow +""" + +import cgi +import os +import re +import tempfile + +import py7zr +import requests +from airflow.decorators import dag, task +from airflow.operators.bash import BashOperator +from dependencies.container import Container +from dependencies.utils import multiline_string_to_single_line +from pendulum import datetime + + +def find_years_in_url(url: str) -> list[int]: + results = re.findall(pattern="(\d{4})", string=str(url)) # noqa: W605 + + years = set() + + for result in results: + # check if the year the number is > 2000. + # this is to avoid getting other numbers in the path as years + if str(result).startswith("20"): + years.add(int(result)) + + if not years: + raise ValueError("Years not found in the path") + + return list(sorted(years)) + + +def years_as_string(years: list[int]) -> str: + return "_".join(map(str, years)) + + +def find_departement_in_url(url: str) -> str: + results = re.findall(pattern="D(\d{3})", string=str(url)) # noqa: W605 + + if len(results) > 0: + result = results[0] + + if str(result).startswith("0"): + return str(result).replace("0", "", 1) + + if not result: + raise ValueError("Departement not found in the path") + + return result + + +def ocsge_diff_normalization_sql( + years: list[int], + departement: str, + source_name: str, +) -> str: + fields = { + "cs_new": f"CS_{years[1]}", + "cs_old": f"CS_{years[0]}", + "us_new": f"US_{years[1]}", + "us_old": f"US_{years[0]}", + "year_old": years[0], + "year_new": years[1], + } + + return f""" + SELECT + CreateUUID() AS guid, + {fields['year_old']} AS year_old, + {fields['year_new']} AS year_new, + {fields['cs_new']} AS cs_new, + {fields['cs_old']} AS cs_old, + {fields['us_new']} AS us_new, + {fields['us_old']} AS us_old, + {departement} AS departement, + GEOMETRY as geom + FROM + {source_name} + """ + + +def ocsge_occupation_du_sol_normalization_sql( + years: list[int], + departement: str, + source_name: str, +) -> str: + return f""" SELECT + CreateUUID() AS guid, + ID AS id, + code_cs AS code_cs, + code_us AS code_us, + GEOMETRY AS geom, + {departement} AS departement, + {years[0]} AS year + FROM + {source_name} + """ + + +def ocsge_zone_construite_normalization_sql( + years: list[int], + departement: str, + source_name: str, +) -> str: + return f""" SELECT + CreateUUID() AS guid, + ID AS id, + {years[0]} AS year, + {departement} AS departement, + GEOMETRY AS geom + FROM + {source_name} + """ + + +def get_table_name(shapefile_name: str) -> str: + shapefile_name = shapefile_name.lower() + if "diff" in shapefile_name: + return "ocsge_diff" + if "occupation" in shapefile_name: + return "ocsge_occupation_du_sol" + if "zone" in shapefile_name: + return "ocsge_zone_construite" + + return None + + +def get_normalization_sql(table_name: str, source_name: str, years: list[int], departement: str) -> str: + return { + "ocsge_diff": ocsge_diff_normalization_sql, + "ocsge_occupation_du_sol": ocsge_occupation_du_sol_normalization_sql, + "ocsge_zone_construite": ocsge_zone_construite_normalization_sql, + }[table_name](years=years, departement=departement, source_name=source_name) + + +configs = { # noqa: E501 + "94": [ + "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D094_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D094_2021-01-01.7z", # noqa: E501 + "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D094_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D094_2018-01-01.7z", # noqa: E501 + "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D094_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D094_2018-2021.7z", # noqa: E501 + ], + "69": [ + "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D069_2020-01-01/OCS-GE_2-0__SHP_LAMB93_D069_2020-01-01.7z", # noqa: E501 + "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D069_2017-01-01/OCS-GE_2-0__SHP_LAMB93_D069_2017-01-01.7z", # noqa: E501 + "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D069_DIFF_2017-2020/OCS-GE_2-0__SHP_LAMB93_D069_DIFF_2017-2020.7z", # noqa: E501 + ], + "75": [ + "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D075_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D075_2021-01-01.7z", # noqa: E501 + "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D075_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D075_2018-01-01.7z", # noqa: E501 + "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D075_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D075_DIFF_2018-2021.7z", # noqa: E501 + ], + "92": [ + "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D092_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D092_2021-01-01.7z", # noqa: E501 + "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D092_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D092_2018-01-01.7z", # noqa: E501 + "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D092_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D092_2018-2021.7z", # noqa: E501 + ], + "91": [ + "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D091_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D091_2021-01-01.7z", # noqa: E501 + "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D091_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D091_2018-01-01.7z", # noqa: E501 + "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D091_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D091_2018-2021.7z", # noqa: E501 + ], + "66": [ + "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D066_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D066_2021-01-01.7z", # noqa: E501 + "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D066_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D066_2018-01-01.7z", # noqa: E501 + "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D066_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D066_2018-2021.7z", # noqa: E501 + ], +} + + +for departement_str, urls in configs.items(): # noqa: C901 + dag_id = f"ingest_ocsge_{departement_str}" + + # Define the basic parameters of the DAG, like schedule and start_date + @dag( + dag_id=dag_id, + start_date=datetime(2024, 1, 1), + schedule="@once", + catchup=False, + doc_md=__doc__, + default_args={"owner": "Alexis Athlani", "retries": 3}, + tags=["OCS GE"], + ) + def ocsge(config): + bucket_name = "airflow-staging" + config: dict = config.resolve({}) + urls = config.get("urls") + + def download_ocsge(url) -> str: + response = requests.get(url, allow_redirects=True) + + if not response.ok: + raise ValueError(f"Failed to download {url}. Response : {response.content}") + header = response.headers["content-disposition"] + _, params = cgi.parse_header(header) + filename = params.get("filename") + + path_on_bucket = f"{bucket_name}/{os.path.basename(filename)}" + with Container().s3().open(path_on_bucket, "wb") as distant_file: + distant_file.write(response.content) + + return path_on_bucket + + @task.python + def download_ocsge_first_millesime() -> str: + return download_ocsge(urls[0]) + + @task.python + def download_ocsge_second_millesime() -> str: + return download_ocsge(urls[1]) + + @task.python + def download_ocsge_diff() -> str: + return download_ocsge(urls[2]) + + @task.python + def delete_tables_before(): + conn = Container().postgres_conn() + cur = conn.cursor() + + cur.execute("DROP TABLE IF EXISTS ocsge_diff;") + cur.execute("DROP TABLE IF EXISTS ocsge_occupation_du_sol;") + cur.execute("DROP TABLE IF EXISTS ocsge_zone_construite;") + + @task.python(trigger_rule="all_done") + def delete_tables_after(): + conn = Container().postgres_conn() + cur = conn.cursor() + + cur.execute("DROP TABLE IF EXISTS ocsge_diff;") + cur.execute("DROP TABLE IF EXISTS ocsge_occupation_du_sol;") + cur.execute("DROP TABLE IF EXISTS ocsge_zone_construite;") + + @task.python + def ingest_ocsge(paths: list[str]) -> str: + for path in paths: + years = find_years_in_url(path) + print("find_years_in_url", years, path) + departement = find_departement_in_url(path) + + with Container().s3().open(path, "rb") as f: + extract_dir = tempfile.mkdtemp() + py7zr.SevenZipFile(f, mode="r").extractall(path=extract_dir) + + for dirpath, _, filenames in os.walk(extract_dir): + for filename in filenames: + if filename.endswith(".shp"): + path = os.path.abspath(os.path.join(dirpath, filename)) + table_name = get_table_name(shapefile_name=filename) + print("get_table_name", table_name) + if not table_name: + continue + sql = multiline_string_to_single_line( + get_normalization_sql( + source_name=os.path.basename(path).replace(".shp", ""), + table_name=table_name, + years=years, + departement=departement, + ) + ) + cmd = [ + "ogr2ogr", + "-dialect", + "SQLITE", + "-f", + '"PostgreSQL"', + f'"{Container().postgres_conn_str_ogr2ogr()}"', + "-overwrite", + "-lco", + "GEOMETRY_NAME=geom", + "-a_srs", + "EPSG:2154", + "-nlt", + "MULTIPOLYGON", + "-nlt", + "PROMOTE_TO_MULTI", + "-nln", + table_name, + path, + "--config", + "PG_USE_COPY", + "YES", + "-sql", + f'"{sql}"', + ] + BashOperator( + task_id=f"ingest_{table_name}", + bash_command=" ".join(cmd), + ).execute(context={}) + + build_dbt = BashOperator( + task_id="build_dbt", + bash_command='cd "${AIRFLOW_HOME}/sql/sparte" && dbt build -s ocsge', + retries=0, + ) + + @task.python + def export_table(): + conn = Container().postgres_conn() + cur = conn.cursor() + + filename = "occupation_du_sol.csv" + temp_file = f"/tmp/{filename}" + temp_archive = f"/tmp/{filename}.7z" + path_on_bucket = f"{bucket_name}/{filename}.7z" + + with open(temp_file, "w") as csv_file: + cur.copy_expert( + "COPY (SELECT * FROM public_ocsge.occupation_du_sol) TO STDOUT WITH CSV HEADER", csv_file + ) + + with py7zr.SevenZipFile(temp_archive, mode="w") as archive: + archive.write(temp_file, filename) + + with open(temp_archive, "rb") as archive: + with Container().s3().open(path_on_bucket, "wb") as f: + f.write(archive.read()) + + paths = [ + download_ocsge_diff(), + download_ocsge_first_millesime(), + download_ocsge_second_millesime(), + ] + + paths >> delete_tables_before() + + ingest_ocsge(paths) >> build_dbt >> export_table() >> delete_tables_after() + + config = {"urls": urls} + + ocsge(config) diff --git a/airflow/dbt_profile.yml b/airflow/dbt_profile.yml new file mode 100644 index 000000000..f39c999a3 --- /dev/null +++ b/airflow/dbt_profile.yml @@ -0,0 +1,12 @@ +sparte: + outputs: + dev: + dbname: "{{ env_var('DBT_DB_NAME') }}" + host: "{{ env_var('DBT_DB_HOST') }}" + pass: "{{ env_var('DBT_DB_PASSWORD') }}" + port: "{{ env_var('DBT_DB_PORT') | as_number }}" + schema: "{{ env_var('DBT_DB_SCHEMA') }}" + threads: 1 + type: "postgres" + user: "{{ env_var('DBT_DB_USER') }}" + target: dev diff --git a/airflow/dependencies/container.py b/airflow/dependencies/container.py new file mode 100644 index 000000000..1af770665 --- /dev/null +++ b/airflow/dependencies/container.py @@ -0,0 +1,41 @@ +from os import getenv + +from airflow.hooks.base import BaseHook +from dependency_injector import containers, providers +from psycopg2 import connect +from psycopg2.extensions import connection +from s3fs import S3FileSystem + + +def db_str_for_ogr2ogr(dbname: str, user: str, password: str, host: str, port: int) -> str: + return f"PG:dbname='{dbname}' host='{host}' port='{port}' user='{user}' password='{password}'" + + +class Container(containers.DeclarativeContainer): + s3 = providers.Factory( + provides=S3FileSystem, + key=BaseHook.get_connection("scaleway_airflow_bucket").login, + secret=BaseHook.get_connection("scaleway_airflow_bucket").password, + endpoint_url=BaseHook.get_connection("scaleway_airflow_bucket").extra_dejson.get("endpoint_url"), + client_kwargs={ + "region_name": BaseHook.get_connection("scaleway_airflow_bucket").extra_dejson.get("region_name") + }, + ) + + postgres_conn: connection = providers.Factory( + provides=connect, + dbname=getenv("DBT_DB_NAME"), + user=getenv("DBT_DB_USER"), + password=getenv("DBT_DB_PASSWORD"), + host=getenv("DBT_DB_HOST"), + port=getenv("DBT_DB_PORT"), + ) + + postgres_conn_str_ogr2ogr = providers.Factory( + db_str_for_ogr2ogr, + dbname=getenv("DBT_DB_NAME"), + user=getenv("DBT_DB_USER"), + password=getenv("DBT_DB_PASSWORD"), + host=getenv("DBT_DB_HOST"), + port=getenv("DBT_DB_PORT"), + ) diff --git a/airflow/dependencies/utils.py b/airflow/dependencies/utils.py new file mode 100644 index 000000000..ec9aa141e --- /dev/null +++ b/airflow/dependencies/utils.py @@ -0,0 +1,2 @@ +def multiline_string_to_single_line(string: str) -> str: + return string.replace("\n", " ").replace("\r", "") diff --git a/airflow/packages.txt b/airflow/packages.txt new file mode 100644 index 000000000..6b84fcfa2 --- /dev/null +++ b/airflow/packages.txt @@ -0,0 +1,11 @@ +binutils +libproj-dev +gdal-bin +libgdal-dev +libpq-dev +python3-dev +gcc +g++ +wget +p7zip-full +postgresql-client diff --git a/airflow/requirements.txt b/airflow/requirements.txt new file mode 100644 index 000000000..42df29065 --- /dev/null +++ b/airflow/requirements.txt @@ -0,0 +1,10 @@ +# Astro Runtime includes the following pre-installed providers packages: https://docs.astronomer.io/astro/runtime-image-architecture#provider-packages +s3fs==2024.6.1 +dependency-injector==4.41.0 +py7zr==0.21.1 +sqlmesh==0.115.1 +sqlmesh[postgres]==0.115.1 +apache-airflow-providers-postgres==5.11.2 +requests +dbt-core +dbt-postgres diff --git a/airflow/sql/sparte/.gitignore b/airflow/sql/sparte/.gitignore new file mode 100644 index 000000000..49f147cb9 --- /dev/null +++ b/airflow/sql/sparte/.gitignore @@ -0,0 +1,4 @@ + +target/ +dbt_packages/ +logs/ diff --git a/airflow/sql/sparte/README.md b/airflow/sql/sparte/README.md new file mode 100644 index 000000000..7874ac842 --- /dev/null +++ b/airflow/sql/sparte/README.md @@ -0,0 +1,15 @@ +Welcome to your new dbt project! + +### Using the starter project + +Try running the following commands: +- dbt run +- dbt test + + +### Resources: +- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) +- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers +- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support +- Find [dbt events](https://events.getdbt.com) near you +- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices diff --git a/airflow/sql/sparte/analyses/.gitkeep b/airflow/sql/sparte/analyses/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/airflow/sql/sparte/dbt_project.yml b/airflow/sql/sparte/dbt_project.yml new file mode 100644 index 000000000..3dd8e2b10 --- /dev/null +++ b/airflow/sql/sparte/dbt_project.yml @@ -0,0 +1,42 @@ + +# Name your project! Project names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: 'sparte' +version: '1.0.0' + +# This setting configures which "profile" dbt uses for this project. +profile: 'sparte' + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that models in this project can be +# found in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +analysis-paths: ["analyses"] +test-paths: ["tests"] +seed-paths: ["seeds"] +macro-paths: ["macros"] +snapshot-paths: ["snapshots"] + +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_packages" + + +# Configuring models +# Full documentation: https://docs.getdbt.com/docs/configuring-models + +# In this example config, we tell dbt to build all models in the example/ +# directory as views. These settings can be overridden in the individual model +# files using the `{{ config(...) }}` macro. +models: + sparte: + ocsge: + +schema: ocsge + +indexes: + - columns: [geom] + type: gist + + admin_express: + +schema: admin_express + # Config indicated by + and applies to all files under models/example/ diff --git a/airflow/sql/sparte/macros/.gitkeep b/airflow/sql/sparte/macros/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/airflow/sql/sparte/macros/is_artificial.sql b/airflow/sql/sparte/macros/is_artificial.sql new file mode 100644 index 000000000..751f7602c --- /dev/null +++ b/airflow/sql/sparte/macros/is_artificial.sql @@ -0,0 +1,42 @@ + +{% macro is_artificial(code_cs, code_us) %} + (CASE + /* CS 1.1 */ + WHEN {{ code_cs }} = 'CS1.1.1.1' THEN true + WHEN {{ code_cs }} = 'CS1.1.1.2' THEN true + WHEN {{ code_cs }} = 'CS1.1.2.1' AND {{ code_us }} != 'US1.3' THEN true + WHEN {{ code_cs }} = 'CS1.1.2.2' THEN true + + /* CS 2.2 */ + /* CS 2.2.1 */ + WHEN {{ code_cs }} = 'CS2.2.1' AND {{ code_us }} = 'US2' THEN true + WHEN {{ code_cs }} = 'CS2.2.1' AND {{ code_us }} = 'US3' THEN true + WHEN {{ code_cs }} = 'CS2.2.1' AND {{ code_us }} = 'US5' THEN true + WHEN {{ code_cs }} = 'CS2.2.1' AND {{ code_us }} = 'US235' THEN true + WHEN {{ code_cs }} = 'CS2.2.1' AND {{ code_us }} = 'US4.1.1' THEN true + WHEN {{ code_cs }} = 'CS2.2.1' AND {{ code_us }} = 'US4.1.2' THEN true + WHEN {{ code_cs }} = 'CS2.2.1' AND {{ code_us }} = 'US4.1.3' THEN true + WHEN {{ code_cs }} = 'CS2.2.1' AND {{ code_us }} = 'US4.1.4' THEN true + WHEN {{ code_cs }} = 'CS2.2.1' AND {{ code_us }} = 'US4.1.5' THEN true + WHEN {{ code_cs }} = 'CS2.2.1' AND {{ code_us }} = 'US4.2' THEN true + WHEN {{ code_cs }} = 'CS2.2.1' AND {{ code_us }} = 'US4.3' THEN true + WHEN {{ code_cs }} = 'CS2.2.1' AND {{ code_us }} = 'US6.1' THEN true + WHEN {{ code_cs }} = 'CS2.2.1' AND {{ code_us }} = 'US6.2' THEN true + + /* CS 2.2.2 */ + WHEN {{ code_cs }} = 'CS2.2.2' AND {{ code_us }} = 'US2' THEN true + WHEN {{ code_cs }} = 'CS2.2.2' AND {{ code_us }} = 'US3' THEN true + WHEN {{ code_cs }} = 'CS2.2.2' AND {{ code_us }} = 'US5' THEN true + WHEN {{ code_cs }} = 'CS2.2.2' AND {{ code_us }} = 'US235' THEN true + WHEN {{ code_cs }} = 'CS2.2.2' AND {{ code_us }} = 'US4.1.1' THEN true + WHEN {{ code_cs }} = 'CS2.2.2' AND {{ code_us }} = 'US4.1.2' THEN true + WHEN {{ code_cs }} = 'CS2.2.2' AND {{ code_us }} = 'US4.1.3' THEN true + WHEN {{ code_cs }} = 'CS2.2.2' AND {{ code_us }} = 'US4.1.4' THEN true + WHEN {{ code_cs }} = 'CS2.2.2' AND {{ code_us }} = 'US4.1.5' THEN true + WHEN {{ code_cs }} = 'CS2.2.2' AND {{ code_us }} = 'US4.2' THEN true + WHEN {{ code_cs }} = 'CS2.2.2' AND {{ code_us }} = 'US4.3' THEN true + WHEN {{ code_cs }} = 'CS2.2.2' AND {{ code_us }} = 'US6.1' THEN true + WHEN {{ code_cs }} = 'CS2.2.2' AND {{ code_us }} = 'US6.2' THEN true + ELSE false + END) +{% endmacro %} diff --git a/airflow/sql/sparte/macros/is_impermeable.sql b/airflow/sql/sparte/macros/is_impermeable.sql new file mode 100644 index 000000000..38a644e30 --- /dev/null +++ b/airflow/sql/sparte/macros/is_impermeable.sql @@ -0,0 +1,8 @@ + +{% macro is_impermeable(code_cs) %} + (CASE + WHEN {{ code_cs }} = 'CS1.1.1.1' THEN true + WHEN {{ code_cs }} = 'CS1.1.1.2' THEN true + ELSE false + END) +{% endmacro %} diff --git a/airflow/sql/sparte/models/admin_express/commune_with_checksum.sql b/airflow/sql/sparte/models/admin_express/commune_with_checksum.sql new file mode 100644 index 000000000..7b171ff42 --- /dev/null +++ b/airflow/sql/sparte/models/admin_express/commune_with_checksum.sql @@ -0,0 +1,4 @@ + +{{ config(materialized='table') }} + +SELECT *, md5(commune::text) FROM {{ source('public', 'commune') }} AS commune diff --git a/airflow/sql/sparte/models/admin_express/land.sql b/airflow/sql/sparte/models/admin_express/land.sql new file mode 100644 index 000000000..1fb4abbe5 --- /dev/null +++ b/airflow/sql/sparte/models/admin_express/land.sql @@ -0,0 +1,30 @@ + +{{ config(materialized='view') }} + +SELECT + commune.insee_com AS land_id, + 'COMMUNE' AS land_type, + geom +FROM + {{ source('public', 'commune') }} AS commune +UNION +SELECT + departement.insee_dep AS land_id, + 'DEPARTEMENT' AS land_type, + geom +FROM + {{ source('public', 'departement') }} AS departement +UNION +SELECT + region.insee_reg AS land_id, + 'REGION' AS land_type, + geom +FROM + {{ source('public', 'region') }} AS region +UNION +SELECT + epci.code_siren AS land_id, + 'EPCI' AS land_type, + geom +FROM + {{ source('public', 'epci') }} AS epci diff --git a/airflow/sql/sparte/models/admin_express/schema.yml b/airflow/sql/sparte/models/admin_express/schema.yml new file mode 100644 index 000000000..2a36bd81f --- /dev/null +++ b/airflow/sql/sparte/models/admin_express/schema.yml @@ -0,0 +1,22 @@ + +version: 2 + +models: + - name: commune_with_checksum + - name: land + +sources: + - name: public + tables: + - name: arrondissement + - name: arrondissement_municipal + - name: canton + - name: chflieu_arrondissement_municipal + - name: chflieu_commune + - name: chflieu_commune_associee_ou_deleguee + - name: collectivite_territoriale + - name: commune + - name: commune_associee_ou_deleguee + - name: departement + - name: epci + - name: region diff --git a/airflow/sql/sparte/models/ocsge/artif/README b/airflow/sql/sparte/models/ocsge/artif/README new file mode 100644 index 000000000..e69de29bb diff --git a/airflow/sql/sparte/models/ocsge/artif/artif_nat_by_surface.sql b/airflow/sql/sparte/models/ocsge/artif/artif_nat_by_surface.sql new file mode 100644 index 000000000..e69de29bb diff --git a/airflow/sql/sparte/models/ocsge/artif/artificial_geom_union.sql b/airflow/sql/sparte/models/ocsge/artif/artificial_geom_union.sql new file mode 100644 index 000000000..e69de29bb diff --git a/airflow/sql/sparte/models/ocsge/artif/artificial_geom_union_dump.sql b/airflow/sql/sparte/models/ocsge/artif/artificial_geom_union_dump.sql new file mode 100644 index 000000000..e69de29bb diff --git a/airflow/sql/sparte/models/ocsge/artif/artificial_union.sql b/airflow/sql/sparte/models/ocsge/artif/artificial_union.sql new file mode 100644 index 000000000..e69de29bb diff --git a/airflow/sql/sparte/models/ocsge/artif/clustered_ocsge.sql b/airflow/sql/sparte/models/ocsge/artif/clustered_ocsge.sql new file mode 100644 index 000000000..e69de29bb diff --git a/airflow/sql/sparte/models/ocsge/artif/final_artif.sql b/airflow/sql/sparte/models/ocsge/artif/final_artif.sql new file mode 100644 index 000000000..e69de29bb diff --git a/airflow/sql/sparte/models/ocsge/artif/small_built.sql b/airflow/sql/sparte/models/ocsge/artif/small_built.sql new file mode 100644 index 000000000..e69de29bb diff --git a/airflow/sql/sparte/models/ocsge/difference.sql b/airflow/sql/sparte/models/ocsge/difference.sql new file mode 100644 index 000000000..deaef5b93 --- /dev/null +++ b/airflow/sql/sparte/models/ocsge/difference.sql @@ -0,0 +1,52 @@ +-- depends_on: {{ source('public', 'ocsge_occupation_du_sol') }}, {{ source('public', 'ocsge_diff') }}, {{ source('public', 'ocsge_zone_construite') }} + +{{ + config( + materialized='incremental', + post_hook="DELETE FROM {{ this }} WHERE guid NOT IN (SELECT guid FROM {{ source('public', 'ocsge_diff') }})" + ) +}} + +SELECT + *, + CASE + WHEN + old_is_imper = false AND + new_is_imper = true + THEN true + ELSE false + END AS new_is_impermeable, + CASE + WHEN + old_is_imper = true AND + new_is_imper = false + THEN true + ELSE false + END AS new_not_impermeable, + CASE + WHEN + old_is_artif = false AND + new_is_artif = true + THEN true + ELSE false + END AS new_is_artificial, + CASE + WHEN + old_is_artif = true AND + new_is_artif = false THEN true + ELSE false + END AS new_not_artificial +FROM ( + SELECT + *, + ST_Area(geom) AS surface, + {{ is_artificial('cs_old', 'us_old') }} AS old_is_artif, + {{ is_impermeable('cs_old') }} AS old_is_imper, + {{ is_artificial('cs_new', 'us_new') }} AS new_is_artif, + {{ is_impermeable('cs_new') }} AS new_is_imper + FROM + {{ source('public', 'ocsge_diff') }} + {% if is_incremental() %} + WHERE guid not in (SELECT guid from {{ this }}) + {% endif %} +) AS foo diff --git a/airflow/sql/sparte/models/ocsge/occupation_du_sol.sql b/airflow/sql/sparte/models/ocsge/occupation_du_sol.sql new file mode 100644 index 000000000..87ff28f0b --- /dev/null +++ b/airflow/sql/sparte/models/ocsge/occupation_du_sol.sql @@ -0,0 +1,26 @@ +-- depends_on: {{ source('public', 'ocsge_occupation_du_sol') }}, {{ source('public', 'ocsge_diff') }}, {{ source('public', 'ocsge_zone_construite') }} + + +{{ + config( + materialized='incremental', + incremental_strategy='delete+insert', + unique_key=['departement','year'], + indexes=[ + {'columns': ['departement','year'], 'type': 'btree'}, + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + ST_area(geom) AS surface, + {{ is_impermeable('code_cs') }} as is_impermeable, + {{ is_artificial('code_cs', 'code_us') }} as is_artificial +FROM + {{ source('public', 'ocsge_occupation_du_sol') }} + +{% if is_incremental() %} + WHERE guid not in (SELECT guid from {{ this }}) +{% endif %} diff --git a/airflow/sql/sparte/models/ocsge/schema.yml b/airflow/sql/sparte/models/ocsge/schema.yml new file mode 100644 index 000000000..52243ce9b --- /dev/null +++ b/airflow/sql/sparte/models/ocsge/schema.yml @@ -0,0 +1,131 @@ + +version: 2 + +cs_accepted_values: &cs_accepted_values + values: [ + "CS1.1.1.1", + "CS1.1.1.2", + "CS1.1.2.1", + "CS1.1.2.2", + "CS1.2.1", + "CS1.2.2", + "CS1.2.3", + "CS2.1.1.1", + "CS2.1.1.2", + "CS2.1.1.3", + "CS2.1.2", + "CS2.1.3", + "CS2.2.1", + "CS2.2.2" + ] + +us_accepted_values: &us_accepted_values + values : [ + "US1.1", + "US1.2", + "US1.3", + "US1.4", + "US2", + "US3", + "US5", + "US235", + "US4.1.1", + "US4.1.2", + "US4.1.3", + "US4.1.4", + "US4.1.5", + "US4.2", + "US4.3", + "US6.1", + "US6.2", + "US6.3", + "US6.6" + ] + +difference_test: &difference_test + - name: geom + tests: + - not_null + - unique + - is_valid_geom + - name: cs_new + tests: + - not_null + - accepted_values: *cs_accepted_values + - name: cs_old + tests: + - not_null + - accepted_values: *cs_accepted_values + - name: us_new + tests: + - not_null + - accepted_values: *us_accepted_values + - name: us_old + tests: + - not_null + - accepted_values: *us_accepted_values + +occupation_du_sol_test: &occupation_du_sol_test + - name: geom + tests: + - not_null + - unique + - is_valid_geom + - name: departement + tests: + - not_null + - name: year + tests: + - not_null + - name: id + tests: + - unique + - not_null + - name: guid + tests: + - unique + - not_null + - name: code_cs + tests: + - not_null + - accepted_values: *cs_accepted_values + - name: code_us + tests: + - not_null + - accepted_values: *us_accepted_values + + +zone_construite_test: &zone_construite_test + - name: geom + tests: + - not_null + - unique + - is_valid_geom + - name: departement + tests: + - not_null + - name: year + tests: + - not_null + - name: id + tests: + - unique + - not_null + - name: guid + tests: + - unique + - not_null + +models: + - name: zone_construite + - name: occupation_du_sol + - name: difference + +sources: + - name: public + tables: + - name: ocsge_diff + columns: *difference_test + - name: ocsge_occupation_du_sol + columns: *occupation_du_sol_test + - name: ocsge_zone_construite diff --git a/airflow/sql/sparte/models/ocsge/zone_construite.sql b/airflow/sql/sparte/models/ocsge/zone_construite.sql new file mode 100644 index 000000000..522fa231b --- /dev/null +++ b/airflow/sql/sparte/models/ocsge/zone_construite.sql @@ -0,0 +1,9 @@ +-- depends_on: {{ source('public', 'ocsge_occupation_du_sol') }}, {{ source('public', 'ocsge_diff') }}, {{ source('public', 'ocsge_zone_construite') }} + +{{ config(materialized='incremental') }} + +SELECT * FROM + {{ source('public', 'ocsge_zone_construite') }} +{% if is_incremental() %} + WHERE guid not in (SELECT guid from {{ this }}) +{% endif %} diff --git a/airflow/sql/sparte/package-lock.yml b/airflow/sql/sparte/package-lock.yml new file mode 100644 index 000000000..5c78013b9 --- /dev/null +++ b/airflow/sql/sparte/package-lock.yml @@ -0,0 +1,6 @@ +packages: + - package: dbt-labs/codegen + version: 0.12.1 + - package: dbt-labs/dbt_utils + version: 1.2.0 +sha1_hash: 37aba29ba147b9afff74716d974b60c54b7f1a1d diff --git a/airflow/sql/sparte/packages.yml b/airflow/sql/sparte/packages.yml new file mode 100644 index 000000000..854f36ad7 --- /dev/null +++ b/airflow/sql/sparte/packages.yml @@ -0,0 +1,5 @@ +packages: + - package: dbt-labs/codegen + version: 0.12.1 + - package: dbt-labs/dbt_utils + version: 1.2.0 diff --git a/airflow/sql/sparte/seeds/.gitkeep b/airflow/sql/sparte/seeds/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/airflow/sql/sparte/snapshots/.gitkeep b/airflow/sql/sparte/snapshots/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/airflow/sql/sparte/tests/generic/is_valid_geom.sql b/airflow/sql/sparte/tests/generic/is_valid_geom.sql new file mode 100644 index 000000000..8b41dd5db --- /dev/null +++ b/airflow/sql/sparte/tests/generic/is_valid_geom.sql @@ -0,0 +1,14 @@ +{% test is_valid_geom(model, column_name) %} + +with validation_errors as ( + + select {{ column_name }} + from {{ model }} + where not ST_IsValid({{ column_name }}) + +) + +select * +from validation_errors + +{% endtest %} diff --git a/airflow/tests/dags/test_dag_example.py b/airflow/tests/dags/test_dag_example.py new file mode 100644 index 000000000..582ba077f --- /dev/null +++ b/airflow/tests/dags/test_dag_example.py @@ -0,0 +1,72 @@ +import logging +import os +from contextlib import contextmanager + +import pytest +from airflow.models import DagBag + + +@contextmanager +def suppress_logging(namespace): + logger = logging.getLogger(namespace) + old_value = logger.disabled + logger.disabled = True + try: + yield + finally: + logger.disabled = old_value + + +def get_import_errors(): + """ + Generate a tuple for import errors in the dag bag + """ + with suppress_logging("airflow"): + dag_bag = DagBag(include_examples=False) + + def strip_path_prefix(path): + return os.path.relpath(path, os.environ.get("AIRFLOW_HOME")) + + # prepend "(None,None)" to ensure that a test object is always created even if it's a no op. + return [(None, None)] + [(strip_path_prefix(k), v.strip()) for k, v in dag_bag.import_errors.items()] + + +def get_dags(): + """ + Generate a tuple of dag_id, in the DagBag + """ + with suppress_logging("airflow"): + dag_bag = DagBag(include_examples=False) + + def strip_path_prefix(path): + return os.path.relpath(path, os.environ.get("AIRFLOW_HOME")) + + return [(k, v, strip_path_prefix(v.fileloc)) for k, v in dag_bag.dags.items()] + + +@pytest.mark.parametrize("rel_path,rv", get_import_errors(), ids=[x[0] for x in get_import_errors()]) +def test_file_imports(rel_path, rv): + """Test for import errors on a file""" + if rel_path and rv: + raise Exception(f"{rel_path} failed to import with message \n {rv}") + + +APPROVED_TAGS = {} + + +@pytest.mark.parametrize("dag_id,dag,fileloc", get_dags(), ids=[x[2] for x in get_dags()]) +def test_dag_tags(dag_id, dag, fileloc): + """ + test if a DAG is tagged and if those TAGs are in the approved list + """ + assert dag.tags, f"{dag_id} in {fileloc} has no tags" + if APPROVED_TAGS: + assert not set(dag.tags) - APPROVED_TAGS + + +@pytest.mark.parametrize("dag_id,dag, fileloc", get_dags(), ids=[x[2] for x in get_dags()]) +def test_dag_retries(dag_id, dag, fileloc): + """ + test if a DAG has retries set + """ + assert dag.default_args.get("retries", None) >= 2, f"{dag_id} in {fileloc} must have task retries >= 2." diff --git a/airflow_settings.yaml b/airflow_settings.yaml new file mode 100644 index 000000000..fe542bcf9 --- /dev/null +++ b/airflow_settings.yaml @@ -0,0 +1,25 @@ +# This file allows you to configure Airflow Connections, Pools, and Variables in a single place for local development only. +# NOTE: json dicts can be added to the conn_extra field as yaml key value pairs. See the example below. + +# For more information, refer to our docs: https://docs.astronomer.io/develop-project#configure-airflow_settingsyaml-local-development-only +# For questions, reach out to: https://support.astronomer.io +# For issues create an issue ticket here: https://github.com/astronomer/astro-cli/issues + +airflow: + connections: + - conn_id: + conn_type: + conn_host: + conn_schema: + conn_login: + conn_password: + conn_port: + conn_extra: + example_extra_field: example-value + pools: + - pool_name: + pool_slot: + pool_description: + variables: + - variable_name: + variable_value: diff --git a/tests/dags/test_dag_example.py b/tests/dags/test_dag_example.py new file mode 100644 index 000000000..582ba077f --- /dev/null +++ b/tests/dags/test_dag_example.py @@ -0,0 +1,72 @@ +import logging +import os +from contextlib import contextmanager + +import pytest +from airflow.models import DagBag + + +@contextmanager +def suppress_logging(namespace): + logger = logging.getLogger(namespace) + old_value = logger.disabled + logger.disabled = True + try: + yield + finally: + logger.disabled = old_value + + +def get_import_errors(): + """ + Generate a tuple for import errors in the dag bag + """ + with suppress_logging("airflow"): + dag_bag = DagBag(include_examples=False) + + def strip_path_prefix(path): + return os.path.relpath(path, os.environ.get("AIRFLOW_HOME")) + + # prepend "(None,None)" to ensure that a test object is always created even if it's a no op. + return [(None, None)] + [(strip_path_prefix(k), v.strip()) for k, v in dag_bag.import_errors.items()] + + +def get_dags(): + """ + Generate a tuple of dag_id, in the DagBag + """ + with suppress_logging("airflow"): + dag_bag = DagBag(include_examples=False) + + def strip_path_prefix(path): + return os.path.relpath(path, os.environ.get("AIRFLOW_HOME")) + + return [(k, v, strip_path_prefix(v.fileloc)) for k, v in dag_bag.dags.items()] + + +@pytest.mark.parametrize("rel_path,rv", get_import_errors(), ids=[x[0] for x in get_import_errors()]) +def test_file_imports(rel_path, rv): + """Test for import errors on a file""" + if rel_path and rv: + raise Exception(f"{rel_path} failed to import with message \n {rv}") + + +APPROVED_TAGS = {} + + +@pytest.mark.parametrize("dag_id,dag,fileloc", get_dags(), ids=[x[2] for x in get_dags()]) +def test_dag_tags(dag_id, dag, fileloc): + """ + test if a DAG is tagged and if those TAGs are in the approved list + """ + assert dag.tags, f"{dag_id} in {fileloc} has no tags" + if APPROVED_TAGS: + assert not set(dag.tags) - APPROVED_TAGS + + +@pytest.mark.parametrize("dag_id,dag, fileloc", get_dags(), ids=[x[2] for x in get_dags()]) +def test_dag_retries(dag_id, dag, fileloc): + """ + test if a DAG has retries set + """ + assert dag.default_args.get("retries", None) >= 2, f"{dag_id} in {fileloc} must have task retries >= 2." From eed7c1f2c0f007539097f6f9e36fef137efd7b24 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 6 Aug 2024 11:32:55 +0200 Subject: [PATCH 04/99] fix(airflow): remove example dag --- airflow/tests/dags/test_dag_example.py | 72 -------------------------- tests/dags/test_dag_example.py | 72 -------------------------- 2 files changed, 144 deletions(-) delete mode 100644 airflow/tests/dags/test_dag_example.py delete mode 100644 tests/dags/test_dag_example.py diff --git a/airflow/tests/dags/test_dag_example.py b/airflow/tests/dags/test_dag_example.py deleted file mode 100644 index 582ba077f..000000000 --- a/airflow/tests/dags/test_dag_example.py +++ /dev/null @@ -1,72 +0,0 @@ -import logging -import os -from contextlib import contextmanager - -import pytest -from airflow.models import DagBag - - -@contextmanager -def suppress_logging(namespace): - logger = logging.getLogger(namespace) - old_value = logger.disabled - logger.disabled = True - try: - yield - finally: - logger.disabled = old_value - - -def get_import_errors(): - """ - Generate a tuple for import errors in the dag bag - """ - with suppress_logging("airflow"): - dag_bag = DagBag(include_examples=False) - - def strip_path_prefix(path): - return os.path.relpath(path, os.environ.get("AIRFLOW_HOME")) - - # prepend "(None,None)" to ensure that a test object is always created even if it's a no op. - return [(None, None)] + [(strip_path_prefix(k), v.strip()) for k, v in dag_bag.import_errors.items()] - - -def get_dags(): - """ - Generate a tuple of dag_id, in the DagBag - """ - with suppress_logging("airflow"): - dag_bag = DagBag(include_examples=False) - - def strip_path_prefix(path): - return os.path.relpath(path, os.environ.get("AIRFLOW_HOME")) - - return [(k, v, strip_path_prefix(v.fileloc)) for k, v in dag_bag.dags.items()] - - -@pytest.mark.parametrize("rel_path,rv", get_import_errors(), ids=[x[0] for x in get_import_errors()]) -def test_file_imports(rel_path, rv): - """Test for import errors on a file""" - if rel_path and rv: - raise Exception(f"{rel_path} failed to import with message \n {rv}") - - -APPROVED_TAGS = {} - - -@pytest.mark.parametrize("dag_id,dag,fileloc", get_dags(), ids=[x[2] for x in get_dags()]) -def test_dag_tags(dag_id, dag, fileloc): - """ - test if a DAG is tagged and if those TAGs are in the approved list - """ - assert dag.tags, f"{dag_id} in {fileloc} has no tags" - if APPROVED_TAGS: - assert not set(dag.tags) - APPROVED_TAGS - - -@pytest.mark.parametrize("dag_id,dag, fileloc", get_dags(), ids=[x[2] for x in get_dags()]) -def test_dag_retries(dag_id, dag, fileloc): - """ - test if a DAG has retries set - """ - assert dag.default_args.get("retries", None) >= 2, f"{dag_id} in {fileloc} must have task retries >= 2." diff --git a/tests/dags/test_dag_example.py b/tests/dags/test_dag_example.py deleted file mode 100644 index 582ba077f..000000000 --- a/tests/dags/test_dag_example.py +++ /dev/null @@ -1,72 +0,0 @@ -import logging -import os -from contextlib import contextmanager - -import pytest -from airflow.models import DagBag - - -@contextmanager -def suppress_logging(namespace): - logger = logging.getLogger(namespace) - old_value = logger.disabled - logger.disabled = True - try: - yield - finally: - logger.disabled = old_value - - -def get_import_errors(): - """ - Generate a tuple for import errors in the dag bag - """ - with suppress_logging("airflow"): - dag_bag = DagBag(include_examples=False) - - def strip_path_prefix(path): - return os.path.relpath(path, os.environ.get("AIRFLOW_HOME")) - - # prepend "(None,None)" to ensure that a test object is always created even if it's a no op. - return [(None, None)] + [(strip_path_prefix(k), v.strip()) for k, v in dag_bag.import_errors.items()] - - -def get_dags(): - """ - Generate a tuple of dag_id, in the DagBag - """ - with suppress_logging("airflow"): - dag_bag = DagBag(include_examples=False) - - def strip_path_prefix(path): - return os.path.relpath(path, os.environ.get("AIRFLOW_HOME")) - - return [(k, v, strip_path_prefix(v.fileloc)) for k, v in dag_bag.dags.items()] - - -@pytest.mark.parametrize("rel_path,rv", get_import_errors(), ids=[x[0] for x in get_import_errors()]) -def test_file_imports(rel_path, rv): - """Test for import errors on a file""" - if rel_path and rv: - raise Exception(f"{rel_path} failed to import with message \n {rv}") - - -APPROVED_TAGS = {} - - -@pytest.mark.parametrize("dag_id,dag,fileloc", get_dags(), ids=[x[2] for x in get_dags()]) -def test_dag_tags(dag_id, dag, fileloc): - """ - test if a DAG is tagged and if those TAGs are in the approved list - """ - assert dag.tags, f"{dag_id} in {fileloc} has no tags" - if APPROVED_TAGS: - assert not set(dag.tags) - APPROVED_TAGS - - -@pytest.mark.parametrize("dag_id,dag, fileloc", get_dags(), ids=[x[2] for x in get_dags()]) -def test_dag_retries(dag_id, dag, fileloc): - """ - test if a DAG has retries set - """ - assert dag.default_args.get("retries", None) >= 2, f"{dag_id} in {fileloc} must have task retries >= 2." From 9f7e14b5e67862d6407d126d1fb23f8ff12952b9 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 6 Aug 2024 11:39:41 +0200 Subject: [PATCH 05/99] feat(airflow): set get_table_name type hinting --- airflow/dags/ocsge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/dags/ocsge.py b/airflow/dags/ocsge.py index ed72e2961..31f08294b 100644 --- a/airflow/dags/ocsge.py +++ b/airflow/dags/ocsge.py @@ -132,7 +132,7 @@ def ocsge_zone_construite_normalization_sql( """ -def get_table_name(shapefile_name: str) -> str: +def get_table_name(shapefile_name: str) -> str | None: shapefile_name = shapefile_name.lower() if "diff" in shapefile_name: return "ocsge_diff" From 65d03d619fdf93516351c48b8a0330caf666d22f Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 6 Aug 2024 11:40:56 +0200 Subject: [PATCH 06/99] fix(is_artificial): change style of comments --- airflow/sql/sparte/macros/is_artificial.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/airflow/sql/sparte/macros/is_artificial.sql b/airflow/sql/sparte/macros/is_artificial.sql index 751f7602c..e8f2664f8 100644 --- a/airflow/sql/sparte/macros/is_artificial.sql +++ b/airflow/sql/sparte/macros/is_artificial.sql @@ -1,14 +1,14 @@ {% macro is_artificial(code_cs, code_us) %} (CASE - /* CS 1.1 */ + -- CS 1.1 WHEN {{ code_cs }} = 'CS1.1.1.1' THEN true WHEN {{ code_cs }} = 'CS1.1.1.2' THEN true WHEN {{ code_cs }} = 'CS1.1.2.1' AND {{ code_us }} != 'US1.3' THEN true WHEN {{ code_cs }} = 'CS1.1.2.2' THEN true - /* CS 2.2 */ - /* CS 2.2.1 */ + -- CS 2.2 + -- CS 2.2.1 WHEN {{ code_cs }} = 'CS2.2.1' AND {{ code_us }} = 'US2' THEN true WHEN {{ code_cs }} = 'CS2.2.1' AND {{ code_us }} = 'US3' THEN true WHEN {{ code_cs }} = 'CS2.2.1' AND {{ code_us }} = 'US5' THEN true @@ -23,7 +23,7 @@ WHEN {{ code_cs }} = 'CS2.2.1' AND {{ code_us }} = 'US6.1' THEN true WHEN {{ code_cs }} = 'CS2.2.1' AND {{ code_us }} = 'US6.2' THEN true - /* CS 2.2.2 */ + -- CS 2.2.2 WHEN {{ code_cs }} = 'CS2.2.2' AND {{ code_us }} = 'US2' THEN true WHEN {{ code_cs }} = 'CS2.2.2' AND {{ code_us }} = 'US3' THEN true WHEN {{ code_cs }} = 'CS2.2.2' AND {{ code_us }} = 'US5' THEN true From 87e390b3b5de7fdf0dc1e32a325ede85c5ee7c67 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 6 Aug 2024 11:42:40 +0200 Subject: [PATCH 07/99] fix(land): remove schema --- .../sql/sparte/models/admin_express/land.sql | 30 ------------------- .../sparte/models/admin_express/schema.yml | 1 - 2 files changed, 31 deletions(-) delete mode 100644 airflow/sql/sparte/models/admin_express/land.sql diff --git a/airflow/sql/sparte/models/admin_express/land.sql b/airflow/sql/sparte/models/admin_express/land.sql deleted file mode 100644 index 1fb4abbe5..000000000 --- a/airflow/sql/sparte/models/admin_express/land.sql +++ /dev/null @@ -1,30 +0,0 @@ - -{{ config(materialized='view') }} - -SELECT - commune.insee_com AS land_id, - 'COMMUNE' AS land_type, - geom -FROM - {{ source('public', 'commune') }} AS commune -UNION -SELECT - departement.insee_dep AS land_id, - 'DEPARTEMENT' AS land_type, - geom -FROM - {{ source('public', 'departement') }} AS departement -UNION -SELECT - region.insee_reg AS land_id, - 'REGION' AS land_type, - geom -FROM - {{ source('public', 'region') }} AS region -UNION -SELECT - epci.code_siren AS land_id, - 'EPCI' AS land_type, - geom -FROM - {{ source('public', 'epci') }} AS epci diff --git a/airflow/sql/sparte/models/admin_express/schema.yml b/airflow/sql/sparte/models/admin_express/schema.yml index 2a36bd81f..92e704cc6 100644 --- a/airflow/sql/sparte/models/admin_express/schema.yml +++ b/airflow/sql/sparte/models/admin_express/schema.yml @@ -3,7 +3,6 @@ version: 2 models: - name: commune_with_checksum - - name: land sources: - name: public From 20ca7575d583376b146b5600538678608729dd3e Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Wed, 7 Aug 2024 18:15:00 +0200 Subject: [PATCH 08/99] temp --- airflow/dags/ocsge.py | 481 ++++++++++-------- airflow/sql/sparte/models/ocsge/artif/README | 0 .../ocsge/artif/artif_nat_by_surface.sql | 0 .../ocsge/artif/artificial_geom_union.sql | 0 .../artif/artificial_geom_union_dump.sql | 0 .../models/ocsge/artif/artificial_union.sql | 0 .../models/ocsge/artif/clustered_ocsge.sql | 0 .../sparte/models/ocsge/artif/small_built.sql | 0 .../sql/sparte/models/ocsge/difference.sql | 35 +- .../sparte/models/ocsge/occupation_du_sol.sql | 30 +- airflow/sql/sparte/models/ocsge/schema.yml | 16 +- .../sparte/models/ocsge/zone_construite.sql | 30 +- 12 files changed, 350 insertions(+), 242 deletions(-) delete mode 100644 airflow/sql/sparte/models/ocsge/artif/README delete mode 100644 airflow/sql/sparte/models/ocsge/artif/artif_nat_by_surface.sql delete mode 100644 airflow/sql/sparte/models/ocsge/artif/artificial_geom_union.sql delete mode 100644 airflow/sql/sparte/models/ocsge/artif/artificial_geom_union_dump.sql delete mode 100644 airflow/sql/sparte/models/ocsge/artif/artificial_union.sql delete mode 100644 airflow/sql/sparte/models/ocsge/artif/clustered_ocsge.sql delete mode 100644 airflow/sql/sparte/models/ocsge/artif/small_built.sql diff --git a/airflow/dags/ocsge.py b/airflow/dags/ocsge.py index 31f08294b..44351fdbe 100644 --- a/airflow/dags/ocsge.py +++ b/airflow/dags/ocsge.py @@ -23,13 +23,14 @@ import re import tempfile +import pendulum import py7zr import requests from airflow.decorators import dag, task +from airflow.models.param import Param from airflow.operators.bash import BashOperator from dependencies.container import Container from dependencies.utils import multiline_string_to_single_line -from pendulum import datetime def find_years_in_url(url: str) -> list[int]: @@ -68,11 +69,7 @@ def find_departement_in_url(url: str) -> str: return result -def ocsge_diff_normalization_sql( - years: list[int], - departement: str, - source_name: str, -) -> str: +def ocsge_diff_normalization_sql(years: list[int], departement: str, source_name: str, loaded_date: float) -> str: fields = { "cs_new": f"CS_{years[1]}", "cs_old": f"CS_{years[0]}", @@ -84,7 +81,7 @@ def ocsge_diff_normalization_sql( return f""" SELECT - CreateUUID() AS guid, + {loaded_date} AS loaded_date, {fields['year_old']} AS year_old, {fields['year_new']} AS year_new, {fields['cs_new']} AS cs_new, @@ -102,9 +99,10 @@ def ocsge_occupation_du_sol_normalization_sql( years: list[int], departement: str, source_name: str, + loaded_date: float, ) -> str: return f""" SELECT - CreateUUID() AS guid, + {loaded_date} AS loaded_date, ID AS id, code_cs AS code_cs, code_us AS code_us, @@ -120,9 +118,10 @@ def ocsge_zone_construite_normalization_sql( years: list[int], departement: str, source_name: str, + loaded_date: float, ) -> str: return f""" SELECT - CreateUUID() AS guid, + {loaded_date} AS loaded_date, ID AS id, {years[0]} AS year, {departement} AS departement, @@ -144,206 +143,290 @@ def get_table_name(shapefile_name: str) -> str | None: return None -def get_normalization_sql(table_name: str, source_name: str, years: list[int], departement: str) -> str: +def get_normalization_sql( + table_name: str, + source_name: str, + years: list[int], + departement: str, + loaded_date: float, +) -> str: return { "ocsge_diff": ocsge_diff_normalization_sql, "ocsge_occupation_du_sol": ocsge_occupation_du_sol_normalization_sql, "ocsge_zone_construite": ocsge_zone_construite_normalization_sql, - }[table_name](years=years, departement=departement, source_name=source_name) + }[table_name]( + years=years, + departement=departement, + source_name=source_name, + loaded_date=loaded_date, + ) configs = { # noqa: E501 - "94": [ - "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D094_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D094_2021-01-01.7z", # noqa: E501 - "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D094_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D094_2018-01-01.7z", # noqa: E501 - "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D094_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D094_2018-2021.7z", # noqa: E501 - ], - "69": [ - "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D069_2020-01-01/OCS-GE_2-0__SHP_LAMB93_D069_2020-01-01.7z", # noqa: E501 - "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D069_2017-01-01/OCS-GE_2-0__SHP_LAMB93_D069_2017-01-01.7z", # noqa: E501 - "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D069_DIFF_2017-2020/OCS-GE_2-0__SHP_LAMB93_D069_DIFF_2017-2020.7z", # noqa: E501 - ], - "75": [ - "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D075_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D075_2021-01-01.7z", # noqa: E501 - "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D075_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D075_2018-01-01.7z", # noqa: E501 - "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D075_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D075_DIFF_2018-2021.7z", # noqa: E501 - ], - "92": [ - "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D092_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D092_2021-01-01.7z", # noqa: E501 - "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D092_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D092_2018-01-01.7z", # noqa: E501 - "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D092_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D092_2018-2021.7z", # noqa: E501 - ], - "91": [ - "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D091_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D091_2021-01-01.7z", # noqa: E501 - "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D091_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D091_2018-01-01.7z", # noqa: E501 - "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D091_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D091_2018-2021.7z", # noqa: E501 - ], - "66": [ - "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D066_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D066_2021-01-01.7z", # noqa: E501 - "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D066_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D066_2018-01-01.7z", # noqa: E501 - "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D066_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D066_2018-2021.7z", # noqa: E501 - ], + "91": { + "occupation_du_sol_et_zone_construite": { + 2018: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D091_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D091_2018-01-01.7z", # noqa: E501 + 2021: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D091_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D091_2021-01-01.7z", # noqa: E501 + }, + "difference": { + ( + 2018, + 2021, + ): "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D091_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D091_2018-2021.7z", # noqa: E501 + }, + }, + "92": { + "occupation_du_sol_et_zone_construite": { + 2018: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D092_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D092_2018-01-01.7z", # noqa: E501 + 2021: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D092_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D092_2021-01-01.7z", # noqa: E501 + }, + "difference": { + ( + 2018, + 2021, + ): "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D092_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D092_2018-2021.7z", # noqa: E501 + }, + }, + "78": { + "occupation_du_sol_et_zone_construite": { + 2018: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D078_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D078_2018-01-01.7z", # noqa: E501 + 2021: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D078_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D078_2021-01-01.7z", # noqa: E501 + }, + "difference": { + ( + 2018, + 2021, + ): "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D078_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D078_DIFF_2018-2021.7z" # noqa: E501 + }, + }, + "94": { + "occupation_du_sol_et_zone_construite": { + 2018: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D094_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D094_2018-01-01.7z", # noqa: E501 + 2021: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D094_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D094_2021-01-01.7z", # noqa: E501 + }, + "difference": { + ( + 2018, + 2021, + ): "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D094_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D094_2018-2021.7z", # noqa: E501 + }, + }, + "75": { + "occupation_du_sol_et_zone_construite": { + 2018: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D075_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D075_2018-01-01.7z", # noqa: E501 + 2021: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D075_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D075_2021-01-01.7z", # noqa: E501 + }, + "difference": { + ( + 2018, + 2021, + ): "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D075_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D075_2018-2021.7z", # noqa: E501 + }, + }, } -for departement_str, urls in configs.items(): # noqa: C901 - dag_id = f"ingest_ocsge_{departement_str}" - - # Define the basic parameters of the DAG, like schedule and start_date - @dag( - dag_id=dag_id, - start_date=datetime(2024, 1, 1), - schedule="@once", - catchup=False, - doc_md=__doc__, - default_args={"owner": "Alexis Athlani", "retries": 3}, - tags=["OCS GE"], - ) - def ocsge(config): - bucket_name = "airflow-staging" - config: dict = config.resolve({}) - urls = config.get("urls") - - def download_ocsge(url) -> str: - response = requests.get(url, allow_redirects=True) - - if not response.ok: - raise ValueError(f"Failed to download {url}. Response : {response.content}") - header = response.headers["content-disposition"] - _, params = cgi.parse_header(header) - filename = params.get("filename") - - path_on_bucket = f"{bucket_name}/{os.path.basename(filename)}" - with Container().s3().open(path_on_bucket, "wb") as distant_file: - distant_file.write(response.content) - - return path_on_bucket - - @task.python - def download_ocsge_first_millesime() -> str: - return download_ocsge(urls[0]) - - @task.python - def download_ocsge_second_millesime() -> str: - return download_ocsge(urls[1]) - - @task.python - def download_ocsge_diff() -> str: - return download_ocsge(urls[2]) - - @task.python - def delete_tables_before(): - conn = Container().postgres_conn() - cur = conn.cursor() - - cur.execute("DROP TABLE IF EXISTS ocsge_diff;") - cur.execute("DROP TABLE IF EXISTS ocsge_occupation_du_sol;") - cur.execute("DROP TABLE IF EXISTS ocsge_zone_construite;") - - @task.python(trigger_rule="all_done") - def delete_tables_after(): - conn = Container().postgres_conn() - cur = conn.cursor() - - cur.execute("DROP TABLE IF EXISTS ocsge_diff;") - cur.execute("DROP TABLE IF EXISTS ocsge_occupation_du_sol;") - cur.execute("DROP TABLE IF EXISTS ocsge_zone_construite;") - - @task.python - def ingest_ocsge(paths: list[str]) -> str: - for path in paths: - years = find_years_in_url(path) - print("find_years_in_url", years, path) - departement = find_departement_in_url(path) - - with Container().s3().open(path, "rb") as f: - extract_dir = tempfile.mkdtemp() - py7zr.SevenZipFile(f, mode="r").extractall(path=extract_dir) - - for dirpath, _, filenames in os.walk(extract_dir): - for filename in filenames: - if filename.endswith(".shp"): - path = os.path.abspath(os.path.join(dirpath, filename)) - table_name = get_table_name(shapefile_name=filename) - print("get_table_name", table_name) - if not table_name: - continue - sql = multiline_string_to_single_line( - get_normalization_sql( - source_name=os.path.basename(path).replace(".shp", ""), - table_name=table_name, - years=years, - departement=departement, - ) - ) - cmd = [ - "ogr2ogr", - "-dialect", - "SQLITE", - "-f", - '"PostgreSQL"', - f'"{Container().postgres_conn_str_ogr2ogr()}"', - "-overwrite", - "-lco", - "GEOMETRY_NAME=geom", - "-a_srs", - "EPSG:2154", - "-nlt", - "MULTIPOLYGON", - "-nlt", - "PROMOTE_TO_MULTI", - "-nln", - table_name, - path, - "--config", - "PG_USE_COPY", - "YES", - "-sql", - f'"{sql}"', - ] - BashOperator( - task_id=f"ingest_{table_name}", - bash_command=" ".join(cmd), - ).execute(context={}) - - build_dbt = BashOperator( - task_id="build_dbt", - bash_command='cd "${AIRFLOW_HOME}/sql/sparte" && dbt build -s ocsge', - retries=0, - ) - - @task.python - def export_table(): - conn = Container().postgres_conn() - cur = conn.cursor() - - filename = "occupation_du_sol.csv" - temp_file = f"/tmp/{filename}" - temp_archive = f"/tmp/{filename}.7z" - path_on_bucket = f"{bucket_name}/{filename}.7z" - - with open(temp_file, "w") as csv_file: - cur.copy_expert( - "COPY (SELECT * FROM public_ocsge.occupation_du_sol) TO STDOUT WITH CSV HEADER", csv_file - ) - - with py7zr.SevenZipFile(temp_archive, mode="w") as archive: - archive.write(temp_file, filename) - - with open(temp_archive, "rb") as archive: - with Container().s3().open(path_on_bucket, "wb") as f: - f.write(archive.read()) - - paths = [ - download_ocsge_diff(), - download_ocsge_first_millesime(), - download_ocsge_second_millesime(), +departement_list = list(configs.keys()) + + +@dag( + dag_id="ingest_ocsge", + start_date=pendulum.datetime(2024, 1, 1), + schedule="@once", + catchup=False, + doc_md=__doc__, + default_args={"owner": "Alexis Athlani", "retries": 3}, + tags=["OCS GE"], + params={ + "departement": Param("75", type="string", enum=departement_list), + "years": Param([2018], type="array"), + "dataset": Param( + "occupation_du_sol_et_zone_construite", + type="string", + enum=[ + "occupation_du_sol_et_zone_construite", + "difference", + ], + ), + }, +) +def ocsge(): # noqa: C901 + bucket_name = "airflow-staging" + + @task.python + def get_url_from_config(**context) -> str: + departement = context["params"]["departement"] + years = tuple(map(int, context["params"]["years"])) + dataset = context["params"]["dataset"] + + if len(years) == 1: + years = years[0] + + print(departement, dataset, years) + + url = configs.get(departement, {}).get(dataset, {}).get(years) + + print(url) + return url + + @task.python + def download_ocsge(url) -> str: + response = requests.get(url, allow_redirects=True) + + if not response.ok: + raise ValueError(f"Failed to download {url}. Response : {response.content}") + header = response.headers["content-disposition"] + _, params = cgi.parse_header(header) + filename = params.get("filename") + + path_on_bucket = f"{bucket_name}/{os.path.basename(filename)}" + with Container().s3().open(path_on_bucket, "wb") as distant_file: + distant_file.write(response.content) + + return path_on_bucket + + @task.python + def ingest_ocsge(path, **context) -> int: + loaded_date = int(pendulum.now().timestamp()) + departement = context["params"]["departement"] + years = context["params"]["years"] + print(loaded_date) + with Container().s3().open(path, "rb") as f: + extract_dir = tempfile.mkdtemp() + py7zr.SevenZipFile(f, mode="r").extractall(path=extract_dir) + + for dirpath, _, filenames in os.walk(extract_dir): + for filename in filenames: + if filename.endswith(".shp"): + path = os.path.abspath(os.path.join(dirpath, filename)) + table_name = get_table_name(shapefile_name=filename) + print("get_table_name", table_name) + if not table_name: + continue + sql = multiline_string_to_single_line( + get_normalization_sql( + source_name=os.path.basename(path).replace(".shp", ""), + table_name=table_name, + years=years, + departement=departement, + loaded_date=loaded_date, + ) + ) + cmd = [ + "ogr2ogr", + "-dialect", + "SQLITE", + "-f", + '"PostgreSQL"', + f'"{Container().postgres_conn_str_ogr2ogr()}"', + "-append", + "-lco", + "GEOMETRY_NAME=geom", + "-a_srs", + "EPSG:2154", + "-nlt", + "MULTIPOLYGON", + "-nlt", + "PROMOTE_TO_MULTI", + "-nln", + table_name, + path, + "--config", + "PG_USE_COPY", + "YES", + "-sql", + f'"{sql}"', + ] + BashOperator( + task_id=f"ingest_{table_name}", + bash_command=" ".join(cmd), + ).execute(context={}) + + return loaded_date + + @task.bash(retries=0) + def dbt_test_ocsge(**context): + dataset = context["params"]["dataset"] + + if dataset == "occupation_du_sol_et_zone_construite": + selector = "source:sparte.public.ocsge_occupation_du_sol source:sparte.public.ocsge_zone_construite" + elif dataset == "difference": + selector = "source:sparte.public.ocsge_diff" + else: + raise ValueError(f"Unknown dataset {dataset}") + + return 'cd "${AIRFLOW_HOME}/sql/sparte" && dbt test -s ' + selector + + @task.bash(retries=0, trigger_rule="all_success") + def dbt_run_ocsge(**context): + dataset = context["params"]["dataset"] + + if dataset == "occupation_du_sol_et_zone_construite": + selector = "source:sparte.public.ocsge_occupation_du_sol+ source:sparte.public.ocsge_zone_construite+" + elif dataset == "difference": + selector = "source:sparte.public.ocsge_diff+" + else: + raise ValueError(f"Unknown dataset {dataset}") + + return 'cd "${AIRFLOW_HOME}/sql/sparte" && dbt run -s ' + selector + + @task.python(trigger_rule="one_failed") + def rollback_append(loaded_date: float, **context): + dataset = context["params"]["dataset"] + + if dataset == "occupation_du_sol_et_zone_construite": + tables = ["ocsge_occupation_du_sol", "ocsge_zone_construite"] + elif dataset == "difference": + tables = ["ocsge_diff"] + else: + raise ValueError(f"Unknown dataset {dataset}") + + conn = Container().postgres_conn() + cur = conn.cursor() + + results = {} + + for table in tables: + print(f"DELETE FROM public.{table} WHERE loaded_date = {loaded_date}") + cur.execute(f"DELETE FROM public.{table} WHERE loaded_date = {loaded_date}") + results[table] = cur.rowcount + + conn.commit() + conn.close() + + return results + + @task.python + def export_table(): + conn = Container().postgres_conn() + cur = conn.cursor() + + filename = "occupation_du_sol.csv" + temp_file = f"/tmp/{filename}" + temp_archive = f"/tmp/{filename}.7z" + path_on_bucket = f"{bucket_name}/{filename}.7z" + + with open(temp_file, "w") as csv_file: + cur.copy_expert("COPY (SELECT * FROM public_ocsge.occupation_du_sol) TO STDOUT WITH CSV HEADER", csv_file) + + with py7zr.SevenZipFile(temp_archive, mode="w") as archive: + archive.write(temp_file, filename) + + with open(temp_archive, "rb") as archive: + with Container().s3().open(path_on_bucket, "wb") as f: + f.write(archive.read()) + + url = get_url_from_config() + path = download_ocsge(url=url) + loaded_date = ingest_ocsge(path=path) + ( + loaded_date + >> dbt_test_ocsge() + >> [ + rollback_append(loaded_date=loaded_date), + dbt_run_ocsge(), ] + ) - paths >> delete_tables_before() - - ingest_ocsge(paths) >> build_dbt >> export_table() >> delete_tables_after() - - config = {"urls": urls} - ocsge(config) +ocsge() diff --git a/airflow/sql/sparte/models/ocsge/artif/README b/airflow/sql/sparte/models/ocsge/artif/README deleted file mode 100644 index e69de29bb..000000000 diff --git a/airflow/sql/sparte/models/ocsge/artif/artif_nat_by_surface.sql b/airflow/sql/sparte/models/ocsge/artif/artif_nat_by_surface.sql deleted file mode 100644 index e69de29bb..000000000 diff --git a/airflow/sql/sparte/models/ocsge/artif/artificial_geom_union.sql b/airflow/sql/sparte/models/ocsge/artif/artificial_geom_union.sql deleted file mode 100644 index e69de29bb..000000000 diff --git a/airflow/sql/sparte/models/ocsge/artif/artificial_geom_union_dump.sql b/airflow/sql/sparte/models/ocsge/artif/artificial_geom_union_dump.sql deleted file mode 100644 index e69de29bb..000000000 diff --git a/airflow/sql/sparte/models/ocsge/artif/artificial_union.sql b/airflow/sql/sparte/models/ocsge/artif/artificial_union.sql deleted file mode 100644 index e69de29bb..000000000 diff --git a/airflow/sql/sparte/models/ocsge/artif/clustered_ocsge.sql b/airflow/sql/sparte/models/ocsge/artif/clustered_ocsge.sql deleted file mode 100644 index e69de29bb..000000000 diff --git a/airflow/sql/sparte/models/ocsge/artif/small_built.sql b/airflow/sql/sparte/models/ocsge/artif/small_built.sql deleted file mode 100644 index e69de29bb..000000000 diff --git a/airflow/sql/sparte/models/ocsge/difference.sql b/airflow/sql/sparte/models/ocsge/difference.sql index deaef5b93..da155d990 100644 --- a/airflow/sql/sparte/models/ocsge/difference.sql +++ b/airflow/sql/sparte/models/ocsge/difference.sql @@ -1,12 +1,19 @@ --- depends_on: {{ source('public', 'ocsge_occupation_du_sol') }}, {{ source('public', 'ocsge_diff') }}, {{ source('public', 'ocsge_zone_construite') }} -{{ - config( - materialized='incremental', - post_hook="DELETE FROM {{ this }} WHERE guid NOT IN (SELECT guid FROM {{ source('public', 'ocsge_diff') }})" - ) -}} +{{ config(materialized='table') }} +WITH latest_loaded_date AS ( + SELECT + year_old, + year_new, + departement, + MAX(loaded_date) AS max_loaded_date + FROM + {{ source('public', 'ocsge_diff') }} + GROUP BY + year_old, + year_new, + departement +) SELECT *, CASE @@ -38,15 +45,19 @@ SELECT END AS new_not_artificial FROM ( SELECT - *, + ocsge.*, ST_Area(geom) AS surface, {{ is_artificial('cs_old', 'us_old') }} AS old_is_artif, {{ is_impermeable('cs_old') }} AS old_is_imper, {{ is_artificial('cs_new', 'us_new') }} AS new_is_artif, {{ is_impermeable('cs_new') }} AS new_is_imper FROM - {{ source('public', 'ocsge_diff') }} - {% if is_incremental() %} - WHERE guid not in (SELECT guid from {{ this }}) - {% endif %} + {{ source('public', 'ocsge_diff') }} AS ocsge + JOIN + latest_loaded_date AS ld + ON + ocsge.year_old = ld.year_old + AND ocsge.year_new = ld.year_new + AND ocsge.departement = ld.departement + AND ocsge.loaded_date = ld.max_loaded_date ) AS foo diff --git a/airflow/sql/sparte/models/ocsge/occupation_du_sol.sql b/airflow/sql/sparte/models/ocsge/occupation_du_sol.sql index 87ff28f0b..f579f8eb0 100644 --- a/airflow/sql/sparte/models/ocsge/occupation_du_sol.sql +++ b/airflow/sql/sparte/models/ocsge/occupation_du_sol.sql @@ -1,11 +1,8 @@ --- depends_on: {{ source('public', 'ocsge_occupation_du_sol') }}, {{ source('public', 'ocsge_diff') }}, {{ source('public', 'ocsge_zone_construite') }} {{ config( - materialized='incremental', - incremental_strategy='delete+insert', - unique_key=['departement','year'], + materialized='table', indexes=[ {'columns': ['departement','year'], 'type': 'btree'}, {'columns': ['geom'], 'type': 'gist'} @@ -13,14 +10,27 @@ ) }} +WITH latest_loaded_date AS ( + SELECT + year, + departement, + MAX(loaded_date) AS max_loaded_date + FROM + {{ source('public', 'ocsge_occupation_du_sol') }} + GROUP BY + year, + departement +) SELECT - *, + ocsge.*, ST_area(geom) AS surface, {{ is_impermeable('code_cs') }} as is_impermeable, {{ is_artificial('code_cs', 'code_us') }} as is_artificial FROM - {{ source('public', 'ocsge_occupation_du_sol') }} - -{% if is_incremental() %} - WHERE guid not in (SELECT guid from {{ this }}) -{% endif %} + {{ source('public', 'ocsge_occupation_du_sol') }} AS ocsge +JOIN + latest_loaded_date AS ld +ON + ocsge.year = ld.year + AND ocsge.departement = ld.departement + AND ocsge.loaded_date = ld.max_loaded_date diff --git a/airflow/sql/sparte/models/ocsge/schema.yml b/airflow/sql/sparte/models/ocsge/schema.yml index 52243ce9b..663d11d8f 100644 --- a/airflow/sql/sparte/models/ocsge/schema.yml +++ b/airflow/sql/sparte/models/ocsge/schema.yml @@ -46,7 +46,6 @@ difference_test: &difference_test - name: geom tests: - not_null - - unique - is_valid_geom - name: cs_new tests: @@ -69,7 +68,6 @@ occupation_du_sol_test: &occupation_du_sol_test - name: geom tests: - not_null - - unique - is_valid_geom - name: departement tests: @@ -79,11 +77,6 @@ occupation_du_sol_test: &occupation_du_sol_test - not_null - name: id tests: - - unique - - not_null - - name: guid - tests: - - unique - not_null - name: code_cs tests: @@ -99,7 +92,6 @@ zone_construite_test: &zone_construite_test - name: geom tests: - not_null - - unique - is_valid_geom - name: departement tests: @@ -109,15 +101,12 @@ zone_construite_test: &zone_construite_test - not_null - name: id tests: - - unique - - not_null - - name: guid - tests: - - unique - not_null + models: - name: zone_construite + - name: zone_artificielle - name: occupation_du_sol - name: difference @@ -129,3 +118,4 @@ sources: - name: ocsge_occupation_du_sol columns: *occupation_du_sol_test - name: ocsge_zone_construite + columns: *zone_construite_test diff --git a/airflow/sql/sparte/models/ocsge/zone_construite.sql b/airflow/sql/sparte/models/ocsge/zone_construite.sql index 522fa231b..7cf375852 100644 --- a/airflow/sql/sparte/models/ocsge/zone_construite.sql +++ b/airflow/sql/sparte/models/ocsge/zone_construite.sql @@ -1,9 +1,23 @@ --- depends_on: {{ source('public', 'ocsge_occupation_du_sol') }}, {{ source('public', 'ocsge_diff') }}, {{ source('public', 'ocsge_zone_construite') }} +{{ config(materialized='table') }} -{{ config(materialized='incremental') }} - -SELECT * FROM - {{ source('public', 'ocsge_zone_construite') }} -{% if is_incremental() %} - WHERE guid not in (SELECT guid from {{ this }}) -{% endif %} +WITH latest_loaded_date AS ( + SELECT + year, + departement, + MAX(loaded_date) AS max_loaded_date + FROM + {{ source('public', 'ocsge_zone_construite') }} + GROUP BY + year, + departement +) +SELECT + ocsge.* +FROM + {{ source('public', 'ocsge_zone_construite') }} as ocsge +JOIN + latest_loaded_date AS ld +ON + ocsge.year = ld.year + AND ocsge.departement = ld.departement + AND ocsge.loaded_date = ld.max_loaded_date From 22429917695c317c831f42b3a088767b82c5f10c Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Fri, 9 Aug 2024 10:45:27 +0200 Subject: [PATCH 09/99] feat(airflow): add gpu --- airflow/dags/gpu.py | 85 +++++++++++++++++++ airflow/dags/ocsge.py | 18 +++- airflow/dependencies/container.py | 14 +++ airflow/requirements.txt | 2 + airflow/sql/sparte/dbt_project.yml | 7 +- .../sparte/models/admin_express/commune.sql | 28 ++++++ .../admin_express/commune_with_checksum.sql | 4 - .../models/admin_express/departement.sql | 21 +++++ .../sparte/models/admin_express/schema.yml | 2 +- airflow/sql/sparte/models/gpu/schema.yml | 10 +++ .../sparte/models/gpu/zonage_urbanisme.sql | 31 +++++++ .../sparte/models/ocsge/artif/final_artif.sql | 0 .../sql/sparte/models/ocsge/difference.sql | 13 ++- .../models/ocsge/difference_commune.sql | 26 ++++++ .../models/ocsge/final/artificial_area.sql | 19 +++++ .../models/ocsge/final/commune_diff.sql | 18 ++++ .../sparte/models/ocsge/final/commune_sol.sql | 15 ++++ .../sparte/models/ocsge/occupation_du_sol.sql | 11 ++- .../ocsge/occupation_du_sol_commune.sql | 21 +++++ .../occupation_du_sol_zonage_urbanisme.sql | 20 +++++ airflow/sql/sparte/models/ocsge/schema.yml | 1 + .../sparte/models/ocsge/zone_construite.sql | 6 +- 22 files changed, 354 insertions(+), 18 deletions(-) create mode 100644 airflow/dags/gpu.py create mode 100644 airflow/sql/sparte/models/admin_express/commune.sql delete mode 100644 airflow/sql/sparte/models/admin_express/commune_with_checksum.sql create mode 100644 airflow/sql/sparte/models/admin_express/departement.sql create mode 100644 airflow/sql/sparte/models/gpu/schema.yml create mode 100644 airflow/sql/sparte/models/gpu/zonage_urbanisme.sql delete mode 100644 airflow/sql/sparte/models/ocsge/artif/final_artif.sql create mode 100644 airflow/sql/sparte/models/ocsge/difference_commune.sql create mode 100644 airflow/sql/sparte/models/ocsge/final/artificial_area.sql create mode 100644 airflow/sql/sparte/models/ocsge/final/commune_diff.sql create mode 100644 airflow/sql/sparte/models/ocsge/final/commune_sol.sql create mode 100644 airflow/sql/sparte/models/ocsge/occupation_du_sol_commune.sql create mode 100644 airflow/sql/sparte/models/ocsge/occupation_du_sol_zonage_urbanisme.sql diff --git a/airflow/dags/gpu.py b/airflow/dags/gpu.py new file mode 100644 index 000000000..273b6f28d --- /dev/null +++ b/airflow/dags/gpu.py @@ -0,0 +1,85 @@ +""" +## Astronaut ETL example DAG + +This DAG queries the list of astronauts currently in space from the +Open Notify API and prints each astronaut's name and flying craft. + +There are two tasks, one to get the data from the API and save the results, +and another to print the results. Both tasks are written in Python using +Airflow's TaskFlow API, which allows you to easily turn Python functions into +Airflow tasks, and automatically infer dependencies and pass data. + +The second task uses dynamic task mapping to create a copy of the task for +each Astronaut in the list retrieved from the API. This list will change +depending on how many Astronauts are in space, and the DAG will adjust +accordingly each time it runs. + +For more explanation and getting started instructions, see our Write your +first DAG tutorial: https://docs.astronomer.io/learn/get-started-with-airflow +""" + +from airflow.decorators import dag, task +from airflow.operators.bash import BashOperator +from dependencies.container import Container +from pendulum import datetime + + +# Define the basic parameters of the DAG, like schedule and start_date +@dag( + start_date=datetime(2024, 1, 1), + schedule="@once", + catchup=False, + doc_md=__doc__, + default_args={"owner": "Alexis Athlani", "retries": 3}, + tags=["GPU"], +) +def gpu(): + bucket_name = "airflow-staging" + wfs_du_filename = "wfs_du.gpkg" + + @task.python + def download() -> str: + path_on_bucket = f"{bucket_name}/gpu/{wfs_du_filename}" + with Container.gpu_sftp() as sftp: + sftp.get(f"/pub/export-wfs/latest/gpkg/{wfs_du_filename}", f"/tmp/{wfs_du_filename}") + + Container().s3().put_file(f"/tmp/{wfs_du_filename}", path_on_bucket) + + return path_on_bucket + + @task.python + def ingest(path_on_bucket: str) -> str: + wfs_du_temp = f"/tmp/{wfs_du_filename}" + Container().s3().get_file(path_on_bucket, wfs_du_temp) + cmd = [ + "ogr2ogr", + "-dialect", + "SQLITE", + "-f", + '"PostgreSQL"', + f'"{Container().postgres_conn_str_ogr2ogr()}"', + "-overwrite", + "-lco", + "GEOMETRY_NAME=geom", + "-a_srs", + "EPSG:4236", + "-nlt", + "MULTIPOLYGON", + "-nlt", + "PROMOTE_TO_MULTI", + wfs_du_temp, + "zone_urba", + "--config", + "PG_USE_COPY", + "YES", + ] + BashOperator( + task_id="ingest_gpu", + bash_command=" ".join(cmd), + ).execute(context={}) + + path_on_bucket = download() + ingest(path_on_bucket) + + +gpu() diff --git a/airflow/dags/ocsge.py b/airflow/dags/ocsge.py index 44351fdbe..956e9187c 100644 --- a/airflow/dags/ocsge.py +++ b/airflow/dags/ocsge.py @@ -88,7 +88,7 @@ def ocsge_diff_normalization_sql(years: list[int], departement: str, source_name {fields['cs_old']} AS cs_old, {fields['us_new']} AS us_new, {fields['us_old']} AS us_old, - {departement} AS departement, + cast({departement} as text) AS departement, GEOMETRY as geom FROM {source_name} @@ -107,7 +107,7 @@ def ocsge_occupation_du_sol_normalization_sql( code_cs AS code_cs, code_us AS code_us, GEOMETRY AS geom, - {departement} AS departement, + cast({departement} as text) AS departement, {years[0]} AS year FROM {source_name} @@ -124,7 +124,7 @@ def ocsge_zone_construite_normalization_sql( {loaded_date} AS loaded_date, ID AS id, {years[0]} AS year, - {departement} AS departement, + cast({departement} as text) AS departement, GEOMETRY AS geom FROM {source_name} @@ -223,6 +223,18 @@ def get_normalization_sql( ): "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D075_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D075_2018-2021.7z", # noqa: E501 }, }, + "32": { + "occupation_du_sol_et_zone_construite": { + 2016: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D032_2016-01-01/OCS-GE_2-0__SHP_LAMB93_D032_2016-01-01.7z", # noqa: E501 + 2019: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D032_2019-01-01/OCS-GE_2-0__SHP_LAMB93_D032_2019-01-01.7z", # noqa: E501 + }, + "difference": { + ( + 2016, + 2019, + ): "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D032_DIFF_2016-2019/OCS-GE_2-0__SHP_LAMB93_D032_DIFF_2016-2019.7z", # noqa: E501 + }, + }, } diff --git a/airflow/dependencies/container.py b/airflow/dependencies/container.py index 1af770665..eeaa0da12 100644 --- a/airflow/dependencies/container.py +++ b/airflow/dependencies/container.py @@ -1,5 +1,6 @@ from os import getenv +import pysftp from airflow.hooks.base import BaseHook from dependency_injector import containers, providers from psycopg2 import connect @@ -39,3 +40,16 @@ class Container(containers.DeclarativeContainer): host=getenv("DBT_DB_HOST"), port=getenv("DBT_DB_PORT"), ) + + cnopts = pysftp.CnOpts() + cnopts.hostkeys = None + + gpu_sftp = providers.Factory( + provides=pysftp.Connection, + host=getenv("GPU_SFTP_HOST"), + username=getenv("GPU_SFTP_USER"), + password=getenv("GPU_SFTP_PASSWORD"), + port=int(getenv("GPU_SFTP_PORT")), + default_path="/pub/export-wfs/latest/", + cnopts=cnopts, + ) diff --git a/airflow/requirements.txt b/airflow/requirements.txt index 42df29065..864a6ce38 100644 --- a/airflow/requirements.txt +++ b/airflow/requirements.txt @@ -8,3 +8,5 @@ apache-airflow-providers-postgres==5.11.2 requests dbt-core dbt-postgres +tqdm==4.66.5 +pysftp==0.2.9 diff --git a/airflow/sql/sparte/dbt_project.yml b/airflow/sql/sparte/dbt_project.yml index 3dd8e2b10..18a6fd2f6 100644 --- a/airflow/sql/sparte/dbt_project.yml +++ b/airflow/sql/sparte/dbt_project.yml @@ -33,10 +33,7 @@ models: sparte: ocsge: +schema: ocsge - +indexes: - - columns: [geom] - type: gist - admin_express: +schema: admin_express - # Config indicated by + and applies to all files under models/example/ + gpu: + +schema: gpu diff --git a/airflow/sql/sparte/models/admin_express/commune.sql b/airflow/sql/sparte/models/admin_express/commune.sql new file mode 100644 index 000000000..4ecdeac06 --- /dev/null +++ b/airflow/sql/sparte/models/admin_express/commune.sql @@ -0,0 +1,28 @@ + +{{ + config( + materialized='table', + indexes=[ + {'columns': ['departement'], 'type': 'btree'}, + {'columns': ['code'], 'type': 'btree'}, + {'columns': ['geom'], 'type': 'gist'} + ]) +}} + +SELECT + id, + nom as name, + nom_m as name_uppercase, + insee_com as code, + statut as type, + population as population, + insee_can as canton, + insee_arr as arrondissement, + insee_dep as departement, + insee_reg as region, + siren_epci as epci, + ST_Area(geom) as surface, + md5(commune::text), + geom +FROM + {{ source('public', 'commune') }} as commune diff --git a/airflow/sql/sparte/models/admin_express/commune_with_checksum.sql b/airflow/sql/sparte/models/admin_express/commune_with_checksum.sql deleted file mode 100644 index 7b171ff42..000000000 --- a/airflow/sql/sparte/models/admin_express/commune_with_checksum.sql +++ /dev/null @@ -1,4 +0,0 @@ - -{{ config(materialized='table') }} - -SELECT *, md5(commune::text) FROM {{ source('public', 'commune') }} AS commune diff --git a/airflow/sql/sparte/models/admin_express/departement.sql b/airflow/sql/sparte/models/admin_express/departement.sql new file mode 100644 index 000000000..c4093eb6a --- /dev/null +++ b/airflow/sql/sparte/models/admin_express/departement.sql @@ -0,0 +1,21 @@ + +{{ + config( + materialized='table', + indexes=[ + {'columns': ['code'], 'type': 'btree'}, + {'columns': ['geom'], 'type': 'gist'} + ]) +}} + +SELECT + id, + nom as name, + nom_m as name_uppercase, + insee_dep as code, + insee_reg as region, + ST_Area(geom) as surface, + md5(departement::text), + geom +FROM + {{ source('public', 'departement') }} as departement diff --git a/airflow/sql/sparte/models/admin_express/schema.yml b/airflow/sql/sparte/models/admin_express/schema.yml index 92e704cc6..671fae862 100644 --- a/airflow/sql/sparte/models/admin_express/schema.yml +++ b/airflow/sql/sparte/models/admin_express/schema.yml @@ -2,7 +2,7 @@ version: 2 models: - - name: commune_with_checksum + - name: commune sources: - name: public diff --git a/airflow/sql/sparte/models/gpu/schema.yml b/airflow/sql/sparte/models/gpu/schema.yml new file mode 100644 index 000000000..b748b3245 --- /dev/null +++ b/airflow/sql/sparte/models/gpu/schema.yml @@ -0,0 +1,10 @@ + +version: 2 + +models: + - name: zonage_urbanisme + +sources: + - name: public + tables: + - name: zone_urba diff --git a/airflow/sql/sparte/models/gpu/zonage_urbanisme.sql b/airflow/sql/sparte/models/gpu/zonage_urbanisme.sql new file mode 100644 index 000000000..fbdb13e96 --- /dev/null +++ b/airflow/sql/sparte/models/gpu/zonage_urbanisme.sql @@ -0,0 +1,31 @@ + +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'}, + {'columns': ['libelle'], 'type': 'btree'}, + {'columns': ['type_zone'], 'type': 'btree'} + ]) +}} + + +SELECT + gpu_doc_id, + gpu_status, + gpu_timestamp, + partition, + libelle, + libelong as libelle_long, + typezone as type_zone, + destdomi as destination_dominante, + nomfic as nom_fichier, + urlfic as url_fichier, + insee as commune_code, + datappro as date_approbation, + datvalid as date_validation, + idurba as id_document_urbanisme, + ST_Area(geom) as surface, + geom + FROM + {{ source('public', 'zone_urba') }} diff --git a/airflow/sql/sparte/models/ocsge/artif/final_artif.sql b/airflow/sql/sparte/models/ocsge/artif/final_artif.sql deleted file mode 100644 index e69de29bb..000000000 diff --git a/airflow/sql/sparte/models/ocsge/difference.sql b/airflow/sql/sparte/models/ocsge/difference.sql index da155d990..337350325 100644 --- a/airflow/sql/sparte/models/ocsge/difference.sql +++ b/airflow/sql/sparte/models/ocsge/difference.sql @@ -15,7 +15,15 @@ WITH latest_loaded_date AS ( departement ) SELECT - *, + loaded_date, + year_old, + year_new, + cs_new, + cs_old, + us_new, + us_old, + departement, + surface, CASE WHEN old_is_imper = false AND @@ -42,7 +50,8 @@ SELECT old_is_artif = true AND new_is_artif = false THEN true ELSE false - END AS new_not_artificial + END AS new_not_artificial, + geom FROM ( SELECT ocsge.*, diff --git a/airflow/sql/sparte/models/ocsge/difference_commune.sql b/airflow/sql/sparte/models/ocsge/difference_commune.sql new file mode 100644 index 000000000..aff4e126d --- /dev/null +++ b/airflow/sql/sparte/models/ocsge/difference_commune.sql @@ -0,0 +1,26 @@ +{{ config(materialized='table') }} + +SELECT *, ST_Area(geom) as surface FROM ( + SELECT + commune.code as commune_code, + ocsge.year_old, + ocsge.year_new, + ocsge.departement, + ocsge.new_is_impermeable, + ocsge.new_is_artificial, + ocsge.new_not_impermeable, + ocsge.new_not_artificial, + ocsge.cs_old, + ocsge.us_old, + ocsge.cs_new, + ocsge.us_new, + ST_Intersection(commune.geom, ocsge.geom) AS geom + FROM + {{ ref("commune") }} AS commune + INNER JOIN + {{ ref("difference") }} AS ocsge + ON + ocsge.departement = commune.departement + AND + ST_Intersects(commune.geom, ocsge.geom) +) as foo diff --git a/airflow/sql/sparte/models/ocsge/final/artificial_area.sql b/airflow/sql/sparte/models/ocsge/final/artificial_area.sql new file mode 100644 index 000000000..5d06333ce --- /dev/null +++ b/airflow/sql/sparte/models/ocsge/final/artificial_area.sql @@ -0,0 +1,19 @@ +{{ + config( + materialized='table', + tags=['final'], + ) }} + +SELECT *, ST_Area(geom) FROM ( + SELECT + ocsge.departement, + ocsge.commune_code, + ST_Union(geom) AS geom + FROM + {{ ref("occupation_du_sol_commune") }} AS ocsge + WHERE + ocsge.is_artificial = true + GROUP BY + ocsge.commune_code, + ocsge.departement +) as foo diff --git a/airflow/sql/sparte/models/ocsge/final/commune_diff.sql b/airflow/sql/sparte/models/ocsge/final/commune_diff.sql new file mode 100644 index 000000000..cfd19c0ca --- /dev/null +++ b/airflow/sql/sparte/models/ocsge/final/commune_diff.sql @@ -0,0 +1,18 @@ +{{ config(materialized='table') }} + +SELECT + commune_code, + departement, + year_old, + year_new, + sum(CASE WHEN new_is_impermeable THEN surface ELSE 0 END) AS surface_new_is_impermeable, + sum(CASE WHEN new_not_impermeable THEN surface ELSE 0 END) AS surface_new_not_impermeable, + sum(CASE WHEN new_is_artificial THEN surface ELSE 0 END) AS surface_new_is_artificial, + sum(CASE WHEN new_not_artificial THEN surface ELSE 0 END) AS surface_new_not_artificial +FROM + {{ ref('difference_commune') }} +GROUP BY + commune_code, + departement, + year_old, + year_new diff --git a/airflow/sql/sparte/models/ocsge/final/commune_sol.sql b/airflow/sql/sparte/models/ocsge/final/commune_sol.sql new file mode 100644 index 000000000..2f1edcdfe --- /dev/null +++ b/airflow/sql/sparte/models/ocsge/final/commune_sol.sql @@ -0,0 +1,15 @@ +{{ config(materialized='table') }} + +SELECT + sum(surface), + commune_code, + code_cs, + code_us, + departement +FROM + {{ ref('occupation_du_sol_commune') }} +GROUP BY + commune_code, + code_cs, + code_us, + departement diff --git a/airflow/sql/sparte/models/ocsge/occupation_du_sol.sql b/airflow/sql/sparte/models/ocsge/occupation_du_sol.sql index f579f8eb0..160279963 100644 --- a/airflow/sql/sparte/models/ocsge/occupation_du_sol.sql +++ b/airflow/sql/sparte/models/ocsge/occupation_du_sol.sql @@ -5,6 +5,7 @@ materialized='table', indexes=[ {'columns': ['departement','year'], 'type': 'btree'}, + {'columns': ['departement'], 'type': 'btree'}, {'columns': ['geom'], 'type': 'gist'} ] ) @@ -22,10 +23,16 @@ WITH latest_loaded_date AS ( departement ) SELECT - ocsge.*, + ocsge.loaded_date, + ocsge.id, + ocsge.code_cs, + ocsge.code_us, + ocsge.departement, + ocsge.year, ST_area(geom) AS surface, {{ is_impermeable('code_cs') }} as is_impermeable, - {{ is_artificial('code_cs', 'code_us') }} as is_artificial + {{ is_artificial('code_cs', 'code_us') }} as is_artificial, + ocsge.geom FROM {{ source('public', 'ocsge_occupation_du_sol') }} AS ocsge JOIN diff --git a/airflow/sql/sparte/models/ocsge/occupation_du_sol_commune.sql b/airflow/sql/sparte/models/ocsge/occupation_du_sol_commune.sql new file mode 100644 index 000000000..89b848ecf --- /dev/null +++ b/airflow/sql/sparte/models/ocsge/occupation_du_sol_commune.sql @@ -0,0 +1,21 @@ +{{ config(materialized='table') }} + +SELECT *, ST_Area(geom) as surface FROM ( + SELECT + commune.code AS commune_code, + ocsge.year, + ocsge.departement, + ocsge.code_cs, + ocsge.code_us, + ocsge.is_artificial, + ocsge.is_impermeable, + ST_Intersection(commune.geom, ocsge.geom) AS geom + FROM + {{ ref("commune") }} AS commune + INNER JOIN + {{ ref("occupation_du_sol") }} AS ocsge + ON + ocsge.departement = commune.departement + AND + ST_Intersects(commune.geom, ocsge.geom) +) as foo diff --git a/airflow/sql/sparte/models/ocsge/occupation_du_sol_zonage_urbanisme.sql b/airflow/sql/sparte/models/ocsge/occupation_du_sol_zonage_urbanisme.sql new file mode 100644 index 000000000..b148397fc --- /dev/null +++ b/airflow/sql/sparte/models/ocsge/occupation_du_sol_zonage_urbanisme.sql @@ -0,0 +1,20 @@ +{{ config(materialized='table') }} + +SELECT *, ST_Area(geom) as surface FROM ( + SELECT + zonage.libelle AS zonage_libelle, + ocsge.year, + ocsge.departement, + ocsge.code_cs, + ocsge.code_us, + ocsge.is_artificial, + ocsge.is_impermeable, + ST_Intersection(zonage.geom, ocsge.geom) AS geom + FROM + {{ ref("zonage_urbanisme") }} AS zonage + INNER JOIN + {{ ref("occupation_du_sol") }} AS ocsge + ON + ST_Intersects(zonage.geom, ocsge.geom) + -- TODO: reproject zonage.gome to ocsge.geom srid +) as foo diff --git a/airflow/sql/sparte/models/ocsge/schema.yml b/airflow/sql/sparte/models/ocsge/schema.yml index 663d11d8f..38397ce18 100644 --- a/airflow/sql/sparte/models/ocsge/schema.yml +++ b/airflow/sql/sparte/models/ocsge/schema.yml @@ -109,6 +109,7 @@ models: - name: zone_artificielle - name: occupation_du_sol - name: difference + - name: occupation_du_sol_zonage_urbanisme sources: - name: public diff --git a/airflow/sql/sparte/models/ocsge/zone_construite.sql b/airflow/sql/sparte/models/ocsge/zone_construite.sql index 7cf375852..29646023c 100644 --- a/airflow/sql/sparte/models/ocsge/zone_construite.sql +++ b/airflow/sql/sparte/models/ocsge/zone_construite.sql @@ -12,7 +12,11 @@ WITH latest_loaded_date AS ( departement ) SELECT - ocsge.* + ocsge.loaded_date, + ocsge.id, + ocsge.year, + ocsge.departement, + ocsge.geom FROM {{ source('public', 'ocsge_zone_construite') }} as ocsge JOIN From f011ca47e2a42d2a7a1aad3789f35936b2be8872 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Mon, 12 Aug 2024 11:09:23 +0200 Subject: [PATCH 10/99] temp --- airflow/Dockerfile | 4 +- airflow/dags/copy_to_prod.py | 49 +++++++++++++++++++ airflow/dbt_profile.yml | 2 +- airflow/dependencies/container.py | 29 +++++++++++ airflow/requirements.txt | 17 +++++-- .../sparte/models/gpu/zonage_urbanisme.sql | 3 +- 6 files changed, 94 insertions(+), 10 deletions(-) create mode 100644 airflow/dags/copy_to_prod.py diff --git a/airflow/Dockerfile b/airflow/Dockerfile index 21e13084a..ddfb754a9 100644 --- a/airflow/Dockerfile +++ b/airflow/Dockerfile @@ -1,4 +1,4 @@ -FROM quay.io/astronomer/astro-runtime:11.7.0 - +FROM quay.io/astronomer/astro-runtime:11.8.0 RUN mkdir /home/astro/.dbt + COPY ./dbt_profile.yml /home/astro/.dbt/profiles.yml diff --git a/airflow/dags/copy_to_prod.py b/airflow/dags/copy_to_prod.py new file mode 100644 index 000000000..8deaddd63 --- /dev/null +++ b/airflow/dags/copy_to_prod.py @@ -0,0 +1,49 @@ +""" +## Astronaut ETL example DAG + +This DAG queries the list of astronauts currently in space from the +Open Notify API and prints each astronaut's name and flying craft. + +There are two tasks, one to get the data from the API and save the results, +and another to print the results. Both tasks are written in Python using +Airflow's TaskFlow API, which allows you to easily turn Python functions into +Airflow tasks, and automatically infer dependencies and pass data. + +The second task uses dynamic task mapping to create a copy of the task for +each Astronaut in the list retrieved from the API. This list will change +depending on how many Astronauts are in space, and the DAG will adjust +accordingly each time it runs. + +For more explanation and getting started instructions, see our Write your +first DAG tutorial: https://docs.astronomer.io/learn/get-started-with-airflow +""" + +from airflow.decorators import dag, task +from dependencies.container import Container +from gdaltools import ogr2ogr +from pendulum import datetime + + +# Define the basic parameters of the DAG, like schedule and start_date +@dag( + start_date=datetime(2024, 1, 1), + schedule="@once", + catchup=False, + doc_md=__doc__, + default_args={"owner": "Alexis Athlani", "retries": 3}, + tags=["GPU"], +) +def parquet_test(): + @task.python + def export() -> str: + ogr = ogr2ogr() + ogr.config_options["PG_USE_COPY"] = "YES" + ogr.layer_creation_options["SPATIAL_INDEX"] = "YES" + ogr.set_input(Container().gdal_dw_conn(), table_name="departement", srs="EPSG:2154") + ogr.set_output(Container().gdal_prod_conn(), table_name="prod_departement", srs="EPSG:4326") + ogr.execute() + + export() + + +parquet_test() diff --git a/airflow/dbt_profile.yml b/airflow/dbt_profile.yml index f39c999a3..920e7a9b3 100644 --- a/airflow/dbt_profile.yml +++ b/airflow/dbt_profile.yml @@ -6,7 +6,7 @@ sparte: pass: "{{ env_var('DBT_DB_PASSWORD') }}" port: "{{ env_var('DBT_DB_PORT') | as_number }}" schema: "{{ env_var('DBT_DB_SCHEMA') }}" - threads: 1 + threads: 2 type: "postgres" user: "{{ env_var('DBT_DB_USER') }}" target: dev diff --git a/airflow/dependencies/container.py b/airflow/dependencies/container.py index eeaa0da12..268fb4308 100644 --- a/airflow/dependencies/container.py +++ b/airflow/dependencies/container.py @@ -1,8 +1,10 @@ from os import getenv import pysftp +import sqlalchemy from airflow.hooks.base import BaseHook from dependency_injector import containers, providers +from gdaltools import PgConnectionString from psycopg2 import connect from psycopg2.extensions import connection from s3fs import S3FileSystem @@ -12,6 +14,10 @@ def db_str_for_ogr2ogr(dbname: str, user: str, password: str, host: str, port: i return f"PG:dbname='{dbname}' host='{host}' port='{port}' user='{user}' password='{password}'" +def create_sql_alchemy_conn(url: str) -> sqlalchemy.engine.base.Connection: + return sqlalchemy.create_engine(url) + + class Container(containers.DeclarativeContainer): s3 = providers.Factory( provides=S3FileSystem, @@ -23,6 +29,29 @@ class Container(containers.DeclarativeContainer): }, ) + postgres_conn_sqlalchemy = providers.Factory( + create_sql_alchemy_conn, + url=getenv("AIRFLOW_CONN_DATA_WAREHOUSE"), + ) + + gdal_dw_conn = providers.Factory( + PgConnectionString, + dbname=getenv("DBT_DB_NAME"), + user=getenv("DBT_DB_USER"), + password=getenv("DBT_DB_PASSWORD"), + host=getenv("DBT_DB_HOST"), + port=getenv("DBT_DB_PORT"), + ) + + gdal_prod_conn = providers.Factory( + PgConnectionString, + dbname="alexis_test", + user=getenv("DBT_DB_USER"), + password=getenv("DBT_DB_PASSWORD"), + host=getenv("DBT_DB_HOST"), + port=getenv("DBT_DB_PORT"), + ) + postgres_conn: connection = providers.Factory( provides=connect, dbname=getenv("DBT_DB_NAME"), diff --git a/airflow/requirements.txt b/airflow/requirements.txt index 864a6ce38..6b2c9b0b4 100644 --- a/airflow/requirements.txt +++ b/airflow/requirements.txt @@ -2,11 +2,18 @@ s3fs==2024.6.1 dependency-injector==4.41.0 py7zr==0.21.1 -sqlmesh==0.115.1 -sqlmesh[postgres]==0.115.1 apache-airflow-providers-postgres==5.11.2 -requests -dbt-core -dbt-postgres +requests==2.32.3 +dbt-core==1.8.5 +dbt-postgres==1.8.2 tqdm==4.66.5 pysftp==0.2.9 +pyarrow==17.0.0 +adbc_driver_postgresql==1.1.0 +pandas==2.0.3 +geopandas==1.0.1 +GeoAlchemy2==0.15.2 +SQLAlchemy==1.4.53 +psycopg2==2.9.9 +pygeos +pygdaltools==1.4.2 diff --git a/airflow/sql/sparte/models/gpu/zonage_urbanisme.sql b/airflow/sql/sparte/models/gpu/zonage_urbanisme.sql index fbdb13e96..2e890b858 100644 --- a/airflow/sql/sparte/models/gpu/zonage_urbanisme.sql +++ b/airflow/sql/sparte/models/gpu/zonage_urbanisme.sql @@ -25,7 +25,6 @@ SELECT datappro as date_approbation, datvalid as date_validation, idurba as id_document_urbanisme, - ST_Area(geom) as surface, - geom + ST_MakeValid(ST_transform(geom, 2154)) as geom FROM {{ source('public', 'zone_urba') }} From 456bacfd184304851a1baf22a7a82aeb037caf3c Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Mon, 12 Aug 2024 18:08:24 +0200 Subject: [PATCH 11/99] feat(ingestion): add app deps --- airflow/Dockerfile | 1 - airflow/dags/copy_to_prod.py | 51 +++++++++++----------- airflow/dags/ingest_app_dependencies.py | 56 +++++++++++++++++++++++++ airflow/dependencies/container.py | 12 +++--- 4 files changed, 86 insertions(+), 34 deletions(-) create mode 100644 airflow/dags/ingest_app_dependencies.py diff --git a/airflow/Dockerfile b/airflow/Dockerfile index ddfb754a9..85b090ea7 100644 --- a/airflow/Dockerfile +++ b/airflow/Dockerfile @@ -1,4 +1,3 @@ FROM quay.io/astronomer/astro-runtime:11.8.0 RUN mkdir /home/astro/.dbt - COPY ./dbt_profile.yml /home/astro/.dbt/profiles.yml diff --git a/airflow/dags/copy_to_prod.py b/airflow/dags/copy_to_prod.py index 8deaddd63..6037c4b45 100644 --- a/airflow/dags/copy_to_prod.py +++ b/airflow/dags/copy_to_prod.py @@ -1,23 +1,3 @@ -""" -## Astronaut ETL example DAG - -This DAG queries the list of astronauts currently in space from the -Open Notify API and prints each astronaut's name and flying craft. - -There are two tasks, one to get the data from the API and save the results, -and another to print the results. Both tasks are written in Python using -Airflow's TaskFlow API, which allows you to easily turn Python functions into -Airflow tasks, and automatically infer dependencies and pass data. - -The second task uses dynamic task mapping to create a copy of the task for -each Astronaut in the list retrieved from the API. This list will change -depending on how many Astronauts are in space, and the DAG will adjust -accordingly each time it runs. - -For more explanation and getting started instructions, see our Write your -first DAG tutorial: https://docs.astronomer.io/learn/get-started-with-airflow -""" - from airflow.decorators import dag, task from dependencies.container import Container from gdaltools import ogr2ogr @@ -29,21 +9,38 @@ start_date=datetime(2024, 1, 1), schedule="@once", catchup=False, - doc_md=__doc__, default_args={"owner": "Alexis Athlani", "retries": 3}, tags=["GPU"], ) -def parquet_test(): +def copy_to_prod(): @task.python def export() -> str: ogr = ogr2ogr() - ogr.config_options["PG_USE_COPY"] = "YES" - ogr.layer_creation_options["SPATIAL_INDEX"] = "YES" - ogr.set_input(Container().gdal_dw_conn(), table_name="departement", srs="EPSG:2154") - ogr.set_output(Container().gdal_prod_conn(), table_name="prod_departement", srs="EPSG:4326") + ogr.config_options = {"PG_USE_COPY": "YES"} + + source_schema = "public_ocsge" + source_table_name = "occupation_du_sol" + source_sql = f"SELECT * FROM {source_schema}.{source_table_name} WHERE departement = '75'" + + ogr.set_input( + Container().gdal_dw_conn(schema=source_schema), + table_name=source_table_name, + srs="EPSG:2154", + ) + ogr.set_sql(source_sql) + + destination_table_name = "prod_occupation_du_sol" + + ogr.set_output( + Container().gdal_app_conn(), + table_name=destination_table_name, + srs="EPSG:4326", + ) + ogr.set_output_mode(layer_mode=ogr.MODE_LAYER_APPEND) + ogr.execute() export() -parquet_test() +copy_to_prod() diff --git a/airflow/dags/ingest_app_dependencies.py b/airflow/dags/ingest_app_dependencies.py new file mode 100644 index 000000000..5d9bbe702 --- /dev/null +++ b/airflow/dags/ingest_app_dependencies.py @@ -0,0 +1,56 @@ +from airflow.decorators import dag, task +from dependencies.container import Container +from gdaltools import ogr2ogr +from pendulum import datetime + + +def ingest_table(source_table_name: str, destination_table_name: str): + ogr = ogr2ogr() + ogr.config_options = {"PG_USE_COPY": "YES", "OGR_TRUNCATE": "NO"} + ogr.set_preserve_fid(True) + + ogr.set_input(Container().gdal_app_conn(), table_name=source_table_name, srs="EPSG:4326") + + ogr.set_output(Container().gdal_dw_conn(), table_name=destination_table_name, srs="EPSG:4326") + ogr.set_output_mode(layer_mode=ogr.MODE_LAYER_OVERWRITE) + + ogr.execute() + + +# Define the basic parameters of the DAG, like schedule and start_date +@dag( + start_date=datetime(2024, 1, 1), + schedule="@once", + catchup=False, + default_args={"owner": "Alexis Athlani", "retries": 3}, + tags=["app"], +) +def ingest_app_dependencies(): + @task.python + def ingest_region(): + ingest_table(source_table_name="public_data_region", destination_table_name="app_region") + + @task.python + def ingest_departement(): + ingest_table(source_table_name="public_data_departement", destination_table_name="app_departement") + + @task.python + def ingest_commune(): + ingest_table(source_table_name="public_data_commune", destination_table_name="app_commune") + + @task.python + def ingest_epci(): + ingest_table(source_table_name="public_data_epci", destination_table_name="app_epci") + + @task.python + def ingest_scot(): + ingest_table(source_table_name="public_data_scot", destination_table_name="app_scot") + + ingest_region() + ingest_departement() + ingest_commune() + ingest_epci() + ingest_scot() + + +ingest_app_dependencies() diff --git a/airflow/dependencies/container.py b/airflow/dependencies/container.py index 268fb4308..46d6df225 100644 --- a/airflow/dependencies/container.py +++ b/airflow/dependencies/container.py @@ -43,13 +43,13 @@ class Container(containers.DeclarativeContainer): port=getenv("DBT_DB_PORT"), ) - gdal_prod_conn = providers.Factory( + gdal_app_conn = providers.Factory( PgConnectionString, - dbname="alexis_test", - user=getenv("DBT_DB_USER"), - password=getenv("DBT_DB_PASSWORD"), - host=getenv("DBT_DB_HOST"), - port=getenv("DBT_DB_PORT"), + dbname=getenv("APP_DB_NAME"), + user=getenv("APP_DB_USER"), + password=getenv("APP_DB_PASSWORD"), + host=getenv("APP_DB_HOST"), + port=getenv("APP_DB_PORT"), ) postgres_conn: connection = providers.Factory( From 54952377c9b86f82d0afc51898119e861a5388da Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Wed, 14 Aug 2024 19:11:09 +0200 Subject: [PATCH 12/99] misc --- airflow/dags/admin_express.py | 2 +- airflow/dags/gpu.py | 23 +- airflow/dags/ingest_app_dependencies.py | 3 - airflow/dags/ocsge.py | 597 ++++++++++-------- airflow/dbt_profile.yml | 2 +- airflow/dependencies/container.py | 35 +- airflow/dependencies/ocsge/delete_in_app.py | 35 + airflow/dependencies/ocsge/delete_in_dw.py | 35 + airflow/dependencies/ocsge/enums.py | 12 + airflow/dependencies/ocsge/normalization.py | 68 ++ airflow/sql/sparte/dbt_project.yml | 2 + .../sparte/models/admin_express/commune.sql | 2 +- .../models/admin_express/departement.sql | 2 +- airflow/sql/sparte/models/app/app_commune.sql | 16 + .../sql/sparte/models/app/app_departement.sql | 12 + airflow/sql/sparte/models/app/app_epci.sql | 9 + airflow/sql/sparte/models/app/app_region.sql | 9 + airflow/sql/sparte/models/app/app_scot.sql | 8 + airflow/sql/sparte/models/app/schema.yml | 18 + .../sparte/models/gpu/zonage_urbanisme.sql | 1 + .../sql/sparte/models/ocsge/difference.sql | 54 +- .../models/ocsge/final/artificial_area.sql | 19 - .../models/ocsge/final/commune_diff.sql | 18 - .../sparte/models/ocsge/final/commune_sol.sql | 15 - .../sparte/models/ocsge/for_app/app_ocsge.sql | 16 + .../models/ocsge/for_app/app_ocsgediff.sql | 20 + .../ocsge/for_app/app_zoneconstruite.sql | 13 + .../ocsge/intersected/artificial_commune | 19 + .../{ => intersected}/difference_commune.sql | 14 +- .../occupation_du_sol_commune.sql | 14 +- .../occupation_du_sol_zonage_urbanisme.sql | 15 +- .../sparte/models/ocsge/occupation_du_sol.sql | 37 +- airflow/sql/sparte/models/ocsge/schema.yml | 9 +- .../sparte/models/ocsge/zone_construite.sql | 36 +- .../sparte/tests/generic/is_valid_geom.sql | 5 +- 35 files changed, 745 insertions(+), 450 deletions(-) create mode 100644 airflow/dependencies/ocsge/delete_in_app.py create mode 100644 airflow/dependencies/ocsge/delete_in_dw.py create mode 100644 airflow/dependencies/ocsge/enums.py create mode 100644 airflow/dependencies/ocsge/normalization.py create mode 100644 airflow/sql/sparte/models/app/app_commune.sql create mode 100644 airflow/sql/sparte/models/app/app_departement.sql create mode 100644 airflow/sql/sparte/models/app/app_epci.sql create mode 100644 airflow/sql/sparte/models/app/app_region.sql create mode 100644 airflow/sql/sparte/models/app/app_scot.sql create mode 100644 airflow/sql/sparte/models/app/schema.yml delete mode 100644 airflow/sql/sparte/models/ocsge/final/artificial_area.sql delete mode 100644 airflow/sql/sparte/models/ocsge/final/commune_diff.sql delete mode 100644 airflow/sql/sparte/models/ocsge/final/commune_sol.sql create mode 100644 airflow/sql/sparte/models/ocsge/for_app/app_ocsge.sql create mode 100644 airflow/sql/sparte/models/ocsge/for_app/app_ocsgediff.sql create mode 100644 airflow/sql/sparte/models/ocsge/for_app/app_zoneconstruite.sql create mode 100644 airflow/sql/sparte/models/ocsge/intersected/artificial_commune rename airflow/sql/sparte/models/ocsge/{ => intersected}/difference_commune.sql (66%) rename airflow/sql/sparte/models/ocsge/{ => intersected}/occupation_du_sol_commune.sql (61%) rename airflow/sql/sparte/models/ocsge/{ => intersected}/occupation_du_sol_zonage_urbanisme.sql (58%) diff --git a/airflow/dags/admin_express.py b/airflow/dags/admin_express.py index cb8717d31..7bf41317a 100644 --- a/airflow/dags/admin_express.py +++ b/airflow/dags/admin_express.py @@ -76,7 +76,7 @@ def ingest_admin_express() -> str: for filename in filenames: if filename.endswith(".shp"): path = os.path.abspath(os.path.join(dirpath, filename)) - cmd = f'ogr2ogr -f "PostgreSQL" "{Container().postgres_conn_str_ogr2ogr()}" -overwrite -lco GEOMETRY_NAME=geom -a_srs EPSG:2154 -nlt MULTIPOLYGON -nlt PROMOTE_TO_MULTI {path} --config PG_USE_COPY YES' # noqa: E501 + cmd = f'ogr2ogr -f "PostgreSQL" "{Container().gdal_dw_conn_str()}" -overwrite -lco GEOMETRY_NAME=geom -a_srs EPSG:2154 -nlt MULTIPOLYGON -nlt PROMOTE_TO_MULTI {path} --config PG_USE_COPY YES' # noqa: E501 subprocess.run(cmd, shell=True, check=True) download_admin_express() >> ingest_admin_express() diff --git a/airflow/dags/gpu.py b/airflow/dags/gpu.py index 273b6f28d..4ff63e3f2 100644 --- a/airflow/dags/gpu.py +++ b/airflow/dags/gpu.py @@ -1,30 +1,9 @@ -""" -## Astronaut ETL example DAG - -This DAG queries the list of astronauts currently in space from the -Open Notify API and prints each astronaut's name and flying craft. - -There are two tasks, one to get the data from the API and save the results, -and another to print the results. Both tasks are written in Python using -Airflow's TaskFlow API, which allows you to easily turn Python functions into -Airflow tasks, and automatically infer dependencies and pass data. - -The second task uses dynamic task mapping to create a copy of the task for -each Astronaut in the list retrieved from the API. This list will change -depending on how many Astronauts are in space, and the DAG will adjust -accordingly each time it runs. - -For more explanation and getting started instructions, see our Write your -first DAG tutorial: https://docs.astronomer.io/learn/get-started-with-airflow -""" - from airflow.decorators import dag, task from airflow.operators.bash import BashOperator from dependencies.container import Container from pendulum import datetime -# Define the basic parameters of the DAG, like schedule and start_date @dag( start_date=datetime(2024, 1, 1), schedule="@once", @@ -57,7 +36,7 @@ def ingest(path_on_bucket: str) -> str: "SQLITE", "-f", '"PostgreSQL"', - f'"{Container().postgres_conn_str_ogr2ogr()}"', + f'"{Container().gdal_dw_conn_str()}"', "-overwrite", "-lco", "GEOMETRY_NAME=geom", diff --git a/airflow/dags/ingest_app_dependencies.py b/airflow/dags/ingest_app_dependencies.py index 5d9bbe702..da7c7f6f2 100644 --- a/airflow/dags/ingest_app_dependencies.py +++ b/airflow/dags/ingest_app_dependencies.py @@ -8,12 +8,9 @@ def ingest_table(source_table_name: str, destination_table_name: str): ogr = ogr2ogr() ogr.config_options = {"PG_USE_COPY": "YES", "OGR_TRUNCATE": "NO"} ogr.set_preserve_fid(True) - ogr.set_input(Container().gdal_app_conn(), table_name=source_table_name, srs="EPSG:4326") - ogr.set_output(Container().gdal_dw_conn(), table_name=destination_table_name, srs="EPSG:4326") ogr.set_output_mode(layer_mode=ogr.MODE_LAYER_OVERWRITE) - ogr.execute() diff --git a/airflow/dags/ocsge.py b/airflow/dags/ocsge.py index 956e9187c..edf7f9da4 100644 --- a/airflow/dags/ocsge.py +++ b/airflow/dags/ocsge.py @@ -1,27 +1,7 @@ -""" -## Astronaut ETL example DAG - -This DAG queries the list of astronauts currently in space from the -Open Notify API and prints each astronaut's name and flying craft. - -There are two tasks, one to get the data from the API and save the results, -and another to print the results. Both tasks are written in Python using -Airflow's TaskFlow API, which allows you to easily turn Python functions into -Airflow tasks, and automatically infer dependencies and pass data. - -The second task uses dynamic task mapping to create a copy of the task for -each Astronaut in the list retrieved from the API. This list will change -depending on how many Astronauts are in space, and the DAG will adjust -accordingly each time it runs. - -For more explanation and getting started instructions, see our Write your -first DAG tutorial: https://docs.astronomer.io/learn/get-started-with-airflow -""" - import cgi import os -import re import tempfile +from typing import Literal import pendulum import py7zr @@ -30,139 +10,80 @@ from airflow.models.param import Param from airflow.operators.bash import BashOperator from dependencies.container import Container +from dependencies.ocsge.delete_in_app import ( + delete_difference_in_app_sql, + delete_occupation_du_sol_in_app_sql, + delete_zone_construite_in_app_sql, +) +from dependencies.ocsge.delete_in_dw import ( + delete_difference_in_dw_sql, + delete_occupation_du_sol_in_dw_sql, + delete_zone_construite_in_dw_sql, +) +from dependencies.ocsge.enums import DatasetName, SourceName +from dependencies.ocsge.normalization import ( + ocsge_diff_normalization_sql, + ocsge_occupation_du_sol_normalization_sql, + ocsge_zone_construite_normalization_sql, +) from dependencies.utils import multiline_string_to_single_line - - -def find_years_in_url(url: str) -> list[int]: - results = re.findall(pattern="(\d{4})", string=str(url)) # noqa: W605 - - years = set() - - for result in results: - # check if the year the number is > 2000. - # this is to avoid getting other numbers in the path as years - if str(result).startswith("20"): - years.add(int(result)) - - if not years: - raise ValueError("Years not found in the path") - - return list(sorted(years)) - - -def years_as_string(years: list[int]) -> str: - return "_".join(map(str, years)) - - -def find_departement_in_url(url: str) -> str: - results = re.findall(pattern="D(\d{3})", string=str(url)) # noqa: W605 - - if len(results) > 0: - result = results[0] - - if str(result).startswith("0"): - return str(result).replace("0", "", 1) - - if not result: - raise ValueError("Departement not found in the path") - - return result - - -def ocsge_diff_normalization_sql(years: list[int], departement: str, source_name: str, loaded_date: float) -> str: - fields = { - "cs_new": f"CS_{years[1]}", - "cs_old": f"CS_{years[0]}", - "us_new": f"US_{years[1]}", - "us_old": f"US_{years[0]}", - "year_old": years[0], - "year_new": years[1], - } - - return f""" - SELECT - {loaded_date} AS loaded_date, - {fields['year_old']} AS year_old, - {fields['year_new']} AS year_new, - {fields['cs_new']} AS cs_new, - {fields['cs_old']} AS cs_old, - {fields['us_new']} AS us_new, - {fields['us_old']} AS us_old, - cast({departement} as text) AS departement, - GEOMETRY as geom - FROM - {source_name} - """ - - -def ocsge_occupation_du_sol_normalization_sql( - years: list[int], - departement: str, - source_name: str, - loaded_date: float, -) -> str: - return f""" SELECT - {loaded_date} AS loaded_date, - ID AS id, - code_cs AS code_cs, - code_us AS code_us, - GEOMETRY AS geom, - cast({departement} as text) AS departement, - {years[0]} AS year - FROM - {source_name} - """ - - -def ocsge_zone_construite_normalization_sql( - years: list[int], - departement: str, - source_name: str, - loaded_date: float, -) -> str: - return f""" SELECT - {loaded_date} AS loaded_date, - ID AS id, - {years[0]} AS year, - cast({departement} as text) AS departement, - GEOMETRY AS geom - FROM - {source_name} - """ - - -def get_table_name(shapefile_name: str) -> str | None: - shapefile_name = shapefile_name.lower() - if "diff" in shapefile_name: - return "ocsge_diff" - if "occupation" in shapefile_name: - return "ocsge_occupation_du_sol" - if "zone" in shapefile_name: - return "ocsge_zone_construite" - - return None - - -def get_normalization_sql( - table_name: str, - source_name: str, - years: list[int], - departement: str, - loaded_date: float, -) -> str: - return { - "ocsge_diff": ocsge_diff_normalization_sql, - "ocsge_occupation_du_sol": ocsge_occupation_du_sol_normalization_sql, - "ocsge_zone_construite": ocsge_zone_construite_normalization_sql, - }[table_name]( - years=years, - departement=departement, - source_name=source_name, - loaded_date=loaded_date, - ) - - -configs = { # noqa: E501 +from gdaltools import ogr2ogr + + +def copy_table_from_dw_to_app( + source_sql: str, + destination_table_name: str, +): + ogr = ogr2ogr() + ogr.config_options = {"PG_USE_COPY": "YES"} + ogr.set_input(Container().gdal_dw_conn(schema="public_ocsge")) + ogr.set_sql(source_sql) + ogr.set_output(Container().gdal_app_conn(), table_name=destination_table_name) + ogr.set_output_mode(layer_mode=ogr.MODE_LAYER_APPEND) + ogr.execute() + + +def get_paths_from_directory(directory: str) -> list[tuple[str, str]]: + paths = [] + + for dirpath, _, filenames in os.walk(directory): + for filename in filenames: + path = os.path.abspath(os.path.join(dirpath, filename)) + paths.append( + ( + path, + filename, + ) + ) + + return paths + + +sources = { # noqa: E501 + "38": { + DatasetName.OCCUPATION_DU_SOL_ET_ZONE_CONSTRUITE: { + 2018: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D038_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D038_2018-01-01.7z", # noqa: E501 + 2021: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D038_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D038_2021-01-01.7z", # noqa: E501 + }, + DatasetName.DIFFERENCE: { + ( + 2018, + 2021, + ): "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D038_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D038_DIFF_2018-2021.7z", # noqa: E501 + }, + }, + "69": { + DatasetName.OCCUPATION_DU_SOL_ET_ZONE_CONSTRUITE: { + 2017: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D069_2017-01-01/OCS-GE_2-0__SHP_LAMB93_D069_2017-01-01.7z", # noqa: E501 + 2020: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D069_2020-01-01/OCS-GE_2-0__SHP_LAMB93_D069_2020-01-01.7z", # noqa: E501 + }, + DatasetName.DIFFERENCE: { + ( + 2017, + 2020, + ): "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D069_DIFF_2017-2020/OCS-GE_2-0__SHP_LAMB93_D069_DIFF_2017-2020.7z", # noqa: E501 + }, + }, "91": { "occupation_du_sol_et_zone_construite": { 2018: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D091_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D091_2018-01-01.7z", # noqa: E501 @@ -237,8 +158,151 @@ def get_normalization_sql( }, } +vars = { + SourceName.OCCUPATION_DU_SOL: { + "shapefile_name": "OCCUPATION_SOL", + "dbt_selector": "source:sparte.public.ocsge_occupation_du_sol", + "dbt_selector_staging": "source:sparte.public.ocsge_occupation_du_sol_staging", + "dw_staging": "ocsge_occupation_du_sol_staging", + "dw_source": "ocsge_occupation_du_sol", + "app_table_names": ("public_data_ocsge",), + "normalization_sql": ocsge_occupation_du_sol_normalization_sql, + "delete_on_dwt": delete_occupation_du_sol_in_dw_sql, + "delete_on_app": delete_occupation_du_sol_in_app_sql, + "mapping": { + "public_ocsge.app_ocsge": { + "to_table": "public.public_data_ocsge", + "select": lambda departement, years: f"SELECT * FROM public_ocsge.app_ocsge WHERE departement = '{departement}' AND year = {years[0]}", # noqa: E501 + }, + }, + }, + SourceName.ZONE_CONSTRUITE: { + "shapefile_name": "ZONE_CONSTRUITE", + "dbt_selector": "source:sparte.public.ocsge_zone_construite", + "dbt_selector_staging": "source:sparte.public.ocsge_zone_construite_staging", + "dw_staging": "ocsge_zone_construite_staging", + "dw_source": "ocsge_zone_construite", + "app_table_names": ("public_data_zoneconstruite",), + "normalization_sql": ocsge_zone_construite_normalization_sql, + "delete_on_dwt": delete_zone_construite_in_dw_sql, + "delete_on_app": delete_zone_construite_in_app_sql, + "mapping": { + "public_ocsge.app_zoneconstruite": { + "to_table": "public.public_data_zoneconstruite", + "select": lambda departement, years: f"SELECT * FROM public_ocsge.app_zoneconstruite WHERE departement = '{departement}' AND year = {years[0]}", # noqa: E501 + }, + }, + }, + SourceName.DIFFERENCE: { + "shapefile_name": "DIFFERENCE", + "dbt_selector": "source:sparte.public.ocsge_difference", + "dbt_selector_staging": "source:sparte.public.ocsge_difference_staging", + "dw_staging": "ocsge_difference_staging", + "dw_source": "ocsge_difference", + "dw_final_table_name": "app_ocsgediff", + "app_table_names": ("public_data_ocsgediff",), + "normalization_sql": ocsge_diff_normalization_sql, + "delete_on_dwt": delete_difference_in_dw_sql, + "delete_on_app": delete_difference_in_app_sql, + "mapping": { + "public_ocsge.app_ocsgediff": { + "to_table": "public.public_data_ocsgediff", + "select": lambda departement, years: f"SELECT * FROM public_ocsge.app_ocsgediff WHERE departement = '{departement}' AND year_old = {years[0]} AND year_new = {years[1]}", # noqa: E501 + }, + }, + }, +} + +vars_dataset = { + DatasetName.OCCUPATION_DU_SOL_ET_ZONE_CONSTRUITE: [ + vars[SourceName.OCCUPATION_DU_SOL], + vars[SourceName.ZONE_CONSTRUITE], + ], + DatasetName.DIFFERENCE: [ + vars[SourceName.DIFFERENCE], + ], +} + + +def get_source_name_from_shapefile_name(shapefile_name: str) -> SourceName | None: + shapefile_name = shapefile_name.lower() + if "diff" in shapefile_name: + return SourceName.DIFFERENCE + if "occupation" in shapefile_name: + return SourceName.OCCUPATION_DU_SOL + if "zone" in shapefile_name: + return SourceName.ZONE_CONSTRUITE + + return None + + +def get_vars_by_shapefile_name(shapefile_name: str) -> dict | None: + source_name = get_source_name_from_shapefile_name(shapefile_name) + if not source_name: + return None -departement_list = list(configs.keys()) + return vars[source_name] + + +def load_shapefile_to_dw( + path: str, + years: list[int], + departement: str, + loaded_date: int, + table_key: str, + mode: Literal["overwrite", "append"] = "append", +): + local_path = "/tmp/ocsge.7z" + Container().s3().get_file(path, local_path) + extract_dir = tempfile.mkdtemp() + py7zr.SevenZipFile(local_path, mode="r").extractall(path=extract_dir) + + for file_path, filename in get_paths_from_directory(extract_dir): + if not file_path.endswith(".shp"): + continue + variables = get_vars_by_shapefile_name(filename) + if not variables: + continue + + sql = multiline_string_to_single_line( + variables["normalization_sql"]( + shapefile_name=filename.split(".")[0], + years=years, + departement=departement, + loaded_date=loaded_date, + ) + ) + table_name = variables[table_key] + + cmd = [ + "ogr2ogr", + "-dialect", + "SQLITE", + "-f", + '"PostgreSQL"', + f'"{Container().gdal_dw_conn_str()}"', + f"-{mode}", + "-lco", + "GEOMETRY_NAME=geom", + "-a_srs", + "EPSG:2154", + "-nlt", + "MULTIPOLYGON", + "-nlt", + "PROMOTE_TO_MULTI", + "-nln", + table_name, + file_path, + "--config", + "PG_USE_COPY", + "YES", + "-sql", + f'"{sql}"', + ] + BashOperator( + task_id=f"ingest_{table_name}", + bash_command=" ".join(cmd), + ).execute(context={}) @dag( @@ -247,17 +311,18 @@ def get_normalization_sql( schedule="@once", catchup=False, doc_md=__doc__, + max_active_runs=1, default_args={"owner": "Alexis Athlani", "retries": 3}, tags=["OCS GE"], params={ - "departement": Param("75", type="string", enum=departement_list), + "departement": Param("75", type="string", enum=list(sources.keys())), "years": Param([2018], type="array"), "dataset": Param( - "occupation_du_sol_et_zone_construite", + DatasetName.OCCUPATION_DU_SOL_ET_ZONE_CONSTRUITE, type="string", enum=[ - "occupation_du_sol_et_zone_construite", - "difference", + DatasetName.OCCUPATION_DU_SOL_ET_ZONE_CONSTRUITE, + DatasetName.DIFFERENCE, ], ), }, @@ -265,8 +330,8 @@ def get_normalization_sql( def ocsge(): # noqa: C901 bucket_name = "airflow-staging" - @task.python - def get_url_from_config(**context) -> str: + @task.python() + def get_url(**context) -> str: departement = context["params"]["departement"] years = tuple(map(int, context["params"]["years"])) dataset = context["params"]["dataset"] @@ -274,12 +339,18 @@ def get_url_from_config(**context) -> str: if len(years) == 1: years = years[0] - print(departement, dataset, years) + return sources.get(departement, {}).get(dataset, {}).get(years) - url = configs.get(departement, {}).get(dataset, {}).get(years) + @task.python(retries=0) + def check_url_exists(url) -> dict: + response = requests.head(url) + if not response.ok: + raise ValueError(f"Failed to download {url}. Response : {response.content}") - print(url) - return url + return { + "url": url, + "status_code": response.status_code, + } @task.python def download_ocsge(url) -> str: @@ -287,6 +358,7 @@ def download_ocsge(url) -> str: if not response.ok: raise ValueError(f"Failed to download {url}. Response : {response.content}") + header = response.headers["content-disposition"] _, params = cgi.parse_header(header) filename = params.get("filename") @@ -297,147 +369,134 @@ def download_ocsge(url) -> str: return path_on_bucket + @task.python + def ingest_staging(path, **context) -> int: + loaded_date = int(pendulum.now().timestamp()) + departement = context["params"]["departement"] + years = context["params"]["years"] + + load_shapefile_to_dw( + path=path, + years=years, + departement=departement, + loaded_date=loaded_date, + table_key="dw_staging", + mode="overwrite", + ) + + return loaded_date + + @task.bash + def db_test_ocsge_staging(**context): + dataset = context["params"]["dataset"] + dbt_select = " ".join([vars["dbt_selector_staging"] for vars in vars_dataset[dataset]]) + return 'cd "${AIRFLOW_HOME}/sql/sparte" && dbt test -s ' + dbt_select + @task.python def ingest_ocsge(path, **context) -> int: loaded_date = int(pendulum.now().timestamp()) departement = context["params"]["departement"] years = context["params"]["years"] - print(loaded_date) - with Container().s3().open(path, "rb") as f: - extract_dir = tempfile.mkdtemp() - py7zr.SevenZipFile(f, mode="r").extractall(path=extract_dir) - - for dirpath, _, filenames in os.walk(extract_dir): - for filename in filenames: - if filename.endswith(".shp"): - path = os.path.abspath(os.path.join(dirpath, filename)) - table_name = get_table_name(shapefile_name=filename) - print("get_table_name", table_name) - if not table_name: - continue - sql = multiline_string_to_single_line( - get_normalization_sql( - source_name=os.path.basename(path).replace(".shp", ""), - table_name=table_name, - years=years, - departement=departement, - loaded_date=loaded_date, - ) - ) - cmd = [ - "ogr2ogr", - "-dialect", - "SQLITE", - "-f", - '"PostgreSQL"', - f'"{Container().postgres_conn_str_ogr2ogr()}"', - "-append", - "-lco", - "GEOMETRY_NAME=geom", - "-a_srs", - "EPSG:2154", - "-nlt", - "MULTIPOLYGON", - "-nlt", - "PROMOTE_TO_MULTI", - "-nln", - table_name, - path, - "--config", - "PG_USE_COPY", - "YES", - "-sql", - f'"{sql}"', - ] - BashOperator( - task_id=f"ingest_{table_name}", - bash_command=" ".join(cmd), - ).execute(context={}) + + load_shapefile_to_dw( + path=path, + years=years, + departement=departement, + loaded_date=loaded_date, + table_key="dw_source", + ) return loaded_date @task.bash(retries=0) def dbt_test_ocsge(**context): dataset = context["params"]["dataset"] - - if dataset == "occupation_du_sol_et_zone_construite": - selector = "source:sparte.public.ocsge_occupation_du_sol source:sparte.public.ocsge_zone_construite" - elif dataset == "difference": - selector = "source:sparte.public.ocsge_diff" - else: - raise ValueError(f"Unknown dataset {dataset}") - - return 'cd "${AIRFLOW_HOME}/sql/sparte" && dbt test -s ' + selector + dbt_select = " ".join([vars["dbt_selector"] for vars in vars_dataset[dataset]]) + return 'cd "${AIRFLOW_HOME}/sql/sparte" && dbt test -s ' + dbt_select @task.bash(retries=0, trigger_rule="all_success") def dbt_run_ocsge(**context): dataset = context["params"]["dataset"] + dbt_select = " ".join([f'{vars["dbt_selector"]}+' for vars in vars_dataset[dataset]]) + return 'cd "${AIRFLOW_HOME}/sql/sparte" && dbt run -s ' + dbt_select - if dataset == "occupation_du_sol_et_zone_construite": - selector = "source:sparte.public.ocsge_occupation_du_sol+ source:sparte.public.ocsge_zone_construite+" - elif dataset == "difference": - selector = "source:sparte.public.ocsge_diff+" - else: - raise ValueError(f"Unknown dataset {dataset}") - - return 'cd "${AIRFLOW_HOME}/sql/sparte" && dbt run -s ' + selector - - @task.python(trigger_rule="one_failed") - def rollback_append(loaded_date: float, **context): + @task.python(trigger_rule="all_success") + def delete_previously_loaded_data_in_dw(**context) -> dict: dataset = context["params"]["dataset"] - - if dataset == "occupation_du_sol_et_zone_construite": - tables = ["ocsge_occupation_du_sol", "ocsge_zone_construite"] - elif dataset == "difference": - tables = ["ocsge_diff"] - else: - raise ValueError(f"Unknown dataset {dataset}") - - conn = Container().postgres_conn() + departement = context["params"]["departement"] + years = context["params"]["years"] + conn = Container().psycopg2_dw_conn() cur = conn.cursor() results = {} - for table in tables: - print(f"DELETE FROM public.{table} WHERE loaded_date = {loaded_date}") - cur.execute(f"DELETE FROM public.{table} WHERE loaded_date = {loaded_date}") - results[table] = cur.rowcount + for vars in vars_dataset[dataset]: + cur.execute(vars["delete_on_dwt"](departement, years)) + results[vars["dw_source"]] = cur.rowcount conn.commit() conn.close() return results - @task.python - def export_table(): - conn = Container().postgres_conn() + @task.python(trigger_rule="all_success") + def delete_previously_loaded_data_in_app(**context) -> str: + dataset = context["params"]["dataset"] + departement = context["params"]["departement"] + years = context["params"]["years"] + + conn = Container().psycopg2_app_conn() cur = conn.cursor() - filename = "occupation_du_sol.csv" - temp_file = f"/tmp/{filename}" - temp_archive = f"/tmp/{filename}.7z" - path_on_bucket = f"{bucket_name}/{filename}.7z" + results = {} + + for vars in vars_dataset[dataset]: + cur.execute(vars["delete_on_app"](departement, years)) + results[vars["app_table_names"]] = cur.rowcount + + conn.commit() + conn.close() - with open(temp_file, "w") as csv_file: - cur.copy_expert("COPY (SELECT * FROM public_ocsge.occupation_du_sol) TO STDOUT WITH CSV HEADER", csv_file) + return str(results) - with py7zr.SevenZipFile(temp_archive, mode="w") as archive: - archive.write(temp_file, filename) + @task.python(trigger_rule="all_success") + def load_data_in_app(**context): + dataset = context["params"]["dataset"] + departement = context["params"]["departement"] + years = context["params"]["years"] - with open(temp_archive, "rb") as archive: - with Container().s3().open(path_on_bucket, "wb") as f: - f.write(archive.read()) + for vars in vars_dataset[dataset]: + for from_table in vars["mapping"]: + values = vars["mapping"][from_table] + copy_table_from_dw_to_app( + source_sql=values["select"](departement, years), + destination_table_name=values["to_table"], + ) - url = get_url_from_config() + url = get_url() + url_exists = check_url_exists(url=url) path = download_ocsge(url=url) + load_date_staging = ingest_staging(path=path) + delete_dw = delete_previously_loaded_data_in_dw() + test_result_staging = db_test_ocsge_staging() loaded_date = ingest_ocsge(path=path) + test_result = dbt_test_ocsge() + dbt_run_ocsge_result = dbt_run_ocsge() + delete_app = delete_previously_loaded_data_in_app() + load_app = load_data_in_app() + ( - loaded_date - >> dbt_test_ocsge() - >> [ - rollback_append(loaded_date=loaded_date), - dbt_run_ocsge(), - ] + url + >> url_exists + >> path + >> load_date_staging + >> test_result_staging + >> delete_dw + >> loaded_date + >> test_result + >> dbt_run_ocsge_result + >> delete_app + >> load_app ) diff --git a/airflow/dbt_profile.yml b/airflow/dbt_profile.yml index 920e7a9b3..f39c999a3 100644 --- a/airflow/dbt_profile.yml +++ b/airflow/dbt_profile.yml @@ -6,7 +6,7 @@ sparte: pass: "{{ env_var('DBT_DB_PASSWORD') }}" port: "{{ env_var('DBT_DB_PORT') | as_number }}" schema: "{{ env_var('DBT_DB_SCHEMA') }}" - threads: 2 + threads: 1 type: "postgres" user: "{{ env_var('DBT_DB_USER') }}" target: dev diff --git a/airflow/dependencies/container.py b/airflow/dependencies/container.py index 46d6df225..591d336d7 100644 --- a/airflow/dependencies/container.py +++ b/airflow/dependencies/container.py @@ -43,17 +43,8 @@ class Container(containers.DeclarativeContainer): port=getenv("DBT_DB_PORT"), ) - gdal_app_conn = providers.Factory( - PgConnectionString, - dbname=getenv("APP_DB_NAME"), - user=getenv("APP_DB_USER"), - password=getenv("APP_DB_PASSWORD"), - host=getenv("APP_DB_HOST"), - port=getenv("APP_DB_PORT"), - ) - - postgres_conn: connection = providers.Factory( - provides=connect, + gdal_dw_conn_str = providers.Factory( + db_str_for_ogr2ogr, dbname=getenv("DBT_DB_NAME"), user=getenv("DBT_DB_USER"), password=getenv("DBT_DB_PASSWORD"), @@ -61,8 +52,8 @@ class Container(containers.DeclarativeContainer): port=getenv("DBT_DB_PORT"), ) - postgres_conn_str_ogr2ogr = providers.Factory( - db_str_for_ogr2ogr, + psycopg2_dw_conn: connection = providers.Factory( + provides=connect, dbname=getenv("DBT_DB_NAME"), user=getenv("DBT_DB_USER"), password=getenv("DBT_DB_PASSWORD"), @@ -70,6 +61,24 @@ class Container(containers.DeclarativeContainer): port=getenv("DBT_DB_PORT"), ) + psycopg2_app_conn: connection = providers.Factory( + provides=connect, + dbname=getenv("APP_DB_NAME"), + user=getenv("APP_DB_USER"), + password=getenv("APP_DB_PASSWORD"), + host=getenv("APP_DB_HOST"), + port=getenv("APP_DB_PORT"), + ) + + gdal_app_conn = providers.Factory( + PgConnectionString, + dbname=getenv("APP_DB_NAME"), + user=getenv("APP_DB_USER"), + password=getenv("APP_DB_PASSWORD"), + host=getenv("APP_DB_HOST"), + port=getenv("APP_DB_PORT"), + ) + cnopts = pysftp.CnOpts() cnopts.hostkeys = None diff --git a/airflow/dependencies/ocsge/delete_in_app.py b/airflow/dependencies/ocsge/delete_in_app.py new file mode 100644 index 000000000..a2057e0e2 --- /dev/null +++ b/airflow/dependencies/ocsge/delete_in_app.py @@ -0,0 +1,35 @@ +from typing import List + + +def delete_occupation_du_sol_in_app_sql( + departement: str, + years: List[str], +) -> str: + return f""" + DELETE FROM public.public_data_ocsge + WHERE departement = '{departement}' + AND year = {years[0]}; + """ + + +def delete_zone_construite_in_app_sql( + departement: str, + years: List[str], +) -> str: + return f""" + DELETE FROM public.public_data_zoneconstruite + WHERE departement = '{departement}' + AND year = {years[0]}; + """ + + +def delete_difference_in_app_sql( + departement: str, + years: List[str], +) -> str: + return f""" + DELETE FROM public.public_data_ocsgediff + WHERE departement = '{departement}' + AND year_old = {years[0]} + AND year_new = {years[1]}; + """ diff --git a/airflow/dependencies/ocsge/delete_in_dw.py b/airflow/dependencies/ocsge/delete_in_dw.py new file mode 100644 index 000000000..bc981ad73 --- /dev/null +++ b/airflow/dependencies/ocsge/delete_in_dw.py @@ -0,0 +1,35 @@ +from typing import List + + +def delete_occupation_du_sol_in_dw_sql( + departement: str, + years: List[str], +) -> str: + return f""" + DELETE FROM public.ocsge_occupation_du_sol + WHERE departement = '{departement}' + AND year = {years[0]} + """ + + +def delete_zone_construite_in_dw_sql( + departement: str, + years: List[str], +) -> str: + return f""" + DELETE FROM public.ocsge_zone_construite + WHERE departement = '{departement}' + AND year = {years[0]} + """ + + +def delete_difference_in_dw_sql( + departement: str, + years: List[str], +) -> str: + return f""" + DELETE FROM public.ocsge_difference + WHERE departement = '{departement}' + AND year_old = {years[0]} + AND year_new = {years[1]} + """ diff --git a/airflow/dependencies/ocsge/enums.py b/airflow/dependencies/ocsge/enums.py new file mode 100644 index 000000000..29f441d2e --- /dev/null +++ b/airflow/dependencies/ocsge/enums.py @@ -0,0 +1,12 @@ +from enum import StrEnum + + +class DatasetName(StrEnum): + OCCUPATION_DU_SOL_ET_ZONE_CONSTRUITE = "occupation_du_sol_et_zone_construite" + DIFFERENCE = "difference" + + +class SourceName(StrEnum): + OCCUPATION_DU_SOL = "occupation_du_sol" + ZONE_CONSTRUITE = "zone_construite" + DIFFERENCE = "difference" diff --git a/airflow/dependencies/ocsge/normalization.py b/airflow/dependencies/ocsge/normalization.py new file mode 100644 index 000000000..899e0539a --- /dev/null +++ b/airflow/dependencies/ocsge/normalization.py @@ -0,0 +1,68 @@ +def ocsge_diff_normalization_sql( + years: list[int], + departement: str, + shapefile_name: str, + loaded_date: float, +) -> str: + fields = { + "cs_new": f"CS_{years[1]}", + "cs_old": f"CS_{years[0]}", + "us_new": f"US_{years[1]}", + "us_old": f"US_{years[0]}", + "year_old": years[0], + "year_new": years[1], + } + + return f""" + SELECT + {loaded_date} AS loaded_date, + {fields['year_old']} AS year_old, + {fields['year_new']} AS year_new, + {fields['cs_new']} AS cs_new, + {fields['cs_old']} AS cs_old, + {fields['us_new']} AS us_new, + {fields['us_old']} AS us_old, + cast({departement} as text) AS departement, + CreateUUID() as uuid, + GEOMETRY as geom + FROM + {shapefile_name} + """ + + +def ocsge_occupation_du_sol_normalization_sql( + years: list[int], + departement: str, + shapefile_name: str, + loaded_date: float, +) -> str: + return f""" SELECT + {loaded_date} AS loaded_date, + ID AS id, + code_cs AS code_cs, + code_us AS code_us, + GEOMETRY AS geom, + cast({departement} as text) AS departement, + {years[0]} AS year, + CreateUUID() as uuid + FROM + {shapefile_name} + """ + + +def ocsge_zone_construite_normalization_sql( + years: list[int], + departement: str, + shapefile_name: str, + loaded_date: float, +) -> str: + return f""" SELECT + {loaded_date} AS loaded_date, + ID AS id, + {years[0]} AS year, + cast({departement} as text) AS departement, + CreateUUID() as uuid, + GEOMETRY AS geom + FROM + {shapefile_name} + """ diff --git a/airflow/sql/sparte/dbt_project.yml b/airflow/sql/sparte/dbt_project.yml index 18a6fd2f6..21b39585e 100644 --- a/airflow/sql/sparte/dbt_project.yml +++ b/airflow/sql/sparte/dbt_project.yml @@ -31,6 +31,8 @@ clean-targets: # directories to be removed by `dbt clean` # files using the `{{ config(...) }}` macro. models: sparte: + app: + +schema: app ocsge: +schema: ocsge admin_express: diff --git a/airflow/sql/sparte/models/admin_express/commune.sql b/airflow/sql/sparte/models/admin_express/commune.sql index 4ecdeac06..c3d18e0ed 100644 --- a/airflow/sql/sparte/models/admin_express/commune.sql +++ b/airflow/sql/sparte/models/admin_express/commune.sql @@ -22,7 +22,7 @@ SELECT insee_reg as region, siren_epci as epci, ST_Area(geom) as surface, - md5(commune::text), + gen_random_uuid() as uuid, geom FROM {{ source('public', 'commune') }} as commune diff --git a/airflow/sql/sparte/models/admin_express/departement.sql b/airflow/sql/sparte/models/admin_express/departement.sql index c4093eb6a..559ca66bb 100644 --- a/airflow/sql/sparte/models/admin_express/departement.sql +++ b/airflow/sql/sparte/models/admin_express/departement.sql @@ -15,7 +15,7 @@ SELECT insee_dep as code, insee_reg as region, ST_Area(geom) as surface, - md5(departement::text), + gen_random_uuid() as uuid, geom FROM {{ source('public', 'departement') }} as departement diff --git a/airflow/sql/sparte/models/app/app_commune.sql b/airflow/sql/sparte/models/app/app_commune.sql new file mode 100644 index 000000000..b84f3a083 --- /dev/null +++ b/airflow/sql/sparte/models/app/app_commune.sql @@ -0,0 +1,16 @@ + +{{ config(materialized='table') }} + +SELECT + id, + insee, + name, + departement_id, + epci_id, + scot_id, + ocsge_available, + first_millesime, + last_millesime, + surface_artif +FROM + {{ source('public', 'app_commune') }} diff --git a/airflow/sql/sparte/models/app/app_departement.sql b/airflow/sql/sparte/models/app/app_departement.sql new file mode 100644 index 000000000..b69dffb04 --- /dev/null +++ b/airflow/sql/sparte/models/app/app_departement.sql @@ -0,0 +1,12 @@ + +{{ config(materialized='table') }} + +SELECT + id, + source_id, + name, + region_id, + is_artif_ready, + ocsge_millesimes +FROM + {{ source('public', 'app_departement') }} diff --git a/airflow/sql/sparte/models/app/app_epci.sql b/airflow/sql/sparte/models/app/app_epci.sql new file mode 100644 index 000000000..f7bc6ce73 --- /dev/null +++ b/airflow/sql/sparte/models/app/app_epci.sql @@ -0,0 +1,9 @@ + +{{ config(materialized='table') }} + +SELECT + id, + source_id, + name +FROM + {{ source('public', 'app_epci') }} diff --git a/airflow/sql/sparte/models/app/app_region.sql b/airflow/sql/sparte/models/app/app_region.sql new file mode 100644 index 000000000..0f6207e09 --- /dev/null +++ b/airflow/sql/sparte/models/app/app_region.sql @@ -0,0 +1,9 @@ + +{{ config(materialized='table') }} + +SELECT + id, + source_id, + name +FROM + {{ source('public', 'app_region') }} diff --git a/airflow/sql/sparte/models/app/app_scot.sql b/airflow/sql/sparte/models/app/app_scot.sql new file mode 100644 index 000000000..b8fcc11a9 --- /dev/null +++ b/airflow/sql/sparte/models/app/app_scot.sql @@ -0,0 +1,8 @@ + +{{ config(materialized='table') }} + +SELECT + id, + name +FROM + {{ source('public', 'app_scot') }} diff --git a/airflow/sql/sparte/models/app/schema.yml b/airflow/sql/sparte/models/app/schema.yml new file mode 100644 index 000000000..91961720b --- /dev/null +++ b/airflow/sql/sparte/models/app/schema.yml @@ -0,0 +1,18 @@ + +version: 2 + +models: + - name: app_commune + - name: app_departement + - name: app_region + - name: app_epci + - name: app_scot + +sources: + - name: public + tables: + - name: app_commune + - name: app_departement + - name: app_region + - name: app_epci + - name: app_scot diff --git a/airflow/sql/sparte/models/gpu/zonage_urbanisme.sql b/airflow/sql/sparte/models/gpu/zonage_urbanisme.sql index 2e890b858..b351035b2 100644 --- a/airflow/sql/sparte/models/gpu/zonage_urbanisme.sql +++ b/airflow/sql/sparte/models/gpu/zonage_urbanisme.sql @@ -25,6 +25,7 @@ SELECT datappro as date_approbation, datvalid as date_validation, idurba as id_document_urbanisme, + gen_random_uuid() as uuid, ST_MakeValid(ST_transform(geom, 2154)) as geom FROM {{ source('public', 'zone_urba') }} diff --git a/airflow/sql/sparte/models/ocsge/difference.sql b/airflow/sql/sparte/models/ocsge/difference.sql index 337350325..b143b516a 100644 --- a/airflow/sql/sparte/models/ocsge/difference.sql +++ b/airflow/sql/sparte/models/ocsge/difference.sql @@ -1,29 +1,22 @@ -{{ config(materialized='table') }} +{{ + config( + materialized='table', + post_hook="CREATE INDEX ON {{ this }} USING GIST (geom)" + ) +}} -WITH latest_loaded_date AS ( - SELECT - year_old, - year_new, - departement, - MAX(loaded_date) AS max_loaded_date - FROM - {{ source('public', 'ocsge_diff') }} - GROUP BY - year_old, - year_new, - departement -) SELECT - loaded_date, - year_old, - year_new, + foo.loaded_date, + foo.year_old, + foo.year_new, cs_new, cs_old, us_new, us_old, - departement, - surface, + foo.departement, + ST_Area(geom) AS surface, + uuid, CASE WHEN old_is_imper = false AND @@ -54,19 +47,20 @@ SELECT geom FROM ( SELECT - ocsge.*, - ST_Area(geom) AS surface, + ocsge.loaded_date, + ocsge.year_old, + ocsge.year_new, + ocsge.cs_new, + ocsge.cs_old, + ocsge.us_new, + ocsge.us_old, + ocsge.departement, + ST_MakeValid(ocsge.geom) AS geom, {{ is_artificial('cs_old', 'us_old') }} AS old_is_artif, {{ is_impermeable('cs_old') }} AS old_is_imper, {{ is_artificial('cs_new', 'us_new') }} AS new_is_artif, - {{ is_impermeable('cs_new') }} AS new_is_imper + {{ is_impermeable('cs_new') }} AS new_is_imper, + ocsge.uuid FROM - {{ source('public', 'ocsge_diff') }} AS ocsge - JOIN - latest_loaded_date AS ld - ON - ocsge.year_old = ld.year_old - AND ocsge.year_new = ld.year_new - AND ocsge.departement = ld.departement - AND ocsge.loaded_date = ld.max_loaded_date + {{ source('public', 'ocsge_difference') }} AS ocsge ) AS foo diff --git a/airflow/sql/sparte/models/ocsge/final/artificial_area.sql b/airflow/sql/sparte/models/ocsge/final/artificial_area.sql deleted file mode 100644 index 5d06333ce..000000000 --- a/airflow/sql/sparte/models/ocsge/final/artificial_area.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ - config( - materialized='table', - tags=['final'], - ) }} - -SELECT *, ST_Area(geom) FROM ( - SELECT - ocsge.departement, - ocsge.commune_code, - ST_Union(geom) AS geom - FROM - {{ ref("occupation_du_sol_commune") }} AS ocsge - WHERE - ocsge.is_artificial = true - GROUP BY - ocsge.commune_code, - ocsge.departement -) as foo diff --git a/airflow/sql/sparte/models/ocsge/final/commune_diff.sql b/airflow/sql/sparte/models/ocsge/final/commune_diff.sql deleted file mode 100644 index cfd19c0ca..000000000 --- a/airflow/sql/sparte/models/ocsge/final/commune_diff.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config(materialized='table') }} - -SELECT - commune_code, - departement, - year_old, - year_new, - sum(CASE WHEN new_is_impermeable THEN surface ELSE 0 END) AS surface_new_is_impermeable, - sum(CASE WHEN new_not_impermeable THEN surface ELSE 0 END) AS surface_new_not_impermeable, - sum(CASE WHEN new_is_artificial THEN surface ELSE 0 END) AS surface_new_is_artificial, - sum(CASE WHEN new_not_artificial THEN surface ELSE 0 END) AS surface_new_not_artificial -FROM - {{ ref('difference_commune') }} -GROUP BY - commune_code, - departement, - year_old, - year_new diff --git a/airflow/sql/sparte/models/ocsge/final/commune_sol.sql b/airflow/sql/sparte/models/ocsge/final/commune_sol.sql deleted file mode 100644 index 2f1edcdfe..000000000 --- a/airflow/sql/sparte/models/ocsge/final/commune_sol.sql +++ /dev/null @@ -1,15 +0,0 @@ -{{ config(materialized='table') }} - -SELECT - sum(surface), - commune_code, - code_cs, - code_us, - departement -FROM - {{ ref('occupation_du_sol_commune') }} -GROUP BY - commune_code, - code_cs, - code_us, - departement diff --git a/airflow/sql/sparte/models/ocsge/for_app/app_ocsge.sql b/airflow/sql/sparte/models/ocsge/for_app/app_ocsge.sql new file mode 100644 index 000000000..9a2d3adb7 --- /dev/null +++ b/airflow/sql/sparte/models/ocsge/for_app/app_ocsge.sql @@ -0,0 +1,16 @@ + +{{ config(materialized='table') }} + +SELECT + code_cs as couverture, + code_us as usage, + year, + ST_Transform(geom, 4326) as mpoly, + id as id_source, + is_artificial, + surface, + 2154 as srid_source, + departement, + is_impermeable +FROM + {{ ref("occupation_du_sol") }} diff --git a/airflow/sql/sparte/models/ocsge/for_app/app_ocsgediff.sql b/airflow/sql/sparte/models/ocsge/for_app/app_ocsgediff.sql new file mode 100644 index 000000000..33e092428 --- /dev/null +++ b/airflow/sql/sparte/models/ocsge/for_app/app_ocsgediff.sql @@ -0,0 +1,20 @@ + +{{ config(materialized='table') }} + +SELECT + year_old, + year_new, + cs_new, + cs_old, + us_new, + us_old, + ST_Transform(geom, 4326) as mpoly, + surface, + 2154 as srid_source, + departement, + new_is_artificial as is_new_artif, + new_not_artificial as is_new_natural, + new_is_impermeable as is_new_impermeable, + new_not_impermeable as is_new_not_impermeable +FROM + {{ ref("difference") }} diff --git a/airflow/sql/sparte/models/ocsge/for_app/app_zoneconstruite.sql b/airflow/sql/sparte/models/ocsge/for_app/app_zoneconstruite.sql new file mode 100644 index 000000000..781b8029e --- /dev/null +++ b/airflow/sql/sparte/models/ocsge/for_app/app_zoneconstruite.sql @@ -0,0 +1,13 @@ + +{{ config(materialized='table') }} + +SELECT + id as id_source, + year as millesime, + ST_Transform(geom, 4326) as mpoly, + year, + surface, + 2154 as srid_source, + departement +FROM + {{ ref("zone_construite") }} diff --git a/airflow/sql/sparte/models/ocsge/intersected/artificial_commune b/airflow/sql/sparte/models/ocsge/intersected/artificial_commune new file mode 100644 index 000000000..9444531fe --- /dev/null +++ b/airflow/sql/sparte/models/ocsge/intersected/artificial_commune @@ -0,0 +1,19 @@ +{{ config(materialized='table')}} + +SELECT *, ST_Area(mpoly) as surface FROM ( + SELECT + ocsge.departement, + ocsge.year, + ocsge.commune_code as city, + ARRAY_AGG(ocsge.uuid) AS uuids, + ST_Union(geom) AS mpoly + FROM + {{ ref("occupation_du_sol_commune") }} AS ocsge + WHERE + ocsge.is_artificial = true + GROUP BY + ocsge.commune_code, + ocsge.departement, + ocsge.year, + ocsge.loaded_date +) as foo diff --git a/airflow/sql/sparte/models/ocsge/difference_commune.sql b/airflow/sql/sparte/models/ocsge/intersected/difference_commune.sql similarity index 66% rename from airflow/sql/sparte/models/ocsge/difference_commune.sql rename to airflow/sql/sparte/models/ocsge/intersected/difference_commune.sql index aff4e126d..9de9eb2c5 100644 --- a/airflow/sql/sparte/models/ocsge/difference_commune.sql +++ b/airflow/sql/sparte/models/ocsge/intersected/difference_commune.sql @@ -1,8 +1,15 @@ -{{ config(materialized='table') }} +{{ + config( + materialized='incremental', + post_hook='DELETE FROM {{ this }} WHERE uuid not in (SELECT uuid FROM {{ ref("difference") }} )' + + ) +}} SELECT *, ST_Area(geom) as surface FROM ( SELECT commune.code as commune_code, + ocsge.loaded_date, ocsge.year_old, ocsge.year_new, ocsge.departement, @@ -14,6 +21,7 @@ SELECT *, ST_Area(geom) as surface FROM ( ocsge.us_old, ocsge.cs_new, ocsge.us_new, + ocsge.uuid, ST_Intersection(commune.geom, ocsge.geom) AS geom FROM {{ ref("commune") }} AS commune @@ -23,4 +31,8 @@ SELECT *, ST_Area(geom) as surface FROM ( ocsge.departement = commune.departement AND ST_Intersects(commune.geom, ocsge.geom) + + {% if is_incremental() %} + WHERE ocsge.uuid not in (SELECT bar.uuid from {{ this }} as bar) + {% endif %} ) as foo diff --git a/airflow/sql/sparte/models/ocsge/occupation_du_sol_commune.sql b/airflow/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql similarity index 61% rename from airflow/sql/sparte/models/ocsge/occupation_du_sol_commune.sql rename to airflow/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql index 89b848ecf..e1d0901c2 100644 --- a/airflow/sql/sparte/models/ocsge/occupation_du_sol_commune.sql +++ b/airflow/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql @@ -1,8 +1,15 @@ -{{ config(materialized='table') }} +{{ + config( + materialized='incremental', + post_hook='DELETE FROM {{ this }} WHERE uuid not in (SELECT uuid FROM {{ ref("occupation_du_sol") }} )' + ) +}} SELECT *, ST_Area(geom) as surface FROM ( SELECT commune.code AS commune_code, + ocsge.uuid, + ocsge.loaded_date, ocsge.year, ocsge.departement, ocsge.code_cs, @@ -18,4 +25,9 @@ SELECT *, ST_Area(geom) as surface FROM ( ocsge.departement = commune.departement AND ST_Intersects(commune.geom, ocsge.geom) + + {% if is_incremental() %} + WHERE ocsge.uuid not in (SELECT foo.uuid from {{ this }} as foo) + {% endif %} + ) as foo diff --git a/airflow/sql/sparte/models/ocsge/occupation_du_sol_zonage_urbanisme.sql b/airflow/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql similarity index 58% rename from airflow/sql/sparte/models/ocsge/occupation_du_sol_zonage_urbanisme.sql rename to airflow/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql index b148397fc..b08206f96 100644 --- a/airflow/sql/sparte/models/ocsge/occupation_du_sol_zonage_urbanisme.sql +++ b/airflow/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql @@ -1,12 +1,19 @@ -{{ config(materialized='table') }} +{{ + config( + materialized='incremental', + post_hook='DELETE FROM {{ this }} WHERE uuid not in (SELECT uuid FROM {{ ref("occupation_du_sol") }} )' + ) +}} SELECT *, ST_Area(geom) as surface FROM ( SELECT zonage.libelle AS zonage_libelle, + ocsge.loaded_date, ocsge.year, ocsge.departement, ocsge.code_cs, ocsge.code_us, + ocsge.uuid, ocsge.is_artificial, ocsge.is_impermeable, ST_Intersection(zonage.geom, ocsge.geom) AS geom @@ -16,5 +23,9 @@ SELECT *, ST_Area(geom) as surface FROM ( {{ ref("occupation_du_sol") }} AS ocsge ON ST_Intersects(zonage.geom, ocsge.geom) - -- TODO: reproject zonage.gome to ocsge.geom srid + + {% if is_incremental() %} + WHERE ocsge.uuid not in (SELECT bar.uuid from {{ this }} as bar) + {% endif %} + ) as foo diff --git a/airflow/sql/sparte/models/ocsge/occupation_du_sol.sql b/airflow/sql/sparte/models/ocsge/occupation_du_sol.sql index 160279963..cb15e281c 100644 --- a/airflow/sql/sparte/models/ocsge/occupation_du_sol.sql +++ b/airflow/sql/sparte/models/ocsge/occupation_du_sol.sql @@ -6,38 +6,23 @@ indexes=[ {'columns': ['departement','year'], 'type': 'btree'}, {'columns': ['departement'], 'type': 'btree'}, - {'columns': ['geom'], 'type': 'gist'} - ] + {'columns': ['uuid'], 'type': 'btree'} + ], + post_hook="CREATE INDEX ON {{ this }} USING GIST (geom)" ) }} -WITH latest_loaded_date AS ( - SELECT - year, - departement, - MAX(loaded_date) AS max_loaded_date - FROM - {{ source('public', 'ocsge_occupation_du_sol') }} - GROUP BY - year, - departement -) SELECT - ocsge.loaded_date, - ocsge.id, - ocsge.code_cs, - ocsge.code_us, - ocsge.departement, - ocsge.year, + loaded_date, + id, + code_cs, + code_us, + departement, + year, ST_area(geom) AS surface, {{ is_impermeable('code_cs') }} as is_impermeable, {{ is_artificial('code_cs', 'code_us') }} as is_artificial, - ocsge.geom + uuid, + ST_MakeValid(geom) AS geom FROM {{ source('public', 'ocsge_occupation_du_sol') }} AS ocsge -JOIN - latest_loaded_date AS ld -ON - ocsge.year = ld.year - AND ocsge.departement = ld.departement - AND ocsge.loaded_date = ld.max_loaded_date diff --git a/airflow/sql/sparte/models/ocsge/schema.yml b/airflow/sql/sparte/models/ocsge/schema.yml index 38397ce18..6591b9ce5 100644 --- a/airflow/sql/sparte/models/ocsge/schema.yml +++ b/airflow/sql/sparte/models/ocsge/schema.yml @@ -87,7 +87,6 @@ occupation_du_sol_test: &occupation_du_sol_test - not_null - accepted_values: *us_accepted_values - zone_construite_test: &zone_construite_test - name: geom tests: @@ -114,9 +113,15 @@ models: sources: - name: public tables: - - name: ocsge_diff + - name: ocsge_difference_staging + columns: *difference_test + - name: ocsge_difference columns: *difference_test - name: ocsge_occupation_du_sol columns: *occupation_du_sol_test + - name: ocsge_occupation_du_sol_staging + columns: *occupation_du_sol_test - name: ocsge_zone_construite columns: *zone_construite_test + - name: ocsge_zone_construite_staging + columns: *zone_construite_test diff --git a/airflow/sql/sparte/models/ocsge/zone_construite.sql b/airflow/sql/sparte/models/ocsge/zone_construite.sql index 29646023c..b324df420 100644 --- a/airflow/sql/sparte/models/ocsge/zone_construite.sql +++ b/airflow/sql/sparte/models/ocsge/zone_construite.sql @@ -1,27 +1,17 @@ -{{ config(materialized='table') }} +{{ + config( + materialized='table', + post_hook="CREATE INDEX ON {{ this }} USING GIST (geom)" + ) +}} -WITH latest_loaded_date AS ( - SELECT - year, - departement, - MAX(loaded_date) AS max_loaded_date - FROM - {{ source('public', 'ocsge_zone_construite') }} - GROUP BY - year, - departement -) SELECT - ocsge.loaded_date, - ocsge.id, - ocsge.year, - ocsge.departement, - ocsge.geom + loaded_date, + id, + year, + departement, + ST_MakeValid(geom) AS geom, + ST_Area(geom) as surface, + uuid FROM {{ source('public', 'ocsge_zone_construite') }} as ocsge -JOIN - latest_loaded_date AS ld -ON - ocsge.year = ld.year - AND ocsge.departement = ld.departement - AND ocsge.loaded_date = ld.max_loaded_date diff --git a/airflow/sql/sparte/tests/generic/is_valid_geom.sql b/airflow/sql/sparte/tests/generic/is_valid_geom.sql index 8b41dd5db..81a2cd217 100644 --- a/airflow/sql/sparte/tests/generic/is_valid_geom.sql +++ b/airflow/sql/sparte/tests/generic/is_valid_geom.sql @@ -1,5 +1,7 @@ {% test is_valid_geom(model, column_name) %} +{{ config(severity = 'warn') }} + with validation_errors as ( select {{ column_name }} @@ -8,7 +10,6 @@ with validation_errors as ( ) -select * -from validation_errors +select * from validation_errors {% endtest %} From b58c0a6d24ec0b00834f8b178549ac3c5093b5c9 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 15 Aug 2024 13:33:49 +0200 Subject: [PATCH 13/99] temp --- airflow/dags/ocsge.py | 29 +- airflow/dependencies/ocsge/normalization.py | 6 +- airflow/requirements.txt | 2 +- .../sql/sparte/models/ocsge/difference.sql | 5 + .../ocsge/for_app/app_artificialarea.sql | 11 + .../models/ocsge/for_app/app_communediff.sql | 25 ++ .../ocsge/intersected/artificial_commune | 19 -- .../ocsge/intersected/artificial_commune.sql | 27 ++ airflow/sql/sparte/models/ocsge/schema.yml | 303 ++++++++++++++---- .../sparte/tests/generic/is_valid_geom.sql | 2 - .../tests/generic/is_valid_geom_warning.sql | 15 + 11 files changed, 338 insertions(+), 106 deletions(-) create mode 100644 airflow/sql/sparte/models/ocsge/for_app/app_artificialarea.sql create mode 100644 airflow/sql/sparte/models/ocsge/for_app/app_communediff.sql delete mode 100644 airflow/sql/sparte/models/ocsge/intersected/artificial_commune create mode 100644 airflow/sql/sparte/models/ocsge/intersected/artificial_commune.sql create mode 100644 airflow/sql/sparte/tests/generic/is_valid_geom_warning.sql diff --git a/airflow/dags/ocsge.py b/airflow/dags/ocsge.py index edf7f9da4..98c533f1b 100644 --- a/airflow/dags/ocsge.py +++ b/airflow/dags/ocsge.py @@ -60,6 +60,18 @@ def get_paths_from_directory(directory: str) -> list[tuple[str, str]]: sources = { # noqa: E501 + "01": { + DatasetName.OCCUPATION_DU_SOL_ET_ZONE_CONSTRUITE: { + 2018: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D001_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D001_2018-01-01.7z", # noqa: E501 + 2021: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D001_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D001_2021-01-01.7z", # noqa: E501 + }, + DatasetName.DIFFERENCE: { + ( + 2018, + 2021, + ): "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D001_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D001_2018-2021.7z", # noqa: E501 + }, + }, "38": { DatasetName.OCCUPATION_DU_SOL_ET_ZONE_CONSTRUITE: { 2018: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D038_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D038_2018-01-01.7z", # noqa: E501 @@ -408,17 +420,11 @@ def ingest_ocsge(path, **context) -> int: return loaded_date - @task.bash(retries=0) - def dbt_test_ocsge(**context): - dataset = context["params"]["dataset"] - dbt_select = " ".join([vars["dbt_selector"] for vars in vars_dataset[dataset]]) - return 'cd "${AIRFLOW_HOME}/sql/sparte" && dbt test -s ' + dbt_select - @task.bash(retries=0, trigger_rule="all_success") def dbt_run_ocsge(**context): dataset = context["params"]["dataset"] dbt_select = " ".join([f'{vars["dbt_selector"]}+' for vars in vars_dataset[dataset]]) - return 'cd "${AIRFLOW_HOME}/sql/sparte" && dbt run -s ' + dbt_select + return 'cd "${AIRFLOW_HOME}/sql/sparte" && dbt build -s ' + dbt_select @task.python(trigger_rule="all_success") def delete_previously_loaded_data_in_dw(**context) -> dict: @@ -431,8 +437,11 @@ def delete_previously_loaded_data_in_dw(**context) -> dict: results = {} for vars in vars_dataset[dataset]: - cur.execute(vars["delete_on_dwt"](departement, years)) - results[vars["dw_source"]] = cur.rowcount + try: + cur.execute(vars["delete_on_dwt"](departement, years)) + results[vars["dw_source"]] = cur.rowcount + except Exception as e: + results[vars["dw_source"]] = str(e) conn.commit() conn.close() @@ -480,7 +489,6 @@ def load_data_in_app(**context): delete_dw = delete_previously_loaded_data_in_dw() test_result_staging = db_test_ocsge_staging() loaded_date = ingest_ocsge(path=path) - test_result = dbt_test_ocsge() dbt_run_ocsge_result = dbt_run_ocsge() delete_app = delete_previously_loaded_data_in_app() load_app = load_data_in_app() @@ -493,7 +501,6 @@ def load_data_in_app(**context): >> test_result_staging >> delete_dw >> loaded_date - >> test_result >> dbt_run_ocsge_result >> delete_app >> load_app diff --git a/airflow/dependencies/ocsge/normalization.py b/airflow/dependencies/ocsge/normalization.py index 899e0539a..26b5f5360 100644 --- a/airflow/dependencies/ocsge/normalization.py +++ b/airflow/dependencies/ocsge/normalization.py @@ -22,7 +22,7 @@ def ocsge_diff_normalization_sql( {fields['cs_old']} AS cs_old, {fields['us_new']} AS us_new, {fields['us_old']} AS us_old, - cast({departement} as text) AS departement, + '{departement}' AS departement, CreateUUID() as uuid, GEOMETRY as geom FROM @@ -42,7 +42,7 @@ def ocsge_occupation_du_sol_normalization_sql( code_cs AS code_cs, code_us AS code_us, GEOMETRY AS geom, - cast({departement} as text) AS departement, + '{departement}' AS departement, {years[0]} AS year, CreateUUID() as uuid FROM @@ -60,7 +60,7 @@ def ocsge_zone_construite_normalization_sql( {loaded_date} AS loaded_date, ID AS id, {years[0]} AS year, - cast({departement} as text) AS departement, + '{departement}' AS departement, CreateUUID() as uuid, GEOMETRY AS geom FROM diff --git a/airflow/requirements.txt b/airflow/requirements.txt index 6b2c9b0b4..94e86ac9f 100644 --- a/airflow/requirements.txt +++ b/airflow/requirements.txt @@ -4,7 +4,7 @@ dependency-injector==4.41.0 py7zr==0.21.1 apache-airflow-providers-postgres==5.11.2 requests==2.32.3 -dbt-core==1.8.5 +dbt-core==1.8.2 dbt-postgres==1.8.2 tqdm==4.66.5 pysftp==0.2.9 diff --git a/airflow/sql/sparte/models/ocsge/difference.sql b/airflow/sql/sparte/models/ocsge/difference.sql index b143b516a..783f7cf26 100644 --- a/airflow/sql/sparte/models/ocsge/difference.sql +++ b/airflow/sql/sparte/models/ocsge/difference.sql @@ -63,4 +63,9 @@ FROM ( ocsge.uuid FROM {{ source('public', 'ocsge_difference') }} AS ocsge + WHERE + cs_new IS NOT NULL AND + cs_old IS NOT NULL AND + us_new IS NOT NULL AND + us_old IS NOT NULL ) AS foo diff --git a/airflow/sql/sparte/models/ocsge/for_app/app_artificialarea.sql b/airflow/sql/sparte/models/ocsge/for_app/app_artificialarea.sql new file mode 100644 index 000000000..3ba5bfb61 --- /dev/null +++ b/airflow/sql/sparte/models/ocsge/for_app/app_artificialarea.sql @@ -0,0 +1,11 @@ +{{ config(materialized='table') }} + +SELECT + year, + surface, + 2154 as srid_source, + departement, + commune_code as city, + ST_Transform(geom, 4326) as mpoly +FROM + {{ ref('artificial_commune') }} diff --git a/airflow/sql/sparte/models/ocsge/for_app/app_communediff.sql b/airflow/sql/sparte/models/ocsge/for_app/app_communediff.sql new file mode 100644 index 000000000..d469512f6 --- /dev/null +++ b/airflow/sql/sparte/models/ocsge/for_app/app_communediff.sql @@ -0,0 +1,25 @@ +{{ config(materialized='table') }} + +SELECT + foo.year_old, + foo.year_new, + foo.new_artif, + foo.new_natural, + app_commune.id as city_id, + foo.new_artif - foo.new_natural AS net_artif +FROM ( + SELECT + year_old, + year_new, + SUM(CASE WHEN new_is_artificial THEN surface ELSE 0 END) AS new_artif, + SUM(CASE WHEN new_not_artificial THEN surface ELSE 0 END) AS new_natural, + commune_code + FROM + {{ ref("difference_commune") }} + GROUP BY + commune_code, + year_old, + year_new +) as foo +LEFT JOIN {{ ref('app_commune') }} AS app_commune +ON app_commune.insee = foo.commune_code diff --git a/airflow/sql/sparte/models/ocsge/intersected/artificial_commune b/airflow/sql/sparte/models/ocsge/intersected/artificial_commune deleted file mode 100644 index 9444531fe..000000000 --- a/airflow/sql/sparte/models/ocsge/intersected/artificial_commune +++ /dev/null @@ -1,19 +0,0 @@ -{{ config(materialized='table')}} - -SELECT *, ST_Area(mpoly) as surface FROM ( - SELECT - ocsge.departement, - ocsge.year, - ocsge.commune_code as city, - ARRAY_AGG(ocsge.uuid) AS uuids, - ST_Union(geom) AS mpoly - FROM - {{ ref("occupation_du_sol_commune") }} AS ocsge - WHERE - ocsge.is_artificial = true - GROUP BY - ocsge.commune_code, - ocsge.departement, - ocsge.year, - ocsge.loaded_date -) as foo diff --git a/airflow/sql/sparte/models/ocsge/intersected/artificial_commune.sql b/airflow/sql/sparte/models/ocsge/intersected/artificial_commune.sql new file mode 100644 index 000000000..29357d43b --- /dev/null +++ b/airflow/sql/sparte/models/ocsge/intersected/artificial_commune.sql @@ -0,0 +1,27 @@ +{{ + config( + materialized='incremental', + post_hook="DELETE FROM {{ this }} WHERE NOT uuids <@ (SELECT ARRAY_AGG(uuid) FROM {{ ref('occupation_du_sol') }} )" + ) +}} + +SELECT *, ST_Area(geom) as surface FROM ( + SELECT + ocsge.departement, + ocsge.year, + ocsge.commune_code, + ARRAY_AGG(ocsge.uuid) AS uuids, + ST_Union(geom) as geom + FROM + {{ ref("occupation_du_sol_commune") }} AS ocsge + WHERE + ocsge.is_artificial = true + GROUP BY + ocsge.commune_code, + ocsge.departement, + ocsge.year, + ocsge.loaded_date + {% if is_incremental() %} + HAVING NOT ARRAY_AGG(ocsge.uuid) IN (SELECT uuids FROM {{ this }}) + {% endif %} +) as foo diff --git a/airflow/sql/sparte/models/ocsge/schema.yml b/airflow/sql/sparte/models/ocsge/schema.yml index 6591b9ce5..a25827a5c 100644 --- a/airflow/sql/sparte/models/ocsge/schema.yml +++ b/airflow/sql/sparte/models/ocsge/schema.yml @@ -42,86 +42,249 @@ us_accepted_values: &us_accepted_values "US6.6" ] -difference_test: &difference_test - - name: geom - tests: - - not_null - - is_valid_geom - - name: cs_new - tests: - - not_null - - accepted_values: *cs_accepted_values - - name: cs_old - tests: - - not_null - - accepted_values: *cs_accepted_values - - name: us_new - tests: - - not_null - - accepted_values: *us_accepted_values - - name: us_old - tests: - - not_null - - accepted_values: *us_accepted_values - -occupation_du_sol_test: &occupation_du_sol_test - - name: geom - tests: - - not_null - - is_valid_geom - - name: departement - tests: - - not_null - - name: year - tests: - - not_null - - name: id - tests: - - not_null - - name: code_cs - tests: - - not_null - - accepted_values: *cs_accepted_values - - name: code_us - tests: - - not_null - - accepted_values: *us_accepted_values - -zone_construite_test: &zone_construite_test - - name: geom - tests: - - not_null - - is_valid_geom - - name: departement - tests: - - not_null - - name: year - tests: - - not_null - - name: id - tests: - - not_null - +not_null_cs_us_config_staging: ¬_null_cs_us_config_staging + config: + error_if: ">100" + warn_if: ">0" models: + - name: occupation_du_sol_commune + - name: artificial_commune - name: zone_construite - - name: zone_artificielle + columns: + - name: loaded_date + data_tests: + - not_null + - name: id + data_tests: + - not_null + - unique + - name: year + data_tests: + - not_null + - name: departement + data_tests: + - not_null + - relationships: + to: ref('departement') + field: code + - name: geom + data_tests: + - not_null + - is_valid_geom + - name: surface + data_tests: + - not_null + - name: uuid + data_tests: + - not_null + - unique - name: occupation_du_sol + columns: + - name: loaded_date + data_tests: + - not_null + - name: id + data_tests: + - not_null + - name: code_cs + data_tests: + - not_null + - accepted_values: *cs_accepted_values + - name: code_us + data_tests: + - not_null + - accepted_values: *us_accepted_values + - name: departement + data_tests: + - not_null + - relationships: + to: ref('departement') + field: code + - name: year + data_tests: + - not_null + - name: surface + data_tests: + - not_null + - name: is_impermeable + data_tests: + - not_null + - name: is_artificial + data_tests: + - not_null + - name: uuid + data_tests: + - not_null + - unique + - name: geom + data_tests: + - not_null + - is_valid_geom - name: difference - - name: occupation_du_sol_zonage_urbanisme + columns: + - name: loaded_date + data_tests: + - not_null + - name: year_old + data_tests: + - not_null + - name: year_new + data_tests: + - not_null + - name: cs_new + data_tests: + - not_null + - accepted_values: *cs_accepted_values + - name: cs_old + data_tests: + - not_null + - accepted_values: *cs_accepted_values + - name: us_new + data_tests: + - not_null + - accepted_values: *us_accepted_values + - name: us_old + data_tests: + - not_null + - accepted_values: *us_accepted_values + - name: departement + data_tests: + - not_null + - relationships: + to: ref('departement') + field: code + - name: surface + data_tests: + - not_null + - name: uuid + data_tests: + - not_null + - unique + - name: new_is_impermeable + data_tests: + - not_null + - name: new_not_impermeable + data_tests: + - not_null + - name: new_is_artificial + data_tests: + - not_null + - name: new_not_artificial + data_tests: + - not_null + - name: geom + data_tests: + - not_null + - is_valid_geom sources: - name: public tables: + - name: ocsge_zone_construite_staging + columns: + - name: loaded_date + data_tests: + - not_null + - name: id + data_tests: + - not_null + - unique + - name: year + data_tests: + - not_null + - name: departement + data_tests: + - not_null + - relationships: + to: ref('departement') + field: code + - name: uuid + data_tests: + - not_null + - unique + - name: geom + data_tests: + - unique + - not_null + - is_valid_geom_warning + - name: ocsge_occupation_du_sol_staging + columns: + - name: loaded_date + data_tests: + - not_null + - name: id + data_tests: + - not_null + - unique + - name: code_cs + data_tests: + - not_null + - accepted_values: *cs_accepted_values + - name: code_us + data_tests: + - not_null + - accepted_values: *us_accepted_values + - name: geom + data_tests: + - unique + - not_null + - is_valid_geom_warning + - name: departement + data_tests: + - not_null + - relationships: + to: ref('departement') + field: code + - name: year + data_tests: + - not_null + - name: uuid + data_tests: + - not_null + - unique - name: ocsge_difference_staging - columns: *difference_test + columns: + - name: loaded_date + data_tests: + - not_null + - name: year_old + data_tests: + - not_null + - name: year_new + data_tests: + - not_null + - name: cs_new + data_tests: + - not_null: *not_null_cs_us_config_staging + - accepted_values: *cs_accepted_values + - name: cs_old + data_tests: + - not_null: *not_null_cs_us_config_staging + - accepted_values: *cs_accepted_values + - name: us_new + data_tests: + - not_null: *not_null_cs_us_config_staging + - accepted_values: *us_accepted_values + - name: us_old + data_tests: + - not_null: *not_null_cs_us_config_staging + - accepted_values: *us_accepted_values + - name: departement + data_tests: + - not_null + - relationships: + to: ref('departement') + field: code + - name: uuid + data_tests: + - not_null + - unique + - name: geom + data_tests: + - unique + - not_null + - is_valid_geom_warning - name: ocsge_difference - columns: *difference_test - name: ocsge_occupation_du_sol - columns: *occupation_du_sol_test - - name: ocsge_occupation_du_sol_staging - columns: *occupation_du_sol_test - name: ocsge_zone_construite - columns: *zone_construite_test - - name: ocsge_zone_construite_staging - columns: *zone_construite_test diff --git a/airflow/sql/sparte/tests/generic/is_valid_geom.sql b/airflow/sql/sparte/tests/generic/is_valid_geom.sql index 81a2cd217..7154a83dc 100644 --- a/airflow/sql/sparte/tests/generic/is_valid_geom.sql +++ b/airflow/sql/sparte/tests/generic/is_valid_geom.sql @@ -1,7 +1,5 @@ {% test is_valid_geom(model, column_name) %} -{{ config(severity = 'warn') }} - with validation_errors as ( select {{ column_name }} diff --git a/airflow/sql/sparte/tests/generic/is_valid_geom_warning.sql b/airflow/sql/sparte/tests/generic/is_valid_geom_warning.sql new file mode 100644 index 000000000..3fa579b03 --- /dev/null +++ b/airflow/sql/sparte/tests/generic/is_valid_geom_warning.sql @@ -0,0 +1,15 @@ +{% test is_valid_geom_warning(model, column_name) %} + +{{ config(severity = 'warn') }} + +with validation_errors as ( + + select {{ column_name }} + from {{ model }} + where not ST_IsValid({{ column_name }}) + +) + +select * from validation_errors + +{% endtest %} From 72e118156ad2cd857d402a9e92f826f10493f180 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 15 Aug 2024 14:26:32 +0200 Subject: [PATCH 14/99] temp --- airflow/sql/sparte/models/admin_express/schema.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/airflow/sql/sparte/models/admin_express/schema.yml b/airflow/sql/sparte/models/admin_express/schema.yml index 671fae862..19f6eadf1 100644 --- a/airflow/sql/sparte/models/admin_express/schema.yml +++ b/airflow/sql/sparte/models/admin_express/schema.yml @@ -3,6 +3,7 @@ version: 2 models: - name: commune + - name: departement sources: - name: public From 667d3d21c9eb22bd846e5e7a6e3fcea83ab838ba Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 15 Aug 2024 18:59:09 +0200 Subject: [PATCH 15/99] temp --- airflow/dags/gpu.py | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/airflow/dags/gpu.py b/airflow/dags/gpu.py index 4ff63e3f2..23f188755 100644 --- a/airflow/dags/gpu.py +++ b/airflow/dags/gpu.py @@ -1,6 +1,7 @@ from airflow.decorators import dag, task from airflow.operators.bash import BashOperator from dependencies.container import Container +from dependencies.utils import multiline_string_to_single_line from pendulum import datetime @@ -30,6 +31,36 @@ def download() -> str: def ingest(path_on_bucket: str) -> str: wfs_du_temp = f"/tmp/{wfs_du_filename}" Container().s3().get_file(path_on_bucket, wfs_du_temp) + sql = """ + SELECT + MD5Checksum( + ST_AsText(geom) || CastToText(gpu_timestamp) + ) AS checksum, + gpu_doc_id, + gpu_status + gpu_timestamp, + partition, + libelle, + libelong, + typezone, + destdomi, + nomfic, + urlfic, + insee, + datappro, + datvalid, + idurba, + idzone, + lib_idzone, + formdomi, + destoui, + destcdt, + destnon, + symbole, + geom + FROM + zone_urba + """ cmd = [ "ogr2ogr", "-dialect", @@ -42,12 +73,15 @@ def ingest(path_on_bucket: str) -> str: "GEOMETRY_NAME=geom", "-a_srs", "EPSG:4236", + "-nln", + "zone_urba", "-nlt", "MULTIPOLYGON", "-nlt", "PROMOTE_TO_MULTI", wfs_du_temp, - "zone_urba", + "-sql", + f'"{multiline_string_to_single_line(sql)}"', "--config", "PG_USE_COPY", "YES", From 894f05ebb50423f1abf75ed23d390610a720d341 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Sat, 17 Aug 2024 21:23:21 +0200 Subject: [PATCH 16/99] temp --- Pipfile | 1 - Pipfile.lock | 753 ++++++++++++++++++++++-------------------- config/middlewares.py | 2 - config/settings.py | 2 - public_data/urls.py | 5 - public_data/views.py | 45 +-- 6 files changed, 402 insertions(+), 406 deletions(-) diff --git a/Pipfile b/Pipfile index 351abfdfd..fa27e1f8c 100644 --- a/Pipfile +++ b/Pipfile @@ -70,4 +70,3 @@ sentry-sdk = "*" setuptools = "*" py7zr = "*" dependency-injector = "*" -django-vectortiles = "*" diff --git a/Pipfile.lock b/Pipfile.lock index b8030d1eb..68ec55ef7 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "f01c885be0dc2329c5e8599d5f252f00fe6c3b1fcb8120ee80eed1d9ec059f08" + "sha256": "d2695e46bd7e953d3504c4c4e82a6943fb55741b1b9ab45541464bed48e411aa" }, "pipfile-spec": 6, "requires": { @@ -66,20 +66,20 @@ }, "boto3": { "hashes": [ - "sha256:894b222f7850b870a7ac63d7e378ac36c5c34375da24ddc30e131d9fafe369dc", - "sha256:ad648c89a4935590a69341e5430fc42a021489a22de171ee3fd7bb204f9ef0fa" + "sha256:23ca8d8f7a30c3bbd989808056b5fc5d68ff5121c02c722c6167b6b1bb7f8726", + "sha256:578bbd5e356005719b6b610d03edff7ea1b0824d078afe62d3fb8bea72f83a87" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==1.34.150" + "version": "==1.34.140" }, "botocore": { "hashes": [ - "sha256:4d23387e0f076d87b637a2a35c0ff2b8daca16eace36b63ce27f65630c6b375a", - "sha256:b988d47f4d502df85befce11a48002421e4e6ea4289997b5e0261bac5fa76ce6" + "sha256:43940d3a67d946ba3301631ba4078476a75f1015d4fb0fb0272d0b754b2cf9de", + "sha256:86302b2226c743b9eec7915a4c6cfaffd338ae03989cd9ee181078ef39d1ab39" ], "markers": "python_version >= '3.8'", - "version": "==1.34.150" + "version": "==1.34.140" }, "brotli": { "hashes": [ @@ -499,12 +499,12 @@ }, "django": { "hashes": [ - "sha256:3ec32bc2c616ab02834b9cac93143a7dc1cdcd5b822d78ac95fc20a38c534240", - "sha256:fc6919875a6226c7ffcae1a7d51e0f2ceaf6f160393180818f6c95f51b1e7b96" + "sha256:837e3cf1f6c31347a1396a3f6b65688f2b4bb4a11c580dcb628b5afe527b68a5", + "sha256:a17fcba2aad3fc7d46fdb23215095dbbd64e6174bf4589171e732b18b07e426a" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==4.2.14" + "version": "==4.2.13" }, "django-app-parameter": { "hashes": [ @@ -526,12 +526,12 @@ }, "django-crispy-forms": { "hashes": [ - "sha256:2db17ae08527201be1273f0df789e5f92819e23dd28fec69cffba7f3762e1a38", - "sha256:efc4c31e5202bbec6af70d383a35e12fc80ea769d464fb0e7fe21768bb138a20" + "sha256:92cb7b7786fb82646bad739343fd17a99a04b0b736a09e34f3b91f26cbca7e4a", + "sha256:a681cffd5af270b3082bda02cf8f81bdb5717ed66d2265e87e3df2f4ccf46277" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==2.3" + "version": "==2.2" }, "django-csp": { "hashes": [ @@ -569,12 +569,12 @@ }, "django-import-export": { "hashes": [ - "sha256:16ecc5a9f0df46bde6eb278a3e65ebda0ee1db55656f36440e9fb83f40ab85a3", - "sha256:730ae2443a02b1ba27d8dba078a27ae9123adfcabb78161b4f130843607b3df9" + "sha256:13de8d28bf3d7ffc45da5fdf60d53ff70c285827a39eea937f90450fbda0df3d", + "sha256:639f8488bdf155f46d15910220ef984d72fd2f5a8f4f448b49078125f11701d3" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==4.1.1" + "version": "==4.1.0" }, "django-redis": { "hashes": [ @@ -605,20 +605,12 @@ }, "django-storages": { "hashes": [ - "sha256:69aca94d26e6714d14ad63f33d13619e697508ee33ede184e462ed766dc2a73f", - "sha256:d61930acb4a25e3aebebc6addaf946a3b1df31c803a6bf1af2f31c9047febaa3" + "sha256:31f263389e95ce3a1b902fb5f739a7ed32895f7d8b80179fe7453ecc0dfe102e", + "sha256:95a12836cd998d4c7a4512347322331c662d9114c4344f932f5e9c0fce000608" ], "index": "pypi", "markers": "python_version >= '3.7'", - "version": "==1.14.4" - }, - "django-vectortiles": { - "hashes": [ - "sha256:9710492bb77328b202dcbd889b65f9d08807e7dc061a5593d86cfbf1ea545caf" - ], - "index": "pypi", - "markers": "python_version >= '3.6'", - "version": "==0.2.0" + "version": "==1.14.3" }, "djangorestframework": { "hashes": [ @@ -1274,55 +1266,55 @@ }, "numpy": { "hashes": [ - "sha256:08458fbf403bff5e2b45f08eda195d4b0c9b35682311da5a5a0a0925b11b9bd8", - "sha256:0fbb536eac80e27a2793ffd787895242b7f18ef792563d742c2d673bfcb75134", - "sha256:12f5d865d60fb9734e60a60f1d5afa6d962d8d4467c120a1c0cda6eb2964437d", - "sha256:15eb4eca47d36ec3f78cde0a3a2ee24cf05ca7396ef808dda2c0ddad7c2bde67", - "sha256:173a00b9995f73b79eb0191129f2455f1e34c203f559dd118636858cc452a1bf", - "sha256:1b902ce0e0a5bb7704556a217c4f63a7974f8f43e090aff03fcf262e0b135e02", - "sha256:1f682ea61a88479d9498bf2091fdcd722b090724b08b31d63e022adc063bad59", - "sha256:1f87fec1f9bc1efd23f4227becff04bd0e979e23ca50cc92ec88b38489db3b55", - "sha256:24a0e1befbfa14615b49ba9659d3d8818a0f4d8a1c5822af8696706fbda7310c", - "sha256:2c3a346ae20cfd80b6cfd3e60dc179963ef2ea58da5ec074fd3d9e7a1e7ba97f", - "sha256:36d3a9405fd7c511804dc56fc32974fa5533bdeb3cd1604d6b8ff1d292b819c4", - "sha256:3fdabe3e2a52bc4eff8dc7a5044342f8bd9f11ef0934fcd3289a788c0eb10018", - "sha256:4127d4303b9ac9f94ca0441138acead39928938660ca58329fe156f84b9f3015", - "sha256:4658c398d65d1b25e1760de3157011a80375da861709abd7cef3bad65d6543f9", - "sha256:485b87235796410c3519a699cfe1faab097e509e90ebb05dcd098db2ae87e7b3", - "sha256:529af13c5f4b7a932fb0e1911d3a75da204eff023ee5e0e79c1751564221a5c8", - "sha256:5a3d94942c331dd4e0e1147f7a8699a4aa47dffc11bf8a1523c12af8b2e91bbe", - "sha256:5daab361be6ddeb299a918a7c0864fa8618af66019138263247af405018b04e1", - "sha256:61728fba1e464f789b11deb78a57805c70b2ed02343560456190d0501ba37b0f", - "sha256:6790654cb13eab303d8402354fabd47472b24635700f631f041bd0b65e37298a", - "sha256:69ff563d43c69b1baba77af455dd0a839df8d25e8590e79c90fcbe1499ebde42", - "sha256:6bf4e6f4a2a2e26655717a1983ef6324f2664d7011f6ef7482e8c0b3d51e82ac", - "sha256:6e4eeb6eb2fced786e32e6d8df9e755ce5be920d17f7ce00bc38fcde8ccdbf9e", - "sha256:72dc22e9ec8f6eaa206deb1b1355eb2e253899d7347f5e2fae5f0af613741d06", - "sha256:75b4e316c5902d8163ef9d423b1c3f2f6252226d1aa5cd8a0a03a7d01ffc6268", - "sha256:7b9853803278db3bdcc6cd5beca37815b133e9e77ff3d4733c247414e78eb8d1", - "sha256:7d6fddc5fe258d3328cd8e3d7d3e02234c5d70e01ebe377a6ab92adb14039cb4", - "sha256:81b0893a39bc5b865b8bf89e9ad7807e16717f19868e9d234bdaf9b1f1393868", - "sha256:8efc84f01c1cd7e34b3fb310183e72fcdf55293ee736d679b6d35b35d80bba26", - "sha256:8fae4ebbf95a179c1156fab0b142b74e4ba4204c87bde8d3d8b6f9c34c5825ef", - "sha256:99d0d92a5e3613c33a5f01db206a33f8fdf3d71f2912b0de1739894668b7a93b", - "sha256:9adbd9bb520c866e1bfd7e10e1880a1f7749f1f6e5017686a5fbb9b72cf69f82", - "sha256:a1e01dcaab205fbece13c1410253a9eea1b1c9b61d237b6fa59bcc46e8e89343", - "sha256:a8fc2de81ad835d999113ddf87d1ea2b0f4704cbd947c948d2f5513deafe5a7b", - "sha256:b83e16a5511d1b1f8a88cbabb1a6f6a499f82c062a4251892d9ad5d609863fb7", - "sha256:bb2124fdc6e62baae159ebcfa368708867eb56806804d005860b6007388df171", - "sha256:bfc085b28d62ff4009364e7ca34b80a9a080cbd97c2c0630bb5f7f770dae9414", - "sha256:cbab9fc9c391700e3e1287666dfd82d8666d10e69a6c4a09ab97574c0b7ee0a7", - "sha256:e5eeca8067ad04bc8a2a8731183d51d7cbaac66d86085d5f4766ee6bf19c7f87", - "sha256:e9e81fa9017eaa416c056e5d9e71be93d05e2c3c2ab308d23307a8bc4443c368", - "sha256:ea2326a4dca88e4a274ba3a4405eb6c6467d3ffbd8c7d38632502eaae3820587", - "sha256:eacf3291e263d5a67d8c1a581a8ebbcfd6447204ef58828caf69a5e3e8c75990", - "sha256:ec87f5f8aca726117a1c9b7083e7656a9d0d606eec7299cc067bb83d26f16e0c", - "sha256:f1659887361a7151f89e79b276ed8dff3d75877df906328f14d8bb40bb4f5101", - "sha256:f9cf5ea551aec449206954b075db819f52adc1638d46a6738253a712d553c7b4" + "sha256:04494f6ec467ccb5369d1808570ae55f6ed9b5809d7f035059000a37b8d7e86f", + "sha256:0a43f0974d501842866cc83471bdb0116ba0dffdbaac33ec05e6afed5b615238", + "sha256:0e50842b2295ba8414c8c1d9d957083d5dfe9e16828b37de883f51fc53c4016f", + "sha256:0ec84b9ba0654f3b962802edc91424331f423dcf5d5f926676e0150789cb3d95", + "sha256:17067d097ed036636fa79f6a869ac26df7db1ba22039d962422506640314933a", + "sha256:1cde1753efe513705a0c6d28f5884e22bdc30438bf0085c5c486cdaff40cd67a", + "sha256:1e72728e7501a450288fc8e1f9ebc73d90cfd4671ebbd631f3e7857c39bd16f2", + "sha256:2635dbd200c2d6faf2ef9a0d04f0ecc6b13b3cad54f7c67c61155138835515d2", + "sha256:2ce46fd0b8a0c947ae047d222f7136fc4d55538741373107574271bc00e20e8f", + "sha256:34f003cb88b1ba38cb9a9a4a3161c1604973d7f9d5552c38bc2f04f829536609", + "sha256:354f373279768fa5a584bac997de6a6c9bc535c482592d7a813bb0c09be6c76f", + "sha256:38ecb5b0582cd125f67a629072fed6f83562d9dd04d7e03256c9829bdec027ad", + "sha256:3e8e01233d57639b2e30966c63d36fcea099d17c53bf424d77f088b0f4babd86", + "sha256:3f6bed7f840d44c08ebdb73b1825282b801799e325bcbdfa6bc5c370e5aecc65", + "sha256:4554eb96f0fd263041baf16cf0881b3f5dafae7a59b1049acb9540c4d57bc8cb", + "sha256:46e161722e0f619749d1cd892167039015b2c2817296104487cd03ed4a955995", + "sha256:49d9f7d256fbc804391a7f72d4a617302b1afac1112fac19b6c6cec63fe7fe8a", + "sha256:4d2f62e55a4cd9c58c1d9a1c9edaedcd857a73cb6fda875bf79093f9d9086f85", + "sha256:5f64641b42b2429f56ee08b4f427a4d2daf916ec59686061de751a55aafa22e4", + "sha256:63b92c512d9dbcc37f9d81b123dec99fdb318ba38c8059afc78086fe73820275", + "sha256:6d7696c615765091cc5093f76fd1fa069870304beaccfd58b5dcc69e55ef49c1", + "sha256:79e843d186c8fb1b102bef3e2bc35ef81160ffef3194646a7fdd6a73c6b97196", + "sha256:821eedb7165ead9eebdb569986968b541f9908979c2da8a4967ecac4439bae3d", + "sha256:84554fc53daa8f6abf8e8a66e076aff6ece62de68523d9f665f32d2fc50fd66e", + "sha256:8d83bb187fb647643bd56e1ae43f273c7f4dbcdf94550d7938cfc32566756514", + "sha256:903703372d46bce88b6920a0cd86c3ad82dae2dbef157b5fc01b70ea1cfc430f", + "sha256:9416a5c2e92ace094e9f0082c5fd473502c91651fb896bc17690d6fc475128d6", + "sha256:9a1712c015831da583b21c5bfe15e8684137097969c6d22e8316ba66b5baabe4", + "sha256:9c27f0946a3536403efb0e1c28def1ae6730a72cd0d5878db38824855e3afc44", + "sha256:a356364941fb0593bb899a1076b92dfa2029f6f5b8ba88a14fd0984aaf76d0df", + "sha256:a7039a136017eaa92c1848152827e1424701532ca8e8967fe480fe1569dae581", + "sha256:acd3a644e4807e73b4e1867b769fbf1ce8c5d80e7caaef0d90dcdc640dfc9787", + "sha256:ad0c86f3455fbd0de6c31a3056eb822fc939f81b1618f10ff3406971893b62a5", + "sha256:b4c76e3d4c56f145d41b7b6751255feefae92edbc9a61e1758a98204200f30fc", + "sha256:b6f6a8f45d0313db07d6d1d37bd0b112f887e1369758a5419c0370ba915b3871", + "sha256:c5a59996dc61835133b56a32ebe4ef3740ea5bc19b3983ac60cc32be5a665d54", + "sha256:c73aafd1afca80afecb22718f8700b40ac7cab927b8abab3c3e337d70e10e5a2", + "sha256:cee6cc0584f71adefe2c908856ccc98702baf95ff80092e4ca46061538a2ba98", + "sha256:cef04d068f5fb0518a77857953193b6bb94809a806bd0a14983a8f12ada060c9", + "sha256:cf5d1c9e6837f8af9f92b6bd3e86d513cdc11f60fd62185cc49ec7d1aba34864", + "sha256:e61155fae27570692ad1d327e81c6cf27d535a5d7ef97648a17d922224b216de", + "sha256:e7f387600d424f91576af20518334df3d97bc76a300a755f9a8d6e4f5cadd289", + "sha256:ed08d2703b5972ec736451b818c2eb9da80d66c3e84aed1deeb0c345fefe461b", + "sha256:fbd6acc766814ea6443628f4e6751d0da6593dae29c08c0b2606164db026970c", + "sha256:feff59f27338135776f6d4e2ec7aeeac5d5f7a08a83e80869121ef8164b74af9" ], "index": "pypi", "markers": "python_version >= '3.9'", - "version": "==2.0.1" + "version": "==2.0.0" }, "openpyxl": { "hashes": [ @@ -1987,63 +1979,68 @@ }, "sentry-sdk": { "hashes": [ - "sha256:4ca16e9f5c7c6bc2fb2d5c956219f4926b148e511fffdbbde711dc94f1e0468f", - "sha256:d964710e2dbe015d9dc4ff0ad16225d68c3b36936b742a6fe0504565b760a3b7" + "sha256:6051562d2cfa8087bb8b4b8b79dc44690f8a054762a29c07e22588b1f619bfb5", + "sha256:aa4314f877d9cd9add5a0c9ba18e3f27f99f7de835ce36bd150e48a41c7c646f" ], "index": "pypi", "markers": "python_version >= '3.6'", - "version": "==2.11.0" + "version": "==2.8.0" }, "setuptools": { "hashes": [ - "sha256:5a03e1860cf56bb6ef48ce186b0e557fdba433237481a9a625176c2831be15d1", - "sha256:8d243eff56d095e5817f796ede6ae32941278f542e0f941867cc05ae52b162ec" + "sha256:b8b8060bb426838fbe942479c90296ce976249451118ef566a5a0b7d8b78fb05", + "sha256:bd63e505105011b25c3c11f753f7e3b8465ea739efddaccef8f0efac2137bac1" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==72.1.0" + "version": "==70.2.0" }, "shapely": { "hashes": [ - "sha256:03bd7b5fa5deb44795cc0a503999d10ae9d8a22df54ae8d4a4cd2e8a93466195", - "sha256:06efe39beafde3a18a21dde169d32f315c57da962826a6d7d22630025200c5e6", - "sha256:0f8e71bb9a46814019f6644c4e2560a09d44b80100e46e371578f35eaaa9da1c", - "sha256:1b65365cfbf657604e50d15161ffcc68de5cdb22a601bbf7823540ab4918a98d", - "sha256:1e5cb5ee72f1bc7ace737c9ecd30dc174a5295fae412972d3879bac2e82c8fae", - "sha256:21f64e647a025b61b19585d2247137b3a38a35314ea68c66aaf507a1c03ef6fe", - "sha256:2e119444bc27ca33e786772b81760f2028d930ac55dafe9bc50ef538b794a8e1", - "sha256:2ff9521991ed9e201c2e923da014e766c1aa04771bc93e6fe97c27dcf0d40ace", - "sha256:30e8737983c9d954cd17feb49eb169f02f1da49e24e5171122cf2c2b62d65c95", - "sha256:35110e80070d664781ec7955c7de557456b25727a0257b354830abb759bf8311", - "sha256:3ac7dc1350700c139c956b03d9c3df49a5b34aaf91d024d1510a09717ea39199", - "sha256:401cb794c5067598f50518e5a997e270cd7642c4992645479b915c503866abed", - "sha256:4461509afdb15051e73ab178fae79974387f39c47ab635a7330d7fee02c68a3f", - "sha256:45211276900c4790d6bfc6105cbf1030742da67594ea4161a9ce6812a6721e68", - "sha256:49b299b91557b04acb75e9732645428470825061f871a2edc36b9417d66c1fc5", - "sha256:4c83a36f12ec8dee2066946d98d4d841ab6512a6ed7eb742e026a64854019b5f", - "sha256:5bbfb048a74cf273db9091ff3155d373020852805a37dfc846ab71dde4be93ec", - "sha256:6c6b78c0007a34ce7144f98b7418800e0a6a5d9a762f2244b00ea560525290c9", - "sha256:7545a39c55cad1562be302d74c74586f79e07b592df8ada56b79a209731c0219", - "sha256:798090b426142df2c5258779c1d8d5734ec6942f778dab6c6c30cfe7f3bf64ff", - "sha256:7e8cf5c252fac1ea51b3162be2ec3faddedc82c256a1160fc0e8ddbec81b06d2", - "sha256:7fed9dbfbcfec2682d9a047b9699db8dcc890dfca857ecba872c42185fc9e64e", - "sha256:8203a8b2d44dcb366becbc8c3d553670320e4acf0616c39e218c9561dd738d92", - "sha256:89d34787c44f77a7d37d55ae821f3a784fa33592b9d217a45053a93ade899375", - "sha256:89e640c2cd37378480caf2eeda9a51be64201f01f786d127e78eaeff091ec897", - "sha256:8af6f7260f809c0862741ad08b1b89cb60c130ae30efab62320bbf4ee9cc71fa", - "sha256:93be600cbe2fbaa86c8eb70656369f2f7104cd231f0d6585c7d0aa555d6878b8", - "sha256:9a4492a2b2ccbeaebf181e7310d2dfff4fdd505aef59d6cb0f217607cb042fb3", - "sha256:b5870633f8e684bf6d1ae4df527ddcb6f3895f7b12bced5c13266ac04f47d231", - "sha256:b714a840402cde66fd7b663bb08cacb7211fa4412ea2a209688f671e0d0631fd", - "sha256:bff2366bc786bfa6cb353d6b47d0443c570c32776612e527ee47b6df63fcfe32", - "sha256:d5251c28a29012e92de01d2e84f11637eb1d48184ee8f22e2df6c8c578d26760", - "sha256:e91ee179af539100eb520281ba5394919067c6b51824e6ab132ad4b3b3e76dd0", - "sha256:f5456dd522800306ba3faef77c5ba847ec30a0bd73ab087a25e0acdd4db2514f", - "sha256:ff7731fea5face9ec08a861ed351734a79475631b7540ceb0b66fb9732a5f529", - "sha256:ff9e520af0c5a578e174bca3c18713cd47a6c6a15b6cf1f50ac17dc8bb8db6a2" + "sha256:011b77153906030b795791f2fdfa2d68f1a8d7e40bce78b029782ade3afe4f2f", + "sha256:03152442d311a5e85ac73b39680dd64a9892fa42bb08fd83b3bab4fe6999bfa0", + "sha256:05ffd6491e9e8958b742b0e2e7c346635033d0a5f1a0ea083547fcc854e5d5cf", + "sha256:0776c92d584f72f1e584d2e43cfc5542c2f3dd19d53f70df0900fda643f4bae6", + "sha256:263bcf0c24d7a57c80991e64ab57cba7a3906e31d2e21b455f493d4aab534aaa", + "sha256:2fbdc1140a7d08faa748256438291394967aa54b40009f54e8d9825e75ef6113", + "sha256:30982f79f21bb0ff7d7d4a4e531e3fcaa39b778584c2ce81a147f95be1cd58c9", + "sha256:31c19a668b5a1eadab82ff070b5a260478ac6ddad3a5b62295095174a8d26398", + "sha256:3f9103abd1678cb1b5f7e8e1af565a652e036844166c91ec031eeb25c5ca8af0", + "sha256:41388321a73ba1a84edd90d86ecc8bfed55e6a1e51882eafb019f45895ec0f65", + "sha256:4310b5494271e18580d61022c0857eb85d30510d88606fa3b8314790df7f367d", + "sha256:464157509ce4efa5ff285c646a38b49f8c5ef8d4b340f722685b09bb033c5ccf", + "sha256:485246fcdb93336105c29a5cfbff8a226949db37b7473c89caa26c9bae52a242", + "sha256:489c19152ec1f0e5c5e525356bcbf7e532f311bff630c9b6bc2db6f04da6a8b9", + "sha256:4f2ab0faf8188b9f99e6a273b24b97662194160cc8ca17cf9d1fb6f18d7fb93f", + "sha256:55a38dcd1cee2f298d8c2ebc60fc7d39f3b4535684a1e9e2f39a80ae88b0cea7", + "sha256:58b0ecc505bbe49a99551eea3f2e8a9b3b24b3edd2a4de1ac0dc17bc75c9ec07", + "sha256:5af4cd0d8cf2912bd95f33586600cac9c4b7c5053a036422b97cfe4728d2eb53", + "sha256:5bbd974193e2cc274312da16b189b38f5f128410f3377721cadb76b1e8ca5328", + "sha256:5c4849916f71dc44e19ed370421518c0d86cf73b26e8656192fcfcda08218fbd", + "sha256:5dc736127fac70009b8d309a0eeb74f3e08979e530cf7017f2f507ef62e6cfb8", + "sha256:63f3a80daf4f867bd80f5c97fbe03314348ac1b3b70fb1c0ad255a69e3749879", + "sha256:674d7baf0015a6037d5758496d550fc1946f34bfc89c1bf247cabdc415d7747e", + "sha256:6cd4ccecc5ea5abd06deeaab52fcdba372f649728050c6143cc405ee0c166679", + "sha256:790a168a808bd00ee42786b8ba883307c0e3684ebb292e0e20009588c426da47", + "sha256:7d56ce3e2a6a556b59a288771cf9d091470116867e578bebced8bfc4147fbfd7", + "sha256:841f93a0e31e4c64d62ea570d81c35de0f6cea224568b2430d832967536308e6", + "sha256:8de4578e838a9409b5b134a18ee820730e507b2d21700c14b71a2b0757396acc", + "sha256:92a41d936f7d6743f343be265ace93b7c57f5b231e21b9605716f5a47c2879e7", + "sha256:9831816a5d34d5170aa9ed32a64982c3d6f4332e7ecfe62dc97767e163cb0b17", + "sha256:994c244e004bc3cfbea96257b883c90a86e8cbd76e069718eb4c6b222a56f78b", + "sha256:9dab4c98acfb5fb85f5a20548b5c0abe9b163ad3525ee28822ffecb5c40e724c", + "sha256:b79bbd648664aa6f44ef018474ff958b6b296fed5c2d42db60078de3cffbc8aa", + "sha256:c3e700abf4a37b7b8b90532fa6ed5c38a9bfc777098bc9fbae5ec8e618ac8f30", + "sha256:c52ed79f683f721b69a10fb9e3d940a468203f5054927215586c5d49a072de8d", + "sha256:c75c98380b1ede1cae9a252c6dc247e6279403fae38c77060a5e6186c95073ac", + "sha256:d2b4431f522b277c79c34b65da128029a9955e4481462cbf7ebec23aab61fc58", + "sha256:ddf4a9bfaac643e62702ed662afc36f6abed2a88a21270e891038f9a19bc08fc", + "sha256:de0205cb21ad5ddaef607cda9a3191eadd1e7a62a756ea3a356369675230ac35", + "sha256:ec555c9d0db12d7fd777ba3f8b75044c73e576c720a851667432fabb7057da6c", + "sha256:fb5cdcbbe3080181498931b52a91a21a781a35dcb859da741c0345c6402bf00c" ], "markers": "python_version >= '3.7'", - "version": "==2.0.5" + "version": "==2.0.4" }, "six": { "hashes": [ @@ -2062,11 +2059,11 @@ }, "sqlparse": { "hashes": [ - "sha256:773dcbf9a5ab44a090f3441e2180efe2560220203dc2f8c0b0fa141e18b505e4", - "sha256:bb6b4df465655ef332548e24f08e205afc81b9ab86cb1c45657a7ff173a3a00e" + "sha256:714d0a4932c059d16189f58ef5411ec2287a4360f17cdd0edd2d09d4c5087c93", + "sha256:c204494cd97479d0e39f28c93d46c0b2d5959c7b9ab904762ea6c7af211c8663" ], "markers": "python_version >= '3.8'", - "version": "==0.5.1" + "version": "==0.5.0" }, "static3": { "hashes": [ @@ -2153,14 +2150,6 @@ "markers": "python_version >= '3.7'", "version": "==3.7.1" }, - "appnope": { - "hashes": [ - "sha256:1de3860566df9caf38f01f86f65e0e13e379af54f9e4bee1e66b48f2efffd1ee", - "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c" - ], - "markers": "platform_system == 'Darwin'", - "version": "==0.1.4" - }, "argon2-cffi": { "hashes": [ "sha256:879c3e79a2729ce768ebb7d36d4609e3a78a4ca2ec3a9f12286ca057e3d0db08", @@ -2496,61 +2485,61 @@ "toml" ], "hashes": [ - "sha256:0086cd4fc71b7d485ac93ca4239c8f75732c2ae3ba83f6be1c9be59d9e2c6382", - "sha256:01c322ef2bbe15057bc4bf132b525b7e3f7206f071799eb8aa6ad1940bcf5fb1", - "sha256:03cafe82c1b32b770a29fd6de923625ccac3185a54a5e66606da26d105f37dac", - "sha256:044a0985a4f25b335882b0966625270a8d9db3d3409ddc49a4eb00b0ef5e8cee", - "sha256:07ed352205574aad067482e53dd606926afebcb5590653121063fbf4e2175166", - "sha256:0d1b923fc4a40c5832be4f35a5dab0e5ff89cddf83bb4174499e02ea089daf57", - "sha256:0e7b27d04131c46e6894f23a4ae186a6a2207209a05df5b6ad4caee6d54a222c", - "sha256:1fad32ee9b27350687035cb5fdf9145bc9cf0a094a9577d43e909948ebcfa27b", - "sha256:289cc803fa1dc901f84701ac10c9ee873619320f2f9aff38794db4a4a0268d51", - "sha256:3c59105f8d58ce500f348c5b56163a4113a440dad6daa2294b5052a10db866da", - "sha256:46c3d091059ad0b9c59d1034de74a7f36dcfa7f6d3bde782c49deb42438f2450", - "sha256:482855914928c8175735a2a59c8dc5806cf7d8f032e4820d52e845d1f731dca2", - "sha256:49c76cdfa13015c4560702574bad67f0e15ca5a2872c6a125f6327ead2b731dd", - "sha256:4b03741e70fb811d1a9a1d75355cf391f274ed85847f4b78e35459899f57af4d", - "sha256:4bea27c4269234e06f621f3fac3925f56ff34bc14521484b8f66a580aacc2e7d", - "sha256:4d5fae0a22dc86259dee66f2cc6c1d3e490c4a1214d7daa2a93d07491c5c04b6", - "sha256:543ef9179bc55edfd895154a51792b01c017c87af0ebaae092720152e19e42ca", - "sha256:54dece71673b3187c86226c3ca793c5f891f9fc3d8aa183f2e3653da18566169", - "sha256:6379688fb4cfa921ae349c76eb1a9ab26b65f32b03d46bb0eed841fd4cb6afb1", - "sha256:65fa405b837060db569a61ec368b74688f429b32fa47a8929a7a2f9b47183713", - "sha256:6616d1c9bf1e3faea78711ee42a8b972367d82ceae233ec0ac61cc7fec09fa6b", - "sha256:6fe885135c8a479d3e37a7aae61cbd3a0fb2deccb4dda3c25f92a49189f766d6", - "sha256:7221f9ac9dad9492cecab6f676b3eaf9185141539d5c9689d13fd6b0d7de840c", - "sha256:76d5f82213aa78098b9b964ea89de4617e70e0d43e97900c2778a50856dac605", - "sha256:7792f0ab20df8071d669d929c75c97fecfa6bcab82c10ee4adb91c7a54055463", - "sha256:831b476d79408ab6ccfadaaf199906c833f02fdb32c9ab907b1d4aa0713cfa3b", - "sha256:9146579352d7b5f6412735d0f203bbd8d00113a680b66565e205bc605ef81bc6", - "sha256:9cc44bf0315268e253bf563f3560e6c004efe38f76db03a1558274a6e04bf5d5", - "sha256:a73d18625f6a8a1cbb11eadc1d03929f9510f4131879288e3f7922097a429f63", - "sha256:a8659fd33ee9e6ca03950cfdcdf271d645cf681609153f218826dd9805ab585c", - "sha256:a94925102c89247530ae1dab7dc02c690942566f22e189cbd53579b0693c0783", - "sha256:ad4567d6c334c46046d1c4c20024de2a1c3abc626817ae21ae3da600f5779b44", - "sha256:b2e16f4cd2bc4d88ba30ca2d3bbf2f21f00f382cf4e1ce3b1ddc96c634bc48ca", - "sha256:bbdf9a72403110a3bdae77948b8011f644571311c2fb35ee15f0f10a8fc082e8", - "sha256:beb08e8508e53a568811016e59f3234d29c2583f6b6e28572f0954a6b4f7e03d", - "sha256:c4cbe651f3904e28f3a55d6f371203049034b4ddbce65a54527a3f189ca3b390", - "sha256:c7b525ab52ce18c57ae232ba6f7010297a87ced82a2383b1afd238849c1ff933", - "sha256:ca5d79cfdae420a1d52bf177de4bc2289c321d6c961ae321503b2ca59c17ae67", - "sha256:cdab02a0a941af190df8782aafc591ef3ad08824f97850b015c8c6a8b3877b0b", - "sha256:d17c6a415d68cfe1091d3296ba5749d3d8696e42c37fca5d4860c5bf7b729f03", - "sha256:d39bd10f0ae453554798b125d2f39884290c480f56e8a02ba7a6ed552005243b", - "sha256:d4b3cd1ca7cd73d229487fa5caca9e4bc1f0bca96526b922d61053ea751fe791", - "sha256:d50a252b23b9b4dfeefc1f663c568a221092cbaded20a05a11665d0dbec9b8fb", - "sha256:da8549d17489cd52f85a9829d0e1d91059359b3c54a26f28bec2c5d369524807", - "sha256:dcd070b5b585b50e6617e8972f3fbbee786afca71b1936ac06257f7e178f00f6", - "sha256:ddaaa91bfc4477d2871442bbf30a125e8fe6b05da8a0015507bfbf4718228ab2", - "sha256:df423f351b162a702c053d5dddc0fc0ef9a9e27ea3f449781ace5f906b664428", - "sha256:dff044f661f59dace805eedb4a7404c573b6ff0cdba4a524141bc63d7be5c7fd", - "sha256:e7e128f85c0b419907d1f38e616c4f1e9f1d1b37a7949f44df9a73d5da5cd53c", - "sha256:ed8d1d1821ba5fc88d4a4f45387b65de52382fa3ef1f0115a4f7a20cdfab0e94", - "sha256:f2501d60d7497fd55e391f423f965bbe9e650e9ffc3c627d5f0ac516026000b8", - "sha256:f7db0b6ae1f96ae41afe626095149ecd1b212b424626175a6633c2999eaad45b" + "sha256:018a12985185038a5b2bcafab04ab833a9a0f2c59995b3cec07e10074c78635f", + "sha256:02ff6e898197cc1e9fa375581382b72498eb2e6d5fc0b53f03e496cfee3fac6d", + "sha256:042183de01f8b6d531e10c197f7f0315a61e8d805ab29c5f7b51a01d62782747", + "sha256:1014fbf665fef86cdfd6cb5b7371496ce35e4d2a00cda501cf9f5b9e6fced69f", + "sha256:1137f46adb28e3813dec8c01fefadcb8c614f33576f672962e323b5128d9a68d", + "sha256:16852febd96acd953b0d55fc842ce2dac1710f26729b31c80b940b9afcd9896f", + "sha256:2174e7c23e0a454ffe12267a10732c273243b4f2d50d07544a91198f05c48f47", + "sha256:2214ee920787d85db1b6a0bd9da5f8503ccc8fcd5814d90796c2f2493a2f4d2e", + "sha256:3257fdd8e574805f27bb5342b77bc65578e98cbc004a92232106344053f319ba", + "sha256:3684bc2ff328f935981847082ba4fdc950d58906a40eafa93510d1b54c08a66c", + "sha256:3a6612c99081d8d6134005b1354191e103ec9705d7ba2754e848211ac8cacc6b", + "sha256:3d7564cc09dd91b5a6001754a5b3c6ecc4aba6323baf33a12bd751036c998be4", + "sha256:44da56a2589b684813f86d07597fdf8a9c6ce77f58976727329272f5a01f99f7", + "sha256:5013ed890dc917cef2c9f765c4c6a8ae9df983cd60dbb635df8ed9f4ebc9f555", + "sha256:54317c2b806354cbb2dc7ac27e2b93f97096912cc16b18289c5d4e44fc663233", + "sha256:56b4eafa21c6c175b3ede004ca12c653a88b6f922494b023aeb1e836df953ace", + "sha256:581ea96f92bf71a5ec0974001f900db495488434a6928a2ca7f01eee20c23805", + "sha256:5cd64adedf3be66f8ccee418473c2916492d53cbafbfcff851cbec5a8454b136", + "sha256:5df54843b88901fdc2f598ac06737f03d71168fd1175728054c8f5a2739ac3e4", + "sha256:65e528e2e921ba8fd67d9055e6b9f9e34b21ebd6768ae1c1723f4ea6ace1234d", + "sha256:6aae5cce399a0f065da65c7bb1e8abd5c7a3043da9dceb429ebe1b289bc07806", + "sha256:6cfb5a4f556bb51aba274588200a46e4dd6b505fb1a5f8c5ae408222eb416f99", + "sha256:7076b4b3a5f6d2b5d7f1185fde25b1e54eb66e647a1dfef0e2c2bfaf9b4c88c8", + "sha256:73ca8fbc5bc622e54627314c1a6f1dfdd8db69788f3443e752c215f29fa87a0b", + "sha256:79b356f3dd5b26f3ad23b35c75dbdaf1f9e2450b6bcefc6d0825ea0aa3f86ca5", + "sha256:7a892be37ca35eb5019ec85402c3371b0f7cda5ab5056023a7f13da0961e60da", + "sha256:8192794d120167e2a64721d88dbd688584675e86e15d0569599257566dec9bf0", + "sha256:820bc841faa502e727a48311948e0461132a9c8baa42f6b2b84a29ced24cc078", + "sha256:8f894208794b164e6bd4bba61fc98bf6b06be4d390cf2daacfa6eca0a6d2bb4f", + "sha256:a04e990a2a41740b02d6182b498ee9796cf60eefe40cf859b016650147908029", + "sha256:a44963520b069e12789d0faea4e9fdb1e410cdc4aab89d94f7f55cbb7fef0353", + "sha256:a6bb74ed465d5fb204b2ec41d79bcd28afccf817de721e8a807d5141c3426638", + "sha256:ab73b35e8d109bffbda9a3e91c64e29fe26e03e49addf5b43d85fc426dde11f9", + "sha256:aea072a941b033813f5e4814541fc265a5c12ed9720daef11ca516aeacd3bd7f", + "sha256:b1ccf5e728ccf83acd313c89f07c22d70d6c375a9c6f339233dcf792094bcbf7", + "sha256:b385d49609f8e9efc885790a5a0e89f2e3ae042cdf12958b6034cc442de428d3", + "sha256:b3d45ff86efb129c599a3b287ae2e44c1e281ae0f9a9bad0edc202179bcc3a2e", + "sha256:b4a474f799456e0eb46d78ab07303286a84a3140e9700b9e154cfebc8f527016", + "sha256:b95c3a8cb0463ba9f77383d0fa8c9194cf91f64445a63fc26fb2327e1e1eb088", + "sha256:c5986ee7ea0795a4095ac4d113cbb3448601efca7f158ec7f7087a6c705304e4", + "sha256:cdd31315fc20868c194130de9ee6bfd99755cc9565edff98ecc12585b90be882", + "sha256:cef4649ec906ea7ea5e9e796e68b987f83fa9a718514fe147f538cfeda76d7a7", + "sha256:d05c16cf4b4c2fc880cb12ba4c9b526e9e5d5bb1d81313d4d732a5b9fe2b9d53", + "sha256:d2e344d6adc8ef81c5a233d3a57b3c7d5181f40e79e05e1c143da143ccb6377d", + "sha256:d45d3cbd94159c468b9b8c5a556e3f6b81a8d1af2a92b77320e887c3e7a5d080", + "sha256:db14f552ac38f10758ad14dd7b983dbab424e731588d300c7db25b6f89e335b5", + "sha256:dbc5958cb471e5a5af41b0ddaea96a37e74ed289535e8deca404811f6cb0bc3d", + "sha256:ddbd2f9713a79e8e7242d7c51f1929611e991d855f414ca9996c20e44a895f7c", + "sha256:e16f3d6b491c48c5ae726308e6ab1e18ee830b4cdd6913f2d7f77354b33f91c8", + "sha256:e2afe743289273209c992075a5a4913e8d007d569a406ffed0bd080ea02b0633", + "sha256:e564c2cf45d2f44a9da56f4e3a26b2236504a496eb4cb0ca7221cd4cc7a9aca9", + "sha256:ed550e7442f278af76d9d65af48069f1fb84c9f745ae249c1a183c1e9d1b025c" ], "markers": "python_version >= '3.8'", - "version": "==7.6.0" + "version": "==7.5.4" }, "cryptography": { "hashes": [ @@ -2649,21 +2638,21 @@ }, "django": { "hashes": [ - "sha256:3ec32bc2c616ab02834b9cac93143a7dc1cdcd5b822d78ac95fc20a38c534240", - "sha256:fc6919875a6226c7ffcae1a7d51e0f2ceaf6f160393180818f6c95f51b1e7b96" + "sha256:837e3cf1f6c31347a1396a3f6b65688f2b4bb4a11c580dcb628b5afe527b68a5", + "sha256:a17fcba2aad3fc7d46fdb23215095dbbd64e6174bf4589171e732b18b07e426a" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==4.2.14" + "version": "==4.2.13" }, "django-debug-toolbar": { "hashes": [ - "sha256:36e421cb908c2f0675e07f9f41e3d1d8618dc386392ec82d23bcfcd5d29c7044", - "sha256:3beb671c9ec44ffb817fad2780667f172bd1c067dbcabad6268ce39a81335f45" + "sha256:8298ce966b4c8fc71430082dd4739ef2badb5f867734e1973a413c4ab2ea81b7", + "sha256:91425606673ee674d780f7aeedf3595c264eb382dcf41f55c6779577900904c0" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==4.4.6" + "version": "==4.4.5" }, "django-extensions": { "hashes": [ @@ -2755,6 +2744,70 @@ "markers": "python_version >= '3.8'", "version": "==1.29.0" }, + "greenlet": { + "hashes": [ + "sha256:01bc7ea167cf943b4c802068e178bbf70ae2e8c080467070d01bfa02f337ee67", + "sha256:0448abc479fab28b00cb472d278828b3ccca164531daab4e970a0458786055d6", + "sha256:086152f8fbc5955df88382e8a75984e2bb1c892ad2e3c80a2508954e52295257", + "sha256:098d86f528c855ead3479afe84b49242e174ed262456c342d70fc7f972bc13c4", + "sha256:149e94a2dd82d19838fe4b2259f1b6b9957d5ba1b25640d2380bea9c5df37676", + "sha256:1551a8195c0d4a68fac7a4325efac0d541b48def35feb49d803674ac32582f61", + "sha256:15d79dd26056573940fcb8c7413d84118086f2ec1a8acdfa854631084393efcc", + "sha256:1996cb9306c8595335bb157d133daf5cf9f693ef413e7673cb07e3e5871379ca", + "sha256:1a7191e42732df52cb5f39d3527217e7ab73cae2cb3694d241e18f53d84ea9a7", + "sha256:1ea188d4f49089fc6fb283845ab18a2518d279c7cd9da1065d7a84e991748728", + "sha256:1f672519db1796ca0d8753f9e78ec02355e862d0998193038c7073045899f305", + "sha256:2516a9957eed41dd8f1ec0c604f1cdc86758b587d964668b5b196a9db5bfcde6", + "sha256:2797aa5aedac23af156bbb5a6aa2cd3427ada2972c828244eb7d1b9255846379", + "sha256:2dd6e660effd852586b6a8478a1d244b8dc90ab5b1321751d2ea15deb49ed414", + "sha256:3ddc0f794e6ad661e321caa8d2f0a55ce01213c74722587256fb6566049a8b04", + "sha256:3ed7fb269f15dc662787f4119ec300ad0702fa1b19d2135a37c2c4de6fadfd4a", + "sha256:419b386f84949bf0e7c73e6032e3457b82a787c1ab4a0e43732898a761cc9dbf", + "sha256:43374442353259554ce33599da8b692d5aa96f8976d567d4badf263371fbe491", + "sha256:52f59dd9c96ad2fc0d5724107444f76eb20aaccb675bf825df6435acb7703559", + "sha256:57e8974f23e47dac22b83436bdcf23080ade568ce77df33159e019d161ce1d1e", + "sha256:5b51e85cb5ceda94e79d019ed36b35386e8c37d22f07d6a751cb659b180d5274", + "sha256:649dde7de1a5eceb258f9cb00bdf50e978c9db1b996964cd80703614c86495eb", + "sha256:64d7675ad83578e3fc149b617a444fab8efdafc9385471f868eb5ff83e446b8b", + "sha256:68834da854554926fbedd38c76e60c4a2e3198c6fbed520b106a8986445caaf9", + "sha256:6b66c9c1e7ccabad3a7d037b2bcb740122a7b17a53734b7d72a344ce39882a1b", + "sha256:70fb482fdf2c707765ab5f0b6655e9cfcf3780d8d87355a063547b41177599be", + "sha256:7170375bcc99f1a2fbd9c306f5be8764eaf3ac6b5cb968862cad4c7057756506", + "sha256:73a411ef564e0e097dbe7e866bb2dda0f027e072b04da387282b02c308807405", + "sha256:77457465d89b8263bca14759d7c1684df840b6811b2499838cc5b040a8b5b113", + "sha256:7f362975f2d179f9e26928c5b517524e89dd48530a0202570d55ad6ca5d8a56f", + "sha256:81bb9c6d52e8321f09c3d165b2a78c680506d9af285bfccbad9fb7ad5a5da3e5", + "sha256:881b7db1ebff4ba09aaaeae6aa491daeb226c8150fc20e836ad00041bcb11230", + "sha256:894393ce10ceac937e56ec00bb71c4c2f8209ad516e96033e4b3b1de270e200d", + "sha256:99bf650dc5d69546e076f413a87481ee1d2d09aaaaaca058c9251b6d8c14783f", + "sha256:9da2bd29ed9e4f15955dd1595ad7bc9320308a3b766ef7f837e23ad4b4aac31a", + "sha256:afaff6cf5200befd5cec055b07d1c0a5a06c040fe5ad148abcd11ba6ab9b114e", + "sha256:b1b5667cced97081bf57b8fa1d6bfca67814b0afd38208d52538316e9422fc61", + "sha256:b37eef18ea55f2ffd8f00ff8fe7c8d3818abd3e25fb73fae2ca3b672e333a7a6", + "sha256:b542be2440edc2d48547b5923c408cbe0fc94afb9f18741faa6ae970dbcb9b6d", + "sha256:b7dcbe92cc99f08c8dd11f930de4d99ef756c3591a5377d1d9cd7dd5e896da71", + "sha256:b7f009caad047246ed379e1c4dbcb8b020f0a390667ea74d2387be2998f58a22", + "sha256:bba5387a6975598857d86de9eac14210a49d554a77eb8261cc68b7d082f78ce2", + "sha256:c5e1536de2aad7bf62e27baf79225d0d64360d4168cf2e6becb91baf1ed074f3", + "sha256:c5ee858cfe08f34712f548c3c363e807e7186f03ad7a5039ebadb29e8c6be067", + "sha256:c9db1c18f0eaad2f804728c67d6c610778456e3e1cc4ab4bbd5eeb8e6053c6fc", + "sha256:d353cadd6083fdb056bb46ed07e4340b0869c305c8ca54ef9da3421acbdf6881", + "sha256:d46677c85c5ba00a9cb6f7a00b2bfa6f812192d2c9f7d9c4f6a55b60216712f3", + "sha256:d4d1ac74f5c0c0524e4a24335350edad7e5f03b9532da7ea4d3c54d527784f2e", + "sha256:d73a9fe764d77f87f8ec26a0c85144d6a951a6c438dfe50487df5595c6373eac", + "sha256:da70d4d51c8b306bb7a031d5cff6cc25ad253affe89b70352af5f1cb68e74b53", + "sha256:daf3cb43b7cf2ba96d614252ce1684c1bccee6b2183a01328c98d36fcd7d5cb0", + "sha256:dca1e2f3ca00b84a396bc1bce13dd21f680f035314d2379c4160c98153b2059b", + "sha256:dd4f49ae60e10adbc94b45c0b5e6a179acc1736cf7a90160b404076ee283cf83", + "sha256:e1f145462f1fa6e4a4ae3c0f782e580ce44d57c8f2c7aae1b6fa88c0b2efdb41", + "sha256:e3391d1e16e2a5a1507d83e4a8b100f4ee626e8eca43cf2cadb543de69827c4c", + "sha256:fcd2469d6a2cf298f198f0487e0a5b1a47a42ca0fa4dfd1b6862c999f018ebbf", + "sha256:fd096eb7ffef17c456cfa587523c5f92321ae02427ff955bebe9e3c63bc9f0da", + "sha256:fe754d231288e1e64323cfad462fcee8f0288654c10bdf4f603a39ed923bef33" + ], + "markers": "python_version < '3.13' and platform_machine == 'aarch64' or (platform_machine == 'ppc64le' or (platform_machine == 'x86_64' or (platform_machine == 'amd64' or (platform_machine == 'AMD64' or (platform_machine == 'win32' or platform_machine == 'WIN32')))))", + "version": "==3.0.3" + }, "html-tag-names": { "hashes": [ "sha256:04924aca48770f36b5a41c27e4d917062507be05118acb0ba869c97389084297", @@ -2805,12 +2858,12 @@ }, "ipyleaflet": { "hashes": [ - "sha256:7cc9157848baca2e1793b96e79f8bdb1aa7340521d2b7d8a62aa8bc30eab5278", - "sha256:b3b83fe3460e742964c2a5924ea7934365a3749bb75310ce388d45fd751372d2" + "sha256:3454dbc0d360150516ea2a3ab4109503ceb9b7347fa1746a6cafd889075a0ff8", + "sha256:ffb90f67576c22c85438490bb32af1f30de88993bfb5dc167503e3a278252139" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==0.19.2" + "version": "==0.19.1" }, "ipython": { "hashes": [ @@ -2885,11 +2938,11 @@ }, "jsonschema": { "hashes": [ - "sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4", - "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566" + "sha256:5b22d434a45935119af990552c862e5d6d564e8f6601206b305a61fdf661a2b7", + "sha256:ff4cfd6b1367a40e7bc6411caec72effadd3db0bbe5017de188f2d6108335802" ], "markers": "python_version >= '3.8'", - "version": "==4.23.0" + "version": "==4.22.0" }, "jsonschema-specifications": { "hashes": [ @@ -2935,11 +2988,11 @@ }, "jupyter-leaflet": { "hashes": [ - "sha256:0d57e15e80c08a4360f0cde0b4c490beddc5d422bb0e9bc1c0b4479d3fb725a6", - "sha256:b09b5ba48b1488cb61da37a6f558347269eb53ff6d64dc1a73e005ffc4420063" + "sha256:8001a7304e9262394b8f896003539438467bed712bb9330dd65785bd9a5f8add", + "sha256:f4c1ab7a8b2c91d01a0940d1bab6543c57e91dca6425789632780eef7d58b266" ], "markers": "python_version >= '3.8'", - "version": "==0.19.2" + "version": "==0.19.1" }, "jupyter-server": { "hashes": [ @@ -3073,37 +3126,37 @@ }, "mypy": { "hashes": [ - "sha256:0bea2a0e71c2a375c9fa0ede3d98324214d67b3cbbfcbd55ac8f750f85a414e3", - "sha256:104e9c1620c2675420abd1f6c44bab7dd33cc85aea751c985006e83dcd001095", - "sha256:14f9294528b5f5cf96c721f231c9f5b2733164e02c1c018ed1a0eff8a18005ac", - "sha256:1a5d8d8dd8613a3e2be3eae829ee891b6b2de6302f24766ff06cb2875f5be9c6", - "sha256:1d44c1e44a8be986b54b09f15f2c1a66368eb43861b4e82573026e04c48a9e20", - "sha256:25bcfa75b9b5a5f8d67147a54ea97ed63a653995a82798221cca2a315c0238c1", - "sha256:35ce88b8ed3a759634cb4eb646d002c4cef0a38f20565ee82b5023558eb90c00", - "sha256:56913ec8c7638b0091ef4da6fcc9136896914a9d60d54670a75880c3e5b99ace", - "sha256:65f190a6349dec29c8d1a1cd4aa71284177aee5949e0502e6379b42873eddbe7", - "sha256:6801319fe76c3f3a3833f2b5af7bd2c17bb93c00026a2a1b924e6762f5b19e13", - "sha256:72596a79bbfb195fd41405cffa18210af3811beb91ff946dbcb7368240eed6be", - "sha256:93743608c7348772fdc717af4aeee1997293a1ad04bc0ea6efa15bf65385c538", - "sha256:940bfff7283c267ae6522ef926a7887305945f716a7704d3344d6d07f02df850", - "sha256:96f8dbc2c85046c81bcddc246232d500ad729cb720da4e20fce3b542cab91287", - "sha256:98790025861cb2c3db8c2f5ad10fc8c336ed2a55f4daf1b8b3f877826b6ff2eb", - "sha256:a3824187c99b893f90c845bab405a585d1ced4ff55421fdf5c84cb7710995229", - "sha256:a83ec98ae12d51c252be61521aa5731f5512231d0b738b4cb2498344f0b840cd", - "sha256:becc9111ca572b04e7e77131bc708480cc88a911adf3d0239f974c034b78085c", - "sha256:c1a184c64521dc549324ec6ef7cbaa6b351912be9cb5edb803c2808a0d7e85ac", - "sha256:c7b73a856522417beb78e0fb6d33ef89474e7a622db2653bc1285af36e2e3e3d", - "sha256:cea3d0fb69637944dd321f41bc896e11d0fb0b0aa531d887a6da70f6e7473aba", - "sha256:d2b3d36baac48e40e3064d2901f2fbd2a2d6880ec6ce6358825c85031d7c0d4d", - "sha256:d7b54c27783991399046837df5c7c9d325d921394757d09dbcbf96aee4649fe9", - "sha256:d8e2e43977f0e09f149ea69fd0556623919f816764e26d74da0c8a7b48f3e18a", - "sha256:dbe286303241fea8c2ea5466f6e0e6a046a135a7e7609167b07fd4e7baf151bf", - "sha256:f006e955718ecd8d159cee9932b64fba8f86ee6f7728ca3ac66c3a54b0062abe", - "sha256:f2268d9fcd9686b61ab64f077be7ffbc6fbcdfb4103e5dd0cc5eaab53a8886c2" + "sha256:0cd62192a4a32b77ceb31272d9e74d23cd88c8060c34d1d3622db3267679a5d9", + "sha256:1b3a2ffce52cc4dbaeee4df762f20a2905aa171ef157b82192f2e2f368eec05d", + "sha256:1f8f492d7db9e3593ef42d4f115f04e556130f2819ad33ab84551403e97dd4c0", + "sha256:2189ff1e39db399f08205e22a797383613ce1cb0cb3b13d8bcf0170e45b96cc3", + "sha256:378c03f53f10bbdd55ca94e46ec3ba255279706a6aacaecac52ad248f98205d3", + "sha256:37fd87cab83f09842653f08de066ee68f1182b9b5282e4634cdb4b407266bade", + "sha256:3c4c2992f6ea46ff7fce0072642cfb62af7a2484efe69017ed8b095f7b39ef31", + "sha256:51a46974340baaa4145363b9e051812a2446cf583dfaeba124af966fa44593f7", + "sha256:5bb9cd11c01c8606a9d0b83ffa91d0b236a0e91bc4126d9ba9ce62906ada868e", + "sha256:5cc3ca0a244eb9a5249c7c583ad9a7e881aa5d7b73c35652296ddcdb33b2b9c7", + "sha256:604282c886497645ffb87b8f35a57ec773a4a2721161e709a4422c1636ddde5c", + "sha256:6166a88b15f1759f94a46fa474c7b1b05d134b1b61fca627dd7335454cc9aa6b", + "sha256:6bacf8f3a3d7d849f40ca6caea5c055122efe70e81480c8328ad29c55c69e93e", + "sha256:6be84c06e6abd72f960ba9a71561c14137a583093ffcf9bbfaf5e613d63fa531", + "sha256:701b5f71413f1e9855566a34d6e9d12624e9e0a8818a5704d74d6b0402e66c04", + "sha256:71d8ac0b906354ebda8ef1673e5fde785936ac1f29ff6987c7483cfbd5a4235a", + "sha256:8addf6313777dbb92e9564c5d32ec122bf2c6c39d683ea64de6a1fd98b90fe37", + "sha256:901c89c2d67bba57aaaca91ccdb659aa3a312de67f23b9dfb059727cce2e2e0a", + "sha256:97a131ee36ac37ce9581f4220311247ab6cba896b4395b9c87af0675a13a755f", + "sha256:a1bbb3a6f5ff319d2b9d40b4080d46cd639abe3516d5a62c070cf0114a457d84", + "sha256:a2cbc68cb9e943ac0814c13e2452d2046c2f2b23ff0278e26599224cf164e78d", + "sha256:b8edd4e9bbbc9d7b79502eb9592cab808585516ae1bcc1446eb9122656c6066f", + "sha256:bd6f629b67bb43dc0d9211ee98b96d8dabc97b1ad38b9b25f5e4c4d7569a0c6a", + "sha256:c2ae450d60d7d020d67ab440c6e3fae375809988119817214440033f26ddf7bf", + "sha256:d8681909f7b44d0b7b86e653ca152d6dff0eb5eb41694e163c6092124f8246d7", + "sha256:e36f229acfe250dc660790840916eb49726c928e8ce10fbdf90715090fe4ae02", + "sha256:fe85ed6836165d52ae8b88f99527d3d1b2362e0cb90b005409b8bed90e9059b3" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==1.11.0" + "version": "==1.10.1" }, "mypy-extensions": { "hashes": [ @@ -3252,21 +3305,21 @@ }, "pre-commit": { "hashes": [ - "sha256:8bb6494d4a20423842e198980c9ecf9f96607a07ea29549e180eef9ae80fe7af", - "sha256:9a90a53bf82fdd8778d58085faf8d83df56e40dfe18f45b19446e26bf1b3a63f" + "sha256:8ca3ad567bc78a4972a3f1a477e94a79d4597e8140a6e0b651c5e33899c3654a", + "sha256:fae36fd1d7ad7d6a5a1c0b0d5adb2ed1a3bda5a21bf6c3e5372073d7a11cd4c5" ], "index": "pypi", "markers": "python_version >= '3.9'", - "version": "==3.8.0" + "version": "==3.7.1" }, "prettytable": { "hashes": [ - "sha256:1cbfdeb4bcc73976a778a0fb33cb6d752e75396f16574dcb3e2d6332fd93c76a", - "sha256:29ec6c34260191d42cd4928c28d56adec360ac2b1208a26c7e4f14b90cc8bc84" + "sha256:6536efaf0757fdaa7d22e78b3aac3b69ea1b7200538c2c6995d649365bddab92", + "sha256:9665594d137fb08a1117518c25551e0ede1687197cf353a4fdc78d27e1073568" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==3.10.2" + "version": "==3.10.0" }, "prometheus-client": { "hashes": [ @@ -3317,10 +3370,10 @@ }, "pure-eval": { "hashes": [ - "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", - "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42" + "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350", + "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3" ], - "version": "==0.2.3" + "version": "==0.2.2" }, "pycodestyle": { "hashes": [ @@ -3372,12 +3425,12 @@ }, "pytest": { "hashes": [ - "sha256:4ba08f9ae7dcf84ded419494d229b48d0903ea6407b030eaec46df5e6a73bba5", - "sha256:c132345d12ce551242c87269de812483f5bcc87cdbb4722e48487ba194f9fdce" + "sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343", + "sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==8.3.2" + "version": "==8.2.2" }, "pytest-cov": { "hashes": [ @@ -3723,112 +3776,108 @@ }, "rpds-py": { "hashes": [ - "sha256:01227f8b3e6c8961490d869aa65c99653df80d2f0a7fde8c64ebddab2b9b02fd", - "sha256:08ce9c95a0b093b7aec75676b356a27879901488abc27e9d029273d280438505", - "sha256:0b02dd77a2de6e49078c8937aadabe933ceac04b41c5dde5eca13a69f3cf144e", - "sha256:0d4b52811dcbc1aba08fd88d475f75b4f6db0984ba12275d9bed1a04b2cae9b5", - "sha256:13e6d4840897d4e4e6b2aa1443e3a8eca92b0402182aafc5f4ca1f5e24f9270a", - "sha256:1a129c02b42d46758c87faeea21a9f574e1c858b9f358b6dd0bbd71d17713175", - "sha256:1a8dfa125b60ec00c7c9baef945bb04abf8ac772d8ebefd79dae2a5f316d7850", - "sha256:1c32e41de995f39b6b315d66c27dea3ef7f7c937c06caab4c6a79a5e09e2c415", - "sha256:1d494887d40dc4dd0d5a71e9d07324e5c09c4383d93942d391727e7a40ff810b", - "sha256:1d4af2eb520d759f48f1073ad3caef997d1bfd910dc34e41261a595d3f038a94", - "sha256:1fb93d3486f793d54a094e2bfd9cd97031f63fcb5bc18faeb3dd4b49a1c06523", - "sha256:24f8ae92c7fae7c28d0fae9b52829235df83f34847aa8160a47eb229d9666c7b", - "sha256:24fc5a84777cb61692d17988989690d6f34f7f95968ac81398d67c0d0994a897", - "sha256:26ab43b6d65d25b1a333c8d1b1c2f8399385ff683a35ab5e274ba7b8bb7dc61c", - "sha256:271accf41b02687cef26367c775ab220372ee0f4925591c6796e7c148c50cab5", - "sha256:2ddd50f18ebc05ec29a0d9271e9dbe93997536da3546677f8ca00b76d477680c", - "sha256:31dd5794837f00b46f4096aa8ccaa5972f73a938982e32ed817bb520c465e520", - "sha256:31e450840f2f27699d014cfc8865cc747184286b26d945bcea6042bb6aa4d26e", - "sha256:32e0db3d6e4f45601b58e4ac75c6f24afbf99818c647cc2066f3e4b192dabb1f", - "sha256:346557f5b1d8fd9966059b7a748fd79ac59f5752cd0e9498d6a40e3ac1c1875f", - "sha256:34bca66e2e3eabc8a19e9afe0d3e77789733c702c7c43cd008e953d5d1463fde", - "sha256:3511f6baf8438326e351097cecd137eb45c5f019944fe0fd0ae2fea2fd26be39", - "sha256:35af5e4d5448fa179fd7fff0bba0fba51f876cd55212f96c8bbcecc5c684ae5c", - "sha256:3837c63dd6918a24de6c526277910e3766d8c2b1627c500b155f3eecad8fad65", - "sha256:39d67896f7235b2c886fb1ee77b1491b77049dcef6fbf0f401e7b4cbed86bbd4", - "sha256:3b823be829407393d84ee56dc849dbe3b31b6a326f388e171555b262e8456cc1", - "sha256:3c73254c256081704dba0a333457e2fb815364018788f9b501efe7c5e0ada401", - "sha256:3ddab996807c6b4227967fe1587febade4e48ac47bb0e2d3e7858bc621b1cace", - "sha256:3e1dc59a5e7bc7f44bd0c048681f5e05356e479c50be4f2c1a7089103f1621d5", - "sha256:4383beb4a29935b8fa28aca8fa84c956bf545cb0c46307b091b8d312a9150e6a", - "sha256:4cc4bc73e53af8e7a42c8fd7923bbe35babacfa7394ae9240b3430b5dcf16b2a", - "sha256:4dd02e29c8cbed21a1875330b07246b71121a1c08e29f0ee3db5b4cfe16980c4", - "sha256:4f580ae79d0b861dfd912494ab9d477bea535bfb4756a2269130b6607a21802e", - "sha256:53dbc35808c6faa2ce3e48571f8f74ef70802218554884787b86a30947842a14", - "sha256:56313be667a837ff1ea3508cebb1ef6681d418fa2913a0635386cf29cff35165", - "sha256:57863d16187995c10fe9cf911b897ed443ac68189179541734502353af33e693", - "sha256:5953391af1405f968eb5701ebbb577ebc5ced8d0041406f9052638bafe52209d", - "sha256:5beffdbe766cfe4fb04f30644d822a1080b5359df7db3a63d30fa928375b2720", - "sha256:5e360188b72f8080fefa3adfdcf3618604cc8173651c9754f189fece068d2a45", - "sha256:5e58b61dcbb483a442c6239c3836696b79f2cd8e7eec11e12155d3f6f2d886d1", - "sha256:69084fd29bfeff14816666c93a466e85414fe6b7d236cfc108a9c11afa6f7301", - "sha256:6d1d7539043b2b31307f2c6c72957a97c839a88b2629a348ebabe5aa8b626d6b", - "sha256:6d8b735c4d162dc7d86a9cf3d717f14b6c73637a1f9cd57fe7e61002d9cb1972", - "sha256:6ea961a674172ed2235d990d7edf85d15d8dfa23ab8575e48306371c070cda67", - "sha256:71157f9db7f6bc6599a852852f3389343bea34315b4e6f109e5cbc97c1fb2963", - "sha256:720f3108fb1bfa32e51db58b832898372eb5891e8472a8093008010911e324c5", - "sha256:74129d5ffc4cde992d89d345f7f7d6758320e5d44a369d74d83493429dad2de5", - "sha256:747251e428406b05fc86fee3904ee19550c4d2d19258cef274e2151f31ae9d38", - "sha256:75130df05aae7a7ac171b3b5b24714cffeabd054ad2ebc18870b3aa4526eba23", - "sha256:7b3661e6d4ba63a094138032c1356d557de5b3ea6fd3cca62a195f623e381c76", - "sha256:7d5c7e32f3ee42f77d8ff1a10384b5cdcc2d37035e2e3320ded909aa192d32c3", - "sha256:8124101e92c56827bebef084ff106e8ea11c743256149a95b9fd860d3a4f331f", - "sha256:81db2e7282cc0487f500d4db203edc57da81acde9e35f061d69ed983228ffe3b", - "sha256:840e18c38098221ea6201f091fc5d4de6128961d2930fbbc96806fb43f69aec1", - "sha256:89cc8921a4a5028d6dd388c399fcd2eef232e7040345af3d5b16c04b91cf3c7e", - "sha256:8b32cd4ab6db50c875001ba4f5a6b30c0f42151aa1fbf9c2e7e3674893fb1dc4", - "sha256:8df1c283e57c9cb4d271fdc1875f4a58a143a2d1698eb0d6b7c0d7d5f49c53a1", - "sha256:902cf4739458852fe917104365ec0efbea7d29a15e4276c96a8d33e6ed8ec137", - "sha256:97fbb77eaeb97591efdc654b8b5f3ccc066406ccfb3175b41382f221ecc216e8", - "sha256:9c7042488165f7251dc7894cd533a875d2875af6d3b0e09eda9c4b334627ad1c", - "sha256:9e318e6786b1e750a62f90c6f7fa8b542102bdcf97c7c4de2a48b50b61bd36ec", - "sha256:a9421b23c85f361a133aa7c5e8ec757668f70343f4ed8fdb5a4a14abd5437244", - "sha256:aaf71f95b21f9dc708123335df22e5a2fef6307e3e6f9ed773b2e0938cc4d491", - "sha256:afedc35fe4b9e30ab240b208bb9dc8938cb4afe9187589e8d8d085e1aacb8309", - "sha256:b5e28e56143750808c1c79c70a16519e9bc0a68b623197b96292b21b62d6055c", - "sha256:b82c9514c6d74b89a370c4060bdb80d2299bc6857e462e4a215b4ef7aa7b090e", - "sha256:b8f78398e67a7227aefa95f876481485403eb974b29e9dc38b307bb6eb2315ea", - "sha256:bbda75f245caecff8faa7e32ee94dfaa8312a3367397975527f29654cd17a6ed", - "sha256:bca34e913d27401bda2a6f390d0614049f5a95b3b11cd8eff80fe4ec340a1208", - "sha256:bd04d8cab16cab5b0a9ffc7d10f0779cf1120ab16c3925404428f74a0a43205a", - "sha256:c149a652aeac4902ecff2dd93c3b2681c608bd5208c793c4a99404b3e1afc87c", - "sha256:c2087dbb76a87ec2c619253e021e4fb20d1a72580feeaa6892b0b3d955175a71", - "sha256:c34f751bf67cab69638564eee34023909380ba3e0d8ee7f6fe473079bf93f09b", - "sha256:c6d20c8896c00775e6f62d8373aba32956aa0b850d02b5ec493f486c88e12859", - "sha256:c7af6f7b80f687b33a4cdb0a785a5d4de1fb027a44c9a049d8eb67d5bfe8a687", - "sha256:c7b07959866a6afb019abb9564d8a55046feb7a84506c74a6f197cbcdf8a208e", - "sha256:ca0dda0c5715efe2ab35bb83f813f681ebcd2840d8b1b92bfc6fe3ab382fae4a", - "sha256:cdb7eb3cf3deb3dd9e7b8749323b5d970052711f9e1e9f36364163627f96da58", - "sha256:ce757c7c90d35719b38fa3d4ca55654a76a40716ee299b0865f2de21c146801c", - "sha256:d1fa67ef839bad3815124f5f57e48cd50ff392f4911a9f3cf449d66fa3df62a5", - "sha256:d2dbd8f4990d4788cb122f63bf000357533f34860d269c1a8e90ae362090ff3a", - "sha256:d4ec0046facab83012d821b33cead742a35b54575c4edfb7ed7445f63441835f", - "sha256:dbceedcf4a9329cc665452db1aaf0845b85c666e4885b92ee0cddb1dbf7e052a", - "sha256:dc733d35f861f8d78abfaf54035461e10423422999b360966bf1c443cbc42705", - "sha256:dd635c2c4043222d80d80ca1ac4530a633102a9f2ad12252183bcf338c1b9474", - "sha256:de1f7cd5b6b351e1afd7568bdab94934d656abe273d66cda0ceea43bbc02a0c2", - "sha256:df7c841813f6265e636fe548a49664c77af31ddfa0085515326342a751a6ba51", - "sha256:e0f9d268b19e8f61bf42a1da48276bcd05f7ab5560311f541d22557f8227b866", - "sha256:e2d66eb41ffca6cc3c91d8387509d27ba73ad28371ef90255c50cb51f8953301", - "sha256:e429fc517a1c5e2a70d576077231538a98d59a45dfc552d1ac45a132844e6dfb", - "sha256:e4d2b88efe65544a7d5121b0c3b003ebba92bfede2ea3577ce548b69c5235185", - "sha256:e76c902d229a3aa9d5ceb813e1cbcc69bf5bda44c80d574ff1ac1fa3136dea71", - "sha256:ef07a0a1d254eeb16455d839cef6e8c2ed127f47f014bbda64a58b5482b6c836", - "sha256:f09529d2332264a902688031a83c19de8fda5eb5881e44233286b9c9ec91856d", - "sha256:f0a6d4a93d2a05daec7cb885157c97bbb0be4da739d6f9dfb02e101eb40921cd", - "sha256:f0cf2a0dbb5987da4bd92a7ca727eadb225581dd9681365beba9accbe5308f7d", - "sha256:f2671cb47e50a97f419a02cd1e0c339b31de017b033186358db92f4d8e2e17d8", - "sha256:f35b34a5184d5e0cc360b61664c1c06e866aab077b5a7c538a3e20c8fcdbf90b", - "sha256:f3d73022990ab0c8b172cce57c69fd9a89c24fd473a5e79cbce92df87e3d9c48", - "sha256:f5b8353ea1a4d7dfb59a7f45c04df66ecfd363bb5b35f33b11ea579111d4655f", - "sha256:f809a17cc78bd331e137caa25262b507225854073fd319e987bd216bed911b7c", - "sha256:f9bc4161bd3b970cd6a6fcda70583ad4afd10f2750609fb1f3ca9505050d4ef3", - "sha256:fdf4890cda3b59170009d012fca3294c00140e7f2abe1910e6a730809d0f3f9b" + "sha256:05f3d615099bd9b13ecf2fc9cf2d839ad3f20239c678f461c753e93755d629ee", + "sha256:06d218939e1bf2ca50e6b0ec700ffe755e5216a8230ab3e87c059ebb4ea06afc", + "sha256:07f2139741e5deb2c5154a7b9629bc5aa48c766b643c1a6750d16f865a82c5fc", + "sha256:08d74b184f9ab6289b87b19fe6a6d1a97fbfea84b8a3e745e87a5de3029bf944", + "sha256:0abeee75434e2ee2d142d650d1e54ac1f8b01e6e6abdde8ffd6eeac6e9c38e20", + "sha256:154bf5c93d79558b44e5b50cc354aa0459e518e83677791e6adb0b039b7aa6a7", + "sha256:17c6d2155e2423f7e79e3bb18151c686d40db42d8645e7977442170c360194d4", + "sha256:1805d5901779662d599d0e2e4159d8a82c0b05faa86ef9222bf974572286b2b6", + "sha256:19ba472b9606c36716062c023afa2484d1e4220548751bda14f725a7de17b4f6", + "sha256:19e515b78c3fc1039dd7da0a33c28c3154458f947f4dc198d3c72db2b6b5dc93", + "sha256:1d54f74f40b1f7aaa595a02ff42ef38ca654b1469bef7d52867da474243cc633", + "sha256:207c82978115baa1fd8d706d720b4a4d2b0913df1c78c85ba73fe6c5804505f0", + "sha256:2625f03b105328729f9450c8badda34d5243231eef6535f80064d57035738360", + "sha256:27bba383e8c5231cd559affe169ca0b96ec78d39909ffd817f28b166d7ddd4d8", + "sha256:2c3caec4ec5cd1d18e5dd6ae5194d24ed12785212a90b37f5f7f06b8bedd7139", + "sha256:2cc7c1a47f3a63282ab0f422d90ddac4aa3034e39fc66a559ab93041e6505da7", + "sha256:2fc24a329a717f9e2448f8cd1f960f9dac4e45b6224d60734edeb67499bab03a", + "sha256:312fe69b4fe1ffbe76520a7676b1e5ac06ddf7826d764cc10265c3b53f96dbe9", + "sha256:32b7daaa3e9389db3695964ce8e566e3413b0c43e3394c05e4b243a4cd7bef26", + "sha256:338dee44b0cef8b70fd2ef54b4e09bb1b97fc6c3a58fea5db6cc083fd9fc2724", + "sha256:352a88dc7892f1da66b6027af06a2e7e5d53fe05924cc2cfc56495b586a10b72", + "sha256:35b2b771b13eee8729a5049c976197ff58a27a3829c018a04341bcf1ae409b2b", + "sha256:38e14fb4e370885c4ecd734f093a2225ee52dc384b86fa55fe3f74638b2cfb09", + "sha256:3c20f05e8e3d4fc76875fc9cb8cf24b90a63f5a1b4c5b9273f0e8225e169b100", + "sha256:3dd3cd86e1db5aadd334e011eba4e29d37a104b403e8ca24dcd6703c68ca55b3", + "sha256:489bdfe1abd0406eba6b3bb4fdc87c7fa40f1031de073d0cfb744634cc8fa261", + "sha256:48c2faaa8adfacefcbfdb5f2e2e7bdad081e5ace8d182e5f4ade971f128e6bb3", + "sha256:4a98a1f0552b5f227a3d6422dbd61bc6f30db170939bd87ed14f3c339aa6c7c9", + "sha256:4adec039b8e2928983f885c53b7cc4cda8965b62b6596501a0308d2703f8af1b", + "sha256:4e0ee01ad8260184db21468a6e1c37afa0529acc12c3a697ee498d3c2c4dcaf3", + "sha256:51584acc5916212e1bf45edd17f3a6b05fe0cbb40482d25e619f824dccb679de", + "sha256:531796fb842b53f2695e94dc338929e9f9dbf473b64710c28af5a160b2a8927d", + "sha256:5463c47c08630007dc0fe99fb480ea4f34a89712410592380425a9b4e1611d8e", + "sha256:5c45a639e93a0c5d4b788b2613bd637468edd62f8f95ebc6fcc303d58ab3f0a8", + "sha256:6031b25fb1b06327b43d841f33842b383beba399884f8228a6bb3df3088485ff", + "sha256:607345bd5912aacc0c5a63d45a1f73fef29e697884f7e861094e443187c02be5", + "sha256:618916f5535784960f3ecf8111581f4ad31d347c3de66d02e728de460a46303c", + "sha256:636a15acc588f70fda1661234761f9ed9ad79ebed3f2125d44be0862708b666e", + "sha256:673fdbbf668dd958eff750e500495ef3f611e2ecc209464f661bc82e9838991e", + "sha256:6afd80f6c79893cfc0574956f78a0add8c76e3696f2d6a15bca2c66c415cf2d4", + "sha256:6b5ff7e1d63a8281654b5e2896d7f08799378e594f09cf3674e832ecaf396ce8", + "sha256:6c4c4c3f878df21faf5fac86eda32671c27889e13570645a9eea0a1abdd50922", + "sha256:6cd8098517c64a85e790657e7b1e509b9fe07487fd358e19431cb120f7d96338", + "sha256:6d1e42d2735d437e7e80bab4d78eb2e459af48c0a46e686ea35f690b93db792d", + "sha256:6e30ac5e329098903262dc5bdd7e2086e0256aa762cc8b744f9e7bf2a427d3f8", + "sha256:70a838f7754483bcdc830444952fd89645569e7452e3226de4a613a4c1793fb2", + "sha256:720edcb916df872d80f80a1cc5ea9058300b97721efda8651efcd938a9c70a72", + "sha256:732672fbc449bab754e0b15356c077cc31566df874964d4801ab14f71951ea80", + "sha256:740884bc62a5e2bbb31e584f5d23b32320fd75d79f916f15a788d527a5e83644", + "sha256:7700936ef9d006b7ef605dc53aa364da2de5a3aa65516a1f3ce73bf82ecfc7ae", + "sha256:7732770412bab81c5a9f6d20aeb60ae943a9b36dcd990d876a773526468e7163", + "sha256:7750569d9526199c5b97e5a9f8d96a13300950d910cf04a861d96f4273d5b104", + "sha256:7f1944ce16401aad1e3f7d312247b3d5de7981f634dc9dfe90da72b87d37887d", + "sha256:81c5196a790032e0fc2464c0b4ab95f8610f96f1f2fa3d4deacce6a79852da60", + "sha256:8352f48d511de5f973e4f2f9412736d7dea76c69faa6d36bcf885b50c758ab9a", + "sha256:8927638a4d4137a289e41d0fd631551e89fa346d6dbcfc31ad627557d03ceb6d", + "sha256:8c7672e9fba7425f79019db9945b16e308ed8bc89348c23d955c8c0540da0a07", + "sha256:8d2e182c9ee01135e11e9676e9a62dfad791a7a467738f06726872374a83db49", + "sha256:910e71711d1055b2768181efa0a17537b2622afeb0424116619817007f8a2b10", + "sha256:942695a206a58d2575033ff1e42b12b2aece98d6003c6bc739fbf33d1773b12f", + "sha256:9437ca26784120a279f3137ee080b0e717012c42921eb07861b412340f85bae2", + "sha256:967342e045564cef76dfcf1edb700b1e20838d83b1aa02ab313e6a497cf923b8", + "sha256:998125738de0158f088aef3cb264a34251908dd2e5d9966774fdab7402edfab7", + "sha256:9e6934d70dc50f9f8ea47081ceafdec09245fd9f6032669c3b45705dea096b88", + "sha256:a3d456ff2a6a4d2adcdf3c1c960a36f4fd2fec6e3b4902a42a384d17cf4e7a65", + "sha256:a7b28c5b066bca9a4eb4e2f2663012debe680f097979d880657f00e1c30875a0", + "sha256:a888e8bdb45916234b99da2d859566f1e8a1d2275a801bb8e4a9644e3c7e7909", + "sha256:aa3679e751408d75a0b4d8d26d6647b6d9326f5e35c00a7ccd82b78ef64f65f8", + "sha256:aaa71ee43a703c321906813bb252f69524f02aa05bf4eec85f0c41d5d62d0f4c", + "sha256:b646bf655b135ccf4522ed43d6902af37d3f5dbcf0da66c769a2b3938b9d8184", + "sha256:b906b5f58892813e5ba5c6056d6a5ad08f358ba49f046d910ad992196ea61397", + "sha256:b9bb1f182a97880f6078283b3505a707057c42bf55d8fca604f70dedfdc0772a", + "sha256:bd1105b50ede37461c1d51b9698c4f4be6e13e69a908ab7751e3807985fc0346", + "sha256:bf18932d0003c8c4d51a39f244231986ab23ee057d235a12b2684ea26a353590", + "sha256:c273e795e7a0f1fddd46e1e3cb8be15634c29ae8ff31c196debb620e1edb9333", + "sha256:c69882964516dc143083d3795cb508e806b09fc3800fd0d4cddc1df6c36e76bb", + "sha256:c827576e2fa017a081346dce87d532a5310241648eb3700af9a571a6e9fc7e74", + "sha256:cbfbea39ba64f5e53ae2915de36f130588bba71245b418060ec3330ebf85678e", + "sha256:ce0bb20e3a11bd04461324a6a798af34d503f8d6f1aa3d2aa8901ceaf039176d", + "sha256:d0cee71bc618cd93716f3c1bf56653740d2d13ddbd47673efa8bf41435a60daa", + "sha256:d21be4770ff4e08698e1e8e0bce06edb6ea0626e7c8f560bc08222880aca6a6f", + "sha256:d31dea506d718693b6b2cffc0648a8929bdc51c70a311b2770f09611caa10d53", + "sha256:d44607f98caa2961bab4fa3c4309724b185b464cdc3ba6f3d7340bac3ec97cc1", + "sha256:d58ad6317d188c43750cb76e9deacf6051d0f884d87dc6518e0280438648a9ac", + "sha256:d70129cef4a8d979caa37e7fe957202e7eee8ea02c5e16455bc9808a59c6b2f0", + "sha256:d85164315bd68c0806768dc6bb0429c6f95c354f87485ee3593c4f6b14def2bd", + "sha256:d960de62227635d2e61068f42a6cb6aae91a7fe00fca0e3aeed17667c8a34611", + "sha256:dc48b479d540770c811fbd1eb9ba2bb66951863e448efec2e2c102625328e92f", + "sha256:e1735502458621921cee039c47318cb90b51d532c2766593be6207eec53e5c4c", + "sha256:e2be6e9dd4111d5b31ba3b74d17da54a8319d8168890fbaea4b9e5c3de630ae5", + "sha256:e4c39ad2f512b4041343ea3c7894339e4ca7839ac38ca83d68a832fc8b3748ab", + "sha256:ed402d6153c5d519a0faf1bb69898e97fb31613b49da27a84a13935ea9164dfc", + "sha256:ee17cd26b97d537af8f33635ef38be873073d516fd425e80559f4585a7b90c43", + "sha256:f3027be483868c99b4985fda802a57a67fdf30c5d9a50338d9db646d590198da", + "sha256:f5bab211605d91db0e2995a17b5c6ee5edec1270e46223e513eaa20da20076ac", + "sha256:f6f8e3fecca256fefc91bb6765a693d96692459d7d4c644660a9fff32e517843", + "sha256:f7afbfee1157e0f9376c00bb232e80a60e59ed716e3211a80cb8506550671e6e", + "sha256:fa242ac1ff583e4ec7771141606aafc92b361cd90a05c30d93e343a0c2d82a89", + "sha256:fab6ce90574645a0d6c58890e9bcaac8d94dff54fb51c69e5522a7358b80ab64" ], "markers": "python_version >= '3.8'", - "version": "==0.19.1" + "version": "==0.18.1" }, "send2trash": { "hashes": [ @@ -3840,12 +3889,12 @@ }, "setuptools": { "hashes": [ - "sha256:5a03e1860cf56bb6ef48ce186b0e557fdba433237481a9a625176c2831be15d1", - "sha256:8d243eff56d095e5817f796ede6ae32941278f542e0f941867cc05ae52b162ec" + "sha256:b8b8060bb426838fbe942479c90296ce976249451118ef566a5a0b7d8b78fb05", + "sha256:bd63e505105011b25c3c11f753f7e3b8465ea739efddaccef8f0efac2137bac1" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==72.1.0" + "version": "==70.2.0" }, "six": { "hashes": [ @@ -3928,11 +3977,11 @@ }, "sqlparse": { "hashes": [ - "sha256:773dcbf9a5ab44a090f3441e2180efe2560220203dc2f8c0b0fa141e18b505e4", - "sha256:bb6b4df465655ef332548e24f08e205afc81b9ab86cb1c45657a7ff173a3a00e" + "sha256:714d0a4932c059d16189f58ef5411ec2287a4360f17cdd0edd2d09d4c5087c93", + "sha256:c204494cd97479d0e39f28c93d46c0b2d5959c7b9ab904762ea6c7af211c8663" ], "markers": "python_version >= '3.8'", - "version": "==0.5.1" + "version": "==0.5.0" }, "stack-data": { "hashes": [ @@ -4007,28 +4056,28 @@ }, "types-requests": { "hashes": [ - "sha256:90c079ff05e549f6bf50e02e910210b98b8ff1ebdd18e19c873cd237737c1358", - "sha256:f754283e152c752e46e70942fa2a146b5bc70393522257bb85bd1ef7e019dcc3" + "sha256:97bac6b54b5bd4cf91d407e62f0932a74821bc2211f22116d9ee1dd643826caf", + "sha256:ed5e8a412fcc39159d6319385c009d642845f250c63902718f605cd90faade31" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==2.32.0.20240712" + "version": "==2.32.0.20240622" }, "types-setuptools": { "hashes": [ - "sha256:85ba28e9461bb1be86ebba4db0f1c2408f2b11115b1966334ea9dc464e29303e", - "sha256:a7775376f36e0ff09bcad236bf265777590a66b11623e48c20bfc30f1444ea36" + "sha256:2f8d28d16ca1607080f9fdf19595bd49c942884b2bbd6529c9b8a9a8fc8db911", + "sha256:6b892d5441c2ed58dd255724516e3df1db54892fb20597599aea66d04c3e4d7f" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==71.1.0.20240726" + "version": "==70.2.0.20240704" }, "typing-extensions": { "hashes": [ "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8" ], - "markers": "python_version >= '3.8'", + "markers": "python_version < '3.12'", "version": "==4.12.2" }, "typing-inspect": { diff --git a/config/middlewares.py b/config/middlewares.py index ca9e08262..aaaeafc12 100644 --- a/config/middlewares.py +++ b/config/middlewares.py @@ -51,8 +51,6 @@ def process_request(self, request): def process_response(self, request, response): """Replace nonce placeholder by its true value.""" - if response._csp_exempt: - return response response = super().process_response(request, response) if isinstance(response, HttpResponse): content = response.content.decode("utf-8") diff --git a/config/settings.py b/config/settings.py index a8f6d3734..20a077355 100644 --- a/config/settings.py +++ b/config/settings.py @@ -262,8 +262,6 @@ "BACKEND": "config.cache_backends.RedisDummyCache", }, } - # to use qgis locally : - X_FRAME_OPTIONS = "ALLOW" else: CACHES = { "default": { diff --git a/public_data/urls.py b/public_data/urls.py index 911a165b7..6216a1d0d 100644 --- a/public_data/urls.py +++ b/public_data/urls.py @@ -10,11 +10,6 @@ path("matrix", views.DisplayMatrix.as_view(), name="matrix"), path("grid", views.grid_view.as_view(), name="grid"), path("search-land", views.SearchLandApiView.as_view({"post": "post"}), name="search-land"), - path( - "ocsge/zones-artificielle-v2///", - views.ArtificialAreaMVTView.as_view(), - name="ArtificialAreaMVTView", - ), ] diff --git a/public_data/views.py b/public_data/views.py index 544b41cbc..c3c0c1c32 100644 --- a/public_data/views.py +++ b/public_data/views.py @@ -6,13 +6,12 @@ from django.http import HttpResponse from django.urls import reverse_lazy from django.views.generic import TemplateView -from rest_framework import renderers, viewsets +from rest_framework import viewsets from rest_framework.decorators import action from rest_framework.response import Response from rest_framework.views import APIView from rest_framework.viewsets import GenericViewSet from rest_framework_gis import filters -from vectortiles.postgis.views import MVTView from public_data import models, serializers from public_data.models.administration import Land @@ -350,48 +349,6 @@ def get_sql_where(self): return "WHERE o.year = %s" -class MVTRenderer(renderers.BaseRenderer): - media_type = "application/vnd.mapbox-vector-tile" - format = "pbf" - - def render(self, data, accepted_media_type=None, renderer_context=None): - return data - - -class ArtificialAreaMVTView(MVTView, APIView): - model = models.ArtificialArea - vector_tile_layer_name = "artificial_area" # name for data layer in vector tile - vector_tile_fields = ("year", "city") # model fields or queryset annotates to include in tile - vector_tile_content_type = "application/x-protobuf" # if you want to use custom content_type - vector_tile_geom_name = "mpoly" # geom field to consider in qs - renderer_classes = (MVTRenderer,) - accepted_renderer = MVTRenderer - - def get_vector_tile_queryset(self): - year = self.request.GET.get("year") - city = str(self.request.GET.get("city")) - - if not year: - raise ValueError("year parameter must be set") - if city and year: - return models.ArtificialArea.objects.filter(city=city, year=year) - if city: - return models.ArtificialArea.objects.filter(city=city) - if year: - return models.ArtificialArea.objects.filter(year=year) - - def get(self, request, *args, **kwargs): - response = Response( - self.get_tile( - kwargs.get("x"), - kwargs.get("y"), - kwargs.get("z"), - ) - ) - response._csp_exempt = True - return response - - class ArtificialAreaViewSet(OnlyBoundingBoxMixin, ZoomSimplificationMixin, OptimizedMixins, DataViewSet): queryset = models.ArtificialArea.objects.all() serializer_class = serializers.OcsgeDiffSerializer From 776e9813e6e6a39b7cee21bf1f0bf23b226fe11a Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Sun, 18 Aug 2024 18:31:16 +0200 Subject: [PATCH 17/99] feat(airflow): prepare zone_urba table for ingestion from airflow --- airflow/dags/gpu.py | 4 +- airflow/dags/ocsge.py | 79 +++++++------------ .../models/gpu/for_app/app_zoneurba.sql | 16 ++++ airflow/sql/sparte/models/gpu/schema.yml | 9 ++- .../sparte/models/gpu/zonage_urbanisme.sql | 55 ++++++++----- .../sql/sparte/models/ocsge/difference.sql | 4 +- .../ocsge/for_app/app_artifareazoneurba.sql | 14 ++++ .../occupation_du_sol_zonage_urbanisme.sql | 21 ++++- .../sparte/models/ocsge/occupation_du_sol.sql | 4 +- .../sparte/models/ocsge/zone_construite.sql | 4 +- project/views/RNUPackagesNoticeView.py | 24 ++++++ .../0188_remove_zoneurba_id_zoneurba_uuid.py | 22 ++++++ .../migrations/0189_remove_zoneurba_gid.py | 16 ++++ ...0_remove_zoneurba_origin_insee_and_more.py | 20 +++++ .../0191_remove_zoneurba_destdomi.py | 16 ++++ ...one_remove_zoneurba_lib_idzone_and_more.py | 28 +++++++ ...a_public_data_insee_3f872f_idx_and_more.py | 20 +++++ .../0194_artifareazoneurba_departement.py | 18 +++++ .../0195_remove_zoneurba_uuid_zoneurba_id.py | 22 ++++++ ...fareazoneurba_unique_zone_year_and_more.py | 28 +++++++ .../0197_artifareazoneurba_zone_urba.py | 21 +++++ public_data/models/gpu.py | 23 +----- 22 files changed, 367 insertions(+), 101 deletions(-) create mode 100644 airflow/sql/sparte/models/gpu/for_app/app_zoneurba.sql create mode 100644 airflow/sql/sparte/models/ocsge/for_app/app_artifareazoneurba.sql create mode 100644 project/views/RNUPackagesNoticeView.py create mode 100644 public_data/migrations/0188_remove_zoneurba_id_zoneurba_uuid.py create mode 100644 public_data/migrations/0189_remove_zoneurba_gid.py create mode 100644 public_data/migrations/0190_remove_zoneurba_origin_insee_and_more.py create mode 100644 public_data/migrations/0191_remove_zoneurba_destdomi.py create mode 100644 public_data/migrations/0192_remove_zoneurba_idzone_remove_zoneurba_lib_idzone_and_more.py create mode 100644 public_data/migrations/0193_remove_zoneurba_public_data_insee_3f872f_idx_and_more.py create mode 100644 public_data/migrations/0194_artifareazoneurba_departement.py create mode 100644 public_data/migrations/0195_remove_zoneurba_uuid_zoneurba_id.py create mode 100644 public_data/migrations/0196_remove_artifareazoneurba_unique_zone_year_and_more.py create mode 100644 public_data/migrations/0197_artifareazoneurba_zone_urba.py diff --git a/airflow/dags/gpu.py b/airflow/dags/gpu.py index 23f188755..c649a6eb4 100644 --- a/airflow/dags/gpu.py +++ b/airflow/dags/gpu.py @@ -37,7 +37,7 @@ def ingest(path_on_bucket: str) -> str: ST_AsText(geom) || CastToText(gpu_timestamp) ) AS checksum, gpu_doc_id, - gpu_status + gpu_status, gpu_timestamp, partition, libelle, @@ -74,7 +74,7 @@ def ingest(path_on_bucket: str) -> str: "-a_srs", "EPSG:4236", "-nln", - "zone_urba", + "gpu_zone_urba", "-nlt", "MULTIPOLYGON", "-nlt", diff --git a/airflow/dags/ocsge.py b/airflow/dags/ocsge.py index 98c533f1b..957285169 100644 --- a/airflow/dags/ocsge.py +++ b/airflow/dags/ocsge.py @@ -31,15 +31,14 @@ def copy_table_from_dw_to_app( - source_sql: str, - destination_table_name: str, + from_table: str, + to_table: str, ): ogr = ogr2ogr() ogr.config_options = {"PG_USE_COPY": "YES"} - ogr.set_input(Container().gdal_dw_conn(schema="public_ocsge")) - ogr.set_sql(source_sql) - ogr.set_output(Container().gdal_app_conn(), table_name=destination_table_name) - ogr.set_output_mode(layer_mode=ogr.MODE_LAYER_APPEND) + ogr.set_input(Container().gdal_dw_conn(schema="public_ocsge"), table_name=from_table) + ogr.set_output(Container().gdal_app_conn(), table_name=to_table) + ogr.set_output_mode(layer_mode=ogr.MODE_LAYER_OVERWRITE) ogr.execute() @@ -153,7 +152,7 @@ def get_paths_from_directory(directory: str) -> list[tuple[str, str]]: ( 2018, 2021, - ): "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D075_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D075_2018-2021.7z", # noqa: E501 + ): "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D075_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D075_DIFF_2018-2021.7z", # noqa: E501 }, }, "32": { @@ -181,12 +180,16 @@ def get_paths_from_directory(directory: str) -> list[tuple[str, str]]: "normalization_sql": ocsge_occupation_du_sol_normalization_sql, "delete_on_dwt": delete_occupation_du_sol_in_dw_sql, "delete_on_app": delete_occupation_du_sol_in_app_sql, - "mapping": { - "public_ocsge.app_ocsge": { + "mapping": [ + { + "from_table": "public_ocsge.app_ocsge", "to_table": "public.public_data_ocsge", - "select": lambda departement, years: f"SELECT * FROM public_ocsge.app_ocsge WHERE departement = '{departement}' AND year = {years[0]}", # noqa: E501 }, - }, + { + "from_table": "public_ocsge.app_artificialarea", + "to_table": "public.public_data_artificialarea", + }, + ], }, SourceName.ZONE_CONSTRUITE: { "shapefile_name": "ZONE_CONSTRUITE", @@ -198,12 +201,12 @@ def get_paths_from_directory(directory: str) -> list[tuple[str, str]]: "normalization_sql": ocsge_zone_construite_normalization_sql, "delete_on_dwt": delete_zone_construite_in_dw_sql, "delete_on_app": delete_zone_construite_in_app_sql, - "mapping": { - "public_ocsge.app_zoneconstruite": { + "mapping": [ + { + "from_table": "public_ocsge.app_zoneconstruite", "to_table": "public.public_data_zoneconstruite", - "select": lambda departement, years: f"SELECT * FROM public_ocsge.app_zoneconstruite WHERE departement = '{departement}' AND year = {years[0]}", # noqa: E501 - }, - }, + } + ], }, SourceName.DIFFERENCE: { "shapefile_name": "DIFFERENCE", @@ -216,12 +219,16 @@ def get_paths_from_directory(directory: str) -> list[tuple[str, str]]: "normalization_sql": ocsge_diff_normalization_sql, "delete_on_dwt": delete_difference_in_dw_sql, "delete_on_app": delete_difference_in_app_sql, - "mapping": { - "public_ocsge.app_ocsgediff": { + "mapping": [ + { + "from_table": "public_ocsge.app_ocsgediff", "to_table": "public.public_data_ocsgediff", - "select": lambda departement, years: f"SELECT * FROM public_ocsge.app_ocsgediff WHERE departement = '{departement}' AND year_old = {years[0]} AND year_new = {years[1]}", # noqa: E501 }, - }, + { + "from_table": "public_ocsge.app_communediff", + "to_table": "public.public_data_communediff", + }, + ], }, } @@ -448,38 +455,14 @@ def delete_previously_loaded_data_in_dw(**context) -> dict: return results - @task.python(trigger_rule="all_success") - def delete_previously_loaded_data_in_app(**context) -> str: - dataset = context["params"]["dataset"] - departement = context["params"]["departement"] - years = context["params"]["years"] - - conn = Container().psycopg2_app_conn() - cur = conn.cursor() - - results = {} - - for vars in vars_dataset[dataset]: - cur.execute(vars["delete_on_app"](departement, years)) - results[vars["app_table_names"]] = cur.rowcount - - conn.commit() - conn.close() - - return str(results) - @task.python(trigger_rule="all_success") def load_data_in_app(**context): dataset = context["params"]["dataset"] - departement = context["params"]["departement"] - years = context["params"]["years"] - for vars in vars_dataset[dataset]: - for from_table in vars["mapping"]: - values = vars["mapping"][from_table] + for mapping in vars["mapping"]: copy_table_from_dw_to_app( - source_sql=values["select"](departement, years), - destination_table_name=values["to_table"], + from_table=mapping["from_table"], + to_table=mapping["to_table"], ) url = get_url() @@ -490,7 +473,6 @@ def load_data_in_app(**context): test_result_staging = db_test_ocsge_staging() loaded_date = ingest_ocsge(path=path) dbt_run_ocsge_result = dbt_run_ocsge() - delete_app = delete_previously_loaded_data_in_app() load_app = load_data_in_app() ( @@ -502,7 +484,6 @@ def load_data_in_app(**context): >> delete_dw >> loaded_date >> dbt_run_ocsge_result - >> delete_app >> load_app ) diff --git a/airflow/sql/sparte/models/gpu/for_app/app_zoneurba.sql b/airflow/sql/sparte/models/gpu/for_app/app_zoneurba.sql new file mode 100644 index 000000000..cb4cbb362 --- /dev/null +++ b/airflow/sql/sparte/models/gpu/for_app/app_zoneurba.sql @@ -0,0 +1,16 @@ +{{ config(materialized='table') }} + +SELECT + checksum as id, + libelle, + libelle_long as libelong, + id_document_urbanisme as idurba, + type_zone as typezone, + partition, + date_approbation::text as datappro, + date_validation::text as datvalid, + surface as area, + ST_Transform(geom, 4326) as mpoly, + 4326 AS srid_source +FROM + {{ ref('zonage_urbanisme') }} diff --git a/airflow/sql/sparte/models/gpu/schema.yml b/airflow/sql/sparte/models/gpu/schema.yml index b748b3245..c2ce33386 100644 --- a/airflow/sql/sparte/models/gpu/schema.yml +++ b/airflow/sql/sparte/models/gpu/schema.yml @@ -3,8 +3,15 @@ version: 2 models: - name: zonage_urbanisme + columns: + - name: gpu_timestamp + data_tests: + - not_null + - name: geom + data_tests: + - unique sources: - name: public tables: - - name: zone_urba + - name: gpu_zone_urba diff --git a/airflow/sql/sparte/models/gpu/zonage_urbanisme.sql b/airflow/sql/sparte/models/gpu/zonage_urbanisme.sql index b351035b2..688de9734 100644 --- a/airflow/sql/sparte/models/gpu/zonage_urbanisme.sql +++ b/airflow/sql/sparte/models/gpu/zonage_urbanisme.sql @@ -5,27 +5,40 @@ indexes=[ {'columns': ['geom'], 'type': 'gist'}, {'columns': ['libelle'], 'type': 'btree'}, - {'columns': ['type_zone'], 'type': 'btree'} + {'columns': ['type_zone'], 'type': 'btree'}, + {'columns': ['checksum'], 'type': 'btree'} ]) }} - -SELECT - gpu_doc_id, - gpu_status, - gpu_timestamp, - partition, - libelle, - libelong as libelle_long, - typezone as type_zone, - destdomi as destination_dominante, - nomfic as nom_fichier, - urlfic as url_fichier, - insee as commune_code, - datappro as date_approbation, - datvalid as date_validation, - idurba as id_document_urbanisme, - gen_random_uuid() as uuid, - ST_MakeValid(ST_transform(geom, 2154)) as geom - FROM - {{ source('public', 'zone_urba') }} +SELECT *, ST_Area(geom) as surface FROM ( + SELECT + gpu_doc_id, + gpu_status, + gpu_timestamp::timestamptz as gpu_timestamp, + partition, + libelle, + NULLIF(libelong, '') as libelle_long, + typezone as type_zone, + NULLIF(destdomi, '') as destination_dominante, + nomfic as nom_fichier, + NULLIF(urlfic, '') as url_fichier, + NULLIF(insee, '') as commune_code, + TO_DATE(NULLIF(datappro, ''), 'YYYYMMDD') as date_approbation, + TO_DATE(NULLIF(datvalid, ''), 'YYYYMMDD') as date_validation, + NULLIF(idurba, '') as id_document_urbanisme, + checksum, + row_number() OVER (PARTITION BY geom ORDER BY gpu_timestamp), + CASE + WHEN ST_IsValid(geom) THEN ST_transform(geom, 2154) + ELSE st_multi( + st_collectionextract( + st_makevalid( + ST_transform(geom, 2154) + ), + 3) + ) + END as geom + FROM + {{ source('public', 'gpu_zone_urba') }} +) as foo +WHERE row_number = 1 diff --git a/airflow/sql/sparte/models/ocsge/difference.sql b/airflow/sql/sparte/models/ocsge/difference.sql index 783f7cf26..741c2f0d8 100644 --- a/airflow/sql/sparte/models/ocsge/difference.sql +++ b/airflow/sql/sparte/models/ocsge/difference.sql @@ -7,7 +7,7 @@ }} SELECT - foo.loaded_date, + to_timestamp(foo.loaded_date) as loaded_date, foo.year_old, foo.year_new, cs_new, @@ -60,7 +60,7 @@ FROM ( {{ is_impermeable('cs_old') }} AS old_is_imper, {{ is_artificial('cs_new', 'us_new') }} AS new_is_artif, {{ is_impermeable('cs_new') }} AS new_is_imper, - ocsge.uuid + ocsge.uuid::uuid FROM {{ source('public', 'ocsge_difference') }} AS ocsge WHERE diff --git a/airflow/sql/sparte/models/ocsge/for_app/app_artifareazoneurba.sql b/airflow/sql/sparte/models/ocsge/for_app/app_artifareazoneurba.sql new file mode 100644 index 000000000..7ca3f5bbb --- /dev/null +++ b/airflow/sql/sparte/models/ocsge/for_app/app_artifareazoneurba.sql @@ -0,0 +1,14 @@ +{{ config(materialized='table') }} + +SELECT + zonage_checksum as zone_urba, + year, + max(departement), + ST_Area(ST_Transform(ST_Union(geom), 2154)) as area +FROM + {{ ref('occupation_du_sol_zonage_urbanisme') }} +WHERE + is_artificial = true +GROUP BY + zonage_checksum, + year diff --git a/airflow/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql b/airflow/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql index b08206f96..d391f9e85 100644 --- a/airflow/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql +++ b/airflow/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql @@ -1,14 +1,25 @@ {{ config( materialized='incremental', - post_hook='DELETE FROM {{ this }} WHERE uuid not in (SELECT uuid FROM {{ ref("occupation_du_sol") }} )' + indexes=[ + {'columns': ['departement'], 'type': 'btree'}, + {'columns': ['year'], 'type': 'btree'}, + {'columns': ['uuid'], 'type': 'btree'}, + {'columns': ['zonage_checksum'], 'type': 'btree'} + ], + post_hook=[ + 'DELETE FROM {{ this }} WHERE uuid not in (SELECT uuid FROM {{ ref("occupation_du_sol") }} )', + 'DELETE FROM {{ this }} WHERE zonage_checksum not in (SELECT checksum FROM {{ ref("zonage_urbanisme") }} )' + ] ) }} SELECT *, ST_Area(geom) as surface FROM ( SELECT zonage.libelle AS zonage_libelle, - ocsge.loaded_date, + zonage.checksum AS zonage_checksum, + zonage.gpu_timestamp AS zonage_gpu_timestamp, + ocsge.loaded_date AS ocsge_loaded_date, ocsge.year, ocsge.departement, ocsge.code_cs, @@ -25,7 +36,11 @@ SELECT *, ST_Area(geom) as surface FROM ( ST_Intersects(zonage.geom, ocsge.geom) {% if is_incremental() %} - WHERE ocsge.uuid not in (SELECT bar.uuid from {{ this }} as bar) + WHERE ocsge.loaded_date > + (SELECT max(foo.ocsge_loaded_date) FROM {{ this }} as foo) + OR + zonage.gpu_timestamp > + (SELECT max(bar.zonage_gpu_timestamp) FROM {{ this }} as bar) {% endif %} ) as foo diff --git a/airflow/sql/sparte/models/ocsge/occupation_du_sol.sql b/airflow/sql/sparte/models/ocsge/occupation_du_sol.sql index cb15e281c..e689701d7 100644 --- a/airflow/sql/sparte/models/ocsge/occupation_du_sol.sql +++ b/airflow/sql/sparte/models/ocsge/occupation_du_sol.sql @@ -13,7 +13,7 @@ }} SELECT - loaded_date, + to_timestamp(loaded_date) as loaded_date, id, code_cs, code_us, @@ -22,7 +22,7 @@ SELECT ST_area(geom) AS surface, {{ is_impermeable('code_cs') }} as is_impermeable, {{ is_artificial('code_cs', 'code_us') }} as is_artificial, - uuid, + uuid::uuid, ST_MakeValid(geom) AS geom FROM {{ source('public', 'ocsge_occupation_du_sol') }} AS ocsge diff --git a/airflow/sql/sparte/models/ocsge/zone_construite.sql b/airflow/sql/sparte/models/ocsge/zone_construite.sql index b324df420..d162c057d 100644 --- a/airflow/sql/sparte/models/ocsge/zone_construite.sql +++ b/airflow/sql/sparte/models/ocsge/zone_construite.sql @@ -6,12 +6,12 @@ }} SELECT - loaded_date, + to_timestamp(loaded_date) as loaded_date, id, year, departement, ST_MakeValid(geom) AS geom, ST_Area(geom) as surface, - uuid + uuid::uuid FROM {{ source('public', 'ocsge_zone_construite') }} as ocsge diff --git a/project/views/RNUPackagesNoticeView.py b/project/views/RNUPackagesNoticeView.py new file mode 100644 index 000000000..07d8385a7 --- /dev/null +++ b/project/views/RNUPackagesNoticeView.py @@ -0,0 +1,24 @@ +from typing import Any + +from django.db.models.query import QuerySet +from django.views.generic import DetailView, TemplateView + +from project.models import RNUPackage + + +class RNUPackagesNoticeView(TemplateView, DetailView): + template_name = "project/rnu_package_notice.html" + breadcrumbs_title = "__" + pk_url_kwarg = "departement_official_id" + model = RNUPackage + + def get_object(self) -> QuerySet[Any]: + departement_official_id = self.kwargs.get("departement_official_id") + return RNUPackage.objects.get(departement_official_id=departement_official_id) + + def get_context_data(self, **kwargs): + data: RNUPackage = self.get_object() + return { + "object": data, + "communes": data.communes, + } diff --git a/public_data/migrations/0188_remove_zoneurba_id_zoneurba_uuid.py b/public_data/migrations/0188_remove_zoneurba_id_zoneurba_uuid.py new file mode 100644 index 000000000..80d115b40 --- /dev/null +++ b/public_data/migrations/0188_remove_zoneurba_id_zoneurba_uuid.py @@ -0,0 +1,22 @@ +# Generated by Django 4.2.13 on 2024-08-17 19:25 + +from django.db import migrations, models +import uuid + + +class Migration(migrations.Migration): + dependencies = [ + ("public_data", "0187_auto_20240703_1704"), + ] + + operations = [ + migrations.RemoveField( + model_name="zoneurba", + name="id", + ), + migrations.AddField( + model_name="zoneurba", + name="uuid", + field=models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False, verbose_name="UUID"), + ), + ] diff --git a/public_data/migrations/0189_remove_zoneurba_gid.py b/public_data/migrations/0189_remove_zoneurba_gid.py new file mode 100644 index 000000000..3467055eb --- /dev/null +++ b/public_data/migrations/0189_remove_zoneurba_gid.py @@ -0,0 +1,16 @@ +# Generated by Django 4.2.13 on 2024-08-17 19:32 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("public_data", "0188_remove_zoneurba_id_zoneurba_uuid"), + ] + + operations = [ + migrations.RemoveField( + model_name="zoneurba", + name="gid", + ), + ] diff --git a/public_data/migrations/0190_remove_zoneurba_origin_insee_and_more.py b/public_data/migrations/0190_remove_zoneurba_origin_insee_and_more.py new file mode 100644 index 000000000..11b7304e8 --- /dev/null +++ b/public_data/migrations/0190_remove_zoneurba_origin_insee_and_more.py @@ -0,0 +1,20 @@ +# Generated by Django 4.2.13 on 2024-08-17 19:39 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("public_data", "0189_remove_zoneurba_gid"), + ] + + operations = [ + migrations.RemoveField( + model_name="zoneurba", + name="origin_insee", + ), + migrations.RemoveField( + model_name="zoneurba", + name="origin_typezone", + ), + ] diff --git a/public_data/migrations/0191_remove_zoneurba_destdomi.py b/public_data/migrations/0191_remove_zoneurba_destdomi.py new file mode 100644 index 000000000..7462d3e76 --- /dev/null +++ b/public_data/migrations/0191_remove_zoneurba_destdomi.py @@ -0,0 +1,16 @@ +# Generated by Django 4.2.13 on 2024-08-17 19:42 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("public_data", "0190_remove_zoneurba_origin_insee_and_more"), + ] + + operations = [ + migrations.RemoveField( + model_name="zoneurba", + name="destdomi", + ), + ] diff --git a/public_data/migrations/0192_remove_zoneurba_idzone_remove_zoneurba_lib_idzone_and_more.py b/public_data/migrations/0192_remove_zoneurba_idzone_remove_zoneurba_lib_idzone_and_more.py new file mode 100644 index 000000000..b7659b0da --- /dev/null +++ b/public_data/migrations/0192_remove_zoneurba_idzone_remove_zoneurba_lib_idzone_and_more.py @@ -0,0 +1,28 @@ +# Generated by Django 4.2.13 on 2024-08-17 19:45 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("public_data", "0191_remove_zoneurba_destdomi"), + ] + + operations = [ + migrations.RemoveField( + model_name="zoneurba", + name="idzone", + ), + migrations.RemoveField( + model_name="zoneurba", + name="lib_idzone", + ), + migrations.RemoveField( + model_name="zoneurba", + name="nomfic", + ), + migrations.RemoveField( + model_name="zoneurba", + name="urlfic", + ), + ] diff --git a/public_data/migrations/0193_remove_zoneurba_public_data_insee_3f872f_idx_and_more.py b/public_data/migrations/0193_remove_zoneurba_public_data_insee_3f872f_idx_and_more.py new file mode 100644 index 000000000..3057cfa06 --- /dev/null +++ b/public_data/migrations/0193_remove_zoneurba_public_data_insee_3f872f_idx_and_more.py @@ -0,0 +1,20 @@ +# Generated by Django 4.2.13 on 2024-08-17 19:50 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("public_data", "0192_remove_zoneurba_idzone_remove_zoneurba_lib_idzone_and_more"), + ] + + operations = [ + migrations.RemoveIndex( + model_name="zoneurba", + name="public_data_insee_3f872f_idx", + ), + migrations.RemoveField( + model_name="zoneurba", + name="insee", + ), + ] diff --git a/public_data/migrations/0194_artifareazoneurba_departement.py b/public_data/migrations/0194_artifareazoneurba_departement.py new file mode 100644 index 000000000..650ce4945 --- /dev/null +++ b/public_data/migrations/0194_artifareazoneurba_departement.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.13 on 2024-08-17 19:50 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("public_data", "0193_remove_zoneurba_public_data_insee_3f872f_idx_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="artifareazoneurba", + name="departement", + field=models.CharField(default="", max_length=3, verbose_name="Département"), + preserve_default=False, + ), + ] diff --git a/public_data/migrations/0195_remove_zoneurba_uuid_zoneurba_id.py b/public_data/migrations/0195_remove_zoneurba_uuid_zoneurba_id.py new file mode 100644 index 000000000..bae4902e1 --- /dev/null +++ b/public_data/migrations/0195_remove_zoneurba_uuid_zoneurba_id.py @@ -0,0 +1,22 @@ +# Generated by Django 4.2.13 on 2024-08-17 19:55 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("public_data", "0194_artifareazoneurba_departement"), + ] + + operations = [ + migrations.RemoveField( + model_name="zoneurba", + name="uuid", + ), + migrations.AddField( + model_name="zoneurba", + name="id", + field=models.TextField(default="", primary_key=True, serialize=False, verbose_name="id"), + preserve_default=False, + ), + ] diff --git a/public_data/migrations/0196_remove_artifareazoneurba_unique_zone_year_and_more.py b/public_data/migrations/0196_remove_artifareazoneurba_unique_zone_year_and_more.py new file mode 100644 index 000000000..eadd11a67 --- /dev/null +++ b/public_data/migrations/0196_remove_artifareazoneurba_unique_zone_year_and_more.py @@ -0,0 +1,28 @@ +# Generated by Django 4.2.13 on 2024-08-18 12:44 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("public_data", "0195_remove_zoneurba_uuid_zoneurba_id"), + ] + + operations = [ + migrations.RemoveConstraint( + model_name="artifareazoneurba", + name="unique_zone_year", + ), + migrations.RemoveIndex( + model_name="artifareazoneurba", + name="public_data_zone_ur_cb8473_idx", + ), + migrations.RemoveIndex( + model_name="artifareazoneurba", + name="public_data_zone_ur_57615b_idx", + ), + migrations.RemoveField( + model_name="artifareazoneurba", + name="zone_urba", + ), + ] diff --git a/public_data/migrations/0197_artifareazoneurba_zone_urba.py b/public_data/migrations/0197_artifareazoneurba_zone_urba.py new file mode 100644 index 000000000..ed5cb061e --- /dev/null +++ b/public_data/migrations/0197_artifareazoneurba_zone_urba.py @@ -0,0 +1,21 @@ +# Generated by Django 4.2.13 on 2024-08-18 12:44 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + dependencies = [ + ("public_data", "0196_remove_artifareazoneurba_unique_zone_year_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="artifareazoneurba", + name="zone_urba", + field=models.ForeignKey( + default="", on_delete=django.db.models.deletion.CASCADE, to="public_data.zoneurba" + ), + preserve_default=False, + ), + ] diff --git a/public_data/models/gpu.py b/public_data/models/gpu.py index 7319d63a0..9bb12c0c6 100644 --- a/public_data/models/gpu.py +++ b/public_data/models/gpu.py @@ -3,7 +3,6 @@ """ from django.contrib.gis.db import models from django.core.validators import MaxValueValidator, MinValueValidator -from django.db.models import UniqueConstraint from public_data.models.enums import SRID from utils.db import IntersectMixin @@ -14,21 +13,15 @@ class ZoneUrbaManager(IntersectMixin, models.Manager): class ZoneUrba(models.Model): - gid = models.CharField("gid", max_length=80, blank=True, null=True) + id = models.TextField("id", primary_key=True) libelle = models.CharField("libelle", max_length=80, blank=True, null=True) libelong = models.CharField("libelong", max_length=254, blank=True, null=True) - origin_typezone = models.CharField("typezone", max_length=80, blank=True, null=True) - origin_insee = models.CharField("insee", max_length=80, blank=True, null=True) idurba = models.CharField("idurba", max_length=80, blank=True, null=True) - idzone = models.CharField("idzone", max_length=80, blank=True, null=True) - lib_idzone = models.CharField("lib_idzone", max_length=80, blank=True, null=True) + typezone = models.CharField("typezone", max_length=3, blank=True, null=True) partition = models.CharField("partition", max_length=80, blank=True, null=True) - destdomi = models.CharField("destdomi", max_length=80, blank=True, null=True) - nomfic = models.CharField("nomfic", max_length=80, blank=True, null=True) - urlfic = models.CharField("urlfic", max_length=178, blank=True, null=True) datappro = models.CharField("datappro", max_length=80, blank=True, null=True) datvalid = models.CharField("datvalid", max_length=80, blank=True, null=True) - + area = models.DecimalField("area", max_digits=15, decimal_places=4, blank=True, null=True) mpoly = models.MultiPolygonField(srid=4326) srid_source = models.IntegerField( "SRID", @@ -36,11 +29,6 @@ class ZoneUrba(models.Model): default=SRID.LAMBERT_93, ) - # calulated fields - insee = models.CharField("insee", max_length=10, blank=True, null=True) - area = models.DecimalField("area", max_digits=15, decimal_places=4, blank=True, null=True) - typezone = models.CharField("typezone", max_length=3, blank=True, null=True) - objects = ZoneUrbaManager() def get_color(self): @@ -58,7 +46,6 @@ def __str__(self): class Meta: indexes = [ - models.Index(fields=["insee"]), models.Index(fields=["typezone"]), ] @@ -66,15 +53,13 @@ class Meta: class ArtifAreaZoneUrba(models.Model): zone_urba = models.ForeignKey(ZoneUrba, on_delete=models.CASCADE) year = models.IntegerField("Millésime", validators=[MinValueValidator(2000), MaxValueValidator(2050)]) + departement = models.CharField("Département", max_length=3) area = models.DecimalField("Surface artificialisée", max_digits=15, decimal_places=4) def __str__(self): return f"{self.zone_urba_id} {self.year} {self.area}Ha" class Meta: - constraints = [UniqueConstraint(fields=["zone_urba", "year"], name="unique_zone_year")] indexes = [ - models.Index(fields=["zone_urba"]), models.Index(fields=["year"]), - models.Index(fields=["zone_urba", "year"]), ] From 74dca1f3f69972222d863ae3573861218c812b66 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Mon, 19 Aug 2024 17:37:20 +0200 Subject: [PATCH 18/99] chore(airflow): remove unneeded commands --- airflow/dags/copy_to_prod.py | 46 --- airflow/dags/gpu.py | 17 +- airflow/dags/ingest_app_dependencies.py | 17 ++ airflow/dags/ocsge.py | 146 ++------- airflow/dependencies/ocsge/sources.json | 83 +++++ .../sparte/models/app/app_couverturesol.sql | 13 + .../models/app/app_couvertureusagematrix.sql | 12 + .../sql/sparte/models/app/app_usagesol.sql | 13 + airflow/sql/sparte/models/app/schema.yml | 6 + .../models/gpu/for_app/app_zoneurba.sql | 2 +- .../ocsge/for_app/app_artifareazoneurba.sql | 6 +- .../ocsge/for_app/app_artificialarea.sql | 2 +- .../models/ocsge/for_app/app_communediff.sql | 4 +- .../models/ocsge/for_app/app_communesol.sql | 64 ++++ .../models/ocsge/for_app/app_ocsgediff.sql | 2 +- .../models/ocsge/for_app/for_app_commune.sql | 68 +++++ .../ocsge/for_app/for_app_departement.sql | 35 +++ .../ocsge/intersected/artificial_commune.sql | 10 +- project/models/project_base.py | 18 +- .../shapefile_builder/BaseShapefileBuilder.py | 98 ------ .../domain/shapefile_builder/__init__.py | 0 .../gdal/GdalShapefileBuilder.py | 29 -- .../gdal/build_consommation_espace.py | 70 ----- .../gdal/build_ocsge_difference.py | 112 ------- .../gdal/build_ocsge_occupation_du_sol.py | 77 ----- .../gdal/build_ocsge_zone_artificielle.py | 188 ------------ .../gdal/build_ocsge_zone_construite.py | 54 ---- .../shapefile_builder/gdal/is_artif_case.py | 52 ---- .../gdal/is_impermeable_case.py | 10 - .../infra/shapefile_builder/gdal/tests.py | 81 ----- .../infra/shapefile_builder/gdal/utils.py | 2 - .../commands/build_administrative_layers.py | 232 -------------- .../management/commands/build_commune_data.py | 180 ----------- .../management/commands/build_matrix.py | 72 ----- .../management/commands/build_shapefile.py | 77 ----- .../commands/check_ocsge_validity.py | 70 ----- .../management/commands/evaluate_city_area.py | 18 -- public_data/management/commands/export_gpu.py | 93 ------ .../management/commands/find_urlfic.py | 55 ---- .../commands/fix_is_artif_carriere.py | 44 --- public_data/management/commands/import_gpu.py | 114 ------- public_data/management/commands/load_gpu.py | 155 ---------- public_data/management/commands/load_ocsge.py | 110 ------- .../management/commands/load_shapefile.py | 286 ------------------ public_data/management/commands/mep_7_1.py | 53 ---- .../management/commands/repack_ocsge.py | 149 --------- public_data/management/commands/set_parent.py | 19 -- .../management/commands/setup_departements.py | 63 ---- .../commands/update_administration_layer.py | 118 -------- .../0198_remove_commune_map_color.py | 16 + public_data/models/administration/Commune.py | 1 - public_data/views.py | 1 - 52 files changed, 390 insertions(+), 2873 deletions(-) delete mode 100644 airflow/dags/copy_to_prod.py create mode 100644 airflow/dependencies/ocsge/sources.json create mode 100644 airflow/sql/sparte/models/app/app_couverturesol.sql create mode 100644 airflow/sql/sparte/models/app/app_couvertureusagematrix.sql create mode 100644 airflow/sql/sparte/models/app/app_usagesol.sql create mode 100644 airflow/sql/sparte/models/ocsge/for_app/app_communesol.sql create mode 100644 airflow/sql/sparte/models/ocsge/for_app/for_app_commune.sql create mode 100644 airflow/sql/sparte/models/ocsge/for_app/for_app_departement.sql delete mode 100644 public_data/domain/shapefile_builder/BaseShapefileBuilder.py delete mode 100644 public_data/domain/shapefile_builder/__init__.py delete mode 100644 public_data/infra/shapefile_builder/gdal/GdalShapefileBuilder.py delete mode 100644 public_data/infra/shapefile_builder/gdal/build_consommation_espace.py delete mode 100644 public_data/infra/shapefile_builder/gdal/build_ocsge_difference.py delete mode 100644 public_data/infra/shapefile_builder/gdal/build_ocsge_occupation_du_sol.py delete mode 100644 public_data/infra/shapefile_builder/gdal/build_ocsge_zone_artificielle.py delete mode 100644 public_data/infra/shapefile_builder/gdal/build_ocsge_zone_construite.py delete mode 100644 public_data/infra/shapefile_builder/gdal/is_artif_case.py delete mode 100644 public_data/infra/shapefile_builder/gdal/is_impermeable_case.py delete mode 100644 public_data/infra/shapefile_builder/gdal/tests.py delete mode 100644 public_data/infra/shapefile_builder/gdal/utils.py delete mode 100644 public_data/management/commands/build_administrative_layers.py delete mode 100644 public_data/management/commands/build_commune_data.py delete mode 100644 public_data/management/commands/build_matrix.py delete mode 100644 public_data/management/commands/build_shapefile.py delete mode 100644 public_data/management/commands/check_ocsge_validity.py delete mode 100644 public_data/management/commands/evaluate_city_area.py delete mode 100644 public_data/management/commands/export_gpu.py delete mode 100644 public_data/management/commands/find_urlfic.py delete mode 100644 public_data/management/commands/fix_is_artif_carriere.py delete mode 100644 public_data/management/commands/import_gpu.py delete mode 100644 public_data/management/commands/load_gpu.py delete mode 100644 public_data/management/commands/load_ocsge.py delete mode 100644 public_data/management/commands/load_shapefile.py delete mode 100644 public_data/management/commands/mep_7_1.py delete mode 100644 public_data/management/commands/repack_ocsge.py delete mode 100644 public_data/management/commands/set_parent.py delete mode 100644 public_data/management/commands/setup_departements.py delete mode 100644 public_data/management/commands/update_administration_layer.py create mode 100644 public_data/migrations/0198_remove_commune_map_color.py diff --git a/airflow/dags/copy_to_prod.py b/airflow/dags/copy_to_prod.py deleted file mode 100644 index 6037c4b45..000000000 --- a/airflow/dags/copy_to_prod.py +++ /dev/null @@ -1,46 +0,0 @@ -from airflow.decorators import dag, task -from dependencies.container import Container -from gdaltools import ogr2ogr -from pendulum import datetime - - -# Define the basic parameters of the DAG, like schedule and start_date -@dag( - start_date=datetime(2024, 1, 1), - schedule="@once", - catchup=False, - default_args={"owner": "Alexis Athlani", "retries": 3}, - tags=["GPU"], -) -def copy_to_prod(): - @task.python - def export() -> str: - ogr = ogr2ogr() - ogr.config_options = {"PG_USE_COPY": "YES"} - - source_schema = "public_ocsge" - source_table_name = "occupation_du_sol" - source_sql = f"SELECT * FROM {source_schema}.{source_table_name} WHERE departement = '75'" - - ogr.set_input( - Container().gdal_dw_conn(schema=source_schema), - table_name=source_table_name, - srs="EPSG:2154", - ) - ogr.set_sql(source_sql) - - destination_table_name = "prod_occupation_du_sol" - - ogr.set_output( - Container().gdal_app_conn(), - table_name=destination_table_name, - srs="EPSG:4326", - ) - ogr.set_output_mode(layer_mode=ogr.MODE_LAYER_APPEND) - - ogr.execute() - - export() - - -copy_to_prod() diff --git a/airflow/dags/gpu.py b/airflow/dags/gpu.py index c649a6eb4..52a7d96a3 100644 --- a/airflow/dags/gpu.py +++ b/airflow/dags/gpu.py @@ -2,6 +2,7 @@ from airflow.operators.bash import BashOperator from dependencies.container import Container from dependencies.utils import multiline_string_to_single_line +from gdaltools import ogr2ogr from pendulum import datetime @@ -33,9 +34,7 @@ def ingest(path_on_bucket: str) -> str: Container().s3().get_file(path_on_bucket, wfs_du_temp) sql = """ SELECT - MD5Checksum( - ST_AsText(geom) || CastToText(gpu_timestamp) - ) AS checksum, + MD5Checksum(ST_AsText(geom)) AS checksum, gpu_doc_id, gpu_status, gpu_timestamp, @@ -91,8 +90,18 @@ def ingest(path_on_bucket: str) -> str: bash_command=" ".join(cmd), ).execute(context={}) + @task.python + def load_to_app(): + ogr = ogr2ogr() + ogr.config_options = {"PG_USE_COPY": "YES", "OGR_TRUNCATE": "NO"} + ogr.set_input(Container().gdal_dw_conn(schema="public_gpu"), table_name="app_zoneurba") + ogr.set_output(Container().gdal_app_conn(), table_name="public_data_zoneurba") + ogr.set_output_mode(layer_mode=ogr.MODE_LAYER_OVERWRITE) + ogr.execute() + path_on_bucket = download() - ingest(path_on_bucket) + ingest_task = ingest(path_on_bucket) + ingest_task >> load_to_app() gpu() diff --git a/airflow/dags/ingest_app_dependencies.py b/airflow/dags/ingest_app_dependencies.py index da7c7f6f2..191a78385 100644 --- a/airflow/dags/ingest_app_dependencies.py +++ b/airflow/dags/ingest_app_dependencies.py @@ -43,11 +43,28 @@ def ingest_epci(): def ingest_scot(): ingest_table(source_table_name="public_data_scot", destination_table_name="app_scot") + @task.python + def ingest_couverturesol(): + ingest_table(source_table_name="public_data_couverturesol", destination_table_name="app_couverturesol") + + @task.python + def ingest_usagesol(): + ingest_table(source_table_name="public_data_usagesol", destination_table_name="app_usagesol") + + @task.python + def ingest_couvertureusagematrix(): + ingest_table( + source_table_name="public_data_couvertureusagematrix", destination_table_name="app_couvertureusagematrix" + ) + ingest_region() ingest_departement() ingest_commune() ingest_epci() ingest_scot() + ingest_couverturesol() + ingest_usagesol() + ingest_couvertureusagematrix() ingest_app_dependencies() diff --git a/airflow/dags/ocsge.py b/airflow/dags/ocsge.py index 957285169..ee4d63015 100644 --- a/airflow/dags/ocsge.py +++ b/airflow/dags/ocsge.py @@ -1,4 +1,5 @@ import cgi +import json import os import tempfile from typing import Literal @@ -10,11 +11,6 @@ from airflow.models.param import Param from airflow.operators.bash import BashOperator from dependencies.container import Container -from dependencies.ocsge.delete_in_app import ( - delete_difference_in_app_sql, - delete_occupation_du_sol_in_app_sql, - delete_zone_construite_in_app_sql, -) from dependencies.ocsge.delete_in_dw import ( delete_difference_in_dw_sql, delete_occupation_du_sol_in_dw_sql, @@ -35,7 +31,7 @@ def copy_table_from_dw_to_app( to_table: str, ): ogr = ogr2ogr() - ogr.config_options = {"PG_USE_COPY": "YES"} + ogr.config_options = {"PG_USE_COPY": "YES", "OGR_TRUNCATE": "NO"} ogr.set_input(Container().gdal_dw_conn(schema="public_ocsge"), table_name=from_table) ogr.set_output(Container().gdal_app_conn(), table_name=to_table) ogr.set_output_mode(layer_mode=ogr.MODE_LAYER_OVERWRITE) @@ -58,116 +54,8 @@ def get_paths_from_directory(directory: str) -> list[tuple[str, str]]: return paths -sources = { # noqa: E501 - "01": { - DatasetName.OCCUPATION_DU_SOL_ET_ZONE_CONSTRUITE: { - 2018: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D001_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D001_2018-01-01.7z", # noqa: E501 - 2021: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D001_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D001_2021-01-01.7z", # noqa: E501 - }, - DatasetName.DIFFERENCE: { - ( - 2018, - 2021, - ): "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D001_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D001_2018-2021.7z", # noqa: E501 - }, - }, - "38": { - DatasetName.OCCUPATION_DU_SOL_ET_ZONE_CONSTRUITE: { - 2018: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D038_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D038_2018-01-01.7z", # noqa: E501 - 2021: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D038_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D038_2021-01-01.7z", # noqa: E501 - }, - DatasetName.DIFFERENCE: { - ( - 2018, - 2021, - ): "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D038_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D038_DIFF_2018-2021.7z", # noqa: E501 - }, - }, - "69": { - DatasetName.OCCUPATION_DU_SOL_ET_ZONE_CONSTRUITE: { - 2017: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D069_2017-01-01/OCS-GE_2-0__SHP_LAMB93_D069_2017-01-01.7z", # noqa: E501 - 2020: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D069_2020-01-01/OCS-GE_2-0__SHP_LAMB93_D069_2020-01-01.7z", # noqa: E501 - }, - DatasetName.DIFFERENCE: { - ( - 2017, - 2020, - ): "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D069_DIFF_2017-2020/OCS-GE_2-0__SHP_LAMB93_D069_DIFF_2017-2020.7z", # noqa: E501 - }, - }, - "91": { - "occupation_du_sol_et_zone_construite": { - 2018: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D091_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D091_2018-01-01.7z", # noqa: E501 - 2021: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D091_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D091_2021-01-01.7z", # noqa: E501 - }, - "difference": { - ( - 2018, - 2021, - ): "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D091_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D091_2018-2021.7z", # noqa: E501 - }, - }, - "92": { - "occupation_du_sol_et_zone_construite": { - 2018: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D092_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D092_2018-01-01.7z", # noqa: E501 - 2021: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D092_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D092_2021-01-01.7z", # noqa: E501 - }, - "difference": { - ( - 2018, - 2021, - ): "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D092_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D092_2018-2021.7z", # noqa: E501 - }, - }, - "78": { - "occupation_du_sol_et_zone_construite": { - 2018: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D078_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D078_2018-01-01.7z", # noqa: E501 - 2021: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D078_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D078_2021-01-01.7z", # noqa: E501 - }, - "difference": { - ( - 2018, - 2021, - ): "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D078_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D078_DIFF_2018-2021.7z" # noqa: E501 - }, - }, - "94": { - "occupation_du_sol_et_zone_construite": { - 2018: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D094_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D094_2018-01-01.7z", # noqa: E501 - 2021: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D094_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D094_2021-01-01.7z", # noqa: E501 - }, - "difference": { - ( - 2018, - 2021, - ): "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D094_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D094_2018-2021.7z", # noqa: E501 - }, - }, - "75": { - "occupation_du_sol_et_zone_construite": { - 2018: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D075_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D075_2018-01-01.7z", # noqa: E501 - 2021: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D075_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D075_2021-01-01.7z", # noqa: E501 - }, - "difference": { - ( - 2018, - 2021, - ): "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D075_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D075_DIFF_2018-2021.7z", # noqa: E501 - }, - }, - "32": { - "occupation_du_sol_et_zone_construite": { - 2016: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D032_2016-01-01/OCS-GE_2-0__SHP_LAMB93_D032_2016-01-01.7z", # noqa: E501 - 2019: "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D032_2019-01-01/OCS-GE_2-0__SHP_LAMB93_D032_2019-01-01.7z", # noqa: E501 - }, - "difference": { - ( - 2016, - 2019, - ): "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D032_DIFF_2016-2019/OCS-GE_2-0__SHP_LAMB93_D032_DIFF_2016-2019.7z", # noqa: E501 - }, - }, -} +with open("dependencies/ocsge/sources.json", "r") as f: + sources = json.load(f) vars = { SourceName.OCCUPATION_DU_SOL: { @@ -176,10 +64,8 @@ def get_paths_from_directory(directory: str) -> list[tuple[str, str]]: "dbt_selector_staging": "source:sparte.public.ocsge_occupation_du_sol_staging", "dw_staging": "ocsge_occupation_du_sol_staging", "dw_source": "ocsge_occupation_du_sol", - "app_table_names": ("public_data_ocsge",), "normalization_sql": ocsge_occupation_du_sol_normalization_sql, "delete_on_dwt": delete_occupation_du_sol_in_dw_sql, - "delete_on_app": delete_occupation_du_sol_in_app_sql, "mapping": [ { "from_table": "public_ocsge.app_ocsge", @@ -189,6 +75,22 @@ def get_paths_from_directory(directory: str) -> list[tuple[str, str]]: "from_table": "public_ocsge.app_artificialarea", "to_table": "public.public_data_artificialarea", }, + { + "from_table": "public_ocsge.app_artifareazoneurba", + "to_table": "public.public_data_artifareazoneurba", + }, + { + "from_table": "public_ocsge.for_app_commune", + "to_table": "public.public_data_commune", + }, + { + "from_table": "public_ocsge.for_app_departement", + "to_table": "public.public_data_departement", + }, + { + "from_table": "public_ocsge.app_communesol", + "to_table": "public.public_data_communesol", + }, ], }, SourceName.ZONE_CONSTRUITE: { @@ -197,10 +99,8 @@ def get_paths_from_directory(directory: str) -> list[tuple[str, str]]: "dbt_selector_staging": "source:sparte.public.ocsge_zone_construite_staging", "dw_staging": "ocsge_zone_construite_staging", "dw_source": "ocsge_zone_construite", - "app_table_names": ("public_data_zoneconstruite",), "normalization_sql": ocsge_zone_construite_normalization_sql, "delete_on_dwt": delete_zone_construite_in_dw_sql, - "delete_on_app": delete_zone_construite_in_app_sql, "mapping": [ { "from_table": "public_ocsge.app_zoneconstruite", @@ -215,10 +115,8 @@ def get_paths_from_directory(directory: str) -> list[tuple[str, str]]: "dw_staging": "ocsge_difference_staging", "dw_source": "ocsge_difference", "dw_final_table_name": "app_ocsgediff", - "app_table_names": ("public_data_ocsgediff",), "normalization_sql": ocsge_diff_normalization_sql, "delete_on_dwt": delete_difference_in_dw_sql, - "delete_on_app": delete_difference_in_app_sql, "mapping": [ { "from_table": "public_ocsge.app_ocsgediff", @@ -352,11 +250,11 @@ def ocsge(): # noqa: C901 @task.python() def get_url(**context) -> str: departement = context["params"]["departement"] - years = tuple(map(int, context["params"]["years"])) + years = "_".join(map(str, context["params"]["years"])) dataset = context["params"]["dataset"] if len(years) == 1: - years = years[0] + years = str(years[0]) return sources.get(departement, {}).get(dataset, {}).get(years) diff --git a/airflow/dependencies/ocsge/sources.json b/airflow/dependencies/ocsge/sources.json new file mode 100644 index 000000000..90b72a51e --- /dev/null +++ b/airflow/dependencies/ocsge/sources.json @@ -0,0 +1,83 @@ +{ + "01": { + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D001_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D001_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D001_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D001_2021-01-01.7z" + }, + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D001_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D001_2018-2021.7z" + } + }, + "38": { + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D038_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D038_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D038_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D038_2021-01-01.7z" + }, + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D038_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D038_DIFF_2018-2021.7z" + } + }, + "69": { + "occupation_du_sol_et_zone_construite": { + "2017": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D069_2017-01-01/OCS-GE_2-0__SHP_LAMB93_D069_2017-01-01.7z", + "2020": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D069_2020-01-01/OCS-GE_2-0__SHP_LAMB93_D069_2020-01-01.7z" + }, + "difference": { + "2017_2020": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D069_DIFF_2017-2020/OCS-GE_2-0__SHP_LAMB93_D069_DIFF_2017-2020.7z" + } + }, + "91": { + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D091_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D091_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D091_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D091_2021-01-01.7z" + }, + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D091_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D091_2018-2021.7z" + } + }, + "92": { + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D092_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D092_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D092_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D092_2021-01-01.7z" + }, + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D092_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D092_2018-2021.7z" + } + }, + "78": { + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D078_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D078_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D078_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D078_2021-01-01.7z" + }, + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D078_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D078_DIFF_2018-2021.7z" + } + }, + "94": { + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D094_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D094_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D094_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D094_2021-01-01.7z" + }, + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D094_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D094_2018-2021.7z" + } + }, + "75": { + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D075_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D075_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D075_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D075_2021-01-01.7z" + }, + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D075_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D075_DIFF_2018-2021.7z" + } + }, + "32": { + "occupation_du_sol_et_zone_construite": { + "2016": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D032_2016-01-01/OCS-GE_2-0__SHP_LAMB93_D032_2016-01-01.7z", + "2019": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D032_2019-01-01/OCS-GE_2-0__SHP_LAMB93_D032_2019-01-01.7z" + }, + "difference": { + "2016_2019": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D032_DIFF_2016-2019/OCS-GE_2-0__SHP_LAMB93_D032_DIFF_2016-2019.7z" + } + } +} diff --git a/airflow/sql/sparte/models/app/app_couverturesol.sql b/airflow/sql/sparte/models/app/app_couverturesol.sql new file mode 100644 index 000000000..dbc094117 --- /dev/null +++ b/airflow/sql/sparte/models/app/app_couverturesol.sql @@ -0,0 +1,13 @@ +{{ config(materialized='table') }} + +SELECT + id, + code, + label, + parent_id, + code_prefix, + map_color, + label_short, + is_key +FROM + {{ source('public', 'app_couverturesol') }} diff --git a/airflow/sql/sparte/models/app/app_couvertureusagematrix.sql b/airflow/sql/sparte/models/app/app_couvertureusagematrix.sql new file mode 100644 index 000000000..8f18d1961 --- /dev/null +++ b/airflow/sql/sparte/models/app/app_couvertureusagematrix.sql @@ -0,0 +1,12 @@ +{{ config(materialized='table') }} + +SELECT + id, + is_artificial, + is_impermeable, + couverture_id, + usage_id + -- is_natural, + --label, +FROM + {{ source('public', 'app_couvertureusagematrix') }} diff --git a/airflow/sql/sparte/models/app/app_usagesol.sql b/airflow/sql/sparte/models/app/app_usagesol.sql new file mode 100644 index 000000000..e72591f45 --- /dev/null +++ b/airflow/sql/sparte/models/app/app_usagesol.sql @@ -0,0 +1,13 @@ +{{ config(materialized='table') }} + +SELECT + id, + code, + label, + parent_id, + code_prefix, + map_color, + label_short, + is_key +FROM + {{ source('public', 'app_usagesol') }} diff --git a/airflow/sql/sparte/models/app/schema.yml b/airflow/sql/sparte/models/app/schema.yml index 91961720b..db186762b 100644 --- a/airflow/sql/sparte/models/app/schema.yml +++ b/airflow/sql/sparte/models/app/schema.yml @@ -7,6 +7,9 @@ models: - name: app_region - name: app_epci - name: app_scot + - name: app_usagesol + - name: app_couverturesol + - name: app_couvertureusagematrix sources: - name: public @@ -16,3 +19,6 @@ sources: - name: app_region - name: app_epci - name: app_scot + - name: app_usagesol + - name: app_couverturesol + - name: app_couvertureusagematrix diff --git a/airflow/sql/sparte/models/gpu/for_app/app_zoneurba.sql b/airflow/sql/sparte/models/gpu/for_app/app_zoneurba.sql index cb4cbb362..edc6d206e 100644 --- a/airflow/sql/sparte/models/gpu/for_app/app_zoneurba.sql +++ b/airflow/sql/sparte/models/gpu/for_app/app_zoneurba.sql @@ -9,7 +9,7 @@ SELECT partition, date_approbation::text as datappro, date_validation::text as datvalid, - surface as area, + surface / 10000 as area, ST_Transform(geom, 4326) as mpoly, 4326 AS srid_source FROM diff --git a/airflow/sql/sparte/models/ocsge/for_app/app_artifareazoneurba.sql b/airflow/sql/sparte/models/ocsge/for_app/app_artifareazoneurba.sql index 7ca3f5bbb..be9604b56 100644 --- a/airflow/sql/sparte/models/ocsge/for_app/app_artifareazoneurba.sql +++ b/airflow/sql/sparte/models/ocsge/for_app/app_artifareazoneurba.sql @@ -1,10 +1,10 @@ {{ config(materialized='table') }} SELECT - zonage_checksum as zone_urba, + zonage_checksum as zone_urba_id, year, - max(departement), - ST_Area(ST_Transform(ST_Union(geom), 2154)) as area + max(departement) as departement, + sum(ST_Area(ST_Transform(geom, 2154))) / 10000 as area FROM {{ ref('occupation_du_sol_zonage_urbanisme') }} WHERE diff --git a/airflow/sql/sparte/models/ocsge/for_app/app_artificialarea.sql b/airflow/sql/sparte/models/ocsge/for_app/app_artificialarea.sql index 3ba5bfb61..3e15d7ca9 100644 --- a/airflow/sql/sparte/models/ocsge/for_app/app_artificialarea.sql +++ b/airflow/sql/sparte/models/ocsge/for_app/app_artificialarea.sql @@ -2,7 +2,7 @@ SELECT year, - surface, + surface / 10000, 2154 as srid_source, departement, commune_code as city, diff --git a/airflow/sql/sparte/models/ocsge/for_app/app_communediff.sql b/airflow/sql/sparte/models/ocsge/for_app/app_communediff.sql index d469512f6..a8d7cff90 100644 --- a/airflow/sql/sparte/models/ocsge/for_app/app_communediff.sql +++ b/airflow/sql/sparte/models/ocsge/for_app/app_communediff.sql @@ -11,8 +11,8 @@ FROM ( SELECT year_old, year_new, - SUM(CASE WHEN new_is_artificial THEN surface ELSE 0 END) AS new_artif, - SUM(CASE WHEN new_not_artificial THEN surface ELSE 0 END) AS new_natural, + SUM(CASE WHEN new_is_artificial THEN surface ELSE 0 END) / 10000 AS new_artif, + SUM(CASE WHEN new_not_artificial THEN surface ELSE 0 END) / 10000 AS new_natural, commune_code FROM {{ ref("difference_commune") }} diff --git a/airflow/sql/sparte/models/ocsge/for_app/app_communesol.sql b/airflow/sql/sparte/models/ocsge/for_app/app_communesol.sql new file mode 100644 index 000000000..90605c48a --- /dev/null +++ b/airflow/sql/sparte/models/ocsge/for_app/app_communesol.sql @@ -0,0 +1,64 @@ +{{ config(materialized='table') }} + +with ocsge_with_cs_us_id as ( + SELECT + ocsge.commune_code, + ocsge.code_us, + ocsge.code_cs, + ocsge.surface, + ocsge.year, + ocsge.departement, + app_couverturesol.id as couverture_id, + app_usagesol.id as usage_id + FROM + {{ ref('occupation_du_sol_commune')}} as ocsge + LEFT JOIN + {{ ref("app_couverturesol") }} AS app_couverturesol + ON + app_couverturesol.code_prefix = ocsge.code_cs + LEFT JOIN + {{ ref("app_usagesol") }} AS app_usagesol + ON + app_usagesol.code_prefix = ocsge.code_us +), ocsge_with_matrix as ( + SELECT + ocsge_with_cs_us_id.commune_code, + ocsge_with_cs_us_id.surface, + ocsge_with_cs_us_id.year, + ocsge_with_cs_us_id.departement, + cs_us_matrix.id as matrix_id + FROM + ocsge_with_cs_us_id + LEFT JOIN + {{ ref("app_couvertureusagematrix") }} AS cs_us_matrix + ON + cs_us_matrix.couverture_id = ocsge_with_cs_us_id.couverture_id + AND + cs_us_matrix.usage_id = ocsge_with_cs_us_id.usage_id +), ocsge_with_matrix_and_city_id as ( + SELECT + ocsge_with_matrix.commune_code, + ocsge_with_matrix.surface, + ocsge_with_matrix.year, + ocsge_with_matrix.departement, + ocsge_with_matrix.matrix_id, + commune.id as city_id + FROM + ocsge_with_matrix + LEFT JOIN + {{ ref("app_commune") }} AS commune + ON + commune.insee = ocsge_with_matrix.commune_code +) + +SELECT + year, + (sum(surface) / 10000) as surface, + city_id, + matrix_id +FROM + ocsge_with_matrix_and_city_id +GROUP BY + year, + city_id, + matrix_id diff --git a/airflow/sql/sparte/models/ocsge/for_app/app_ocsgediff.sql b/airflow/sql/sparte/models/ocsge/for_app/app_ocsgediff.sql index 33e092428..03553a459 100644 --- a/airflow/sql/sparte/models/ocsge/for_app/app_ocsgediff.sql +++ b/airflow/sql/sparte/models/ocsge/for_app/app_ocsgediff.sql @@ -9,7 +9,7 @@ SELECT us_new, us_old, ST_Transform(geom, 4326) as mpoly, - surface, + surface / 10000 as surface, 2154 as srid_source, departement, new_is_artificial as is_new_artif, diff --git a/airflow/sql/sparte/models/ocsge/for_app/for_app_commune.sql b/airflow/sql/sparte/models/ocsge/for_app/for_app_commune.sql new file mode 100644 index 000000000..b82f7b4fb --- /dev/null +++ b/airflow/sql/sparte/models/ocsge/for_app/for_app_commune.sql @@ -0,0 +1,68 @@ +{{ config(materialized='table') }} + +with artif_commune_partitionned as ( + SELECT + row_number() OVER (PARTITION BY commune_code ORDER BY year DESC) as rn, + * + FROM + {{ ref('artificial_commune') }} + +), latest_year_artif_commune as ( + SELECT + * + FROM + artif_commune_partitionned + WHERE + rn = 1 +), first_and_last_millesimes as ( + SELECT + commune_code, + MIN(year) as first_millesime, + MAX(year) as last_millesime + FROM + {{ ref('occupation_du_sol_commune') }} + GROUP BY + commune_code +) +SELECT + commune.id, + commune.insee, + commune.name, + commune.departement_id, + commune.epci_id, + commune.scot_id, + CASE + WHEN + artif_commune.surface IS NOT NULL + THEN true + ELSE commune.ocsge_available + END AS ocsge_available, + millesimes.first_millesime as first_millesime, + millesimes.last_millesime as last_millesime, + COALESCE( + CASE + WHEN + artif_commune.surface IS NOT NULL + THEN artif_commune.surface / 10000 + ELSE + NULL + END, + commune.surface_artif + ) as surface_artif, + admin_express_commune.surface / 10000 as area, + ST_Transform(admin_express_commune.geom, 4326) as mpoly, + 2154 as srid_source +FROM + {{ ref('app_commune') }} as commune +LEFT JOIN + latest_year_artif_commune as artif_commune +ON + commune.insee = artif_commune.commune_code +LEFT JOIN + first_and_last_millesimes as millesimes +ON + commune.insee = millesimes.commune_code +LEFT JOIN + {{ ref('commune') }} as admin_express_commune +ON + commune.insee = admin_express_commune.code diff --git a/airflow/sql/sparte/models/ocsge/for_app/for_app_departement.sql b/airflow/sql/sparte/models/ocsge/for_app/for_app_departement.sql new file mode 100644 index 000000000..d9bba8db2 --- /dev/null +++ b/airflow/sql/sparte/models/ocsge/for_app/for_app_departement.sql @@ -0,0 +1,35 @@ +{{ config(materialized='table') }} + +with millesimes AS ( + SELECT + departement, + ARRAY_AGG(DISTINCT year) as ocsge_millesimes + FROM + {{ ref('occupation_du_sol') }} + GROUP BY + departement +) +SELECT + app_departement.id, + app_departement.source_id, + app_departement.name, + app_departement.region_id, + CASE + WHEN + millesimes.ocsge_millesimes IS NOT NULL + THEN true + ELSE false + END AS is_artif_ready, + millesimes.ocsge_millesimes, + ST_Transform(admin_express_departement.geom, 4326) as mpoly, + 2154 as srid_source +FROM + {{ ref('app_departement') }} as app_departement +LEFT JOIN + {{ ref('departement') }} as admin_express_departement +ON + app_departement.source_id = admin_express_departement.code +LEFT JOIN + millesimes +ON + app_departement.source_id = millesimes.departement diff --git a/airflow/sql/sparte/models/ocsge/intersected/artificial_commune.sql b/airflow/sql/sparte/models/ocsge/intersected/artificial_commune.sql index 29357d43b..2f6475357 100644 --- a/airflow/sql/sparte/models/ocsge/intersected/artificial_commune.sql +++ b/airflow/sql/sparte/models/ocsge/intersected/artificial_commune.sql @@ -1,7 +1,7 @@ {{ config( materialized='incremental', - post_hook="DELETE FROM {{ this }} WHERE NOT uuids <@ (SELECT ARRAY_AGG(uuid) FROM {{ ref('occupation_du_sol') }} )" + post_hook="DELETE FROM {{ this }} WHERE loaded_date not in (SELECT loaded_date FROM {{ ref('occupation_du_sol') }} )" ) }} @@ -10,18 +10,20 @@ SELECT *, ST_Area(geom) as surface FROM ( ocsge.departement, ocsge.year, ocsge.commune_code, + ocsge.loaded_date, ARRAY_AGG(ocsge.uuid) AS uuids, ST_Union(geom) as geom FROM {{ ref("occupation_du_sol_commune") }} AS ocsge WHERE ocsge.is_artificial = true + {% if is_incremental() %} + AND ocsge.loaded_date > + (SELECT max(foo.loaded_date) FROM {{ this }} as foo) + {% endif %} GROUP BY ocsge.commune_code, ocsge.departement, ocsge.year, ocsge.loaded_date - {% if is_incremental() %} - HAVING NOT ARRAY_AGG(ocsge.uuid) IN (SELECT uuids FROM {{ this }}) - {% endif %} ) as foo diff --git a/project/models/project_base.py b/project/models/project_base.py index 18f0e48d3..c3b3d323f 100644 --- a/project/models/project_base.py +++ b/project/models/project_base.py @@ -8,7 +8,7 @@ from django.conf import settings from django.contrib.gis.db import models as gis_models from django.contrib.gis.db.models import Extent, Union -from django.contrib.gis.db.models.functions import Area, Centroid +from django.contrib.gis.db.models.functions import Area, Centroid, PointOnSurface from django.contrib.gis.geos import MultiPolygon, Polygon from django.core.cache import cache from django.core.validators import MaxValueValidator, MinValueValidator @@ -567,9 +567,11 @@ def has_partial_ocsge_coverage(self) -> bool: def has_no_ocsge_coverage(self) -> bool: return self.ocsge_coverage_status == self.OcsgeCoverageStatus.NO_DATA - @cached_property + @property def has_zonage_urbanisme(self) -> bool: - return ArtifAreaZoneUrba.objects.filter(zone_urba__mpoly__intersects=self.combined_emprise).exists() + has = ArtifAreaZoneUrba.objects.filter(zone_urba__mpoly__intersects=self.combined_emprise).exists() + print("has_zonage_urbanisme", has) + return has def get_ocsge_millesimes(self): """Return all OCS GE millésimes available within project cities and between @@ -1246,8 +1248,15 @@ def get_artif_per_zone_urba_type( last_artif_area (float): artificial area of zone in last year fill_up_rate (float): percentage of zone filled up """ + + zone_urba = ( + ZoneUrba.objects.annotate(pos=PointOnSurface("mpoly")) + .filter(pos__intersects=self.combined_emprise) + .values_list("id", flat=True) + ) + qs = ( - ArtifAreaZoneUrba.objects.filter(zone_urba__in=ZoneUrba.objects.intersect(self.combined_emprise)) + ArtifAreaZoneUrba.objects.filter(zone_urba__in=zone_urba) .filter(year__in=[self.first_year_ocsge, self.last_year_ocsge]) .order_by("zone_urba__typezone", "year") .values("zone_urba__typezone", "year") @@ -1257,6 +1266,7 @@ def get_artif_per_zone_urba_type( nb_zones=Count("zone_urba_id"), ) ) + zone_list = dict() for row in qs: zone_type = row["zone_urba__typezone"] # A, U, AUs... diff --git a/public_data/domain/shapefile_builder/BaseShapefileBuilder.py b/public_data/domain/shapefile_builder/BaseShapefileBuilder.py deleted file mode 100644 index 429177ffc..000000000 --- a/public_data/domain/shapefile_builder/BaseShapefileBuilder.py +++ /dev/null @@ -1,98 +0,0 @@ -from abc import ABC, abstractmethod -from pathlib import Path - -from public_data.models import DataSource - - -class BaseShapefileBuilder(ABC): - def build(self, source: DataSource) -> list[tuple[DataSource, Path]]: - """ - Builds a shapefile from a DataSource. - """ - created = [] - - if source.productor == source.ProductorChoices.IGN: - if source.dataset == source.DatasetChoices.OCSGE: - if source.name == source.DataNameChoices.DIFFERENCE: - created.append(self.build_ocsge_difference(source)) - elif source.name == source.DataNameChoices.ZONE_CONSTRUITE: - created.append(self.build_ocsge_zone_construite(source)) - elif source.name == source.DataNameChoices.OCCUPATION_DU_SOL: - created.append(self.build_ocsge_occupation_du_sol(source)) - created.append(self.build_ocsge_zone_artificielle(source)) - elif source.name == source.DataNameChoices.ZONE_ARTIFICIELLE: - created.append(self.build_ocsge_zone_artificielle(source)) - elif source.productor == source.ProductorChoices.CEREMA: - if source.dataset == source.DatasetChoices.MAJIC: - created.append(self.build_consommation_espace(source)) - - if not created: - raise NotImplementedError(f"Building {source} is not implemented") - - return created - - @abstractmethod - def build_ocsge_zone_artificielle(self, source: DataSource) -> tuple[DataSource, Path]: - pass - - @abstractmethod - def build_ocsge_difference(self, source: DataSource) -> tuple[DataSource, Path]: - """ - Creates a new shapefile with the difference between two OCSGE. - Based on the diff shapefile from IGN. - - Output fields: - - YEAR_OLD: Year of the old OCSGE - - YEAR_NEW: Year of the new OCSGE - - CS_NEW: Code of the new coverage - - CS_OLD: Code of the old coverage - - US_NEW: Code of the new usage - - US_OLD: Code of the old usage - - SRID: SRID of the shapefile - - SURFACE: Surface of the polygon in square meters - - DPT: Departement code - - GEOMETRY: Geometry of the polygon - - NEW_ARTIF: 1 if the new coverage is artificial and the old one is not - - NEW_NAT: 1 if the new coverage is natural and the old one is not - """ - - @abstractmethod - def build_ocsge_zone_construite(self, source: DataSource) -> tuple[DataSource, Path]: - """ - Creates a new shapefile with the zone construite from OCSGE. - Based on the zone construite shapefile from IGN. - - Expected output fields: - - ID: ID of the polygon. TODO: remove this field as it is not used - - YEAR: Year of the OCSGE - - MILLESIME: Millesime of the OCSGE. This field is duplicated with YEAR. TODO: remove this field - - SRID: SRID of the shapefile - - DPT: Departement code - - SURFACE: Surface of the polygon in square meters - - MPOLY: Geometry of the polygon - """ - - @abstractmethod - def build_ocsge_occupation_du_sol(self, source: DataSource) -> tuple[DataSource, Path]: - """ - Creates a new shapefile with the occupation du sol from OCSGE. - Based on the occupation du sol shapefile from IGN. - - Output fields: - - CODE_CS: Code of the coverage - - CODE_US: Code of the usage - - ID: ID of the polygon. - - GEOMETRY: Geometry of the polygon TODO: renamme MPOLY - - SURFACE: Surface of the polygon in square meters - - DPT: Departement code - - YEAR: Year of the OCSGE - - SRID: SRID of the shapefile - - IS_ARTIF: 1 if the coverage is artificial - """ - - @abstractmethod - def build_consommation_espace(self, source: DataSource) -> tuple[DataSource, Path]: - """ - Creates a new shapefile with the consommation d'espace from MAJIC. - Based on the consommation d'espace shapefile from CEREMA. - """ diff --git a/public_data/domain/shapefile_builder/__init__.py b/public_data/domain/shapefile_builder/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/public_data/infra/shapefile_builder/gdal/GdalShapefileBuilder.py b/public_data/infra/shapefile_builder/gdal/GdalShapefileBuilder.py deleted file mode 100644 index bb1bab0c4..000000000 --- a/public_data/infra/shapefile_builder/gdal/GdalShapefileBuilder.py +++ /dev/null @@ -1,29 +0,0 @@ -from pathlib import Path - -from public_data.domain.shapefile_builder.BaseShapefileBuilder import ( - BaseShapefileBuilder, -) -from public_data.models import DataSource - -from .build_consommation_espace import build_consommation_espace -from .build_ocsge_difference import build_ocsge_difference -from .build_ocsge_occupation_du_sol import build_ocsge_occupation_du_sol -from .build_ocsge_zone_artificielle import build_ocsge_zone_artificielle -from .build_ocsge_zone_construite import build_ocsge_zone_construite - - -class GdalShapefileBuilder(BaseShapefileBuilder): - def build_ocsge_difference(self, source: DataSource) -> tuple[DataSource, Path]: - return build_ocsge_difference(source) - - def build_ocsge_zone_construite(self, source: DataSource) -> tuple[DataSource, Path]: - return build_ocsge_zone_construite(source) - - def build_ocsge_occupation_du_sol(self, source: DataSource) -> tuple[DataSource, Path]: - return build_ocsge_occupation_du_sol(source) - - def build_consommation_espace(self, source: DataSource) -> tuple[DataSource, Path]: - return build_consommation_espace(source) - - def build_ocsge_zone_artificielle(self, source: DataSource) -> tuple[DataSource, Path]: - return build_ocsge_zone_artificielle(source) diff --git a/public_data/infra/shapefile_builder/gdal/build_consommation_espace.py b/public_data/infra/shapefile_builder/gdal/build_consommation_espace.py deleted file mode 100644 index 299a6876e..000000000 --- a/public_data/infra/shapefile_builder/gdal/build_consommation_espace.py +++ /dev/null @@ -1,70 +0,0 @@ -import subprocess -from pathlib import Path - -from public_data.models import Cerema, DataSource -from public_data.models.enums import SRID -from public_data.shapefile import ShapefileFromSource - -from .utils import multiline_string_to_single_line - - -def build_consommation_espace(source: DataSource) -> tuple[DataSource, Path]: - build_name = source.get_build_name() - - with ShapefileFromSource(source=source) as shapefile_path: - art_fields_11_21 = Cerema.get_art_field( - start=2011, - end=2020, - ) - habitat_fields_11_21 = [field.replace("art", "hab").replace("naf", "art") for field in art_fields_11_21] - activity_fields_11_21 = [field.replace("art", "act").replace("naf", "art") for field in art_fields_11_21] - - sql = f""" - SELECT - *, - '{source.srid}' AS SRID, - CAST(({' + '.join(art_fields_11_21)}) AS FLOAT) AS NAF11ART21, - CAST(({' + '.join(habitat_fields_11_21)}) AS FLOAT) AS ART11HAB21, - CAST(({' + '.join(activity_fields_11_21)}) AS FLOAT) AS ART11ACT21, - {"artcom0923" if source.srid == SRID.LAMBERT_93 else "NULL"} AS ARTCOM0923, - GEOMETRY AS MPOLY - FROM - {Path(source.shapefile_name).stem} - """ - command = [ - "ogr2ogr", - "-dialect SQLITE", - '-f "ESRI Shapefile"', - f'"{build_name}"', - str(shapefile_path.absolute()), - "-nlt MULTIPOLYGON", - "-nlt PROMOTE_TO_MULTI", - f"-nln {source.name}", - f"-a_srs EPSG:{source.srid}", - "-sql", - f'"{multiline_string_to_single_line(sql)}"', - ] - - with open("output.txt", "w") as f: - subprocess.run( - args=" ".join(command), - shell=True, - check=True, - stdout=f, - stderr=f, - ) - - output_source, _ = DataSource.objects.update_or_create( - productor=source.ProductorChoices.MDA, - dataset=source.dataset, - name=source.name, - millesimes=source.millesimes, - official_land_id=source.official_land_id, - defaults={ - "mapping": None, - "path": build_name, - "shapefile_name": source.name + ".shp", - "srid": source.srid, - }, - ) - return output_source, Path(build_name) diff --git a/public_data/infra/shapefile_builder/gdal/build_ocsge_difference.py b/public_data/infra/shapefile_builder/gdal/build_ocsge_difference.py deleted file mode 100644 index 01eeaa820..000000000 --- a/public_data/infra/shapefile_builder/gdal/build_ocsge_difference.py +++ /dev/null @@ -1,112 +0,0 @@ -import subprocess -from pathlib import Path - -from public_data.models import DataSource -from public_data.shapefile import ShapefileFromSource - -from .is_artif_case import is_artif_case -from .is_impermeable_case import is_impermeable_case -from .utils import multiline_string_to_single_line - - -def build_ocsge_difference(source: DataSource) -> tuple[DataSource, Path]: - fields = { - "cs_new": f"CS_{source.millesimes[1]}", - "cs_old": f"CS_{source.millesimes[0]}", - "us_new": f"US_{source.millesimes[1]}", - "us_old": f"US_{source.millesimes[0]}", - } - if source.mapping: - fields |= source.mapping - - build_name = source.get_build_name() - - with ShapefileFromSource(source=source) as shapefile_path: - sql = f""" - SELECT - YEAR_OLD AS YEAR_OLD, - YEAR_NEW AS YEAR_NEW, - CS_NEW AS CS_NEW, - CS_OLD AS CS_OLD, - US_NEW AS US_NEW, - US_OLD AS US_OLD, - SRID AS SRID, - SURFACE AS SURFACE, - DPT AS DPT, - GEOMETRY, - CASE - WHEN OLD_IS_IMPER = 0 AND NEW_IS_IMPER = 1 THEN 1 - ELSE 0 - END AS NEW_IMPER, - CASE - WHEN OLD_IS_IMPER = 1 AND NEW_IS_IMPER = 0 THEN 1 - ELSE 0 - END AS NEWNOIMPER, - CASE - WHEN OLD_IS_ARTIF = 0 AND NEW_IS_ARTIF = 1 THEN 1 - ELSE 0 - END AS NEW_ARTIF, - CASE - WHEN OLD_IS_ARTIF = 1 AND NEW_IS_ARTIF = 0 THEN 1 - ELSE 0 - END AS NEW_NAT - FROM ( - SELECT - '{source.millesimes[0]}' AS YEAR_OLD, - '{source.millesimes[1]}' AS YEAR_NEW, - {fields['cs_new']} AS CS_NEW, - {fields['cs_old']} AS CS_OLD, - {fields['us_new']} AS US_NEW, - {fields['us_old']} AS US_OLD, - '{source.srid}' AS SRID, - round(ST_Area(GEOMETRY), 4) AS SURFACE, - {is_artif_case(fields['cs_old'], fields['us_old'])} AS OLD_IS_ARTIF, - {is_impermeable_case(fields['cs_old'])} AS OLD_IS_IMPER, - {is_artif_case(fields['cs_new'], fields['us_new'])} AS NEW_IS_ARTIF, - {is_impermeable_case(fields['cs_new'])} AS NEW_IS_IMPER, - '{source.official_land_id}' AS DPT, - GEOMETRY - FROM - {Path(source.shapefile_name).stem} - WHERE - {fields['cs_new']} IS NOT NULL AND - {fields['cs_old']} IS NOT NULL AND - {fields['us_new']} IS NOT NULL AND - {fields['us_old']} IS NOT NULL - ) - """ - - command = [ - "ogr2ogr", - "-dialect SQLITE", - '-f "ESRI Shapefile"', - f'"{build_name}"', - str(shapefile_path.absolute()), - "-nlt MULTIPOLYGON", - "-nlt PROMOTE_TO_MULTI", - f"-nln {source.name}", - f"-a_srs EPSG:{source.srid}", - "-sql", - f'"{multiline_string_to_single_line(sql)}"', - ] - - subprocess.run( - args=" ".join(command), - shell=True, - check=True, - ) - - output_source, _ = DataSource.objects.update_or_create( - productor=source.ProductorChoices.MDA, - dataset=source.dataset, - name=source.name, - millesimes=source.millesimes, - official_land_id=source.official_land_id, - defaults={ - "mapping": None, - "path": build_name, - "shapefile_name": source.name + ".shp", - "srid": source.srid, - }, - ) - return output_source, Path(build_name) diff --git a/public_data/infra/shapefile_builder/gdal/build_ocsge_occupation_du_sol.py b/public_data/infra/shapefile_builder/gdal/build_ocsge_occupation_du_sol.py deleted file mode 100644 index d340e98a4..000000000 --- a/public_data/infra/shapefile_builder/gdal/build_ocsge_occupation_du_sol.py +++ /dev/null @@ -1,77 +0,0 @@ -import subprocess -from pathlib import Path - -from public_data.models import DataSource -from public_data.shapefile import ShapefileFromSource - -from .is_artif_case import is_artif_case -from .is_impermeable_case import is_impermeable_case -from .utils import multiline_string_to_single_line - - -def build_ocsge_occupation_du_sol(source: DataSource) -> tuple[DataSource, Path]: - fields = { - "couverture": "CODE_CS", - "usage": "CODE_US", - } - if source.mapping: - fields |= source.mapping - - build_name = source.get_build_name() - - with ShapefileFromSource(source=source) as shapefile_path: - sql = f""" - SELECT - {fields['couverture']} AS CODE_CS, - {fields['usage']} AS CODE_US, - ID AS ID, - GEOMETRY AS MPOLY, - round(ST_Area(GEOMETRY), 4) AS SURFACE, - '{source.official_land_id}' AS DPT, - '{source.millesimes[0]}' AS YEAR, - '{source.srid}' AS SRID, - {is_impermeable_case(fields['couverture'])} AS IS_IMPER, - {is_artif_case(fields['couverture'], fields['usage'])} AS IS_ARTIF - FROM - {Path(source.shapefile_name).stem} - """ - - subprocess.run( - " ".join( - [ - "ogr2ogr", - "-dialect", - "SQLITE", - "-f", - "'ESRI Shapefile'", - f'"{build_name}"', - str(shapefile_path.absolute()), - "-nlt", - "MULTIPOLYGON", - "-nlt", - "PROMOTE_TO_MULTI", - "-nln", - source.name, - f"-a_srs EPSG:{source.srid}", - "-sql", - f'"{multiline_string_to_single_line(sql)}"', - ] - ), - shell=True, - check=True, - ) - - output_source, _ = DataSource.objects.update_or_create( - productor=source.ProductorChoices.MDA, - dataset=source.dataset, - name=source.name, - millesimes=source.millesimes, - official_land_id=source.official_land_id, - defaults={ - "mapping": None, - "path": source.get_build_name(), - "shapefile_name": source.name + ".shp", - "srid": source.srid, - }, - ) - return output_source, Path(build_name) diff --git a/public_data/infra/shapefile_builder/gdal/build_ocsge_zone_artificielle.py b/public_data/infra/shapefile_builder/gdal/build_ocsge_zone_artificielle.py deleted file mode 100644 index 7c5c7dab0..000000000 --- a/public_data/infra/shapefile_builder/gdal/build_ocsge_zone_artificielle.py +++ /dev/null @@ -1,188 +0,0 @@ -import subprocess -from logging import getLogger -from pathlib import Path -from uuid import uuid4 - -from django.conf import settings -from django.db import connection - -from public_data.models import DataSource -from public_data.shapefile import ShapefileFromSource - -from .is_artif_case import is_artif_case - -logger = getLogger(__name__) - - -def build_ocsge_zone_artificielle(source: DataSource) -> tuple[DataSource, Path]: - logger.info(f"Building {source}") - unique_string = "_".join( - [ - source.dataset, - DataSource.DataNameChoices.ZONE_ARTIFICIELLE, - source.official_land_id, - str(source.millesimes[0]), - DataSource.ProductorChoices.MDA, - ] - ) - - build_name = unique_string + ".shp.zip" - - fields = { - "couverture": "CODE_CS", - "usage": "CODE_US", - } - - temp_table_occupation_du_sol = f"temp_{uuid4().hex}" - temp_table_artif = f"temp_{uuid4().hex}" - - db = settings.DATABASES["default"] - - with ShapefileFromSource(source=source) as shapefile_path: - command_occupation_du_sol = f'ogr2ogr -f "PostgreSQL" -overwrite "PG:dbname={db["NAME"]} host={db["HOST"]} port={db["PORT"]} user={db["USER"]} password={db["PASSWORD"]}" {shapefile_path.absolute()} -nln {temp_table_occupation_du_sol} -a_srs EPSG:{source.srid} -nlt MULTIPOLYGON -nlt PROMOTE_TO_MULTI -lco GEOMETRY_NAME=mpoly -lco PRECISION=NO --config PG_USE_COPY YES' # noqa: E501 - - with open("error.log", "w") as f: - subprocess.run(args=command_occupation_du_sol, check=True, shell=True, stdout=f, stderr=f) - - sql = f""" - DROP TABLE IF EXISTS ocsge_classified; - DROP TABLE IF EXISTS clustered_ocsge; - DROP TABLE IF EXISTS artif_nat_by_surface; - DROP TABLE IF EXISTS small_built; - DROP TABLE IF EXISTS artificial_union; - DROP TABLE IF EXISTS artificial_geom_union; - DROP TABLE IF EXISTS artificial_geom_union_dump; - CREATE TEMPORARY TABLE ocsge_classified AS - SELECT - *, - {is_artif_case( - code_cs=fields['couverture'], - code_us=fields['usage'], - true_value='TRUE', - false_value='FALSE', - )} AS is_artificial - FROM - {temp_table_occupation_du_sol}; - CREATE INDEX ON ocsge_classified USING GIST (mpoly); - CREATE TEMPORARY TABLE clustered_ocsge AS - SELECT - is_artificial, - ST_UnaryUnion( - unnest( - ST_ClusterIntersecting(mpoly) - ) - ) AS mpoly - FROM - ocsge_classified - GROUP BY - is_artificial; - CREATE INDEX ON clustered_ocsge USING GIST (mpoly); - CREATE TEMPORARY TABLE artif_nat_by_surface AS - SELECT - CASE - WHEN ST_Area(mpoly) < 2500 THEN NOT is_artificial - ELSE is_artificial - END AS is_artificial, - mpoly - FROM - clustered_ocsge; - CREATE TEMPORARY TABLE small_built AS - SELECT - is_artificial, - mpoly - FROM - ocsge_classified - WHERE - code_cs = 'CS1.1.1.1' - AND ST_Area(mpoly) < 2500 - AND EXISTS ( - SELECT - mpoly - FROM - artif_nat_by_surface - WHERE - ST_Intersects(mpoly, ocsge_classified.mpoly) AND - is_artificial = FALSE - ); - CREATE TEMPORARY TABLE artificial_union AS - SELECT - is_artificial, - mpoly - FROM - artif_nat_by_surface - WHERE - is_artificial = TRUE - UNION ALL - SELECT - is_artificial, - mpoly - FROM - small_built; - CREATE TEMPORARY TABLE artificial_geom_union AS - SELECT - ST_Union(mpoly) AS mpoly, - is_artificial - FROM - artificial_union - GROUP BY - is_artificial; - CREATE TEMPORARY TABLE artificial_geom_union_dump AS - SELECT - (ST_Dump(mpoly)).geom AS MPOLY, - {source.millesimes[0]} AS YEAR, - {source.official_land_id} AS DPT, - {source.srid} as SRID - - FROM - artificial_geom_union; - CREATE INDEX ON artificial_geom_union_dump USING GIST (MPOLY); - CREATE TABLE {temp_table_artif} AS - SELECT - *, - ST_Area(MPOLY) AS SURFACE - FROM - artificial_geom_union_dump; - """ - - with connection.cursor() as cursor: - cursor.execute(sql=sql) - cursor.connection.commit() - - command = [ - "ogr2ogr", - "-f", - '"ESRI Shapefile"', - f'"{build_name}"', - f'"PG:dbname={db["NAME"]} host={db["HOST"]} port={db["PORT"]} user={db["USER"]} password={db["PASSWORD"]}"', # noqa: E501 - "-nlt MULTIPOLYGON", - "-nlt PROMOTE_TO_MULTI", - "-nln", - f'"{DataSource.DataNameChoices.ZONE_ARTIFICIELLE}"', - f"-a_srs EPSG:{source.srid}", - "-sql", - f'"SELECT * FROM {temp_table_artif}"', - "-progress", - ] - with open("error.log", "w") as f: - subprocess.run(args=" ".join(command), shell=True, check=True, stdout=f, stderr=f) - - with connection.cursor() as cursor: - cursor.execute(sql=f"DROP TABLE IF EXISTS {temp_table_occupation_du_sol};") - - with connection.cursor() as cursor: - cursor.execute(sql=f"DROP TABLE IF EXISTS {temp_table_artif};") - - output_source, _ = DataSource.objects.update_or_create( - productor=source.ProductorChoices.MDA, - dataset=source.dataset, - name=DataSource.DataNameChoices.ZONE_ARTIFICIELLE, - millesimes=source.millesimes, - official_land_id=source.official_land_id, - defaults={ - "mapping": None, - "path": build_name, - "shapefile_name": f"{DataSource.DataNameChoices.ZONE_ARTIFICIELLE}.shp", - "srid": source.srid, - }, - ) - return output_source, Path(build_name) diff --git a/public_data/infra/shapefile_builder/gdal/build_ocsge_zone_construite.py b/public_data/infra/shapefile_builder/gdal/build_ocsge_zone_construite.py deleted file mode 100644 index 1c9ec7d18..000000000 --- a/public_data/infra/shapefile_builder/gdal/build_ocsge_zone_construite.py +++ /dev/null @@ -1,54 +0,0 @@ -import subprocess -from pathlib import Path - -from public_data.models import DataSource -from public_data.shapefile import ShapefileFromSource - -from .utils import multiline_string_to_single_line - - -def build_ocsge_zone_construite(source: DataSource) -> tuple[DataSource, Path]: - build_name = source.get_build_name() - - with ShapefileFromSource(source=source) as shapefile_path: - sql = f""" - SELECT - 'NO_ID' AS ID, - '{source.millesimes[0]}' AS YEAR, - '{source.millesimes[0]}' AS MILLESIME, - '{source.srid}' AS SRID, - '{source.official_land_id}' AS DPT, - round(ST_Area(GEOMETRY), 4) AS SURFACE, - GEOMETRY AS MPOLY - FROM - {Path(source.shapefile_name).stem} - """ - command = [ - "ogr2ogr", - "-dialect SQLITE", - '-f "ESRI Shapefile"', - f'"{build_name}"', - str(shapefile_path.absolute()), - "-nlt MULTIPOLYGON", - "-nlt PROMOTE_TO_MULTI", - f"-nln {source.name}", - f"-a_srs EPSG:{source.srid}", - "-sql", - f'"{multiline_string_to_single_line(sql)}"', - ] - subprocess.run(args=" ".join(command), shell=True, check=True) - - output_source, _ = DataSource.objects.update_or_create( - productor=source.ProductorChoices.MDA, - dataset=source.dataset, - name=source.name, - millesimes=source.millesimes, - official_land_id=source.official_land_id, - defaults={ - "mapping": None, - "path": build_name, - "shapefile_name": source.name + ".shp", - "srid": source.srid, - }, - ) - return output_source, Path(build_name) diff --git a/public_data/infra/shapefile_builder/gdal/is_artif_case.py b/public_data/infra/shapefile_builder/gdal/is_artif_case.py deleted file mode 100644 index d1ac35c95..000000000 --- a/public_data/infra/shapefile_builder/gdal/is_artif_case.py +++ /dev/null @@ -1,52 +0,0 @@ -def is_artif_case( - code_cs: str, - code_us: str, - true_value=1, - false_value=0, -) -> str: - """ - true_value and false_value are optional parameters - - The default values are for working with sqlite (which does not have a proper boolean type), - but you can change them to work with other databases. - """ - - return f""" CASE - /* CS 1.1 */ - WHEN {code_cs} = 'CS1.1.1.1' THEN {true_value} - WHEN {code_cs} = 'CS1.1.1.2' THEN {true_value} - WHEN {code_cs} = 'CS1.1.2.1' AND {code_us} != 'US1.3' THEN {true_value} - WHEN {code_cs} = 'CS1.1.2.2' THEN {true_value} - - /* CS 2.2 */ - /* CS 2.2.1 */ - WHEN {code_cs} = 'CS2.2.1' AND {code_us} = 'US2' THEN {true_value} - WHEN {code_cs} = 'CS2.2.1' AND {code_us} = 'US3' THEN {true_value} - WHEN {code_cs} = 'CS2.2.1' AND {code_us} = 'US5' THEN {true_value} - WHEN {code_cs} = 'CS2.2.1' AND {code_us} = 'US235' THEN {true_value} - WHEN {code_cs} = 'CS2.2.1' AND {code_us} = 'US4.1.1' THEN {true_value} - WHEN {code_cs} = 'CS2.2.1' AND {code_us} = 'US4.1.2' THEN {true_value} - WHEN {code_cs} = 'CS2.2.1' AND {code_us} = 'US4.1.3' THEN {true_value} - WHEN {code_cs} = 'CS2.2.1' AND {code_us} = 'US4.1.4' THEN {true_value} - WHEN {code_cs} = 'CS2.2.1' AND {code_us} = 'US4.1.5' THEN {true_value} - WHEN {code_cs} = 'CS2.2.1' AND {code_us} = 'US4.2' THEN {true_value} - WHEN {code_cs} = 'CS2.2.1' AND {code_us} = 'US4.3' THEN {true_value} - WHEN {code_cs} = 'CS2.2.1' AND {code_us} = 'US6.1' THEN {true_value} - WHEN {code_cs} = 'CS2.2.1' AND {code_us} = 'US6.2' THEN {true_value} - - /* CS 2.2.2 */ - WHEN {code_cs} = 'CS2.2.2' AND {code_us} = 'US2' THEN {true_value} - WHEN {code_cs} = 'CS2.2.2' AND {code_us} = 'US3' THEN {true_value} - WHEN {code_cs} = 'CS2.2.2' AND {code_us} = 'US5' THEN {true_value} - WHEN {code_cs} = 'CS2.2.2' AND {code_us} = 'US235' THEN {true_value} - WHEN {code_cs} = 'CS2.2.2' AND {code_us} = 'US4.1.1' THEN {true_value} - WHEN {code_cs} = 'CS2.2.2' AND {code_us} = 'US4.1.2' THEN {true_value} - WHEN {code_cs} = 'CS2.2.2' AND {code_us} = 'US4.1.3' THEN {true_value} - WHEN {code_cs} = 'CS2.2.2' AND {code_us} = 'US4.1.4' THEN {true_value} - WHEN {code_cs} = 'CS2.2.2' AND {code_us} = 'US4.1.5' THEN {true_value} - WHEN {code_cs} = 'CS2.2.2' AND {code_us} = 'US4.2' THEN {true_value} - WHEN {code_cs} = 'CS2.2.2' AND {code_us} = 'US4.3' THEN {true_value} - WHEN {code_cs} = 'CS2.2.2' AND {code_us} = 'US6.1' THEN {true_value} - WHEN {code_cs} = 'CS2.2.2' AND {code_us} = 'US6.2' THEN {true_value} - ELSE {false_value} - END""" diff --git a/public_data/infra/shapefile_builder/gdal/is_impermeable_case.py b/public_data/infra/shapefile_builder/gdal/is_impermeable_case.py deleted file mode 100644 index 120391afa..000000000 --- a/public_data/infra/shapefile_builder/gdal/is_impermeable_case.py +++ /dev/null @@ -1,10 +0,0 @@ -def is_impermeable_case( - code_cs: str, - true_value=1, - false_value=0, -) -> str: - return f""" CASE - WHEN {code_cs} = 'CS1.1.1.1' THEN {true_value} - WHEN {code_cs} = 'CS1.1.1.2' THEN {true_value} - ELSE {false_value} - END""" diff --git a/public_data/infra/shapefile_builder/gdal/tests.py b/public_data/infra/shapefile_builder/gdal/tests.py deleted file mode 100644 index d5411dcb9..000000000 --- a/public_data/infra/shapefile_builder/gdal/tests.py +++ /dev/null @@ -1,81 +0,0 @@ -from django.db import connection -from django.test import TransactionTestCase - -from .is_artif_case import is_artif_case -from .is_impermeable_case import is_impermeable_case - - -class TestGdalShapefileBuilder(TransactionTestCase): - def test_carriere_is_not_artif(self): - couverture = "CS1.1.2.1" # zones à matériaux minéraux - usage = "US1.3" # activité d'extraction - - query = f""" - WITH test_data AS ( - SELECT - '{couverture}' AS code_cs, - '{usage}' AS code_us - ) - SELECT - {is_artif_case( - code_cs="code_cs", - code_us="code_us", - )} - FROM - test_data - """ - - with connection.cursor() as cursor: - cursor.execute(query) - result = cursor.fetchone() - - self.assertEqual(result[0], 0) - - def test_only_zone_baties_and_zones_non_baties_are_impermeable(self): - impermeable_couvertures = [ - "CS1.1.1.1", # Zones bâties - "CS1.1.1.2", # Zones non bâties - ] - - non_impermeable_couvertures = [ - "CS1.1.2.1", # zones à matériaux minéraux - "CS1.1.2.2", # zones à matériaux composites - "CS1.2.1", # sol nuls - "CS1.2.2", # eau - "CS1.2.3", # nevé et glaciers - "CS2.1.1.1", # peuplement de feuillus - "CS2.1.1.2", # peuplement de conifères - "CS2.1.1.3", # peuplement mixte - "CS2.1.2", # formations arbustives et sous-abrisseaux - "CS2.1.3", # autres formations ligneuses - "CS2.2.1", # prairies - "CS2.2.2", # autres formations non ligneuses - ] - - def get_query(couverture): - return f""" - WITH test_data AS ( - SELECT - '{couverture}' AS code_cs - ) - SELECT - {is_impermeable_case( - code_cs="code_cs", - )} - FROM - test_data - """ - - for couverture in impermeable_couvertures: - with connection.cursor() as cursor: - cursor.execute(get_query(couverture)) - result = cursor.fetchone() - - self.assertEqual(result[0], 1) - - for couverture in non_impermeable_couvertures: - with connection.cursor() as cursor: - cursor.execute(get_query(couverture)) - result = cursor.fetchone() - - self.assertEqual(result[0], 0) diff --git a/public_data/infra/shapefile_builder/gdal/utils.py b/public_data/infra/shapefile_builder/gdal/utils.py deleted file mode 100644 index ec9aa141e..000000000 --- a/public_data/infra/shapefile_builder/gdal/utils.py +++ /dev/null @@ -1,2 +0,0 @@ -def multiline_string_to_single_line(string: str) -> str: - return string.replace("\n", " ").replace("\r", "") diff --git a/public_data/management/commands/build_administrative_layers.py b/public_data/management/commands/build_administrative_layers.py deleted file mode 100644 index 6d74c154d..000000000 --- a/public_data/management/commands/build_administrative_layers.py +++ /dev/null @@ -1,232 +0,0 @@ -import logging - -from django.contrib.gis.db.models import Union -from django.core.management.base import BaseCommand -from django.core.paginator import Paginator -from django.db.models import QuerySet - -from public_data.models import Cerema, Commune, Departement, Epci, Region -from public_data.models.administration import Scot -from utils.db import fix_poly - -logger = logging.getLogger("management.commands") - - -class Command(BaseCommand): - help = "(caution, it's from scratch) Will load data into Region, Departement and Epci from Cerema data" - - def add_arguments(self, parser): - parser.add_argument( - "--clean", - action="store_true", - help="Clean all data before loading", - ) - parser.add_argument( - "--departements", - nargs="+", - type=int, - help="Select departements to build", - ) - - def handle(self, *args, **options): - """This command is keeped for documentation prupose, do not use it unless to be sure of review everything.""" - - clean = options.get("clean", False) - - logger.info("Recreate region, departement, EPCI and communes referentials") - - if clean: - logger.info("Clean all data") - Region.objects.all().delete() - Departement.objects.all().delete() - Epci.objects.all().delete() - Commune.objects.all().delete() - Scot.objects.all().delete() - - base_qs = Cerema.objects.all() - - if options.get("departements"): - base_qs = base_qs.filter(dept_id__in=options["departements"]) - - self.load_region(base_qs) - self.load_departement(base_qs) - self.load_epci(base_qs) - self.load_scot(base_qs) - self.link_epci(base_qs) - self.load_communes(base_qs, table_was_cleaned=clean) - - def load_region(self, base_qs: QuerySet): - logger.info("Loading regions") - - qs = base_qs.values("region_id", "region_name", "srid_source") - qs = qs.annotate(mpoly=Union("mpoly")).order_by("region_name") - - logger.info("%d found regions", len(qs)) - - total_created = 0 - - for data in qs: - _, created = Region.objects.get_or_create( - source_id=data["region_id"], - defaults={ - "name": data["region_name"], - "mpoly": fix_poly(data["mpoly"]), - "srid_source": data["srid_source"], - }, - ) - - if created: - total_created += 1 - - logger.info("%d regions created", total_created) - logger.info("Done loading regions") - - def load_departement(self, base_qs: QuerySet): - logger.info("Loading departements") - - regions = {r.source_id: r for r in Region.objects.all()} - - qs = base_qs.values("region_id", "dept_id", "dept_name", "srid_source") - qs = qs.annotate(mpoly=Union("mpoly")).order_by("dept_id") - - logger.info("%d departements found", len(qs)) - - total_created = 0 - - for data in qs: - _, created = Departement.objects.get_or_create( - source_id=data["dept_id"], - defaults={ - "region": regions[data["region_id"]], - "name": data["dept_name"], - "mpoly": fix_poly(data["mpoly"]), - "srid_source": data["srid_source"], - }, - ) - - if created: - total_created += 1 - - logger.info("%d departements created", total_created) - logger.info("Done loading departements") - - def load_epci(self, base_qs: QuerySet): - logger.info("Loading EPCI") - - qs = base_qs.values("epci_id", "epci_name", "srid_source") - qs = qs.annotate(mpoly=Union("mpoly")).order_by("epci_id") - - logger.info("%d EPCI found", len(qs)) - - total_created = 0 - - for data in qs: - _, created = Epci.objects.get_or_create( - source_id=data["epci_id"], - defaults={ - "name": data["epci_name"], - "mpoly": fix_poly(data["mpoly"]), - "srid_source": data["srid_source"], - }, - ) - - if created: - total_created += 1 - - logger.info("%d EPCI created", total_created) - logger.info("Done loading EPCI") - - def load_scot(self, base_qs: QuerySet): - logger.info("Loading SCOT") - - qs = base_qs.values("scot", "srid_source") - qs = qs.annotate(mpoly=Union("mpoly")).order_by("scot") - - logger.info("%d SCoTs found", len(qs)) - - total_created = 0 - - for data in qs: - if data["scot"] is None: - continue - - _, created = Scot.objects.get_or_create( - name=data["scot"], - defaults={ - "mpoly": fix_poly(data["mpoly"]), - }, - ) - - if created: - total_created += 1 - - logger.info("%d SCoTs created", total_created) - # link to region and departement - depts = {d.source_id: d for d in Departement.objects.all()} - regions = {r.source_id: r for r in Region.objects.all()} - links = {} - - for scot_name, dept_id, region_id in ( - Cerema.objects.values_list("scot", "dept_id", "region_id") - .order_by("scot") - .filter(scot__isnull=False) - .distinct() - ): - if scot_name not in links: - links[scot_name] = {"departement_ids": set(), "region_ids": set()} - - links[scot_name]["departement_ids"].add(dept_id) - links[scot_name]["region_ids"].add(region_id) - - for scot_name, data in links.items(): - scot = Scot.objects.get(name=scot_name) - scot.departements.add(*[depts[d] for d in data["departement_ids"]]) - scot.regions.add(*[regions[r] for r in data["region_ids"]]) - - def link_epci(self, base_qs: QuerySet): - logger.info("Link EPCI <-> département") - depts = {d.source_id: d for d in Departement.objects.all()} - epcis = {e.source_id: e for e in Epci.objects.all()} - for epci in epcis.values(): - epci.departements.remove() - links = base_qs.values_list("epci_id", "dept_id").distinct() - logger.info("%d links found", links.count()) - for epci_id, dept_id in links: - epcis[epci_id].departements.add(depts[dept_id]) - logger.info("Done linking") - - def load_communes(self, base_qs: QuerySet, table_was_cleaned: bool): - logger.info("Loading Communes") - depts = {d.source_id: d for d in Departement.objects.all()} - epcis = {e.source_id: e for e in Epci.objects.all()} - qs = base_qs.order_by("city_insee") - - logger.info("%d Communes found ", qs.count()) - - should_only_load_missing_communes = not table_was_cleaned - - if should_only_load_missing_communes: - qs = qs.exclude(city_insee__in=Commune.objects.values_list("insee", flat=True)) - - logger.info("%d Communes to load ", qs.count()) - - paginator = Paginator(object_list=qs, per_page=1000) - - for page in paginator.page_range: - Commune.objects.bulk_create( - ( - Commune( - insee=data.city_insee, - name=data.city_name, - departement=depts[data.dept_id], - epci=epcis[data.epci_id], - mpoly=fix_poly(data.mpoly), - area=data.mpoly.transform(data.srid_source, clone=True).area / 10000, - srid_source=data.srid_source, - ) - for data in paginator.page(page) - ) - ) - logger.info("Page %d/%d done", page, paginator.num_pages) - - logger.info("Done loading Communes") diff --git a/public_data/management/commands/build_commune_data.py b/public_data/management/commands/build_commune_data.py deleted file mode 100644 index 4387a1059..000000000 --- a/public_data/management/commands/build_commune_data.py +++ /dev/null @@ -1,180 +0,0 @@ -import logging - -from django.core.management.base import BaseCommand -from django.db import connection -from django.db.models import F, Q - -from public_data.models import ( - Commune, - CommuneDiff, - CommuneSol, - Departement, - Ocsge, - OcsgeDiff, - Region, -) -from utils.db import cast_sum_area - -logger = logging.getLogger("management.commands") - - -class Command(BaseCommand): - help = "Build all data of cities" - - def add_arguments(self, parser): - parser.add_argument( - "--insee", - type=str, - help="insee code of a particular city", - ) - parser.add_argument( - "--departement", - type=str, - help="name of a specific departement", - ) - parser.add_argument( - "--region", - type=str, - help="name of region", - ) - parser.add_argument( - "--verbose", - action="store_true", - help="display city processed", - ) - - def handle(self, *args, **options): - logger.info("Start build cities data") - self.verbose = options["verbose"] - if options["insee"]: - self.process_one(options["insee"]) - elif options["departement"]: - self.process_departement(options["departement"]) - elif options["region"]: - self.process_departement(options["region"]) - else: - self.process_all() - logger.info("End building cities data") - - def process_multi(self, queryset): - total = queryset.count() - logger.info("Total cities : %d", total) - for i, city in enumerate(queryset): - self.build_data(city) - if self.verbose: - logger.info("%d/%d - %s (%s)", i + 1, total, city.name, city.insee) - - def process_all(self): - logger.info("Processing all cities") - qs = Commune.objects.all().order_by("insee") - self.process_multi(qs) - - def process_region(self, region_name): - logger.info("Processing a region with name= %s", region_name) - qs = Region.objects.filter(Q(source_id=region_name) | Q(name__icontains=region_name)) - if not qs.exists(): - logger.warning("No region found") - return - region = qs.first() - logger.info("Région: %s (%s)", region.name, region.source_id) - self.process_multi(region.get_cities().order_by("name")) - - def process_departement(self, departement): - qs = Departement.objects.filter(Q(source_id=departement) | Q(name__icontains=departement)) - if not qs.exists(): - logger.warning("No departement found") - return - departement = qs.first() - logger.info("Departement: %s (%s)", departement.name, departement.source_id) - self.process_multi(departement.commune_set.all().order_by("name")) - - def process_one(self, insee): - logger.info("Processing one city with code insee= %s", insee) - qs = Commune.objects.filter(insee=insee) - if not qs.exists(): - logger.warning("Code insee unknown") - return - elif qs.count() > 1: - logger.warning("More than 1 city fetched, should'nt be possible -_-'") - return - city = qs.first() - self.build_data(city) - - def __calculate_surface_artif(self, city: Commune): - city.surface_artif = ( - Ocsge.objects.intersect(city.mpoly) - .filter( - is_artificial=True, - year=city.last_millesime, - departement=city.departement.source_id, - ) - .aggregate(surface_artif=cast_sum_area("intersection_area"))["surface_artif"] - ) - - def build_data(self, city: Commune): - if not city.ocsge_available: - logger.info(f"No OCSGE data available for {city.name}. Maybe you forgot to run setup_departements?") - return - - self.__calculate_surface_artif(city) - - city.save() - - self.build_commune_sol(city) - self.build_commune_diff(city) - - def build_commune_sol(self, city: Commune): - CommuneSol.objects.filter(city=city).delete() - with connection.cursor() as cursor: - cursor.execute( - sql=""" - INSERT INTO public_data_communesol ( - city_id, - year, - matrix_id, - surface - ) - SELECT - com.id AS city_id, - o.year, - matrix.id AS matrix_id, - St_Area(ST_Union(ST_Intersection( - ST_Transform(com.mpoly, com.srid_source), - ST_Transform(o.mpoly, o.srid_source))) - ) / 10000 AS surface - FROM - public_data_commune AS com - LEFT JOIN - public_data_ocsge AS o ON - ST_Intersects(com.mpoly, o.mpoly) - LEFT JOIN - public_data_couverturesol AS cs ON - o.couverture = cs.code_prefix - LEFT JOIN - public_data_usagesol AS us ON - o.usage = us.code_prefix - LEFT JOIN - public_data_couvertureusagematrix AS matrix ON - matrix.couverture_id = cs.id AND - matrix.usage_id = us.id - WHERE - com.insee = %s - GROUP BY com.insee, com.id, o.year, o.couverture, o.usage, matrix.id, cs.code_prefix, us.code_prefix - """, - params=[city.insee], - ) - - def build_commune_diff(self, city: Commune): - CommuneDiff.objects.filter(city=city).delete() - qs = ( - OcsgeDiff.objects.intersect(city.mpoly) - .filter(departement=city.departement.source_id) - .values("year_old", "year_new") - .annotate( - new_artif=cast_sum_area("intersection_area", filter=Q(is_new_artif=True)), - new_natural=cast_sum_area("intersection_area", filter=Q(is_new_natural=True)), - net_artif=F("new_artif") - F("new_natural"), - ) - ) - - CommuneDiff.objects.bulk_create([CommuneDiff(city=city, **_) for _ in qs]) diff --git a/public_data/management/commands/build_matrix.py b/public_data/management/commands/build_matrix.py deleted file mode 100644 index b25ca05c5..000000000 --- a/public_data/management/commands/build_matrix.py +++ /dev/null @@ -1,72 +0,0 @@ -import logging - -from django.core.management.base import BaseCommand -from django.db.models import Q - -from public_data.models import CouvertureSol, CouvertureUsageMatrix, UsageSol - -logger = logging.getLogger("management.commands") - - -class Command(BaseCommand): - help = "Utilise le décret pour déterminer les zones artificielles" - - def handle(self, *args, **options): - logger.info("Update matrix to comply to décret") - - # add all keys with None - for couv in CouvertureSol.objects.all(): - qs = CouvertureUsageMatrix.objects.filter(couverture=couv, usage=None) - if not qs.exists(): - CouvertureUsageMatrix.objects.create(couverture=couv, usage=None) - - for usage in UsageSol.objects.all(): - qs = CouvertureUsageMatrix.objects.filter(couverture=None, usage=usage) - if not qs.exists(): - CouvertureUsageMatrix.objects.create(couverture=None, usage=usage) - - for couv in CouvertureSol.objects.all(): - for usage in UsageSol.objects.all(): - qs = CouvertureUsageMatrix.objects.filter(couverture=couv, usage=usage) - if not qs.exists(): - CouvertureUsageMatrix.objects.create(couverture=couv, usage=usage) - - qs = CouvertureUsageMatrix.objects.filter(couverture=None, usage=None) - if not qs.exists(): - CouvertureUsageMatrix.objects.create(couverture=None, usage=None) - - # first reinitialize - CouvertureUsageMatrix.objects.all().update( - is_artificial=False, - is_consumed=None, - is_natural=None, - label=CouvertureUsageMatrix.LabelChoices.NONE, - ) - # select artificial - # code_cs = ["1.1.1.1", "1.1.1.2", "1.1.2.1", "1.1.2.2"] - artificial = CouvertureUsageMatrix.objects.filter( - Q(couverture__code__startswith="1.1.") - | Q( - couverture__code__startswith="2.2.", - usage__code__in=[ - "2", - "3", - "5", - "235", - "4.1.1", - "4.1.2", - "4.1.3", - "4.1.4", - "4.1.5", - "4.2", - "4.3", - "6.1", - "6.2", - ], - ) - ).exclude(couverture__code="1.1.2.1", usage__code="1.3") - artificial.update( - is_artificial=True, - label=CouvertureUsageMatrix.LabelChoices.ARTIFICIAL, - ) - logger.info("End") diff --git a/public_data/management/commands/build_shapefile.py b/public_data/management/commands/build_shapefile.py deleted file mode 100644 index a176386a4..000000000 --- a/public_data/management/commands/build_shapefile.py +++ /dev/null @@ -1,77 +0,0 @@ -from concurrent.futures import ProcessPoolExecutor -from logging import getLogger -from pathlib import Path -from typing import Any - -from django import setup as django_setup -from django.conf import settings -from django.core.management.base import BaseCommand - -from public_data.infra.shapefile_builder.gdal.GdalShapefileBuilder import ( - GdalShapefileBuilder, -) -from public_data.models import DataSource -from public_data.storages import DataStorage - -logger = getLogger("management.commands") - - -def upload_file_to_s3(path: Path): - logger.info(f"Uploading {path.name} to S3") - - with open(path, "b+r") as f: - storage = DataStorage() - storage.save(path.name, f) - - logger.info(f"Uploaded {path.name} to S3") - - -class Command(BaseCommand): - help = "Build shapefile" - - def add_arguments(self, parser): - parser.add_argument("--productor", type=str, required=True, choices=DataSource.ProductorChoices.values) - parser.add_argument("--dataset", type=str, required=True, choices=DataSource.DatasetChoices.values) - parser.add_argument("--name", type=str, choices=DataSource.DataNameChoices.values) - parser.add_argument("--year", type=int) - parser.add_argument("--parallel", action="store_true", help="Run the build in parallel", default=False) - parser.add_argument( - "--land_id", - type=str, - help="Departement etc ...", - choices=set([source.official_land_id for source in DataSource.objects.all()]), - ) - parser.add_argument("--upload", action="store_true", help="Upload the shapefile to S3", default=False) - - def get_sources_queryset(self, options): - sources = DataSource.objects.filter( - dataset=options.get("dataset"), - productor=options.get("productor"), - ) - if options.get("year"): - sources = sources.filter(millesimes__contains=[options.get("year")]) - if options.get("land_id"): - sources = sources.filter(official_land_id=options.get("land_id")) - if options.get("name"): - sources = sources.filter(name=options.get("name")) - return sources - - def handle(self, *args: Any, **options: Any) -> str | None: - if settings.ENVIRONMENT == "production": - logger.error("This command cannot be run in production") - return - - builder = GdalShapefileBuilder() - - if options.get("parallel"): - with ProcessPoolExecutor(max_workers=5, initializer=django_setup) as executor: - for source in self.get_sources_queryset(options).all(): - executor.submit(builder.build, source) - else: - # Running sequentially might be useful for debugging and necesary for - # building the most complex shapefiles - for source in self.get_sources_queryset(options).all(): - for built in builder.build(source): - if options.get("upload"): - _, path = built - upload_file_to_s3(path) diff --git a/public_data/management/commands/check_ocsge_validity.py b/public_data/management/commands/check_ocsge_validity.py deleted file mode 100644 index 7fd700265..000000000 --- a/public_data/management/commands/check_ocsge_validity.py +++ /dev/null @@ -1,70 +0,0 @@ -import logging -from concurrent.futures import ProcessPoolExecutor - -import geopandas -from django import setup as django_setup -from django.conf import settings -from django.core.management.base import BaseCommand - -from public_data.models import DataSource -from public_data.shapefile import ShapefileFromSource - -logger = logging.getLogger("management.commands") - - -def check_unique_fields_are_unique(source: DataSource, df: geopandas.GeoDataFrame): - fields = { - DataSource.DataNameChoices.OCCUPATION_DU_SOL: ["ID"], - DataSource.DataNameChoices.ZONE_CONSTRUITE: ["ID"], - DataSource.DataNameChoices.DIFFERENCE: [], - }[source.name] - - errors = [] - - for field in fields: - if not df[field].is_unique: - with open("errors_ocsge.txt", "+a") as f: - f.write( - f"D0{source.official_land_id} - {source.millesimes_string()} - {source.name} - {field} is not unique\n" # noqa: E501 - ) - return errors - - -def check_source_validity(source: DataSource) -> bool: - with ShapefileFromSource(source) as shapefile_path: - df = geopandas.read_file(shapefile_path) - check_unique_fields_are_unique(source, df) - - -class Command(BaseCommand): - help = """ - Iterate over all OCSGE sources and check if a series of conditions are met. - Note that the data must already be in S3, and a Datasource created for each source. - The output of the command is a file named errors_ocsge.txt in the current directory. - - This command is intended to be used in local environnement only. - """ - - def add_arguments(self, parser): - parser.add_argument("--land_id", type=str) - - def get_sources_queryset(self, departement=None): - sources = DataSource.objects.filter( - productor=DataSource.ProductorChoices.IGN, - dataset=DataSource.DatasetChoices.OCSGE, - ) - - if departement: - sources = sources.filter(official_land_id=departement) - - return sources - - def handle(self, *args, **options): - if settings.ENVIRONMENT != "local": - raise Exception("This command can only be run in local environnement") - - sources = self.get_sources_queryset(departement=options["land_id"]) - - with ProcessPoolExecutor(max_workers=5, initializer=django_setup) as executor: - for source in sources: - executor.submit(check_source_validity, source) diff --git a/public_data/management/commands/evaluate_city_area.py b/public_data/management/commands/evaluate_city_area.py deleted file mode 100644 index 51db48192..000000000 --- a/public_data/management/commands/evaluate_city_area.py +++ /dev/null @@ -1,18 +0,0 @@ -import logging - -from django.contrib.gis.db.models.functions import Area -from django.core.management.base import BaseCommand - -from public_data.models import Commune -from utils.db import DynamicSRIDTransform - -logger = logging.getLogger("management.commands") - - -class Command(BaseCommand): - help = "Evaluate city area" - - def handle(self, *args, **options): - logger.info("Start evaluation of city area") - Commune.objects.all().update(area=Area(DynamicSRIDTransform("mpoly", "srid_source")) / 10000) - logger.info("End evaluation of city area") diff --git a/public_data/management/commands/export_gpu.py b/public_data/management/commands/export_gpu.py deleted file mode 100644 index 7f71a293b..000000000 --- a/public_data/management/commands/export_gpu.py +++ /dev/null @@ -1,93 +0,0 @@ -import base64 -import codecs -import csv -import io -import logging - -from django.core.management.base import BaseCommand -from django.db.models import Q - -from public_data.models import Departement, ZoneUrba -from public_data.storages import DataStorage - -logger = logging.getLogger("management.commands") - - -FIELD_NAMES = [ - "gid", - "libelle", - "libelong", - "typezone", - "insee", - "idurba", - "idzone", - "lib_idzone", - "partition", - "destdomi", - "nomfic", - "urlfic", - "datappro", - "datvalid", - "mpoly", -] - - -def to_b64_utf8(words: str) -> str: - if words: - words = str(words) - return base64.b64encode(words.encode("utf-8")).decode("utf-8") - return "" - - -class Command(BaseCommand): - help = "Load all data from OCS GE" - - def add_arguments(self, parser): - parser.add_argument( - "--dept", - type=str, - help="departement you want to export", - ) - - def handle(self, *args, **options): - logger.info("Export GPU to csv on s3") - - self.storage = DataStorage() - - qs = Departement.objects.all().order_by("name") - - dept_param = options["dept"] - if dept_param: - qs = Departement.objects.filter(Q(source_id=dept_param) | Q(name=dept_param)) - if not qs.exists(): - raise ValueError(f"{dept_param} is not a valid departement") - - logger.info("Total departement to process: %d", qs.count()) - - for dept in qs: - self.process_one(dept) - - logger.info("End exporting GPU") - - def process_one(self, dept: Departement) -> None: - logger.info("Departement processed: %s", dept.name) - - bcontent = io.BytesIO() - StreamWriter = codecs.getwriter("utf-8") - stream = StreamWriter(bcontent) - writer = csv.writer(stream, delimiter=";", quotechar='"', quoting=csv.QUOTE_ALL) - - writer.writerow(FIELD_NAMES) - - qs = ZoneUrba.objects.intersect(dept.mpoly) - total = qs.count() - logger.info("Exporting %d zones", total) - for row in qs.values_list(*FIELD_NAMES): - writer.writerow(map(to_b64_utf8, row)) - - filename = f"GPU/{dept.source_id}_{dept.name}.csv" - bcontent.seek(0) - logger.info("Writing file to S3") - - final_name = self.storage.save(filename, bcontent) - logger.info("File created: %s", final_name) diff --git a/public_data/management/commands/find_urlfic.py b/public_data/management/commands/find_urlfic.py deleted file mode 100644 index ec1818fa3..000000000 --- a/public_data/management/commands/find_urlfic.py +++ /dev/null @@ -1,55 +0,0 @@ -import asyncio -from concurrent.futures import ThreadPoolExecutor - -import requests -from django.core.management.base import BaseCommand - -from public_data.models.gpu import ZoneUrba - - -class Command(BaseCommand): - help = "Vérifie si une URL renvoie un fichier PDF" - - # @sync_to_async - def is_pdf(self, url): - try: - response = requests.head(url, timeout=5) - content_type = response.headers.get("Content-Type") - if content_type and "application/pdf" in content_type.lower(): - return url - except requests.exceptions.RequestException: - pass - return None - - async def main(self, *args, **options): - items = ( - ZoneUrba.objects.filter(typezone="Ah", urlfic__startswith="h") - .order_by("urlfic") - .values("urlfic") - .distinct() - ) - print(items.count()) - for item in items: - self.stdout.write(item["urlfic"]) - if await self.is_pdf(item["urlfic"]): - self.stdout.write(item["urlfic"], ending="\n") - - def handle(self, *args, **options): - urls = [ - _["urlfic"] - for _ in ( - ZoneUrba.objects.filter(typezone="Nh", urlfic__startswith="h") - .order_by("urlfic") - .values("urlfic") - .distinct() - ) - ] - # Convertir la coroutine en appel synchrone - with ThreadPoolExecutor() as executor: - loop = asyncio.get_event_loop() - tasks = [loop.run_in_executor(executor, self.is_pdf, url) for url in urls] - results = loop.run_until_complete(asyncio.gather(*tasks)) - - for result in results: - if result: - print(result) diff --git a/public_data/management/commands/fix_is_artif_carriere.py b/public_data/management/commands/fix_is_artif_carriere.py deleted file mode 100644 index bd99edd7f..000000000 --- a/public_data/management/commands/fix_is_artif_carriere.py +++ /dev/null @@ -1,44 +0,0 @@ -import logging - -import celery -from django.core.management import call_command -from django.core.management.base import BaseCommand - -from public_data.models import Commune, Ocsge -from public_data.tasks import calculate_commune_artificial_area - -logger = logging.getLogger(__name__) - - -class Command(BaseCommand): - help = "Fix is_artif_carriere" - - def handle(self, *args, **options): - call_command(command_name="load_shapefile", dataset="OCSGE", name="DIFFERENCE") - - ocsge_with_carriere = Ocsge.objects.filter( - couverture="CS1.1.2.1", # zones à matériaux minéraux, - usage="US1.3", # activité d'extraction - ) - - ocsge_with_carriere.update( - is_artificial=False, - ) - - communes = Commune.objects.filter( - ocsge_available=True, - ) - - celery_tasks = [] - - for commune in communes: - ocsge_with_carriere_on_commune = ocsge_with_carriere.filter( - mpoly__intersects=commune.mpoly, - ) - - if ocsge_with_carriere_on_commune.exists(): - logger.info(f"Commune {commune.insee} has carriere") - celery_tasks.append(calculate_commune_artificial_area.si(commune.insee)) - logger.info(f"Found {len(celery_tasks)} communes with carriere") - - celery.group(*celery_tasks).apply_async(queue="long") diff --git a/public_data/management/commands/import_gpu.py b/public_data/management/commands/import_gpu.py deleted file mode 100644 index 8235e5375..000000000 --- a/public_data/management/commands/import_gpu.py +++ /dev/null @@ -1,114 +0,0 @@ -import base64 -import logging - -from django.contrib.gis.geos import GEOSGeometry -from django.core.management.base import BaseCommand -from django.db.models import Q - -from public_data.management.commands.load_gpu import ZoneUrbaFrance -from public_data.models import Departement, ZoneUrba -from public_data.storages import DataStorage - -logger = logging.getLogger("management.commands") - - -FIELD_NAMES = [ - "gid", - "libelle", - "libelong", - "typezone", - "insee", - "idurba", - "idzone", - "lib_idzone", - "partition", - "destdomi", - "nomfic", - "urlfic", - "datappro", - "datvalid", - # "mpoly", -] - - -def from_b64_to_str(base64_unicode): - base64_bytes = base64_unicode.encode("utf-8") - str_bytes = base64.b64decode(base64_bytes) - return str_bytes.decode("utf-8") - - -class MissingFile(Exception): - pass - - -class Command(BaseCommand): - help = "Use file on S3 to import GPU data." - - def add_arguments(self, parser): - parser.add_argument( - "--departement", - type=str, - help="departement you want to export", - ) - - def handle(self, *args, **options): - logger.info("Import GPU from S3") - - self.storage = DataStorage() - - qs = Departement.objects.all().order_by("name") - - dept_param = options["departement"] - if dept_param: - qs = qs.filter(Q(source_id=dept_param) | Q(name__icontains=dept_param)) - if not qs.exists(): - raise ValueError(f"{dept_param} is not a valid departement") - - for dept in qs: - try: - self.process_one(dept) - except MissingFile: - logger.warning("Missing file for departement %s", dept.name) - - logger.info("End importing GPU") - - def process_one(self, dept: Departement) -> None: - logger.info("Departement processed: %s", dept.name) - - filename = f"GPU/{dept.source_id}_{dept.name}.csv" - if not self.storage.exists(filename): - raise MissingFile("File does not exist, have you exported it first ?") - - logger.info("Delete previous data from this departement") - zones = ZoneUrba.objects.intersect(dept.mpoly) - zones.delete() - - logger.info("Read data from file") - s3_file = self.storage.open(filename, "r") - - zones = [] - for i, line in enumerate(s3_file): - if i == 0: - continue - line_b64 = line.strip()[1:-1].split('";"') - line_data = list(map(from_b64_to_str, line_b64)) - line_dict = dict(zip(FIELD_NAMES, line_data[:-1])) - try: - line_dict["mpoly"] = GEOSGeometry(line_data[-1]) - except ValueError as exc: - logger.error("Error while parsing geometry: %s", exc) - logger.error("line: %s", line) - logger.error("line_b64: %s", line_b64) - logger.error("line_data: %s", line_data) - logger.error("line_dict: %s", line_dict) - raise exc - zones.append(ZoneUrba(**line_dict)) - if i % 1000 == 0: - logger.info("Bulk create 1000 lines.") - ZoneUrba.objects.bulk_create(zones) - zones = [] - ZoneUrba.objects.bulk_create(zones) - logger.info("Imported %i lines", i) - - logger.info("Start calculating fields") - ZoneUrbaFrance.calculate_fields() diff --git a/public_data/management/commands/load_gpu.py b/public_data/management/commands/load_gpu.py deleted file mode 100644 index 895ffc59e..000000000 --- a/public_data/management/commands/load_gpu.py +++ /dev/null @@ -1,155 +0,0 @@ -import logging -from pathlib import Path - -from django.contrib.gis.db.models.functions import Area -from django.core.management.base import BaseCommand -from django.db import connection -from django.db.models import DecimalField, F -from django.db.models.functions import Cast - -from public_data.models import ZoneUrba -from public_data.models.mixins import AutoLoadMixin -from utils.db import DynamicSRIDTransform - -logger = logging.getLogger("management.commands") - - -# ############## -# ZoneUrba France entière -# ############## - - -class ZoneUrbaFrance(AutoLoadMixin, ZoneUrba): - """ - Zone urbaines France entière - Données récupérées sur le FTP: sftp-public.ign.fr - Date de téléchargement : mai 2023 - """ - - class Meta: - proxy = True - - shape_file_path = Path("public_data/GPU/zone_urba.shp") - mapping = { - "gid": "gid", - "partition": "partition", - "libelle": "libelle", - "libelong": "libelong", - "origin_typezone": "typezone", - "destdomi": "destdomi", - "nomfic": "nomfic", - "urlfic": "urlfic", - "origin_insee": "insee", - "datappro": "datappro", - "datvalid": "datvalid", - "idurba": "idurba", - "idzone": "idzone", - "lib_idzone": "lib_idzone", - "mpoly": "MULTIPOLYGON", - } - - @classmethod - def clean_data(cls): - """Clean data before loading""" - cls.objects.all().delete() - - def save(self, *args, **kwargs): - return super().save(*args, **kwargs) - - @classmethod - def calculate_fields(cls): - """Override if you need to calculate some fields after loading data. - By default, it will calculate label for couverture and usage if couverture_field - and usage_field are set with the name of the field containing code (cs.2.1.3) - """ - # TODO : insee, surface, type_zone - logger.info("Calculate fields") - logger.info("Make mpoly valid") - make_valid_mpoly_query = ( - "UPDATE public_data_zoneurba pdz " - "SET mpoly = ST_Multi(ST_CollectionExtract(ST_MakeValid(mpoly), 3)) " - "WHERE ST_IsValid(mpoly) IS FALSE " - " AND ST_IsValid(ST_MakeValid(mpoly))" - ) - with connection.cursor() as cursor: - cursor.execute(make_valid_mpoly_query) - logger.info("Evaluate area") - - cls.objects.filter(area__isnull=True).update( - area=Cast( - Area(DynamicSRIDTransform("mpoly", "srid_source")) / 10000, - DecimalField(max_digits=15, decimal_places=4), - ) - ) - - logger.info("Clean typezone") - cls.objects.update(origin_typezone=F("typezone")) - cls.objects.filter(typezone__in=["Nh", "Nd"]).update(typezone="N") - cls.objects.filter(typezone="Ah").update(typezone="A") - logger.info("Fill up table ZoneUrbaArtificialArea") - artif_area_query = """ - INSERT INTO public_data_artifareazoneurba (zone_urba_id, year, area) - SELECT - pdz.id, - pdo.year, - ST_Area(ST_Transform(ST_Union(ST_Intersection(ST_MakeValid(pdo.mpoly), pdz.mpoly)), pdz.srid_source)) - / 10000 - AS artificial_area - FROM - public_data_zoneurba pdz - LEFT JOIN - public_data_artifareazoneurba pda - ON pda.zone_urba_id = pdz.id - INNER JOIN - public_data_ocsge pdo - ON ST_Intersects(pdo.mpoly, pdz.mpoly) AND is_artificial = true - WHERE pda.id is null - GROUP BY pdz.id, pdo.year; - """ - with connection.cursor() as cursor: - cursor.execute(artif_area_query) - - -class Command(BaseCommand): - help = "Load all data from OCS GE" - - def add_arguments(self, parser): - parser.add_argument( - "--truncate", - action="store_true", - help="if you want to completly restart tables including id, not compatible " "with --item", - ) - parser.add_argument( - "--verbose", - action="store_true", - help="increase output", - ) - parser.add_argument( - "--local-file", - type=str, - help="Use local file instead of s3", - ) - - def handle(self, *args, **options): - logger.info("Load GPU") - self.verbose = options["verbose"] - logger.info("Full load") - if options["truncate"]: - self.truncate() - self.load() - logger.info("End loading GPU") - - def truncate(self): - logger.info("Truncate ZoneUrbaFrance") - ZoneUrbaFrance.truncate() - - def load(self): - logger.info("Load data for: ZoneUrbaFrance") - - ZoneUrbaFrance.load( - verbose=self.verbose, - encoding="latin1", - silent=True, - ) - - logger.info("End loading ZoneUrbaFrance") diff --git a/public_data/management/commands/load_ocsge.py b/public_data/management/commands/load_ocsge.py deleted file mode 100644 index c7f77e886..000000000 --- a/public_data/management/commands/load_ocsge.py +++ /dev/null @@ -1,110 +0,0 @@ -import logging -from typing import Callable, Dict, Tuple - -from django.core.management.base import BaseCommand -from django.db.models import Q - -from public_data import loaders -from public_data.factories import LayerMapperFactory -from public_data.models import DataSource, Departement - -logger = logging.getLogger("management.commands") - - -class OcsgeFactory(LayerMapperFactory): - def get_class_properties(self, module_name: str) -> Dict[str, int]: - properties = super().get_class_properties(module_name) - properties |= {"_departement": Departement.objects.get(source_id=self.data_source.official_land_id)} - if self.data_source.name == DataSource.DataNameChoices.DIFFERENCE: - properties |= { - "_year_old": min(self.data_source.millesimes), - "_year_new": max(self.data_source.millesimes), - } - else: - properties |= {"_year": self.data_source.millesimes[0]} - return properties - - def get_base_class(self) -> Tuple[Callable]: - base_class = None - if self.data_source.name == DataSource.DataNameChoices.DIFFERENCE: - base_class = loaders.AutoOcsgeDiff - elif self.data_source.name == DataSource.DataNameChoices.OCCUPATION_DU_SOL: - base_class = loaders.AutoOcsge - elif self.data_source.name == DataSource.DataNameChoices.ZONE_CONSTRUITE: - base_class = loaders.AutoZoneConstruite - return (base_class,) - - -class Command(BaseCommand): - def get_queryset(self): - return DataSource.objects.filter( - productor=DataSource.ProductorChoices.IGN, - dataset=DataSource.DatasetChoices.OCSGE, - ) - - def add_arguments(self, parser): - parser.add_argument( - "--departement", - type=str, - help="Departement name", - ) - parser.add_argument( - "--year-range", - type=str, - help="Year range", - ) - parser.add_argument( - "--layer-type", - type=str, - help="Layer type.", - ) - parser.add_argument( - "--all", - action="store_true", - help="Load all data", - ) - - parser.add_argument( - "--list", - action="store_true", - help="List available data", - ) - - def handle(self, *args, **options): - if not options: - raise ValueError("You must provide at least one option, or use --all to load all data") - - if options.get("list"): - for source in self.get_queryset(): - print(source) - return - - sources = self.get_queryset() - - if options.get("departement"): - departement_param = options.get("departement") - departement_queryset = Departement.objects.filter( - Q(source_id=departement_param) | Q(name__icontains=departement_param) - ) - - if not departement_queryset: - raise ValueError(f"{departement_param} is not a valid departement") - - departement = departement_queryset.first() - - sources = sources.filter(official_land_id=departement.source_id) - - if options.get("year-range"): - year_range = options.get("year-range").split(",") - sources = sources.filter(millesimes__overlap=year_range) - - if options.get("layer-type"): - sources = sources.filter(name__icontains=options.get("layer-type")) - - if not sources: - raise ValueError("No data sources found") - - for source in sources: - layer_mapper_proxy_class = OcsgeFactory(source).get_layer_mapper_proxy_class(module_name=__name__) - logger.info("Process %s", layer_mapper_proxy_class.__name__) - layer_mapper_proxy_class.load() diff --git a/public_data/management/commands/load_shapefile.py b/public_data/management/commands/load_shapefile.py deleted file mode 100644 index 0571de580..000000000 --- a/public_data/management/commands/load_shapefile.py +++ /dev/null @@ -1,286 +0,0 @@ -import logging -import subprocess -from pathlib import Path - -from django.conf import settings -from django.core.management.base import BaseCommand - -from public_data.models import Cerema, DataSource, Ocsge, OcsgeDiff, ZoneConstruite -from public_data.shapefile import ShapefileFromSource - -logger = logging.getLogger("management.commands") - -source_to_table_map = { - DataSource.DatasetChoices.OCSGE: { - DataSource.DataNameChoices.OCCUPATION_DU_SOL: Ocsge._meta.db_table, - DataSource.DataNameChoices.DIFFERENCE: OcsgeDiff._meta.db_table, - DataSource.DataNameChoices.ZONE_CONSTRUITE: ZoneConstruite._meta.db_table, - }, - DataSource.DatasetChoices.MAJIC: { - DataSource.DataNameChoices.CONSOMMATION_ESPACE: Cerema._meta.db_table, - }, -} - -field_mapping = { - DataSource.DatasetChoices.OCSGE: { - DataSource.DataNameChoices.OCCUPATION_DU_SOL: { - "id_source": "ID", - "couverture": "CODE_CS", - "usage": "CODE_US", - "year": "YEAR", - "srid_source": "SRID", - "is_artificial": "IS_ARTIF", - "departement": "DPT", - "surface": "SURFACE", - "mpoly": "GEOMETRY", - "is_impermeable": "IS_IMPER", - }, - DataSource.DataNameChoices.DIFFERENCE: { - "year_old": "YEAR_OLD", - "year_new": "YEAR_NEW", - "cs_new": "CS_NEW", - "cs_old": "CS_OLD", - "us_new": "US_NEW", - "us_old": "US_OLD", - "srid_source": "SRID", - "surface": "SURFACE", - "is_new_artif": "NEW_ARTIF", - "is_new_natural": "NEW_NAT", - "departement": "DPT", - "mpoly": "GEOMETRY", - "is_new_impermeable": "NEW_IMPER", - "is_new_not_impermeable": "NEWNOIMPER", - }, - DataSource.DataNameChoices.ZONE_CONSTRUITE: { - "id_source": "ID", - "year": "YEAR", - "millesime": "MILLESIME", - "srid_source": "SRID", - "departement": "DPT", - "surface": "SURFACE", - "mpoly": "GEOMETRY", - }, - DataSource.DataNameChoices.ZONE_ARTIFICIELLE: { - "year": "YEAR", - "departement": "DPT", - "surface": "SURFACE", - "srid_source": "SRID", - "mpoly": "GEOMETRY", - }, - }, - DataSource.DatasetChoices.MAJIC: { - DataSource.DataNameChoices.CONSOMMATION_ESPACE: { - "city_insee": "idcom", - "city_name": "idcomtxt", - "region_id": "idreg", - "region_name": "idregtxt", - "dept_id": "iddep", - "dept_name": "iddeptxt", - "epci_id": "epci23", - "epci_name": "epci23txt", - "scot": "scot", - "naf09art10": "naf09art10", - "art09act10": "art09act10", - "art09hab10": "art09hab10", - "art09mix10": "art09mix10", - "art09rou10": "art09rou10", - "art09fer10": "art09fer10", - "art09inc10": "art09inc10", - "naf10art11": "naf10art11", - "art10act11": "art10act11", - "art10hab11": "art10hab11", - "art10mix11": "art10mix11", - "art10rou11": "art10rou11", - "art10fer11": "art10fer11", - "art10inc11": "art10inc11", - "naf11art12": "naf11art12", - "art11act12": "art11act12", - "art11hab12": "art11hab12", - "art11mix12": "art11mix12", - "art11rou12": "art11rou12", - "art11fer12": "art11fer12", - "art11inc12": "art11inc12", - "naf12art13": "naf12art13", - "art12act13": "art12act13", - "art12hab13": "art12hab13", - "art12mix13": "art12mix13", - "art12rou13": "art12rou13", - "art12fer13": "art12fer13", - "art12inc13": "art12inc13", - "naf13art14": "naf13art14", - "art13act14": "art13act14", - "art13hab14": "art13hab14", - "art13mix14": "art13mix14", - "art13rou14": "art13rou14", - "art13fer14": "art13fer14", - "art13inc14": "art13inc14", - "naf14art15": "naf14art15", - "art14act15": "art14act15", - "art14hab15": "art14hab15", - "art14mix15": "art14mix15", - "art14rou15": "art14rou15", - "art14fer15": "art14fer15", - "art14inc15": "art14inc15", - "naf15art16": "naf15art16", - "art15act16": "art15act16", - "art15hab16": "art15hab16", - "art15mix16": "art15mix16", - "art15rou16": "art15rou16", - "art15fer16": "art15fer16", - "art15inc16": "art15inc16", - "naf16art17": "naf16art17", - "art16act17": "art16act17", - "art16hab17": "art16hab17", - "art16mix17": "art16mix17", - "art16rou17": "art16rou17", - "art16fer17": "art16fer17", - "art16inc17": "art16inc17", - "naf17art18": "naf17art18", - "art17act18": "art17act18", - "art17hab18": "art17hab18", - "art17mix18": "art17mix18", - "art17rou18": "art17rou18", - "art17fer18": "art17fer18", - "art17inc18": "art17inc18", - "naf18art19": "naf18art19", - "art18act19": "art18act19", - "art18hab19": "art18hab19", - "art18mix19": "art18mix19", - "art18rou19": "art18rou19", - "art18fer19": "art18fer19", - "art18inc19": "art18inc19", - "naf19art20": "naf19art20", - "art19act20": "art19act20", - "art19hab20": "art19hab20", - "art19mix20": "art19mix20", - "art19rou20": "art19rou20", - "art19fer20": "art19fer20", - "art19inc20": "art19inc20", - "naf20art21": "naf20art21", - "art20act21": "art20act21", - "art20hab21": "art20hab21", - "art20mix21": "art20mix21", - "art20rou21": "art20rou21", - "art20fer21": "art20fer21", - "art20inc21": "art20inc21", - "naf21art22": "naf21art22", - "art21act22": "art21act22", - "art21hab22": "art21hab22", - "art21mix22": "art21mix22", - "art21rou22": "art21rou22", - "art21fer22": "art21fer22", - "art21inc22": "art21inc22", - "naf22art23": "naf22art23", - "art22act23": "art22act23", - "art22hab23": "art22hab23", - "art22mix23": "art22mix23", - "art22rou23": "art22rou23", - "art22fer23": "art22fer23", - "art22inc23": "art22inc23", - "naf09art23": "naf09art23", - "art09act23": "art09act23", - "art09hab23": "art09hab23", - "art09mix23": "art09mix23", - "art09rou23": "art09rou23", - "art09fer23": "art09fer23", - "art09inc23": "art09inc23", - "artcom0923": "artcom0923", - "aav2020": "aav2020", - "aav2020txt": "aav2020txt", - "aav2020_ty": "aav2020_ty", - "pop14": "pop14", - "pop20": "pop20", - "pop1420": "pop1420", - "men14": "men14", - "men20": "men20", - "men1420": "men1420", - "emp14": "emp14", - "emp20": "emp20", - "emp1420": "emp1420", - "mepart1420": "mepart1420", - "menhab1420": "menhab1420", - "artpop1420": "artpop1420", - "surfcom23": "surfcom202", - "naf11art21": "NAF11ART21", - "art11hab21": "ART11HAB21", - "art11act21": "ART11ACT21", - "mpoly": "GEOMETRY", - "srid_source": "SRID", - }, - }, -} - - -class Command(BaseCommand): - def add_arguments(self, parser): - possible_sources = DataSource.objects.filter(productor=DataSource.ProductorChoices.MDA) - - names = [name for dataset in source_to_table_map for name in source_to_table_map[dataset]] - datasets = source_to_table_map.keys() - land_ids = set([source.official_land_id for source in possible_sources]) - - parser.add_argument("--dataset", type=str, required=True, choices=datasets) - parser.add_argument("--land_id", type=str, choices=land_ids) - parser.add_argument("--name", type=str, choices=names) - - def get_sources_queryset(self, options): - sources = DataSource.objects.filter( - dataset=options.get("dataset"), - productor=DataSource.ProductorChoices.MDA, - ) - if options.get("land_id"): - sources = sources.filter(official_land_id=options.get("land_id")) - if options.get("millesimes"): - sources = sources.filter(millesimes__overlap=options.get("millesimes")) - if options.get("name"): - sources = sources.filter(name=options.get("name")) - - return sources - - def load_shapefile_to_db( - self, - shapefile_path: Path, - source: DataSource, - ): - db = settings.DATABASES["default"] - - destination_table_name = source_to_table_map[source.dataset][source.name] - mapping = field_mapping[source.dataset][source.name] - - command = [ - "ogr2ogr", - "-dialect", - "SQLITE", - "-f", - '"PostgreSQL"', - f'"PG:dbname={db["NAME"]} host={db["HOST"]} port={db["PORT"]} user={db["USER"]} password={db["PASSWORD"]}"', # noqa: E501 - str(shapefile_path), - "-s_srs", - f"EPSG:{source.srid}", - "-t_srs", - "EPSG:4326", - "--config", - "PG_USE_COPY", - "YES", - "-nlt", - "PROMOTE_TO_MULTI", - "-nln", - destination_table_name, - "-append", - "-sql", - f'"SELECT {", ".join([f"{value} AS {key}" for key, value in mapping.items()])} FROM {source.name}"', - ] - subprocess.run(" ".join(command), check=True, shell=True) - - def handle(self, *args, **options) -> None: - for source in self.get_sources_queryset(options): - with ShapefileFromSource(source=source) as shapefile_path: - logger.info("Deleting previously loaded data") - deleted_count, _ = source.delete_loaded_data() - logger.info(f"Deleted {deleted_count} previously loaded features") - logger.info("Loading shapefile to db") - self.load_shapefile_to_db( - shapefile_path=shapefile_path, - source=source, - ) - logger.info("Loaded shapefile to db") diff --git a/public_data/management/commands/mep_7_1.py b/public_data/management/commands/mep_7_1.py deleted file mode 100644 index dd887ed6b..000000000 --- a/public_data/management/commands/mep_7_1.py +++ /dev/null @@ -1,53 +0,0 @@ -import logging - -import celery -from django.core.management import call_command -from django.core.management.base import BaseCommand - -from public_data.models import Commune, DataSource, Departement -from public_data.tasks import calculate_data_for_commune - -logger = logging.getLogger("management.commands") - - -class Command(BaseCommand): - help = "Dedicated to load data for 5.2 deployment" - - def load_departement(self, departement: Departement): - call_command( - command_name="load_shapefile", - dataset=DataSource.DatasetChoices.OCSGE, - land_id=departement.source_id, - ) - call_command( - command_name="update_project_ocsge", - departements=[departement.source_id], - ) - - def handle(self, *args, **options): - logger.info("Start mep_71") - - call_command("maintenance", on=True) - - logger.info("Initialize data sources") - DataSource.objects.all().delete() - call_command("loaddata", "public_data/models/data_source_fixture.json") - - logger.info("Load new OCS GE") - call_command("setup_departements") - - departements_source_ids = ["33"] - - celery_tasks = [] - - for source_id in departements_source_ids: - departement = Departement.objects.get(source_id=source_id) - self.load_departement(departement) - - for commune in Commune.objects.filter(departement=departement): - celery_tasks.append(calculate_data_for_commune.si(commune.insee)) - - celery.group(*celery_tasks).apply_async(queue="long") - - call_command("maintenance", off=True) - logger.info("End mep_71") diff --git a/public_data/management/commands/repack_ocsge.py b/public_data/management/commands/repack_ocsge.py deleted file mode 100644 index 75bf47197..000000000 --- a/public_data/management/commands/repack_ocsge.py +++ /dev/null @@ -1,149 +0,0 @@ -import logging -import re -import subprocess - -from django.conf import settings -from django.core.management.base import BaseCommand, CommandParser - -from public_data.models import DataSource -from public_data.shapefile import ShapefileFromURL - -logger = logging.getLogger("management.commands") - - -def find_years_in_url(url: str, count=1) -> list[int]: - results = re.findall(pattern="(\d{4})", string=str(url)) # noqa: W605 - - years = set() - - for result in results: - # check if the year the number is > 2000. - # this is to avoid getting other numbers in the path as years - if str(result).startswith("20"): - years.add(int(result)) - - if len(years) != count: - raise ValueError("Years count does not match the expected count") - - if not years: - raise ValueError("Years not found in the path") - - return list(sorted(years)) - - -def find_departement_in_url(url: str) -> str: - results = re.findall(pattern="D(\d{3})", string=str(url)) # noqa: W605 - - if len(results) > 0: - result = results[0] - - if str(result).startswith("0"): - return str(result).replace("0", "", 1) - - if not result: - raise ValueError("Departement not found in the path") - - return result - - -def process_url(url: str) -> list[DataSource]: - sources = [] - if "DIFF" in url: - sources.append( - DataSource( - dataset=DataSource.DatasetChoices.OCSGE, - name=DataSource.DataNameChoices.DIFFERENCE, - productor=DataSource.ProductorChoices.IGN, - source_url=url, - srid=2154, - ) - ) - else: - sources += [ - DataSource( - dataset=DataSource.DatasetChoices.OCSGE, - name=DataSource.DataNameChoices.OCCUPATION_DU_SOL, - productor=DataSource.ProductorChoices.IGN, - source_url=url, - srid=2154, - ), - DataSource( - dataset=DataSource.DatasetChoices.OCSGE, - name=DataSource.DataNameChoices.ZONE_CONSTRUITE, - productor=DataSource.ProductorChoices.IGN, - source_url=url, - srid=2154, - ), - ] - - logger.info(f"Processing sources : {sources}") - - for source in sources: - shapefile_name_pattern = { - DataSource.DataNameChoices.OCCUPATION_DU_SOL: "OCCUPATION_SOL.shp", - DataSource.DataNameChoices.ZONE_CONSTRUITE: "ZONE_CONSTRUITE.shp", - DataSource.DataNameChoices.DIFFERENCE: "*.shp", - }[source.name] - - with ShapefileFromURL(url=url, shapefile_name=shapefile_name_pattern) as shapefile_path: - source.millesimes = find_years_in_url( - url=url, count=2 if source.name == DataSource.DataNameChoices.DIFFERENCE else 1 - ) - - logger.info(f"Years found : {source.millesimes}") - - source.official_land_id = find_departement_in_url(url=url) - - logger.info(f"Departement found : {source.official_land_id}") - - shapefile_name = shapefile_path.name - - target_name = f"{source.official_land_id}_{source.name}_{'_'.join(map(str, source.millesimes))}_{source.productor}_REPACKED.shp.zip" # noqa: E501 - - logger.info(f"Target name : {target_name}") - - command = f'ogr2ogr -f "ESRI Shapefile" "{target_name}" "{shapefile_path}"' - - subprocess.run(args=command, check=True, shell=True) - - source.path = target_name - source.shapefile_name = shapefile_name - - existing_source = DataSource.objects.filter( - dataset=source.dataset, - name=source.name, - productor=source.productor, - official_land_id=source.official_land_id, - millesimes=source.millesimes, - ).first() - - if existing_source: - existing_source.delete() - source.save() - else: - source.save() - - return sources - - -class Command(BaseCommand): - help = """ - Take a list of URLS as input, and for each URL, download the zip, extract the shapefiles, - and repackage them in a new zip file with a new name. - This effectively separates DIFFERENCE, OCCUPATION_DU_SOL and ZONE_CONSTRUITE shapefiles - into their own zip files, and create the corresponding DataSource objects in the database. - Note that the command does not upload the new zip files to S3. - This command is intended to be used in local environment only. - """ - - def add_arguments(self, parser: CommandParser) -> None: - parser.add_argument("--urls", nargs="+", type=str) - - def handle(self, *args, **options): - if settings.ENVIRONMENT != "local": - raise Exception("This command can only be run in local environment") - - urls = options.get("urls", []) - - for url in urls: - process_url(url) diff --git a/public_data/management/commands/set_parent.py b/public_data/management/commands/set_parent.py deleted file mode 100644 index cf5347248..000000000 --- a/public_data/management/commands/set_parent.py +++ /dev/null @@ -1,19 +0,0 @@ -import logging - -from django.core.management.base import BaseCommand - -from public_data.models import CouvertureSol, UsageSol - -logging.basicConfig(level=logging.INFO) - - -class Command(BaseCommand): - help = "This will reevaluate parent fields of all instances of Couverture and Usage" - - def handle(self, *args, **options): - logging.info("Re-evaluate CouvertureSol parents of all instances") - for couv in CouvertureSol.objects.all(): - couv.set_parent() - logging.info("Re-evaluate UsageSol parents of all instances") - for usage in UsageSol.objects.all(): - usage.set_parent() diff --git a/public_data/management/commands/setup_departements.py b/public_data/management/commands/setup_departements.py deleted file mode 100644 index 4840df1e5..000000000 --- a/public_data/management/commands/setup_departements.py +++ /dev/null @@ -1,63 +0,0 @@ -import logging - -from django.core.management.base import BaseCommand - -from public_data.models import Commune, DataSource, Departement - -logger = logging.getLogger("management.commands") - - -class Command(BaseCommand): - help = """Setup departements and communes OCSGE properties - - Reset departements and communes OCSGE properties - - Compute OCSGE millesimes for departements - - Compute first and last millesimes for communes - """ - - def reset_departements_and_communes(self): - Departement.objects.all().update( - is_artif_ready=False, - ocsge_millesimes=None, - ) - Commune.objects.all().update( - first_millesime=None, - last_millesime=None, - ocsge_available=False, - ) - - def handle(self, *args, **options): - logger.info("Start setup departements OCSGE") - self.reset_departements_and_communes() - logger.info("Departements and communes OCSGE properties resetted") - - sources = DataSource.objects.filter( - productor=DataSource.ProductorChoices.MDA, - dataset=DataSource.DatasetChoices.OCSGE, - ) - - logger.info(f"{sources.count()} OCSGE sources found") - - departements_with_ocsge = Departement.objects.filter( - source_id__in=sources.values_list("official_land_id", flat=True) - ).distinct() - - logger.info(f"{departements_with_ocsge.count()} departement with OCSGE found") - - for departement in departements_with_ocsge: - millesimes = set() - for source in sources.filter(official_land_id=departement.source_id): - millesimes.update(source.millesimes) - - departement.ocsge_millesimes = sorted(list(millesimes)) - departement.is_artif_ready = True - departement.save() - - Commune.objects.filter(departement=departement).update( - first_millesime=min(departement.ocsge_millesimes), - last_millesime=max(departement.ocsge_millesimes), - ocsge_available=True, - ) - - logger.info(f"Done {departement.name}: {departement.ocsge_millesimes}") - - logger.info(msg="End setup departements OCSGE") diff --git a/public_data/management/commands/update_administration_layer.py b/public_data/management/commands/update_administration_layer.py deleted file mode 100644 index 8893932a5..000000000 --- a/public_data/management/commands/update_administration_layer.py +++ /dev/null @@ -1,118 +0,0 @@ -import logging - -from django.contrib.gis.db.models import Union -from django.core.management.base import BaseCommand - -from project.models.project_base import ProjectCommune -from public_data.models import Cerema, Commune, Epci -from public_data.models.administration import Departement, Region, Scot -from utils.commands import PrintProgress -from utils.db import fix_poly - -logger = logging.getLogger("management.commands") - - -class Command(BaseCommand): - help = "Update administration level with CEREMA's data" - - def handle(self, *args, **options): - PrintProgress.logger = logger - logger.info(self.help) - self.update_region() - self.update_departement() - self.update_scot() - self.update_epci() - self.update_commune() - self.delete_commune() - - def update_region(self): - region_list = Cerema.objects.all().values("region_id", "region_name").annotate(geom=Union("mpoly")) - for region in PrintProgress(region_list, title="looping on regions", step=4): - Region.objects.filter(source_id=region["region_id"]).update( - name=region["region_name"], - mpoly=fix_poly(region["geom"]), - ) - - def update_departement(self): - dept_list = Cerema.objects.all().values("dept_id", "dept_name").annotate(geom=Union("mpoly")) - for dept in PrintProgress(dept_list, title="looping on departements"): - Departement.objects.filter(source_id=dept["dept_id"]).update( - name=dept["dept_name"], - mpoly=fix_poly(dept["geom"]), - ) - - def update_scot(self): - Commune.objects.all().update(scot=None) - Scot.objects.all().delete() - new_scot_list = ( - Cerema.objects.all() - .exclude(scot__isnull=True) - .exclude(scot="") - .values("scot") - .annotate(geom=Union("mpoly")) - ) - for new_scot in PrintProgress(new_scot_list, title="looping on SCoT"): - scot = Scot.objects.create( - name=new_scot["scot"], - mpoly=fix_poly(new_scot["geom"]), - ) - scot.departements.set( - Departement.objects.filter( - source_id__in=Cerema.objects.filter(scot=new_scot["scot"]) - .values_list("dept_id", flat=True) - .distinct() - ) - ) - scot.regions.set( - Region.objects.filter( - source_id__in=Cerema.objects.filter(scot=new_scot["scot"]) - .values_list("region_id", flat=True) - .distinct() - ) - ) - - def update_epci(self): - new_epci_list = Cerema.objects.all().values("epci_id", "epci_name").annotate(geom=Union("mpoly")) - old_epci = {e.source_id: e for e in Epci.objects.all()} - for new_epci in PrintProgress(new_epci_list, title="looping on EPCI"): - epci = old_epci.pop(new_epci["epci_id"], None) - if epci: - epci.name = new_epci["epci_name"] - epci.mpoly = fix_poly(new_epci["geom"]) - epci.save() - else: - epci = Epci.objects.create( - source_id=new_epci["epci_id"], - name=new_epci["epci_name"], - mpoly=fix_poly(new_epci["geom"]), - ) - epci.departements.set(Departement.objects.intersect(new_epci["geom"])) - for epci in old_epci.values(): - epci.commune_set.all().update(epci=None) - epci.delete() - - def update_commune(self): - cerema_city_list = Cerema.objects.all() - for cerema_city in PrintProgress(cerema_city_list, title="looping on communes", step=1000): - epci = Epci.objects.get(source_id=cerema_city.epci_id) - scot = Scot.objects.get(name=cerema_city.scot) if cerema_city.scot else None - try: - commune = Commune.objects.get(insee=cerema_city.city_insee) - commune.epci = epci - commune.scot = scot - commune.mpoly = cerema_city.mpoly - commune.save() - except Commune.DoesNotExist: - commune = Commune.objects.create( - insee=cerema_city.city_insee, - name=cerema_city.city_name, - departement=Departement.objects.get(source_id=cerema_city.dept_id), - epci=epci, - scot=scot, - mpoly=cerema_city.mpoly, - ) - - def delete_commune(self): - to_delete = Commune.objects.exclude(insee__in=Cerema.objects.all().values_list("city_insee", flat=True)) - ProjectCommune.objects.filter(commune__in=to_delete).delete() - to_delete.delete() diff --git a/public_data/migrations/0198_remove_commune_map_color.py b/public_data/migrations/0198_remove_commune_map_color.py new file mode 100644 index 000000000..4d62d8701 --- /dev/null +++ b/public_data/migrations/0198_remove_commune_map_color.py @@ -0,0 +1,16 @@ +# Generated by Django 4.2.13 on 2024-08-19 09:06 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("public_data", "0197_artifareazoneurba_zone_urba"), + ] + + operations = [ + migrations.RemoveField( + model_name="commune", + name="map_color", + ), + ] diff --git a/public_data/models/administration/Commune.py b/public_data/models/administration/Commune.py index c40201fb6..068881bba 100644 --- a/public_data/models/administration/Commune.py +++ b/public_data/models/administration/Commune.py @@ -27,7 +27,6 @@ class Commune(DataColorationMixin, LandMixin, GetDataFromCeremaMixin, models.Mod objects = IntersectManager() # Calculated fields - map_color = models.CharField("Couleur d'affichage", max_length=30, null=True, blank=True) first_millesime = models.IntegerField( "Premier millésime disponible", validators=[MinValueValidator(2000), MaxValueValidator(2050)], diff --git a/public_data/views.py b/public_data/views.py index c3c0c1c32..f168e562e 100644 --- a/public_data/views.py +++ b/public_data/views.py @@ -401,7 +401,6 @@ class ZoneUrbaViewSet(OnlyBoundingBoxMixin, ZoomSimplificationMixin, OptimizedMi "o.libelle": "libelle", "o.libelong": "libelong", "o.typezone": "typezone", - "o.urlfic": "urlfic", "o.datappro": "datappro", "o.datvalid": "datvalid", "ST_AsEWKT((ST_MaximumInscribedCircle(o.mpoly)).center)": "label_center", From 1b882391aeef02b08f796354db1eaa2a62fcdf99 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 20 Aug 2024 11:33:07 +0200 Subject: [PATCH 19/99] feat(airflow): separate building and loading for ocsge --- ...min_express.py => ingest_admin_express.py} | 6 +- airflow/dags/{gpu.py => ingest_gpu.py} | 13 +-- airflow/dags/ocsge.py | 78 ++-------------- airflow/dags/update_app.py | 93 +++++++++++++++++++ airflow/dependencies/ocsge/delete_in_app.py | 35 ------- airflow/dependencies/ocsge/sources.json | 18 ++++ airflow/sql/sparte/models/app/app_commune.sql | 8 +- .../sparte/models/app/app_couverturesol.sql | 7 +- .../models/app/app_couvertureusagematrix.sql | 7 +- .../sql/sparte/models/app/app_departement.sql | 8 +- airflow/sql/sparte/models/app/app_epci.sql | 8 +- airflow/sql/sparte/models/app/app_region.sql | 8 +- airflow/sql/sparte/models/app/app_scot.sql | 8 +- .../sql/sparte/models/app/app_usagesol.sql | 7 +- ...{app_zoneurba.sql => for_app_zoneurba.sql} | 7 +- ...urba.sql => for_app_artifareazoneurba.sql} | 7 +- ...ialarea.sql => for_app_artificialarea.sql} | 7 +- .../models/ocsge/for_app/for_app_commune.sql | 7 +- ...ommunediff.sql => for_app_communediff.sql} | 7 +- ..._communesol.sql => for_app_communesol.sql} | 7 +- .../ocsge/for_app/for_app_departement.sql | 7 +- .../{app_ocsge.sql => for_app_ocsge.sql} | 8 +- ...pp_ocsgediff.sql => for_app_ocsgediff.sql} | 10 +- ...struite.sql => for_app_zoneconstruite.sql} | 8 +- 24 files changed, 235 insertions(+), 144 deletions(-) rename airflow/dags/{admin_express.py => ingest_admin_express.py} (93%) rename airflow/dags/{gpu.py => ingest_gpu.py} (83%) create mode 100644 airflow/dags/update_app.py delete mode 100644 airflow/dependencies/ocsge/delete_in_app.py rename airflow/sql/sparte/models/gpu/for_app/{app_zoneurba.sql => for_app_zoneurba.sql} (79%) rename airflow/sql/sparte/models/ocsge/for_app/{app_artifareazoneurba.sql => for_app_artifareazoneurba.sql} (75%) rename airflow/sql/sparte/models/ocsge/for_app/{app_artificialarea.sql => for_app_artificialarea.sql} (66%) rename airflow/sql/sparte/models/ocsge/for_app/{app_communediff.sql => for_app_communediff.sql} (87%) rename airflow/sql/sparte/models/ocsge/for_app/{app_communesol.sql => for_app_communesol.sql} (94%) rename airflow/sql/sparte/models/ocsge/for_app/{app_ocsge.sql => for_app_ocsge.sql} (73%) rename airflow/sql/sparte/models/ocsge/for_app/{app_ocsgediff.sql => for_app_ocsgediff.sql} (78%) rename airflow/sql/sparte/models/ocsge/for_app/{app_zoneconstruite.sql => for_app_zoneconstruite.sql} (67%) diff --git a/airflow/dags/admin_express.py b/airflow/dags/ingest_admin_express.py similarity index 93% rename from airflow/dags/admin_express.py rename to airflow/dags/ingest_admin_express.py index 7bf41317a..941f001e0 100644 --- a/airflow/dags/admin_express.py +++ b/airflow/dags/ingest_admin_express.py @@ -79,7 +79,11 @@ def ingest_admin_express() -> str: cmd = f'ogr2ogr -f "PostgreSQL" "{Container().gdal_dw_conn_str()}" -overwrite -lco GEOMETRY_NAME=geom -a_srs EPSG:2154 -nlt MULTIPOLYGON -nlt PROMOTE_TO_MULTI {path} --config PG_USE_COPY YES' # noqa: E501 subprocess.run(cmd, shell=True, check=True) - download_admin_express() >> ingest_admin_express() + @task.bash(retries=0, trigger_rule="all_success") + def dbt_run(**context): + return 'cd "${AIRFLOW_HOME}/sql/sparte" && dbt run -s admin_express' + + download_admin_express() >> ingest_admin_express() >> dbt_run() # Instantiate the DAG diff --git a/airflow/dags/gpu.py b/airflow/dags/ingest_gpu.py similarity index 83% rename from airflow/dags/gpu.py rename to airflow/dags/ingest_gpu.py index 52a7d96a3..d6e9007d2 100644 --- a/airflow/dags/gpu.py +++ b/airflow/dags/ingest_gpu.py @@ -2,7 +2,6 @@ from airflow.operators.bash import BashOperator from dependencies.container import Container from dependencies.utils import multiline_string_to_single_line -from gdaltools import ogr2ogr from pendulum import datetime @@ -90,18 +89,8 @@ def ingest(path_on_bucket: str) -> str: bash_command=" ".join(cmd), ).execute(context={}) - @task.python - def load_to_app(): - ogr = ogr2ogr() - ogr.config_options = {"PG_USE_COPY": "YES", "OGR_TRUNCATE": "NO"} - ogr.set_input(Container().gdal_dw_conn(schema="public_gpu"), table_name="app_zoneurba") - ogr.set_output(Container().gdal_app_conn(), table_name="public_data_zoneurba") - ogr.set_output_mode(layer_mode=ogr.MODE_LAYER_OVERWRITE) - ogr.execute() - path_on_bucket = download() - ingest_task = ingest(path_on_bucket) - ingest_task >> load_to_app() + ingest(path_on_bucket) gpu() diff --git a/airflow/dags/ocsge.py b/airflow/dags/ocsge.py index ee4d63015..3e234b718 100644 --- a/airflow/dags/ocsge.py +++ b/airflow/dags/ocsge.py @@ -23,19 +23,6 @@ ocsge_zone_construite_normalization_sql, ) from dependencies.utils import multiline_string_to_single_line -from gdaltools import ogr2ogr - - -def copy_table_from_dw_to_app( - from_table: str, - to_table: str, -): - ogr = ogr2ogr() - ogr.config_options = {"PG_USE_COPY": "YES", "OGR_TRUNCATE": "NO"} - ogr.set_input(Container().gdal_dw_conn(schema="public_ocsge"), table_name=from_table) - ogr.set_output(Container().gdal_app_conn(), table_name=to_table) - ogr.set_output_mode(layer_mode=ogr.MODE_LAYER_OVERWRITE) - ogr.execute() def get_paths_from_directory(directory: str) -> list[tuple[str, str]]: @@ -66,32 +53,6 @@ def get_paths_from_directory(directory: str) -> list[tuple[str, str]]: "dw_source": "ocsge_occupation_du_sol", "normalization_sql": ocsge_occupation_du_sol_normalization_sql, "delete_on_dwt": delete_occupation_du_sol_in_dw_sql, - "mapping": [ - { - "from_table": "public_ocsge.app_ocsge", - "to_table": "public.public_data_ocsge", - }, - { - "from_table": "public_ocsge.app_artificialarea", - "to_table": "public.public_data_artificialarea", - }, - { - "from_table": "public_ocsge.app_artifareazoneurba", - "to_table": "public.public_data_artifareazoneurba", - }, - { - "from_table": "public_ocsge.for_app_commune", - "to_table": "public.public_data_commune", - }, - { - "from_table": "public_ocsge.for_app_departement", - "to_table": "public.public_data_departement", - }, - { - "from_table": "public_ocsge.app_communesol", - "to_table": "public.public_data_communesol", - }, - ], }, SourceName.ZONE_CONSTRUITE: { "shapefile_name": "ZONE_CONSTRUITE", @@ -101,12 +62,6 @@ def get_paths_from_directory(directory: str) -> list[tuple[str, str]]: "dw_source": "ocsge_zone_construite", "normalization_sql": ocsge_zone_construite_normalization_sql, "delete_on_dwt": delete_zone_construite_in_dw_sql, - "mapping": [ - { - "from_table": "public_ocsge.app_zoneconstruite", - "to_table": "public.public_data_zoneconstruite", - } - ], }, SourceName.DIFFERENCE: { "shapefile_name": "DIFFERENCE", @@ -117,16 +72,6 @@ def get_paths_from_directory(directory: str) -> list[tuple[str, str]]: "dw_final_table_name": "app_ocsgediff", "normalization_sql": ocsge_diff_normalization_sql, "delete_on_dwt": delete_difference_in_dw_sql, - "mapping": [ - { - "from_table": "public_ocsge.app_ocsgediff", - "to_table": "public.public_data_ocsgediff", - }, - { - "from_table": "public_ocsge.app_communediff", - "to_table": "public.public_data_communediff", - }, - ], }, } @@ -242,6 +187,7 @@ def load_shapefile_to_dw( DatasetName.DIFFERENCE, ], ), + "refresh_source": Param(False, type="boolean"), }, ) def ocsge(): # noqa: C901 @@ -270,8 +216,14 @@ def check_url_exists(url) -> dict: } @task.python - def download_ocsge(url) -> str: - response = requests.get(url, allow_redirects=True) + def download_ocsge(url, **context) -> str: + if not context["params"]["refresh_source"]: + filename = url.split("/")[-1] + path_on_bucket = f"{bucket_name}/{filename}" + if Container().s3().exists(path_on_bucket): + return path_on_bucket + + response = requests.get(url) if not response.ok: raise ValueError(f"Failed to download {url}. Response : {response.content}") @@ -353,16 +305,6 @@ def delete_previously_loaded_data_in_dw(**context) -> dict: return results - @task.python(trigger_rule="all_success") - def load_data_in_app(**context): - dataset = context["params"]["dataset"] - for vars in vars_dataset[dataset]: - for mapping in vars["mapping"]: - copy_table_from_dw_to_app( - from_table=mapping["from_table"], - to_table=mapping["to_table"], - ) - url = get_url() url_exists = check_url_exists(url=url) path = download_ocsge(url=url) @@ -371,7 +313,6 @@ def load_data_in_app(**context): test_result_staging = db_test_ocsge_staging() loaded_date = ingest_ocsge(path=path) dbt_run_ocsge_result = dbt_run_ocsge() - load_app = load_data_in_app() ( url @@ -382,7 +323,6 @@ def load_data_in_app(**context): >> delete_dw >> loaded_date >> dbt_run_ocsge_result - >> load_app ) diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py new file mode 100644 index 000000000..4aa059a7d --- /dev/null +++ b/airflow/dags/update_app.py @@ -0,0 +1,93 @@ +from airflow.decorators import dag, task +from airflow.models.param import Param +from dependencies.container import Container +from gdaltools import ogr2ogr +from pendulum import datetime + + +def copy_table_from_dw_to_app( + from_table: str, + to_table: str, +): + ogr = ogr2ogr() + ogr.config_options = {"PG_USE_COPY": "YES", "OGR_TRUNCATE": "NO"} + ogr.set_input(Container().gdal_dw_conn(), table_name=from_table) + ogr.set_output(Container().gdal_app_conn(), table_name=to_table) + ogr.set_output_mode(layer_mode=ogr.MODE_LAYER_OVERWRITE) + ogr.execute() + + +mapping = [ + { + "from_table": "public_ocsge.for_app_ocsge", + "to_table": "public.public_data_ocsge", + }, + { + "from_table": "public_ocsge.for_app_artificialarea", + "to_table": "public.public_data_artificialarea", + }, + { + "from_table": "public_ocsge.for_app_artifareazoneurba", + "to_table": "public.public_data_artifareazoneurba", + }, + { + "from_table": "public_ocsge.for_app_commune", + "to_table": "public.public_data_commune", + }, + { + "from_table": "public_ocsge.for_app_departement", + "to_table": "public.public_data_departement", + }, + { + "from_table": "public_ocsge.for_app_communesol", + "to_table": "public.public_data_communesol", + }, + { + "from_table": "public_ocsge.for_app_ocsgediff", + "to_table": "public.public_data_ocsgediff", + }, + { + "from_table": "public_ocsge.for_app_communediff", + "to_table": "public.public_data_communediff", + }, + { + "from_table": "public_gpu.for_app_zoneurba", + "to_table": "public.public_data_zoneurba", + }, + { + "from_table": "public_ocsge.for_app_zoneconstruite", + "to_table": "public.public_data_zoneconstruite", + }, +] + + +params = {map["to_table"]: Param(True) for map in mapping} + + +# Define the basic parameters of the DAG, like schedule and start_date +@dag( + start_date=datetime(2024, 1, 1), + schedule="@once", + catchup=False, + doc_md=__doc__, + default_args={"owner": "Alexis Athlani", "retries": 3}, + tags=["Admin Express"], + params=params, +) +def update_app(): + tasks = [] + for map in mapping: + to_table_str = map["to_table"].split(".")[1] + + @task.python(task_id=f"copy_{to_table_str}", retries=0) + def copy_table(from_table=map["from_table"], to_table=map["to_table"], **context): + if context["params"][to_table]: + copy_table_from_dw_to_app(from_table, to_table) + else: + print(f"Skipping {to_table_str}") + + tasks.append(copy_table()) + + +# Instantiate the DAG +update_app() diff --git a/airflow/dependencies/ocsge/delete_in_app.py b/airflow/dependencies/ocsge/delete_in_app.py deleted file mode 100644 index a2057e0e2..000000000 --- a/airflow/dependencies/ocsge/delete_in_app.py +++ /dev/null @@ -1,35 +0,0 @@ -from typing import List - - -def delete_occupation_du_sol_in_app_sql( - departement: str, - years: List[str], -) -> str: - return f""" - DELETE FROM public.public_data_ocsge - WHERE departement = '{departement}' - AND year = {years[0]}; - """ - - -def delete_zone_construite_in_app_sql( - departement: str, - years: List[str], -) -> str: - return f""" - DELETE FROM public.public_data_zoneconstruite - WHERE departement = '{departement}' - AND year = {years[0]}; - """ - - -def delete_difference_in_app_sql( - departement: str, - years: List[str], -) -> str: - return f""" - DELETE FROM public.public_data_ocsgediff - WHERE departement = '{departement}' - AND year_old = {years[0]} - AND year_new = {years[1]}; - """ diff --git a/airflow/dependencies/ocsge/sources.json b/airflow/dependencies/ocsge/sources.json index 90b72a51e..ccc1123ec 100644 --- a/airflow/dependencies/ocsge/sources.json +++ b/airflow/dependencies/ocsge/sources.json @@ -79,5 +79,23 @@ "difference": { "2016_2019": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D032_DIFF_2016-2019/OCS-GE_2-0__SHP_LAMB93_D032_DIFF_2016-2019.7z" } + }, + "37": { + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D037_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D037_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D037_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D037_2021-01-01.7z" + }, + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D037_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D037_DIFF_2018-2021.7z" + } + }, + "29": { + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D029_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D029_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D029_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D029_2021-01-01.7z" + }, + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D029_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D029_DIFF_2018-2021.7z" + } } } diff --git a/airflow/sql/sparte/models/app/app_commune.sql b/airflow/sql/sparte/models/app/app_commune.sql index b84f3a083..c7e5d934d 100644 --- a/airflow/sql/sparte/models/app/app_commune.sql +++ b/airflow/sql/sparte/models/app/app_commune.sql @@ -1,5 +1,9 @@ - -{{ config(materialized='table') }} +{{ + config( + materialized='table', + docs={'node_color': '#D70040'} + ) +}} SELECT id, diff --git a/airflow/sql/sparte/models/app/app_couverturesol.sql b/airflow/sql/sparte/models/app/app_couverturesol.sql index dbc094117..de76041d9 100644 --- a/airflow/sql/sparte/models/app/app_couverturesol.sql +++ b/airflow/sql/sparte/models/app/app_couverturesol.sql @@ -1,4 +1,9 @@ -{{ config(materialized='table') }} +{{ + config( + materialized='table', + docs={'node_color': '#D70040'} + ) +}} SELECT id, diff --git a/airflow/sql/sparte/models/app/app_couvertureusagematrix.sql b/airflow/sql/sparte/models/app/app_couvertureusagematrix.sql index 8f18d1961..d73a96a7f 100644 --- a/airflow/sql/sparte/models/app/app_couvertureusagematrix.sql +++ b/airflow/sql/sparte/models/app/app_couvertureusagematrix.sql @@ -1,4 +1,9 @@ -{{ config(materialized='table') }} +{{ + config( + materialized='table', + docs={'node_color': '#D70040'} + ) +}} SELECT id, diff --git a/airflow/sql/sparte/models/app/app_departement.sql b/airflow/sql/sparte/models/app/app_departement.sql index b69dffb04..8605775d9 100644 --- a/airflow/sql/sparte/models/app/app_departement.sql +++ b/airflow/sql/sparte/models/app/app_departement.sql @@ -1,5 +1,9 @@ - -{{ config(materialized='table') }} +{{ + config( + materialized='table', + docs={'node_color': '#D70040'} + ) +}} SELECT id, diff --git a/airflow/sql/sparte/models/app/app_epci.sql b/airflow/sql/sparte/models/app/app_epci.sql index f7bc6ce73..d8bb4c50f 100644 --- a/airflow/sql/sparte/models/app/app_epci.sql +++ b/airflow/sql/sparte/models/app/app_epci.sql @@ -1,5 +1,9 @@ - -{{ config(materialized='table') }} +{{ + config( + materialized='table', + docs={'node_color': '#D70040'} + ) +}} SELECT id, diff --git a/airflow/sql/sparte/models/app/app_region.sql b/airflow/sql/sparte/models/app/app_region.sql index 0f6207e09..262002a21 100644 --- a/airflow/sql/sparte/models/app/app_region.sql +++ b/airflow/sql/sparte/models/app/app_region.sql @@ -1,5 +1,9 @@ - -{{ config(materialized='table') }} +{{ + config( + materialized='table', + docs={'node_color': '#D70040'} + ) +}} SELECT id, diff --git a/airflow/sql/sparte/models/app/app_scot.sql b/airflow/sql/sparte/models/app/app_scot.sql index b8fcc11a9..747af03e8 100644 --- a/airflow/sql/sparte/models/app/app_scot.sql +++ b/airflow/sql/sparte/models/app/app_scot.sql @@ -1,5 +1,9 @@ - -{{ config(materialized='table') }} +{{ + config( + materialized='table', + docs={'node_color': '#D70040'} + ) +}} SELECT id, diff --git a/airflow/sql/sparte/models/app/app_usagesol.sql b/airflow/sql/sparte/models/app/app_usagesol.sql index e72591f45..d67ad3f5b 100644 --- a/airflow/sql/sparte/models/app/app_usagesol.sql +++ b/airflow/sql/sparte/models/app/app_usagesol.sql @@ -1,4 +1,9 @@ -{{ config(materialized='table') }} +{{ + config( + materialized='table', + docs={'node_color': '#D70040'} + ) +}} SELECT id, diff --git a/airflow/sql/sparte/models/gpu/for_app/app_zoneurba.sql b/airflow/sql/sparte/models/gpu/for_app/for_app_zoneurba.sql similarity index 79% rename from airflow/sql/sparte/models/gpu/for_app/app_zoneurba.sql rename to airflow/sql/sparte/models/gpu/for_app/for_app_zoneurba.sql index edc6d206e..57530a7f1 100644 --- a/airflow/sql/sparte/models/gpu/for_app/app_zoneurba.sql +++ b/airflow/sql/sparte/models/gpu/for_app/for_app_zoneurba.sql @@ -1,4 +1,9 @@ -{{ config(materialized='table') }} +{{ + config( + materialized='table', + docs={'node_color': 'purple'} + ) +}} SELECT checksum as id, diff --git a/airflow/sql/sparte/models/ocsge/for_app/app_artifareazoneurba.sql b/airflow/sql/sparte/models/ocsge/for_app/for_app_artifareazoneurba.sql similarity index 75% rename from airflow/sql/sparte/models/ocsge/for_app/app_artifareazoneurba.sql rename to airflow/sql/sparte/models/ocsge/for_app/for_app_artifareazoneurba.sql index be9604b56..df84d17ce 100644 --- a/airflow/sql/sparte/models/ocsge/for_app/app_artifareazoneurba.sql +++ b/airflow/sql/sparte/models/ocsge/for_app/for_app_artifareazoneurba.sql @@ -1,4 +1,9 @@ -{{ config(materialized='table') }} +{{ + config( + materialized='table', + docs={'node_color': 'purple'} + ) +}} SELECT zonage_checksum as zone_urba_id, diff --git a/airflow/sql/sparte/models/ocsge/for_app/app_artificialarea.sql b/airflow/sql/sparte/models/ocsge/for_app/for_app_artificialarea.sql similarity index 66% rename from airflow/sql/sparte/models/ocsge/for_app/app_artificialarea.sql rename to airflow/sql/sparte/models/ocsge/for_app/for_app_artificialarea.sql index 3e15d7ca9..ee548e638 100644 --- a/airflow/sql/sparte/models/ocsge/for_app/app_artificialarea.sql +++ b/airflow/sql/sparte/models/ocsge/for_app/for_app_artificialarea.sql @@ -1,4 +1,9 @@ -{{ config(materialized='table') }} +{{ + config( + materialized='table', + docs={'node_color': 'purple'} + ) +}} SELECT year, diff --git a/airflow/sql/sparte/models/ocsge/for_app/for_app_commune.sql b/airflow/sql/sparte/models/ocsge/for_app/for_app_commune.sql index b82f7b4fb..bad88598f 100644 --- a/airflow/sql/sparte/models/ocsge/for_app/for_app_commune.sql +++ b/airflow/sql/sparte/models/ocsge/for_app/for_app_commune.sql @@ -1,4 +1,9 @@ -{{ config(materialized='table') }} +{{ + config( + materialized='table', + docs={'node_color': 'purple'} + ) +}} with artif_commune_partitionned as ( SELECT diff --git a/airflow/sql/sparte/models/ocsge/for_app/app_communediff.sql b/airflow/sql/sparte/models/ocsge/for_app/for_app_communediff.sql similarity index 87% rename from airflow/sql/sparte/models/ocsge/for_app/app_communediff.sql rename to airflow/sql/sparte/models/ocsge/for_app/for_app_communediff.sql index a8d7cff90..31c0aefee 100644 --- a/airflow/sql/sparte/models/ocsge/for_app/app_communediff.sql +++ b/airflow/sql/sparte/models/ocsge/for_app/for_app_communediff.sql @@ -1,4 +1,9 @@ -{{ config(materialized='table') }} +{{ + config( + materialized='table', + docs={'node_color': 'purple'} + ) +}} SELECT foo.year_old, diff --git a/airflow/sql/sparte/models/ocsge/for_app/app_communesol.sql b/airflow/sql/sparte/models/ocsge/for_app/for_app_communesol.sql similarity index 94% rename from airflow/sql/sparte/models/ocsge/for_app/app_communesol.sql rename to airflow/sql/sparte/models/ocsge/for_app/for_app_communesol.sql index 90605c48a..f3cfa82fb 100644 --- a/airflow/sql/sparte/models/ocsge/for_app/app_communesol.sql +++ b/airflow/sql/sparte/models/ocsge/for_app/for_app_communesol.sql @@ -1,4 +1,9 @@ -{{ config(materialized='table') }} +{{ + config( + materialized='table', + docs={'node_color': 'purple'} + ) +}} with ocsge_with_cs_us_id as ( SELECT diff --git a/airflow/sql/sparte/models/ocsge/for_app/for_app_departement.sql b/airflow/sql/sparte/models/ocsge/for_app/for_app_departement.sql index d9bba8db2..4e062157c 100644 --- a/airflow/sql/sparte/models/ocsge/for_app/for_app_departement.sql +++ b/airflow/sql/sparte/models/ocsge/for_app/for_app_departement.sql @@ -1,4 +1,9 @@ -{{ config(materialized='table') }} +{{ + config( + materialized='table', + docs={'node_color': 'purple'} + ) +}} with millesimes AS ( SELECT diff --git a/airflow/sql/sparte/models/ocsge/for_app/app_ocsge.sql b/airflow/sql/sparte/models/ocsge/for_app/for_app_ocsge.sql similarity index 73% rename from airflow/sql/sparte/models/ocsge/for_app/app_ocsge.sql rename to airflow/sql/sparte/models/ocsge/for_app/for_app_ocsge.sql index 9a2d3adb7..ef191611d 100644 --- a/airflow/sql/sparte/models/ocsge/for_app/app_ocsge.sql +++ b/airflow/sql/sparte/models/ocsge/for_app/for_app_ocsge.sql @@ -1,5 +1,9 @@ - -{{ config(materialized='table') }} +{{ + config( + materialized='table', + docs={'node_color': 'purple'} + ) +}} SELECT code_cs as couverture, diff --git a/airflow/sql/sparte/models/ocsge/for_app/app_ocsgediff.sql b/airflow/sql/sparte/models/ocsge/for_app/for_app_ocsgediff.sql similarity index 78% rename from airflow/sql/sparte/models/ocsge/for_app/app_ocsgediff.sql rename to airflow/sql/sparte/models/ocsge/for_app/for_app_ocsgediff.sql index 03553a459..6ae29f214 100644 --- a/airflow/sql/sparte/models/ocsge/for_app/app_ocsgediff.sql +++ b/airflow/sql/sparte/models/ocsge/for_app/for_app_ocsgediff.sql @@ -1,5 +1,9 @@ - -{{ config(materialized='table') }} +{{ + config( + materialized='table', + docs={'node_color': 'purple'} + ) +}} SELECT year_old, @@ -9,7 +13,7 @@ SELECT us_new, us_old, ST_Transform(geom, 4326) as mpoly, - surface / 10000 as surface, + surface, 2154 as srid_source, departement, new_is_artificial as is_new_artif, diff --git a/airflow/sql/sparte/models/ocsge/for_app/app_zoneconstruite.sql b/airflow/sql/sparte/models/ocsge/for_app/for_app_zoneconstruite.sql similarity index 67% rename from airflow/sql/sparte/models/ocsge/for_app/app_zoneconstruite.sql rename to airflow/sql/sparte/models/ocsge/for_app/for_app_zoneconstruite.sql index 781b8029e..4558a6d0d 100644 --- a/airflow/sql/sparte/models/ocsge/for_app/app_zoneconstruite.sql +++ b/airflow/sql/sparte/models/ocsge/for_app/for_app_zoneconstruite.sql @@ -1,5 +1,9 @@ - -{{ config(materialized='table') }} +{{ + config( + materialized='table', + docs={'node_color': 'purple'} + ) +}} SELECT id as id_source, From ad4321b7c88385dcc9ade3573cfaab30018aa69b Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 20 Aug 2024 12:44:32 +0200 Subject: [PATCH 20/99] feat(airflow): add mattermost integration --- .../diff_ocsge_download_page_to_mattermost.py | 61 +++++++++++++++++++ airflow/dags/ingest_admin_express.py | 45 ++------------ airflow/dags/ingest_app_dependencies.py | 3 +- airflow/dags/ingest_gpu.py | 4 +- airflow/dags/ocsge.py | 17 ++++++ airflow/dags/update_app.py | 2 +- airflow/dependencies/container.py | 8 +++ airflow/dependencies/mattermost.py | 18 ++++++ airflow/requirements.txt | 1 + 9 files changed, 114 insertions(+), 45 deletions(-) create mode 100644 airflow/dags/diff_ocsge_download_page_to_mattermost.py create mode 100644 airflow/dependencies/mattermost.py diff --git a/airflow/dags/diff_ocsge_download_page_to_mattermost.py b/airflow/dags/diff_ocsge_download_page_to_mattermost.py new file mode 100644 index 000000000..a46cc297c --- /dev/null +++ b/airflow/dags/diff_ocsge_download_page_to_mattermost.py @@ -0,0 +1,61 @@ +import difflib + +import requests +from airflow.decorators import dag, task +from bs4 import BeautifulSoup +from dependencies.container import Container +from pendulum import datetime + + +# Define the basic parameters of the DAG, like schedule and start_date +@dag( + start_date=datetime(2024, 1, 1), + schedule="0 10 * * *", + catchup=False, + doc_md=__doc__, + default_args={"owner": "Alexis Athlani", "retries": 3}, + tags=["App"], +) +def diff_ocsge_download_page_to_mattermost(): + @task.python + def diff(): + url = "https://geoservices.ign.fr/ocsge#telechargement" + selector = "#block-ignpro-content > div > article > div.container > div:nth-child(2) > div > div > div.field--items > div:nth-child(2)" # noqa: E501 + s3_path = "airflow-staging/download_page_ocsge.txt" + local_path = "download_page_ocsge.txt" + + if Container().s3().exists(s3_path): + Container().s3().get_file(s3_path, local_path) + with open(local_path, "r") as f: + previous_txt = f.read() + else: + previous_txt = "" + + new_html = requests.get(url).text + new_soup = BeautifulSoup(new_html, features="html.parser") + new_txt = new_soup.select(selector)[0].text.strip() + + diff = difflib.unified_diff(previous_txt.splitlines(), new_txt.splitlines()) + + with open(local_path, "w") as f: + f.write(new_txt) + + Container().s3().put_file(local_path, s3_path) + + diff_str = "\n".join(diff) + + if diff_str: + markdown_message = "\n".join( + [ + "```", + diff_str, + "```", + ] + ) + Container().mattermost().send(markdown_message) + + diff() + + +# Instantiate the DAG +diff_ocsge_download_page_to_mattermost() diff --git a/airflow/dags/ingest_admin_express.py b/airflow/dags/ingest_admin_express.py index 941f001e0..184e499d6 100644 --- a/airflow/dags/ingest_admin_express.py +++ b/airflow/dags/ingest_admin_express.py @@ -1,23 +1,3 @@ -""" -## Astronaut ETL example DAG - -This DAG queries the list of astronauts currently in space from the -Open Notify API and prints each astronaut's name and flying craft. - -There are two tasks, one to get the data from the API and save the results, -and another to print the results. Both tasks are written in Python using -Airflow's TaskFlow API, which allows you to easily turn Python functions into -Airflow tasks, and automatically infer dependencies and pass data. - -The second task uses dynamic task mapping to create a copy of the task for -each Astronaut in the list retrieved from the API. This list will change -depending on how many Astronauts are in space, and the DAG will adjust -accordingly each time it runs. - -For more explanation and getting started instructions, see our Write your -first DAG tutorial: https://docs.astronomer.io/learn/get-started-with-airflow -""" - import os import subprocess from urllib.request import URLopener @@ -27,10 +7,7 @@ from dependencies.container import Container from pendulum import datetime -from airflow import Dataset - -# Define the basic parameters of the DAG, like schedule and start_date @dag( start_date=datetime(2024, 1, 1), schedule="@once", @@ -39,7 +16,7 @@ default_args={"owner": "Alexis Athlani", "retries": 3}, tags=["Admin Express"], ) -def admin_express(): +def ingest_admin_express(): admin_express_archive_file = "admin_express.7z" bucket_name = "airflow-staging" path_on_bucket = f"{bucket_name}/{admin_express_archive_file}" @@ -56,20 +33,8 @@ def download_admin_express() -> str: with Container().s3().open(path_on_bucket, "wb") as distant_file: distant_file.write(local_file.read()) - @task( - outlets=[ - Dataset("arrondissement"), - Dataset("arrondissement_municipal"), - Dataset("canton"), - Dataset("collectivite_territoriale"), - Dataset("commune"), - Dataset("commune_associee_ou_deleguee"), - Dataset("departement"), - Dataset("epci"), - Dataset("region"), - ] - ) - def ingest_admin_express() -> str: + @task.python + def ingest() -> str: with Container().s3().open(path_on_bucket, "rb") as f: py7zr.SevenZipFile(f, mode="r").extractall() for dirpath, _, filenames in os.walk("."): @@ -83,8 +48,8 @@ def ingest_admin_express() -> str: def dbt_run(**context): return 'cd "${AIRFLOW_HOME}/sql/sparte" && dbt run -s admin_express' - download_admin_express() >> ingest_admin_express() >> dbt_run() + download_admin_express() >> ingest() >> dbt_run() # Instantiate the DAG -admin_express() +ingest_admin_express() diff --git a/airflow/dags/ingest_app_dependencies.py b/airflow/dags/ingest_app_dependencies.py index 191a78385..c002d95f7 100644 --- a/airflow/dags/ingest_app_dependencies.py +++ b/airflow/dags/ingest_app_dependencies.py @@ -14,13 +14,12 @@ def ingest_table(source_table_name: str, destination_table_name: str): ogr.execute() -# Define the basic parameters of the DAG, like schedule and start_date @dag( start_date=datetime(2024, 1, 1), schedule="@once", catchup=False, default_args={"owner": "Alexis Athlani", "retries": 3}, - tags=["app"], + tags=["App"], ) def ingest_app_dependencies(): @task.python diff --git a/airflow/dags/ingest_gpu.py b/airflow/dags/ingest_gpu.py index d6e9007d2..0f963663a 100644 --- a/airflow/dags/ingest_gpu.py +++ b/airflow/dags/ingest_gpu.py @@ -13,7 +13,7 @@ default_args={"owner": "Alexis Athlani", "retries": 3}, tags=["GPU"], ) -def gpu(): +def ingest_gpu(): bucket_name = "airflow-staging" wfs_du_filename = "wfs_du.gpkg" @@ -93,4 +93,4 @@ def ingest(path_on_bucket: str) -> str: ingest(path_on_bucket) -gpu() +ingest_gpu() diff --git a/airflow/dags/ocsge.py b/airflow/dags/ocsge.py index 3e234b718..f91c20c57 100644 --- a/airflow/dags/ocsge.py +++ b/airflow/dags/ocsge.py @@ -305,6 +305,21 @@ def delete_previously_loaded_data_in_dw(**context) -> dict: return results + @task.python + def log_to_mattermost(**context): + refresh_source = "Oui" if context["params"]["refresh_source"] else "Non" + if not context["params"]["refresh_source"]: + refresh_source += " (le fichier a été téléchargé depuis le bucket)" + years = ", ".join(context["params"]["years"]) + message = f""" +### Calcul de données OCS GE terminé +- Jeu de donnée : {context["params"]["dataset"]} +- Departement : {context["params"]["departement"]} +- Année(s) : {years} +- Téléchargé : {refresh_source} +""" + Container().mattermost().send(message) + url = get_url() url_exists = check_url_exists(url=url) path = download_ocsge(url=url) @@ -313,6 +328,7 @@ def delete_previously_loaded_data_in_dw(**context) -> dict: test_result_staging = db_test_ocsge_staging() loaded_date = ingest_ocsge(path=path) dbt_run_ocsge_result = dbt_run_ocsge() + log = log_to_mattermost() ( url @@ -323,6 +339,7 @@ def delete_previously_loaded_data_in_dw(**context) -> dict: >> delete_dw >> loaded_date >> dbt_run_ocsge_result + >> log ) diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index 4aa059a7d..cefc336bd 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -71,7 +71,7 @@ def copy_table_from_dw_to_app( catchup=False, doc_md=__doc__, default_args={"owner": "Alexis Athlani", "retries": 3}, - tags=["Admin Express"], + tags=["App"], params=params, ) def update_app(): diff --git a/airflow/dependencies/container.py b/airflow/dependencies/container.py index 591d336d7..05d47440c 100644 --- a/airflow/dependencies/container.py +++ b/airflow/dependencies/container.py @@ -9,6 +9,8 @@ from psycopg2.extensions import connection from s3fs import S3FileSystem +from .mattermost import Mattermost + def db_str_for_ogr2ogr(dbname: str, user: str, password: str, host: str, port: int) -> str: return f"PG:dbname='{dbname}' host='{host}' port='{port}' user='{user}' password='{password}'" @@ -91,3 +93,9 @@ class Container(containers.DeclarativeContainer): default_path="/pub/export-wfs/latest/", cnopts=cnopts, ) + + mattermost = providers.Factory( + Mattermost, + mattermost_webhook_url=getenv("MATTERMOST_WEBHOOK_URL"), + channel=getenv("MATTERMOST_CHANNEL"), + ) diff --git a/airflow/dependencies/mattermost.py b/airflow/dependencies/mattermost.py new file mode 100644 index 000000000..e42ad131b --- /dev/null +++ b/airflow/dependencies/mattermost.py @@ -0,0 +1,18 @@ +import requests + + +class Mattermost: + def __init__( + self, + channel: str, + mattermost_webhook_url: str, + ): + self.url = mattermost_webhook_url + self.channel = channel + + def send(self, msg: str) -> requests.Response: + data = { + "text": msg, + "channel": self.channel, + } + return requests.post(self.url, json=data) diff --git a/airflow/requirements.txt b/airflow/requirements.txt index 94e86ac9f..f9befed9f 100644 --- a/airflow/requirements.txt +++ b/airflow/requirements.txt @@ -17,3 +17,4 @@ SQLAlchemy==1.4.53 psycopg2==2.9.9 pygeos pygdaltools==1.4.2 +beautifulsoup4 From f27997db3f7aff3bb4dcb3a07e4381646e1e377f Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 22 Aug 2024 12:16:31 +0200 Subject: [PATCH 21/99] feat(airflow): add gpu --- airflow/dags/update_app.py | 2 + .../delete_from_this_where_field_not_in.sql | 10 +++ .../models/gpu/for_app/for_app_zoneurba.sql | 2 +- .../for_app/for_app_artifareazoneurba.sql | 2 +- .../ocsge/for_app/for_app_artificialarea.sql | 3 +- .../models/ocsge/for_app/for_app_commune.sql | 2 +- .../ocsge/for_app/for_app_communesol.sql | 3 + .../ocsge/for_app/for_app_departement.sql | 7 +- .../models/ocsge/for_app/for_app_ocsge.sql | 1 + .../ocsge/for_app/for_app_ocsgediff.sql | 1 + .../ocsge/for_app/for_app_zoneconstruite.sql | 1 + .../ocsge/intersected/artificial_commune.sql | 34 +++++++--- .../ocsge/intersected/difference_commune.sql | 33 +++++++-- .../intersected/occupation_du_sol_commune.sql | 31 +++++++-- .../occupation_du_sol_zonage_urbanisme.sql | 26 +++++-- project/models/project_base.py | 6 +- project/tasks/project.py | 7 +- .../project/partials/artif_zone_urba.html | 6 +- project/urls.py | 2 +- project/views/report.py | 12 ++-- ...lter_artifareazoneurba_options_and_more.py | 68 +++++++++++++++++++ .../0188_remove_zoneurba_id_zoneurba_uuid.py | 22 ------ .../migrations/0189_remove_zoneurba_gid.py | 16 ----- ...0_remove_zoneurba_origin_insee_and_more.py | 20 ------ .../0191_remove_zoneurba_destdomi.py | 16 ----- ...one_remove_zoneurba_lib_idzone_and_more.py | 28 -------- ...a_public_data_insee_3f872f_idx_and_more.py | 20 ------ .../0194_artifareazoneurba_departement.py | 18 ----- .../0195_remove_zoneurba_uuid_zoneurba_id.py | 22 ------ ...fareazoneurba_unique_zone_year_and_more.py | 28 -------- .../0197_artifareazoneurba_zone_urba.py | 21 ------ .../0198_remove_commune_map_color.py | 16 ----- .../models/administration/CommuneDiff.py | 5 +- .../models/administration/CommuneSol.py | 7 +- public_data/models/gpu.py | 19 +++--- public_data/models/ocsge.py | 18 +++-- public_data/views.py | 2 +- 37 files changed, 233 insertions(+), 304 deletions(-) create mode 100644 airflow/sql/sparte/macros/delete_from_this_where_field_not_in.sql create mode 100644 public_data/migrations/0188_alter_artifareazoneurba_options_and_more.py delete mode 100644 public_data/migrations/0188_remove_zoneurba_id_zoneurba_uuid.py delete mode 100644 public_data/migrations/0189_remove_zoneurba_gid.py delete mode 100644 public_data/migrations/0190_remove_zoneurba_origin_insee_and_more.py delete mode 100644 public_data/migrations/0191_remove_zoneurba_destdomi.py delete mode 100644 public_data/migrations/0192_remove_zoneurba_idzone_remove_zoneurba_lib_idzone_and_more.py delete mode 100644 public_data/migrations/0193_remove_zoneurba_public_data_insee_3f872f_idx_and_more.py delete mode 100644 public_data/migrations/0194_artifareazoneurba_departement.py delete mode 100644 public_data/migrations/0195_remove_zoneurba_uuid_zoneurba_id.py delete mode 100644 public_data/migrations/0196_remove_artifareazoneurba_unique_zone_year_and_more.py delete mode 100644 public_data/migrations/0197_artifareazoneurba_zone_urba.py delete mode 100644 public_data/migrations/0198_remove_commune_map_color.py diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index cefc336bd..9e9ac68b8 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -12,6 +12,8 @@ def copy_table_from_dw_to_app( ogr = ogr2ogr() ogr.config_options = {"PG_USE_COPY": "YES", "OGR_TRUNCATE": "NO"} ogr.set_input(Container().gdal_dw_conn(), table_name=from_table) + # the option below will an id column to the table only if it does not exist + ogr.layer_creation_options = {"FID": "id"} ogr.set_output(Container().gdal_app_conn(), table_name=to_table) ogr.set_output_mode(layer_mode=ogr.MODE_LAYER_OVERWRITE) ogr.execute() diff --git a/airflow/sql/sparte/macros/delete_from_this_where_field_not_in.sql b/airflow/sql/sparte/macros/delete_from_this_where_field_not_in.sql new file mode 100644 index 000000000..6a3891f8a --- /dev/null +++ b/airflow/sql/sparte/macros/delete_from_this_where_field_not_in.sql @@ -0,0 +1,10 @@ +{% macro delete_from_this_where_field_not_in ( + this_field, + table, + that_field +) %} + {% if not that_field %} + {% set that_field = this_field %} + {% endif %} + DELETE FROM {{ this }} WHERE {{ this_field }} not in (SELECT {{ that_field }} FROM {{ ref(table) }} ) +{% endmacro %} diff --git a/airflow/sql/sparte/models/gpu/for_app/for_app_zoneurba.sql b/airflow/sql/sparte/models/gpu/for_app/for_app_zoneurba.sql index 57530a7f1..9e74d66b9 100644 --- a/airflow/sql/sparte/models/gpu/for_app/for_app_zoneurba.sql +++ b/airflow/sql/sparte/models/gpu/for_app/for_app_zoneurba.sql @@ -6,7 +6,7 @@ }} SELECT - checksum as id, + checksum, libelle, libelle_long as libelong, id_document_urbanisme as idurba, diff --git a/airflow/sql/sparte/models/ocsge/for_app/for_app_artifareazoneurba.sql b/airflow/sql/sparte/models/ocsge/for_app/for_app_artifareazoneurba.sql index df84d17ce..ebba8830c 100644 --- a/airflow/sql/sparte/models/ocsge/for_app/for_app_artifareazoneurba.sql +++ b/airflow/sql/sparte/models/ocsge/for_app/for_app_artifareazoneurba.sql @@ -6,7 +6,7 @@ }} SELECT - zonage_checksum as zone_urba_id, + zonage_checksum as zone_urba, year, max(departement) as departement, sum(ST_Area(ST_Transform(geom, 2154))) / 10000 as area diff --git a/airflow/sql/sparte/models/ocsge/for_app/for_app_artificialarea.sql b/airflow/sql/sparte/models/ocsge/for_app/for_app_artificialarea.sql index ee548e638..9f2c81aaf 100644 --- a/airflow/sql/sparte/models/ocsge/for_app/for_app_artificialarea.sql +++ b/airflow/sql/sparte/models/ocsge/for_app/for_app_artificialarea.sql @@ -6,8 +6,9 @@ }} SELECT + commune_year_id, year, - surface / 10000, + surface / 10000 as surface, 2154 as srid_source, departement, commune_code as city, diff --git a/airflow/sql/sparte/models/ocsge/for_app/for_app_commune.sql b/airflow/sql/sparte/models/ocsge/for_app/for_app_commune.sql index bad88598f..abef47fd8 100644 --- a/airflow/sql/sparte/models/ocsge/for_app/for_app_commune.sql +++ b/airflow/sql/sparte/models/ocsge/for_app/for_app_commune.sql @@ -43,7 +43,7 @@ SELECT ELSE commune.ocsge_available END AS ocsge_available, millesimes.first_millesime as first_millesime, - millesimes.last_millesime as last_millesime, + millesimes.last_millesime as last_millesime, COALESCE( CASE WHEN diff --git a/airflow/sql/sparte/models/ocsge/for_app/for_app_communesol.sql b/airflow/sql/sparte/models/ocsge/for_app/for_app_communesol.sql index f3cfa82fb..24cd76738 100644 --- a/airflow/sql/sparte/models/ocsge/for_app/for_app_communesol.sql +++ b/airflow/sql/sparte/models/ocsge/for_app/for_app_communesol.sql @@ -7,6 +7,7 @@ with ocsge_with_cs_us_id as ( SELECT + ocsge.ocsge_commune_id as ocsge_commune_id, ocsge.commune_code, ocsge.code_us, ocsge.code_cs, @@ -27,6 +28,7 @@ with ocsge_with_cs_us_id as ( app_usagesol.code_prefix = ocsge.code_us ), ocsge_with_matrix as ( SELECT + ocsge_with_cs_us_id.ocsge_commune_id, ocsge_with_cs_us_id.commune_code, ocsge_with_cs_us_id.surface, ocsge_with_cs_us_id.year, @@ -42,6 +44,7 @@ with ocsge_with_cs_us_id as ( cs_us_matrix.usage_id = ocsge_with_cs_us_id.usage_id ), ocsge_with_matrix_and_city_id as ( SELECT + ocsge_with_matrix.ocsge_commune_id, ocsge_with_matrix.commune_code, ocsge_with_matrix.surface, ocsge_with_matrix.year, diff --git a/airflow/sql/sparte/models/ocsge/for_app/for_app_departement.sql b/airflow/sql/sparte/models/ocsge/for_app/for_app_departement.sql index 4e062157c..fc3d7573e 100644 --- a/airflow/sql/sparte/models/ocsge/for_app/for_app_departement.sql +++ b/airflow/sql/sparte/models/ocsge/for_app/for_app_departement.sql @@ -19,12 +19,7 @@ SELECT app_departement.source_id, app_departement.name, app_departement.region_id, - CASE - WHEN - millesimes.ocsge_millesimes IS NOT NULL - THEN true - ELSE false - END AS is_artif_ready, + array_length(millesimes.ocsge_millesimes, 1) > 1 AS is_artif_ready, millesimes.ocsge_millesimes, ST_Transform(admin_express_departement.geom, 4326) as mpoly, 2154 as srid_source diff --git a/airflow/sql/sparte/models/ocsge/for_app/for_app_ocsge.sql b/airflow/sql/sparte/models/ocsge/for_app/for_app_ocsge.sql index ef191611d..c284cf363 100644 --- a/airflow/sql/sparte/models/ocsge/for_app/for_app_ocsge.sql +++ b/airflow/sql/sparte/models/ocsge/for_app/for_app_ocsge.sql @@ -6,6 +6,7 @@ }} SELECT + uuid, code_cs as couverture, code_us as usage, year, diff --git a/airflow/sql/sparte/models/ocsge/for_app/for_app_ocsgediff.sql b/airflow/sql/sparte/models/ocsge/for_app/for_app_ocsgediff.sql index 6ae29f214..4005d56dc 100644 --- a/airflow/sql/sparte/models/ocsge/for_app/for_app_ocsgediff.sql +++ b/airflow/sql/sparte/models/ocsge/for_app/for_app_ocsgediff.sql @@ -6,6 +6,7 @@ }} SELECT + uuid, year_old, year_new, cs_new, diff --git a/airflow/sql/sparte/models/ocsge/for_app/for_app_zoneconstruite.sql b/airflow/sql/sparte/models/ocsge/for_app/for_app_zoneconstruite.sql index 4558a6d0d..b9353eb96 100644 --- a/airflow/sql/sparte/models/ocsge/for_app/for_app_zoneconstruite.sql +++ b/airflow/sql/sparte/models/ocsge/for_app/for_app_zoneconstruite.sql @@ -6,6 +6,7 @@ }} SELECT + uuid, id as id_source, year as millesime, ST_Transform(geom, 4326) as mpoly, diff --git a/airflow/sql/sparte/models/ocsge/intersected/artificial_commune.sql b/airflow/sql/sparte/models/ocsge/intersected/artificial_commune.sql index 2f6475357..3b128581e 100644 --- a/airflow/sql/sparte/models/ocsge/intersected/artificial_commune.sql +++ b/airflow/sql/sparte/models/ocsge/intersected/artificial_commune.sql @@ -1,29 +1,45 @@ {{ config( materialized='incremental', - post_hook="DELETE FROM {{ this }} WHERE loaded_date not in (SELECT loaded_date FROM {{ ref('occupation_du_sol') }} )" + post_hook="{{ delete_from_this_where_field_not_in('ocsge_loaded_date', 'occupation_du_sol', 'loaded_date') }}" ) }} -SELECT *, ST_Area(geom) as surface FROM ( +/* + +Cette requête retourne une géométrie par commune et par année regroupant +toutes les surfaces artificielles du territoire. + +*/ + +with artificial_commune_without_surface as ( SELECT + concat(ocsge.commune_code::text, '_', ocsge.year::text) as commune_year_id, -- surrogate key + + ocsge.commune_code, + ocsge.ocsge_loaded_date, + ocsge.departement, ocsge.year, - ocsge.commune_code, - ocsge.loaded_date, - ARRAY_AGG(ocsge.uuid) AS uuids, ST_Union(geom) as geom FROM {{ ref("occupation_du_sol_commune") }} AS ocsge WHERE ocsge.is_artificial = true + {% if is_incremental() %} - AND ocsge.loaded_date > - (SELECT max(foo.loaded_date) FROM {{ this }} as foo) + AND ocsge.ocsge_loaded_date > + (SELECT max(foo.ocsge_loaded_date) FROM {{ this }} as foo) {% endif %} + GROUP BY ocsge.commune_code, ocsge.departement, ocsge.year, - ocsge.loaded_date -) as foo + ocsge.ocsge_loaded_date +) +SELECT + *, + ST_Area(geom) as surface +FROM + artificial_commune_without_surface diff --git a/airflow/sql/sparte/models/ocsge/intersected/difference_commune.sql b/airflow/sql/sparte/models/ocsge/intersected/difference_commune.sql index 9de9eb2c5..27bc2da7a 100644 --- a/airflow/sql/sparte/models/ocsge/intersected/difference_commune.sql +++ b/airflow/sql/sparte/models/ocsge/intersected/difference_commune.sql @@ -1,15 +1,29 @@ {{ config( materialized='incremental', - post_hook='DELETE FROM {{ this }} WHERE uuid not in (SELECT uuid FROM {{ ref("difference") }} )' - + post_hook="{{ delete_from_this_where_field_not_in('ocsge_loaded_date', 'difference', 'loaded_date') }}" ) }} -SELECT *, ST_Area(geom) as surface FROM ( +/* + +Cette requête découpe les objets OCS GE de différence par commune. + +Dans le cas où un objet OCS GE est découpé par plusieurs communes, il sera dupliqué, mais +la surface totale de l'objet sera conservée. + +*/ + + +with difference_commune_without_surface as ( SELECT + concat(ocsge.uuid::text, '_', commune.code::text) as ocsge_commune_id, -- surrogate key + -- les attributs spécifiques aux communes sont préfixés par commune_ commune.code as commune_code, - ocsge.loaded_date, + -- les attributs spécifiques aux objets OCS GE sont préfixés par ocsge_ + ocsge.loaded_date as ocsge_loaded_date, + ocsge.uuid as ocsge_uuid, + -- les attributs communs aux deux tables sont sans préfixe ocsge.year_old, ocsge.year_new, ocsge.departement, @@ -21,7 +35,6 @@ SELECT *, ST_Area(geom) as surface FROM ( ocsge.us_old, ocsge.cs_new, ocsge.us_new, - ocsge.uuid, ST_Intersection(commune.geom, ocsge.geom) AS geom FROM {{ ref("commune") }} AS commune @@ -33,6 +46,12 @@ SELECT *, ST_Area(geom) as surface FROM ( ST_Intersects(commune.geom, ocsge.geom) {% if is_incremental() %} - WHERE ocsge.uuid not in (SELECT bar.uuid from {{ this }} as bar) + WHERE ocsge.uuid not in (SELECT bar.ocsge_uuid from {{ this }} as bar) {% endif %} -) as foo +) + +SELECT + *, + ST_Area(geom) as surface +FROM + difference_commune_without_surface diff --git a/airflow/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql b/airflow/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql index e1d0901c2..5a3b82aaf 100644 --- a/airflow/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql +++ b/airflow/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql @@ -1,15 +1,29 @@ {{ config( materialized='incremental', - post_hook='DELETE FROM {{ this }} WHERE uuid not in (SELECT uuid FROM {{ ref("occupation_du_sol") }} )' + post_hook="{{ delete_from_this_where_field_not_in('ocsge_loaded_date', 'occupation_du_sol', 'loaded_date') }}" ) }} -SELECT *, ST_Area(geom) as surface FROM ( +/* + +Cette requête découpe les objets OCS GE d'occupation du sol par commune. + +Dans le cas où un objet OCS GE est découpé par plusieurs communes, il sera dupliqué, mais +la surface totale de l'objet sera conservée. + +*/ + + +with occupation_du_sol_commune_without_surface as ( SELECT + concat(ocsge.uuid::text, '_', commune.code::text) as ocsge_commune_id, -- surrogate key + -- les attributs spécifiques aux communes sont préfixés par commune_ commune.code AS commune_code, - ocsge.uuid, - ocsge.loaded_date, + -- les attributs spécifiques aux objets OCS GE sont préfixés par ocsge_ + ocsge.uuid as ocsge_uuid, + ocsge.loaded_date as ocsge_loaded_date, + -- les attributs communs aux deux tables sont sans préfixe ocsge.year, ocsge.departement, ocsge.code_cs, @@ -27,7 +41,12 @@ SELECT *, ST_Area(geom) as surface FROM ( ST_Intersects(commune.geom, ocsge.geom) {% if is_incremental() %} - WHERE ocsge.uuid not in (SELECT foo.uuid from {{ this }} as foo) + WHERE ocsge.uuid not in (SELECT foo.ocsge_uuid from {{ this }} as foo) {% endif %} +) -) as foo +SELECT + *, + ST_Area(geom) as surface +FROM + occupation_du_sol_commune_without_surface diff --git a/airflow/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql b/airflow/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql index d391f9e85..ed90d3aad 100644 --- a/airflow/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql +++ b/airflow/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql @@ -8,18 +8,31 @@ {'columns': ['zonage_checksum'], 'type': 'btree'} ], post_hook=[ - 'DELETE FROM {{ this }} WHERE uuid not in (SELECT uuid FROM {{ ref("occupation_du_sol") }} )', - 'DELETE FROM {{ this }} WHERE zonage_checksum not in (SELECT checksum FROM {{ ref("zonage_urbanisme") }} )' + "{{ delete_from_this_where_field_not_in('ocsge_loaded_date', 'occupation_du_sol', 'loaded_date') }}", + "{{ delete_from_this_where_field_not_in('zonage_checksum', 'zonage_urbanisme', 'checksum') }}", ] ) }} -SELECT *, ST_Area(geom) as surface FROM ( +/* + +Cette requête découpe les objets OCS GE d'occupation du sol par zonage d'urbanisme. + +Dans le cas où un objet OCS GE est découpé par plusieurs zonages, il sera dupliqué, mais +la surface totale de l'objet sera conservée. + +*/ + +with occupation_du_sol_zonage_urbanisme_without_surface as ( SELECT + concat(ocsge.uuid::text, '_', zonage.checksum::text) as ocsge_zonage_id, -- surrogate key + -- les attributs spécifiques aux zonages sont préfixés par zonage_ zonage.libelle AS zonage_libelle, zonage.checksum AS zonage_checksum, zonage.gpu_timestamp AS zonage_gpu_timestamp, + -- les attributs spécifiques aux objets OCS GE sont préfixés par ocsge_ ocsge.loaded_date AS ocsge_loaded_date, + -- les attributs communs aux deux tables sont sans préfixe ocsge.year, ocsge.departement, ocsge.code_cs, @@ -42,5 +55,10 @@ SELECT *, ST_Area(geom) as surface FROM ( zonage.gpu_timestamp > (SELECT max(bar.zonage_gpu_timestamp) FROM {{ this }} as bar) {% endif %} +) -) as foo +SELECT + *, + ST_Area(geom) as surface +FROM + occupation_du_sol_zonage_urbanisme_without_surface diff --git a/project/models/project_base.py b/project/models/project_base.py index c3b3d323f..f2bce0821 100644 --- a/project/models/project_base.py +++ b/project/models/project_base.py @@ -569,9 +569,7 @@ def has_no_ocsge_coverage(self) -> bool: @property def has_zonage_urbanisme(self) -> bool: - has = ArtifAreaZoneUrba.objects.filter(zone_urba__mpoly__intersects=self.combined_emprise).exists() - print("has_zonage_urbanisme", has) - return has + return ArtifAreaZoneUrba.objects.filter(zone_urba__mpoly__intersects=self.combined_emprise).exists() def get_ocsge_millesimes(self): """Return all OCS GE millésimes available within project cities and between @@ -1252,7 +1250,7 @@ def get_artif_per_zone_urba_type( zone_urba = ( ZoneUrba.objects.annotate(pos=PointOnSurface("mpoly")) .filter(pos__intersects=self.combined_emprise) - .values_list("id", flat=True) + .values_list("checksum", flat=True) ) qs = ( diff --git a/project/tasks/project.py b/project/tasks/project.py index f52e6462c..072332116 100644 --- a/project/tasks/project.py +++ b/project/tasks/project.py @@ -608,15 +608,14 @@ def generate_theme_map_understand_artif(self, project_id) -> None: data = {"color": [], "geometry": []} # add artificial area to data city_ids = diagnostic.cities.all().values_list("insee", flat=True) - queryset = ArtificialArea.objects.filter(city__in=city_ids) + artif_areas = ArtificialArea.objects.filter(city__in=city_ids) artif_color = (0.97, 0.56, 0.33) new_artif_color = (1, 0, 0) new_natural_color = (0, 1, 0) - for row in queryset.only("mpoly"): - srid, wkt = row.mpoly.ewkt.split(";") - polygons = shapely.wkt.loads(wkt) + for artif_area in artif_areas: + polygons = shapely.wkt.loads(artif_area.mpoly.wkt) data["geometry"].append(polygons) data["color"].append(artif_color) diff --git a/project/templates/project/partials/artif_zone_urba.html b/project/templates/project/partials/artif_zone_urba.html index d138996ee..a8b1edfba 100644 --- a/project/templates/project/partials/artif_zone_urba.html +++ b/project/templates/project/partials/artif_zone_urba.html @@ -44,7 +44,7 @@

Détail de la zone urbaine

Artificialisation nette entre {{ diagnostic.first_year_ocsge|stringformat:"s" }} et {{ diagnostic.last_year_ocsge|stringformat:"s" }}

-
+
@@ -55,13 +55,13 @@

Détails de l'artificialisation entre {{ diagnostic.first_year_ocsge|stringf

Grandes familles de couverture des sols des surfaces artificialisées

-
+

Grandes familles d'usages du sol des surfaces artificialisées

-
+
diff --git a/project/urls.py b/project/urls.py index 1bb977da9..f8a493566 100644 --- a/project/urls.py +++ b/project/urls.py @@ -170,7 +170,7 @@ name="map-test", ), path( - "/carte/detail-zone-urbaine/", + "/carte/detail-zone-urbaine/", views.ArtifZoneUrbaView.as_view(), name="map-pane-artif-zone-urba", ), diff --git a/project/views/report.py b/project/views/report.py index 0b8625893..4a10a1e18 100644 --- a/project/views/report.py +++ b/project/views/report.py @@ -9,6 +9,7 @@ from django.db import transaction from django.db.models import Case, CharField, DecimalField, F, Q, Sum, Value, When from django.db.models.functions import Cast, Concat +from django.db.models.query import QuerySet from django.http import HttpRequest, HttpResponse, HttpResponseRedirect from django.shortcuts import redirect from django.urls import reverse @@ -584,10 +585,13 @@ def get_context_data(self, **kwargs): class ArtifZoneUrbaView(CacheMixin, StandAloneMixin, DetailView): """Content of the pannel in Urba Area Explorator.""" - context_object_name = "zone_urba" queryset = ZoneUrba.objects.all() + context_object_name = "zone_urba" template_name = "project/partials/artif_zone_urba.html" + def get_object(self) -> QuerySet[Any]: + return ZoneUrba.objects.get(checksum=self.kwargs["checksum"]) + def get_context_data(self, **kwargs): diagnostic = Project.objects.get(pk=self.kwargs["project_id"]) zone_urba = self.get_object() @@ -615,7 +619,7 @@ def get(self, request: HttpRequest, *args: Any, **kwargs: Any) -> HttpResponse: self.diagnostic = Project.objects.get(pk=self.kwargs["pk"]) self.zone_urba = None if "zone_urba_id" in self.request.GET: - self.zone_urba = ZoneUrba.objects.get(pk=self.request.GET.get("zone_urba_id")) + self.zone_urba = ZoneUrba.objects.get(checksum=self.request.GET.get("zone_urba_id")) return super().get(request, *args, **kwargs) def get_data(self): @@ -677,7 +681,7 @@ def get(self, request: HttpRequest, *args: Any, **kwargs: Any) -> HttpResponse: self.diagnostic = Project.objects.get(pk=self.kwargs["pk"]) self.zone_urba = None if "zone_urba_id" in self.request.GET: - self.zone_urba = ZoneUrba.objects.get(pk=self.request.GET.get("zone_urba_id")) + self.zone_urba = ZoneUrba.objects.get(checksum=self.request.GET.get("zone_urba_id")) return super().get(request, *args, **kwargs) def get_data(self): @@ -760,7 +764,7 @@ def get(self, request: HttpRequest, *args: Any, **kwargs: Any) -> HttpResponse: self.diagnostic = Project.objects.get(pk=self.kwargs["pk"]) self.zone_urba = None if "zone_urba_id" in self.request.GET: - self.zone_urba = ZoneUrba.objects.get(pk=self.request.GET.get("zone_urba_id")) + self.zone_urba = ZoneUrba.objects.get(checksum=self.request.GET.get("zone_urba_id")) return super().get(request, *args, **kwargs) def get_data(self): diff --git a/public_data/migrations/0188_alter_artifareazoneurba_options_and_more.py b/public_data/migrations/0188_alter_artifareazoneurba_options_and_more.py new file mode 100644 index 000000000..7beceb998 --- /dev/null +++ b/public_data/migrations/0188_alter_artifareazoneurba_options_and_more.py @@ -0,0 +1,68 @@ +# Generated by Django 4.2.13 on 2024-08-22 09:15 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("public_data", "0187_auto_20240703_1704"), + ] + + operations = [ + migrations.AlterModelOptions( + name="artifareazoneurba", + options={"managed": False}, + ), + migrations.AlterModelOptions( + name="artificialarea", + options={ + "managed": False, + "verbose_name": "OCSGE - Artificialisation (par commune)", + "verbose_name_plural": "OCSGE - Artificialisation (par commune)", + }, + ), + migrations.AlterModelOptions( + name="communediff", + options={ + "managed": False, + "verbose_name": "OCSGE - Différence (par commune)", + "verbose_name_plural": "OCSGE - Différence (par commune)", + }, + ), + migrations.AlterModelOptions( + name="communesol", + options={ + "managed": False, + "verbose_name": "OCSGE - Couverture x usage des sols (par commune)", + "verbose_name_plural": "OCSGE - Couverture x usage des sols (par commune)", + }, + ), + migrations.AlterModelOptions( + name="ocsge", + options={"managed": False, "verbose_name": "OCSGE", "verbose_name_plural": "OCSGE"}, + ), + migrations.AlterModelOptions( + name="ocsgediff", + options={ + "managed": False, + "verbose_name": "OCSGE - Différence", + "verbose_name_plural": "OCSGE - Différence", + }, + ), + migrations.AlterModelOptions( + name="zoneconstruite", + options={ + "managed": False, + "verbose_name": "OCSGE - Zone construite", + "verbose_name_plural": "OCSGE - Zone construite", + }, + ), + migrations.AlterModelOptions( + name="zoneurba", + options={"managed": False}, + ), + migrations.RemoveField( + model_name="commune", + name="map_color", + ), + ] diff --git a/public_data/migrations/0188_remove_zoneurba_id_zoneurba_uuid.py b/public_data/migrations/0188_remove_zoneurba_id_zoneurba_uuid.py deleted file mode 100644 index 80d115b40..000000000 --- a/public_data/migrations/0188_remove_zoneurba_id_zoneurba_uuid.py +++ /dev/null @@ -1,22 +0,0 @@ -# Generated by Django 4.2.13 on 2024-08-17 19:25 - -from django.db import migrations, models -import uuid - - -class Migration(migrations.Migration): - dependencies = [ - ("public_data", "0187_auto_20240703_1704"), - ] - - operations = [ - migrations.RemoveField( - model_name="zoneurba", - name="id", - ), - migrations.AddField( - model_name="zoneurba", - name="uuid", - field=models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False, verbose_name="UUID"), - ), - ] diff --git a/public_data/migrations/0189_remove_zoneurba_gid.py b/public_data/migrations/0189_remove_zoneurba_gid.py deleted file mode 100644 index 3467055eb..000000000 --- a/public_data/migrations/0189_remove_zoneurba_gid.py +++ /dev/null @@ -1,16 +0,0 @@ -# Generated by Django 4.2.13 on 2024-08-17 19:32 - -from django.db import migrations - - -class Migration(migrations.Migration): - dependencies = [ - ("public_data", "0188_remove_zoneurba_id_zoneurba_uuid"), - ] - - operations = [ - migrations.RemoveField( - model_name="zoneurba", - name="gid", - ), - ] diff --git a/public_data/migrations/0190_remove_zoneurba_origin_insee_and_more.py b/public_data/migrations/0190_remove_zoneurba_origin_insee_and_more.py deleted file mode 100644 index 11b7304e8..000000000 --- a/public_data/migrations/0190_remove_zoneurba_origin_insee_and_more.py +++ /dev/null @@ -1,20 +0,0 @@ -# Generated by Django 4.2.13 on 2024-08-17 19:39 - -from django.db import migrations - - -class Migration(migrations.Migration): - dependencies = [ - ("public_data", "0189_remove_zoneurba_gid"), - ] - - operations = [ - migrations.RemoveField( - model_name="zoneurba", - name="origin_insee", - ), - migrations.RemoveField( - model_name="zoneurba", - name="origin_typezone", - ), - ] diff --git a/public_data/migrations/0191_remove_zoneurba_destdomi.py b/public_data/migrations/0191_remove_zoneurba_destdomi.py deleted file mode 100644 index 7462d3e76..000000000 --- a/public_data/migrations/0191_remove_zoneurba_destdomi.py +++ /dev/null @@ -1,16 +0,0 @@ -# Generated by Django 4.2.13 on 2024-08-17 19:42 - -from django.db import migrations - - -class Migration(migrations.Migration): - dependencies = [ - ("public_data", "0190_remove_zoneurba_origin_insee_and_more"), - ] - - operations = [ - migrations.RemoveField( - model_name="zoneurba", - name="destdomi", - ), - ] diff --git a/public_data/migrations/0192_remove_zoneurba_idzone_remove_zoneurba_lib_idzone_and_more.py b/public_data/migrations/0192_remove_zoneurba_idzone_remove_zoneurba_lib_idzone_and_more.py deleted file mode 100644 index b7659b0da..000000000 --- a/public_data/migrations/0192_remove_zoneurba_idzone_remove_zoneurba_lib_idzone_and_more.py +++ /dev/null @@ -1,28 +0,0 @@ -# Generated by Django 4.2.13 on 2024-08-17 19:45 - -from django.db import migrations - - -class Migration(migrations.Migration): - dependencies = [ - ("public_data", "0191_remove_zoneurba_destdomi"), - ] - - operations = [ - migrations.RemoveField( - model_name="zoneurba", - name="idzone", - ), - migrations.RemoveField( - model_name="zoneurba", - name="lib_idzone", - ), - migrations.RemoveField( - model_name="zoneurba", - name="nomfic", - ), - migrations.RemoveField( - model_name="zoneurba", - name="urlfic", - ), - ] diff --git a/public_data/migrations/0193_remove_zoneurba_public_data_insee_3f872f_idx_and_more.py b/public_data/migrations/0193_remove_zoneurba_public_data_insee_3f872f_idx_and_more.py deleted file mode 100644 index 3057cfa06..000000000 --- a/public_data/migrations/0193_remove_zoneurba_public_data_insee_3f872f_idx_and_more.py +++ /dev/null @@ -1,20 +0,0 @@ -# Generated by Django 4.2.13 on 2024-08-17 19:50 - -from django.db import migrations - - -class Migration(migrations.Migration): - dependencies = [ - ("public_data", "0192_remove_zoneurba_idzone_remove_zoneurba_lib_idzone_and_more"), - ] - - operations = [ - migrations.RemoveIndex( - model_name="zoneurba", - name="public_data_insee_3f872f_idx", - ), - migrations.RemoveField( - model_name="zoneurba", - name="insee", - ), - ] diff --git a/public_data/migrations/0194_artifareazoneurba_departement.py b/public_data/migrations/0194_artifareazoneurba_departement.py deleted file mode 100644 index 650ce4945..000000000 --- a/public_data/migrations/0194_artifareazoneurba_departement.py +++ /dev/null @@ -1,18 +0,0 @@ -# Generated by Django 4.2.13 on 2024-08-17 19:50 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - dependencies = [ - ("public_data", "0193_remove_zoneurba_public_data_insee_3f872f_idx_and_more"), - ] - - operations = [ - migrations.AddField( - model_name="artifareazoneurba", - name="departement", - field=models.CharField(default="", max_length=3, verbose_name="Département"), - preserve_default=False, - ), - ] diff --git a/public_data/migrations/0195_remove_zoneurba_uuid_zoneurba_id.py b/public_data/migrations/0195_remove_zoneurba_uuid_zoneurba_id.py deleted file mode 100644 index bae4902e1..000000000 --- a/public_data/migrations/0195_remove_zoneurba_uuid_zoneurba_id.py +++ /dev/null @@ -1,22 +0,0 @@ -# Generated by Django 4.2.13 on 2024-08-17 19:55 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - dependencies = [ - ("public_data", "0194_artifareazoneurba_departement"), - ] - - operations = [ - migrations.RemoveField( - model_name="zoneurba", - name="uuid", - ), - migrations.AddField( - model_name="zoneurba", - name="id", - field=models.TextField(default="", primary_key=True, serialize=False, verbose_name="id"), - preserve_default=False, - ), - ] diff --git a/public_data/migrations/0196_remove_artifareazoneurba_unique_zone_year_and_more.py b/public_data/migrations/0196_remove_artifareazoneurba_unique_zone_year_and_more.py deleted file mode 100644 index eadd11a67..000000000 --- a/public_data/migrations/0196_remove_artifareazoneurba_unique_zone_year_and_more.py +++ /dev/null @@ -1,28 +0,0 @@ -# Generated by Django 4.2.13 on 2024-08-18 12:44 - -from django.db import migrations - - -class Migration(migrations.Migration): - dependencies = [ - ("public_data", "0195_remove_zoneurba_uuid_zoneurba_id"), - ] - - operations = [ - migrations.RemoveConstraint( - model_name="artifareazoneurba", - name="unique_zone_year", - ), - migrations.RemoveIndex( - model_name="artifareazoneurba", - name="public_data_zone_ur_cb8473_idx", - ), - migrations.RemoveIndex( - model_name="artifareazoneurba", - name="public_data_zone_ur_57615b_idx", - ), - migrations.RemoveField( - model_name="artifareazoneurba", - name="zone_urba", - ), - ] diff --git a/public_data/migrations/0197_artifareazoneurba_zone_urba.py b/public_data/migrations/0197_artifareazoneurba_zone_urba.py deleted file mode 100644 index ed5cb061e..000000000 --- a/public_data/migrations/0197_artifareazoneurba_zone_urba.py +++ /dev/null @@ -1,21 +0,0 @@ -# Generated by Django 4.2.13 on 2024-08-18 12:44 - -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - dependencies = [ - ("public_data", "0196_remove_artifareazoneurba_unique_zone_year_and_more"), - ] - - operations = [ - migrations.AddField( - model_name="artifareazoneurba", - name="zone_urba", - field=models.ForeignKey( - default="", on_delete=django.db.models.deletion.CASCADE, to="public_data.zoneurba" - ), - preserve_default=False, - ), - ] diff --git a/public_data/migrations/0198_remove_commune_map_color.py b/public_data/migrations/0198_remove_commune_map_color.py deleted file mode 100644 index 4d62d8701..000000000 --- a/public_data/migrations/0198_remove_commune_map_color.py +++ /dev/null @@ -1,16 +0,0 @@ -# Generated by Django 4.2.13 on 2024-08-19 09:06 - -from django.db import migrations - - -class Migration(migrations.Migration): - dependencies = [ - ("public_data", "0197_artifareazoneurba_zone_urba"), - ] - - operations = [ - migrations.RemoveField( - model_name="commune", - name="map_color", - ), - ] diff --git a/public_data/models/administration/CommuneDiff.py b/public_data/models/administration/CommuneDiff.py index 6be87a846..cd10b78f2 100644 --- a/public_data/models/administration/CommuneDiff.py +++ b/public_data/models/administration/CommuneDiff.py @@ -6,10 +6,7 @@ class CommuneDiff(models.Model): class Meta: verbose_name = "OCSGE - Différence (par commune)" verbose_name_plural = verbose_name - indexes = [ - models.Index(fields=["year_old"]), - models.Index(fields=["year_new"]), - ] + managed = False city = models.ForeignKey("Commune", verbose_name="Commune", on_delete=models.CASCADE) year_old = models.IntegerField( diff --git a/public_data/models/administration/CommuneSol.py b/public_data/models/administration/CommuneSol.py index 3ca989196..b99ccf29c 100644 --- a/public_data/models/administration/CommuneSol.py +++ b/public_data/models/administration/CommuneSol.py @@ -6,12 +6,7 @@ class CommuneSol(models.Model): class Meta: verbose_name = "OCSGE - Couverture x usage des sols (par commune)" verbose_name_plural = verbose_name - indexes = [ - models.Index(name="communesol-triplet-index", fields=["city", "matrix", "year"]), - models.Index(name="communesol-city-index", fields=["city"]), - models.Index(name="communesol-year-index", fields=["year"]), - models.Index(name="communesol-matrix-index", fields=["matrix"]), - ] + managed = False city = models.ForeignKey("Commune", verbose_name="Commune", on_delete=models.CASCADE) year = models.IntegerField( diff --git a/public_data/models/gpu.py b/public_data/models/gpu.py index 9bb12c0c6..20d372075 100644 --- a/public_data/models/gpu.py +++ b/public_data/models/gpu.py @@ -13,7 +13,7 @@ class ZoneUrbaManager(IntersectMixin, models.Manager): class ZoneUrba(models.Model): - id = models.TextField("id", primary_key=True) + checksum = models.TextField("checksum", unique=True) libelle = models.CharField("libelle", max_length=80, blank=True, null=True) libelong = models.CharField("libelong", max_length=254, blank=True, null=True) idurba = models.CharField("idurba", max_length=80, blank=True, null=True) @@ -45,21 +45,22 @@ def __str__(self): return f"{self.insee} {self.typezone} {self.area}Ha" class Meta: - indexes = [ - models.Index(fields=["typezone"]), - ] + managed = False class ArtifAreaZoneUrba(models.Model): - zone_urba = models.ForeignKey(ZoneUrba, on_delete=models.CASCADE) + zone_urba = models.ForeignKey( + ZoneUrba, + on_delete=models.DO_NOTHING, + to_field="checksum", + db_column="zone_urba", + ) year = models.IntegerField("Millésime", validators=[MinValueValidator(2000), MaxValueValidator(2050)]) departement = models.CharField("Département", max_length=3) area = models.DecimalField("Surface artificialisée", max_digits=15, decimal_places=4) def __str__(self): - return f"{self.zone_urba_id} {self.year} {self.area}Ha" + return f"{self.zone_urba} {self.year} {self.area}Ha" class Meta: - indexes = [ - models.Index(fields=["year"]), - ] + managed = False diff --git a/public_data/models/ocsge.py b/public_data/models/ocsge.py index 7e1bdd4c3..64be7abb0 100644 --- a/public_data/models/ocsge.py +++ b/public_data/models/ocsge.py @@ -4,11 +4,12 @@ from django.db.models import Sum from public_data.models.enums import SRID -from public_data.models.mixins import DataColorationMixin, TruncateTableMixin +from public_data.models.mixins import DataColorationMixin from utils.db import DynamicSRIDTransform, IntersectManager -class Ocsge(TruncateTableMixin, DataColorationMixin, models.Model): +class Ocsge(DataColorationMixin, models.Model): + id = models.TextField("ID", primary_key=True) couverture = models.CharField("Couverture du sol", max_length=254) usage = models.CharField("Usage du sol", max_length=254) id_source = models.CharField("ID source", max_length=200) @@ -32,6 +33,7 @@ class Ocsge(TruncateTableMixin, DataColorationMixin, models.Model): default_property = "id" class Meta: + managed = False verbose_name = "OCSGE" verbose_name_plural = verbose_name indexes = [ @@ -66,7 +68,8 @@ def get_groupby(cls, field_group_by, coveredby, year): return data -class OcsgeDiff(TruncateTableMixin, DataColorationMixin, models.Model): +class OcsgeDiff(DataColorationMixin, models.Model): + id = models.TextField("ID", primary_key=True) year_old = models.IntegerField("Ancienne année", validators=[MinValueValidator(2000), MaxValueValidator(2050)]) year_new = models.IntegerField("Nouvelle année", validators=[MinValueValidator(2000), MaxValueValidator(2050)]) cs_new = models.CharField("Code nouvelle couverture", max_length=12) @@ -97,6 +100,7 @@ class OcsgeDiff(TruncateTableMixin, DataColorationMixin, models.Model): default_color = "Red" class Meta: + managed = False verbose_name = "OCSGE - Différence" verbose_name_plural = verbose_name indexes = [ @@ -106,7 +110,8 @@ class Meta: ] -class ArtificialArea(TruncateTableMixin, DataColorationMixin, models.Model): +class ArtificialArea(DataColorationMixin, models.Model): + id = models.TextField("ID", primary_key=True) mpoly = models.MultiPolygonField(srid=4326) srid_source = models.IntegerField( "SRID", @@ -123,6 +128,7 @@ class ArtificialArea(TruncateTableMixin, DataColorationMixin, models.Model): objects = IntersectManager() class Meta: + managed = False verbose_name = "OCSGE - Artificialisation (par commune)" verbose_name_plural = verbose_name constraints = [] @@ -134,7 +140,8 @@ class Meta: ] -class ZoneConstruite(TruncateTableMixin, DataColorationMixin, models.Model): +class ZoneConstruite(DataColorationMixin, models.Model): + id = models.TextField("ID", primary_key=True) id_source = models.CharField("ID Source", max_length=200) millesime = models.CharField("Millesime", max_length=200) mpoly = models.MultiPolygonField(srid=4326) @@ -155,6 +162,7 @@ class ZoneConstruite(TruncateTableMixin, DataColorationMixin, models.Model): objects = IntersectManager() class Meta: + managed = False verbose_name = "OCSGE - Zone construite" verbose_name_plural = verbose_name indexes = [ diff --git a/public_data/views.py b/public_data/views.py index f168e562e..48d2c4201 100644 --- a/public_data/views.py +++ b/public_data/views.py @@ -397,7 +397,7 @@ class ZoneUrbaViewSet(OnlyBoundingBoxMixin, ZoomSimplificationMixin, OptimizedMi queryset = models.ZoneUrba.objects.all() serializer_class = serializers.ZoneUrbaSerializer optimized_fields = { - "o.id": "id", + "o.checksum": "id", "o.libelle": "libelle", "o.libelong": "libelong", "o.typezone": "typezone", From ba1da771d80999ecb200ad8966dc1e2a6432738f Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 22 Aug 2024 12:21:42 +0200 Subject: [PATCH 22/99] chore(commands): remove unused --- .../commands/build_project_ocsge_status.py | 41 --- project/management/commands/export_excel.py | 58 ----- .../management/commands/fix_look_a_like.py | 20 -- project/management/commands/generate_cover.py | 26 -- project/management/commands/get_image.py | 21 -- project/management/commands/mep_53.py | 15 -- .../commands/correct_label_couv_usage.py | 35 --- public_data/management/commands/load_insee.py | 86 ------- .../management/commands/load_usage_couv.py | 238 ------------------ .../management/commands/set_commune_color.py | 22 -- 10 files changed, 562 deletions(-) delete mode 100644 project/management/commands/build_project_ocsge_status.py delete mode 100644 project/management/commands/export_excel.py delete mode 100644 project/management/commands/fix_look_a_like.py delete mode 100644 project/management/commands/generate_cover.py delete mode 100644 project/management/commands/get_image.py delete mode 100644 project/management/commands/mep_53.py delete mode 100644 public_data/management/commands/correct_label_couv_usage.py delete mode 100644 public_data/management/commands/load_insee.py delete mode 100644 public_data/management/commands/load_usage_couv.py delete mode 100644 public_data/management/commands/set_commune_color.py diff --git a/project/management/commands/build_project_ocsge_status.py b/project/management/commands/build_project_ocsge_status.py deleted file mode 100644 index a8bd9ce90..000000000 --- a/project/management/commands/build_project_ocsge_status.py +++ /dev/null @@ -1,41 +0,0 @@ -import logging - -from django.core.management.base import BaseCommand - -from project.models import Project -from project.tasks import calculate_project_ocsge_status - -logger = logging.getLogger("management.commands") - - -class Command(BaseCommand): - help = "Build all ocsge status of projects" - - def add_arguments(self, parser): - parser.add_argument( - "--departements", - nargs="+", - type=int, - help="Select departements to build", - ) - - def handle(self, *args, **options): - logger.info("Start building ocsge status") - - projects = Project.objects.all() - - if options.get("departements"): - logger.info("Filtering on departements: %s", options["departements"]) - projects = projects.filter(cities__departement__source_id__in=options["departements"]) - - projects = projects.distinct() - - count = projects.count() - - logger.info(f"{count} projects") - - for i, project in enumerate(projects): - logger.info(f"{i + 1}/{count} - Process project {project.id}") - calculate_project_ocsge_status(project.id) - - logger.info("End building ocsge status") diff --git a/project/management/commands/export_excel.py b/project/management/commands/export_excel.py deleted file mode 100644 index 7c4d25c3d..000000000 --- a/project/management/commands/export_excel.py +++ /dev/null @@ -1,58 +0,0 @@ -import calendar -import logging -from datetime import date, datetime - -from django.core.management.base import BaseCommand -from django.utils import timezone - -from project.models.export import export_dl_diag -from project.storages import ExportStorage - -logger = logging.getLogger("management.commands") - - -class Command(BaseCommand): - help = "" - - def add_arguments(self, parser): - parser.add_argument("--local", action="store_true") - parser.add_argument( - "--start", - type=lambda s: date.fromisoformat(s), - ) - parser.add_argument( - "--end", - type=lambda s: date.fromisoformat(s), - ) - - def handle(self, *args, **options): - logger.info("Start export stats") - if options["start"]: - start = options["start"] - else: - n = date.today() - start = date(day=1, month=n.month, year=n.year) - if options["end"]: - end = options["end"] - else: - _, last_day = calendar.monthrange(year=start.year, month=start.month) - end = date(day=last_day, month=start.month, year=start.year) - - if not isinstance(start, datetime): - start = datetime(start.year, start.month, start.day) - if not isinstance(end, datetime): - end = datetime(end.year, end.month, end.day) - start = timezone.make_aware(start) - end = timezone.make_aware(end) - logger.info("Start: %s", start) - logger.info("End: %s", end) - - excel_file = export_dl_diag(start, end) - filename = f"diag_downloaded_{start.strftime('%d%m%Y')}_{end.strftime('%d%m%Y')}.xlsx" - if options["local"]: - with open(filename, "wb") as f: - f.write(excel_file.read()) - else: - storage = ExportStorage() - path = storage.save_xlsx(filename, excel_file) - print(f"Url to file: {storage.url(path)}") diff --git a/project/management/commands/fix_look_a_like.py b/project/management/commands/fix_look_a_like.py deleted file mode 100644 index 49e49b2ba..000000000 --- a/project/management/commands/fix_look_a_like.py +++ /dev/null @@ -1,20 +0,0 @@ -import logging - -from django.core.management.base import BaseCommand -from django.db.models import Value -from django.db.models.functions import Replace - -from project.models import Project - -logger = logging.getLogger("management.commands") - - -class Command(BaseCommand): - help = "Use new referentiel in look-a-like Project field" - - def handle(self, *args, **options): - logger.info("Start - fix look_a_like field") - qs = Project.objects.all() - logger.info(f"Project to be fixed: {qs.count()}") - qs.update(look_a_like=Replace("look_a_like", Value("COMMUNE"), Value("COMM"))) - logger.info("End - fix look_a_like field") diff --git a/project/management/commands/generate_cover.py b/project/management/commands/generate_cover.py deleted file mode 100644 index b1b622b05..000000000 --- a/project/management/commands/generate_cover.py +++ /dev/null @@ -1,26 +0,0 @@ -import logging -from time import sleep - -from django.core.management.base import BaseCommand -from django.db.models import Q - -from project.models import Project -from project.tasks import generate_cover_image - -logger = logging.getLogger("management.commands") - - -class Command(BaseCommand): - help = "Generate cover image for all previous diagnostics" - - def handle(self, *args, **options): - logger.info("Start generate covers") - qs = Project.objects.filter(Q(cover_image="") | Q(cover_image=None)) - total = qs.count() - logger.info(f"To be processed: {total}") - for i, diag in enumerate(qs): - if diag.combined_emprise: - generate_cover_image.delay(diag.id) - sleep(5) - logger.info(f"{diag.id} - {100 * i / total:.0f}%") - logger.info("End generate covers") diff --git a/project/management/commands/get_image.py b/project/management/commands/get_image.py deleted file mode 100644 index 014502834..000000000 --- a/project/management/commands/get_image.py +++ /dev/null @@ -1,21 +0,0 @@ -import base64 - -from django.core.management.base import BaseCommand - -from project.charts import DeterminantPieChart -from project.models import Project - - -class Command(BaseCommand): - help = "Test get image from highchart" - - def handle(self, *args, **options): - project = Project.objects.get(pk=8) - chart = DeterminantPieChart(project) - b64_content = chart.request_b64_image_from_server() - with open("test_image.png", "wb") as f: - f.write(base64.decodebytes(b64_content)) - # fd, img_path = tempfile.mkstemp(suffix=".png", text=False) - # os.write(fd, base64.decodebytes(b64_content)) - # os.close(fd) - # return img_path diff --git a/project/management/commands/mep_53.py b/project/management/commands/mep_53.py deleted file mode 100644 index 8c3ee6ccf..000000000 --- a/project/management/commands/mep_53.py +++ /dev/null @@ -1,15 +0,0 @@ -import logging - -from django.core.management import call_command -from django.core.management.base import BaseCommand - -logger = logging.getLogger("management.commands") - - -class Command(BaseCommand): - help = "mep_53" - - def handle(self, *args, **options): - logger.info("Start mep_53") - call_command("loaddata", "diagnostic_word/word_template_fixture.json") - logger.info("End mep_53") diff --git a/public_data/management/commands/correct_label_couv_usage.py b/public_data/management/commands/correct_label_couv_usage.py deleted file mode 100644 index 1e8d70204..000000000 --- a/public_data/management/commands/correct_label_couv_usage.py +++ /dev/null @@ -1,35 +0,0 @@ -import logging -import re - -from django.core.management.base import BaseCommand - -from public_data.models import CouvertureSol, UsageSol - -logger = logging.getLogger("management.commands") - - -def build_short_label(label): - label_short = re.sub(r"\(.*\)", "", label).strip() - - if len(label_short) < 30: - return label_short - else: - return f"{label_short[:30]}..." - - -class Command(BaseCommand): - help = "Build short label" - - def handle(self, *args, **options): - logger.info("Start build short label for couverture and usage") - - # add all keys with None - for couv in CouvertureSol.objects.all(): - couv.label_short = build_short_label(couv.label) - couv.save() - - for usage in UsageSol.objects.all(): - usage.label_short = build_short_label(usage.label) - usage.save() - - logger.info("End") diff --git a/public_data/management/commands/load_insee.py b/public_data/management/commands/load_insee.py deleted file mode 100644 index 1a1a4d54c..000000000 --- a/public_data/management/commands/load_insee.py +++ /dev/null @@ -1,86 +0,0 @@ -import logging - -from django.core.management.base import BaseCommand -from openpyxl import load_workbook - -from public_data.models import Commune, CommunePop -from public_data.models.mixins import TruncateTableMixin -from public_data.storages import DataStorage - -logger = logging.getLogger("management.commands") - - -class TruncateComPop(TruncateTableMixin, CommunePop): - class Meta: - proxy = True - - -class Command(BaseCommand): - help = "Charge les données de l'INSEE dans la BDD" - - def handle(self, *args, **options): - logger.info("Start loading INSEE data") - self.upload_pop() - logger.info("End loading INSEE data") - - def get(self, remote_file_path, headers): - logger.info(f"file={remote_file_path}") - with DataStorage().open(remote_file_path) as file_stream: - wb = load_workbook(file_stream, data_only=True) - ws = wb.active - return {r[0]: dict(zip(headers, r)) for r in ws.iter_rows(min_row=2, max_col=len(headers), values_only=True)} - - def get_pop_data(self): - remote_file_path = "base-pop-historiques-1876-2019.xlsx" - headers = ["CODGEO", "LIBGEO"] + [f"P{year}" for year in range(2019, 2005, -1)] - return self.get(remote_file_path, headers) - - def get_household_data(self): - remote_file_path = "base-cc-coupl-fam-men-2018-lite.xlsx" - headers = ["CODGEO", "LIBGEO"] + [f"M{y}" for y in range(2018, 2007, -1)] - return self.get(remote_file_path, headers) - - def get_data(self): - pop = self.get_pop_data() - household = self.get_household_data() - for insee, data in pop.items(): - data.update(household[str(insee)]) - return list(pop.values()) - - def upload_pop(self): - logger.info("Load population and household from Excel file on S3") - logger.info("Delete previous data and reset id counter") - TruncateComPop.truncate() - data = self.get_data() - logger.info("Begin looping on Excel rows") - todo = [] - commune_list = {city.insee: city for city in Commune.objects.all()} - for row in data: - if not row["CODGEO"] in commune_list: - continue - - def diff(prefix, year): - try: - current = row.get(f"{prefix}{year}", None) # noqa: B023 - previous = row.get(f"{prefix}{year-1}", None) # noqa: B023 - return current - previous - except (TypeError, ValueError): - return None - - todo += [ - CommunePop( - city=commune_list[row["CODGEO"]], - year=y, - pop=row.get(f"P{y}", None), - pop_change=diff("P", y), - household=row.get(f"M{y}", None), - household_change=diff("M", y), - ) - for y in range(2019, 2005, -1) - ] - if len(todo) >= 10000: - logger.info(f"Save to bdd, INSEE so far {row['CODGEO']}") - CommunePop.objects.bulk_create(todo) - todo = [] - if todo: - CommunePop.objects.bulk_create(todo) diff --git a/public_data/management/commands/load_usage_couv.py b/public_data/management/commands/load_usage_couv.py deleted file mode 100644 index f9bcd53a6..000000000 --- a/public_data/management/commands/load_usage_couv.py +++ /dev/null @@ -1,238 +0,0 @@ -import logging -import re - -from django.core.management.base import BaseCommand - -from public_data.models import CouvertureSol, UsageSol - -logger = logging.getLogger("management.commands") - - -DATA_COUV = [ - [24, "1", "Sans végétation", "Sans végétation", "#ff377a", None], - [25, "1.1", "Surfaces anthropisées", "Surfaces anthropisées", "#ff377a", 24], - [26, "1.1.1", "Zones imperméables", "Zones imperméables", "#ff377a", 25], - [27, "1.1.1.1", "Zones bâties", "Zones bâties", "#ff377a", 26], - [ - 28, - "1.1.1.2", - "Zones non bâties (Routes, places, parking…)", - "Zones non bâties", - "#ff9191", - 26, - ], - [29, "1.1.2", "Zones perméables", "Zones perméables", "#ff9", 25], - [ - 30, - "1.1.2.1", - "Zones à matériaux minéraux", - "Zones à matériaux minéraux", - "#ff9", - 29, - ], - [ - 31, - "1.1.2.2", - "Zones à autres matériaux composites", - "Zones à autres matériaux compo...", - "#a64d00", - 29, - ], - [32, "1.2", "Surfaces naturelles", "Surfaces naturelles", "#ccc", 24], - [ - 33, - "1.2.1", - "Sols nus (Sable, pierres meubles, rochers saillants…)", - "Sols nus", - "#ccc", - 32, - ], - [ - 34, - "1.2.2", - "Surfaces d'eau (Eau continentale et maritime)", - "Surfaces d'eau", - "#00ccf2", - 32, - ], - [35, "1.2.3", "Névés et glaciers", "Névés et glaciers", "#a6e6cc", 32], - [36, "2", "Avec végétation", "Avec végétation", "#80ff00", None], - [37, "2.1", "Végétation ligneuse", "Végétation ligneuse", "#80ff00", 36], - [38, "2.1.1", "Formations arborées", "Formations arborées", "#80be00", 37], - [39, "2.1.1.1", "Peuplement de feuillus", "Peuplement de feuillus", "#80ff00", 38], - [ - 40, - "2.1.1.2", - "Peuplement de conifères", - "Peuplement de conifères", - "#00a600", - 38, - ], - [41, "2.1.1.3", "Peuplement mixte", "Peuplement mixte", "#80be00", 38], - [ - 42, - "2.1.2", - ( - "Formations arbustives et sous-arbrisseaux (Landes basses, formations " - "arbustives, formations arbustives organisées…)" - ), - "Formations arbustives", - "#a6ff80", - 37, - ], - [ - 43, - "2.1.3", - "Autres formations ligneuses (Vignes et autres lianes)", - "Autres formations ligneuses", - "#e68000", - 37, - ], - [44, "2.2", "Végétation non ligneuse", "Végétation non ligneuse", "#ccf24d", 36], - [ - 45, - "2.2.1", - "Formations herbacées (Pelouses et prairies, terres arables, roselières…)", - "Formations herbacées", - "#ccf24d", - 44, - ], - [ - 46, - "2.2.2", - "Autres formations non ligneuses (Lichen, mousse, bananiers, bambous...)", - "Autres formations non ligneuses", - "#cfc", - 44, - ], - # [47, "2.2.1.1", "Prairies", "Prairies", "#ccf24d", 45], - # [48, "2.2.1.2", "Pelouses herbes rases", "Pelouses", "#ccf24d", 45], - # [49, "2.2.1.4", "Terres arables", "Terres arables", "#ccf24d", 45], - # [50, "2.2.1.5", "Autres formations herbacées", "Autres herbacées", "#ccf24d", 45], - # [ - # 51, - # "2.2.1.3", # Pas certain qu'elle existe cette couverture, demander doc. - # "Formations herbacées inconnues", - # "Formations herbacées inconnues", - # "#ccf24d", - # 45, - # ], - # [52, "2.1.3.1", "Vignes", "Vignes", "#e68000", 43], - # [53, "2.1.3.2", "Autres lianes", "Autres lianes", "#e68000", 43], -] -DATA_USAGE = [ - [20, "1", "Production primaire", "Production primaire", "green", None], - [21, "1.1", "Agriculture", "Agriculture", "#ffffa8", 20], - [22, "1.2", "Sylviculture", "Sylviculture", "green", 20], - [23, "1.3", "Activités d'extraction", "Activités d'extraction", "#a600cc", 20], - [24, "1.4", "Pêche et aquaculture", "Pêche et aquaculture", "#009", 20], - [25, "1.5", "Autre", "Autre", "#963", 20], - [26, "2", "Secondaire", "Secondaire", "#e6004d", None], - [ - 27, - "235", - "Production secondaire, tertiaire et usage résidentiel", - "Production secondaire, tertiai...", - "#e6004d", - None, - ], - [28, "3", "Tertiaire", "Tertiaire", "#e6004d", None], - [ - 29, - "4", - "Réseaux de transport logistiques et infrastructures", - "Réseaux de transport logistiqu...", - "#c00", - None, - ], - [30, "4.1", "Réseaux de transport", "Réseaux de transport", "#c00", 29], - [ - 31, - "4.2", - "Services de logistique et de stockage", - "Services de logistique et de s...", - "red", - 29, - ], - [ - 32, - "4.3", - "Réseaux d'utilité publique", - "Réseaux d'utilité publique", - "#ff4b00", - 29, - ], - [33, "5", "Résidentiel", "Résidentiel", "#e6004d", None], - [34, "6", "Autre usage", "Autre usage", "#fc0", None], - [35, "6.1", "Zones en transition", "Zones en transition", "#ff4dff", 34], - [36, "6.2", "Zones abandonnées", "Zones abandonnées", "#404040", 34], - [37, "6.3", "Sans usage", "Sans usage", "#f0f028", 34], - [38, "6.6", "Usage Inconnu", "Usage Inconnu", "#fc0", 34], - [39, "4.1.1", "Routier", "Routier", "#c00", 30], - [40, "4.1.2", "Ferré", "Ferré", "#5a5a5a", 30], - [41, "4.1.3", "Aérien", "Aérien", "#e6cce6", 30], - [42, "4.1.4", "Eau", "Eau", "#06f", 30], - [ - 43, - "4.1.5", - "Autres réseaux de transport", - "Autres réseaux de transport", - "#603", - 30, - ], - # [ - # 44, - # "1.1.3", - # "Surface agricole utilisée", - # "Surface agricole utilisée", - # "#ffffa8", - # 21, - # ], - # [45, "1.1.4", "Jachère", "Jachère", "#ffffa8", 21], - # [46, "1.2.1.2", "Peupleraie", "Peupleraie", "green", 22], -] - - -def build_short_label(label): - label_short = re.sub(r"\(.*\)", "", label).strip() - - if len(label_short) < 30: - return label_short - else: - return f"{label_short[:30]}..." - - -class Command(BaseCommand): - help = "Load Usage and Couverture referentials" - - def handle(self, *args, **options): - logger.info("Start uploading CSV usage and couverture") - logger.info("Process couverture") - self.load(DATA_COUV, CouvertureSol) - logger.info("Process usage") - self.load(DATA_USAGE, UsageSol) - logger.info("set is key") - self.set_is_key() - logger.info("End uploading CSV usage and couverture") - - def load(self, DATA, klass): - mapping_parent = dict() - for row in DATA: - try: - item = klass.objects.get(code=row[1]) - except klass.DoesNotExist: - item = klass(pk=row[0]) - item.code = row[1] - item.code_prefix = f"{klass.prefix}{row[1]}" - item.label = row[2] - item.label_short = row[3] - item.map_color = row[4] - if row[5]: - item.parent = mapping_parent[row[5]] - mapping_parent |= {row[0]: item} - item.save() - logger.debug("Done %s", item) - - def set_is_key(self): - CouvertureSol.objects.filter(code__in=["1.1", "1.2", "2.1", "2.2"]).update(is_key=True) - UsageSol.objects.filter(code__in=["1", "235", "2", "3", "4", "5", "6"]).update(is_key=True) diff --git a/public_data/management/commands/set_commune_color.py b/public_data/management/commands/set_commune_color.py deleted file mode 100644 index 3a9a3bbe6..000000000 --- a/public_data/management/commands/set_commune_color.py +++ /dev/null @@ -1,22 +0,0 @@ -import logging - -from django.core.management.base import BaseCommand - -from public_data.models import Commune -from utils.colors import get_blue_gradient - -logging.basicConfig(level=logging.INFO) - - -class Command(BaseCommand): - help = "This will reevaluate parent fields of all instances of Couverture and Usage" - - def handle(self, *args, **options): - logging.info("Set map color of all communes") - colours = get_blue_gradient(12)[::-1] - to_update = list() - for i, commune in enumerate(Commune.objects.all().order_by("name")): - commune.map_color = colours[i % 12] - to_update.append(commune) - Commune.objects.bulk_update(to_update, ["map_color"], batch_size=1000) - logging.info("End set map color") From 17e18c9bec118719fcd7c2f31d024ae5dc138462 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 22 Aug 2024 16:14:46 +0200 Subject: [PATCH 23/99] feat(airflow): move code to include folder --- .../diff_ocsge_download_page_to_mattermost.py | 2 +- airflow/dags/ingest_admin_express.py | 4 ++-- airflow/dags/ingest_app_dependencies.py | 2 +- airflow/dags/ingest_gpu.py | 4 ++-- airflow/dags/ocsge.py | 16 ++++++++-------- airflow/dags/update_app.py | 2 +- airflow/{dependencies => include}/container.py | 0 airflow/{dependencies => include}/mattermost.py | 0 .../ocsge/delete_in_dw.py | 0 airflow/{dependencies => include}/ocsge/enums.py | 0 .../ocsge/normalization.py | 0 .../{dependencies => include}/ocsge/sources.json | 0 airflow/{ => include}/sql/sparte/.gitignore | 0 airflow/{ => include}/sql/sparte/README.md | 0 .../{ => include}/sql/sparte/analyses/.gitkeep | 0 airflow/{ => include}/sql/sparte/dbt_project.yml | 0 airflow/{ => include}/sql/sparte/macros/.gitkeep | 0 .../delete_from_this_where_field_not_in.sql | 0 .../sql/sparte/macros/is_artificial.sql | 0 .../sql/sparte/macros/is_impermeable.sql | 0 .../sql/sparte/models/admin_express/commune.sql | 0 .../sparte/models/admin_express/departement.sql | 0 .../sql/sparte/models/admin_express/schema.yml | 0 .../sql/sparte/models/app/app_commune.sql | 0 .../sql/sparte/models/app/app_couverturesol.sql | 0 .../models/app/app_couvertureusagematrix.sql | 0 .../sql/sparte/models/app/app_departement.sql | 0 .../sql/sparte/models/app/app_epci.sql | 0 .../sql/sparte/models/app/app_region.sql | 0 .../sql/sparte/models/app/app_scot.sql | 0 .../sql/sparte/models/app/app_usagesol.sql | 0 .../sql/sparte/models/app/schema.yml | 0 .../models/gpu/for_app/for_app_zoneurba.sql | 0 .../sql/sparte/models/gpu/schema.yml | 0 .../sql/sparte/models/gpu/zonage_urbanisme.sql | 0 .../sql/sparte/models/ocsge/difference.sql | 0 .../ocsge/for_app/for_app_artifareazoneurba.sql | 0 .../ocsge/for_app/for_app_artificialarea.sql | 0 .../models/ocsge/for_app/for_app_commune.sql | 0 .../models/ocsge/for_app/for_app_communediff.sql | 0 .../models/ocsge/for_app/for_app_communesol.sql | 0 .../models/ocsge/for_app/for_app_departement.sql | 0 .../models/ocsge/for_app/for_app_ocsge.sql | 0 .../models/ocsge/for_app/for_app_ocsgediff.sql | 0 .../ocsge/for_app/for_app_zoneconstruite.sql | 0 .../ocsge/intersected/artificial_commune.sql | 0 .../ocsge/intersected/difference_commune.sql | 0 .../intersected/occupation_du_sol_commune.sql | 0 .../occupation_du_sol_zonage_urbanisme.sql | 0 .../sparte/models/ocsge/occupation_du_sol.sql | 0 .../sql/sparte/models/ocsge/schema.yml | 0 .../sql/sparte/models/ocsge/zone_construite.sql | 0 .../{ => include}/sql/sparte/package-lock.yml | 0 airflow/{ => include}/sql/sparte/packages.yml | 0 airflow/{ => include}/sql/sparte/seeds/.gitkeep | 0 .../{ => include}/sql/sparte/snapshots/.gitkeep | 0 .../sql/sparte/tests/generic/is_valid_geom.sql | 0 .../tests/generic/is_valid_geom_warning.sql | 0 airflow/{dependencies => include}/utils.py | 0 59 files changed, 15 insertions(+), 15 deletions(-) rename airflow/{dependencies => include}/container.py (100%) rename airflow/{dependencies => include}/mattermost.py (100%) rename airflow/{dependencies => include}/ocsge/delete_in_dw.py (100%) rename airflow/{dependencies => include}/ocsge/enums.py (100%) rename airflow/{dependencies => include}/ocsge/normalization.py (100%) rename airflow/{dependencies => include}/ocsge/sources.json (100%) rename airflow/{ => include}/sql/sparte/.gitignore (100%) rename airflow/{ => include}/sql/sparte/README.md (100%) rename airflow/{ => include}/sql/sparte/analyses/.gitkeep (100%) rename airflow/{ => include}/sql/sparte/dbt_project.yml (100%) rename airflow/{ => include}/sql/sparte/macros/.gitkeep (100%) rename airflow/{ => include}/sql/sparte/macros/delete_from_this_where_field_not_in.sql (100%) rename airflow/{ => include}/sql/sparte/macros/is_artificial.sql (100%) rename airflow/{ => include}/sql/sparte/macros/is_impermeable.sql (100%) rename airflow/{ => include}/sql/sparte/models/admin_express/commune.sql (100%) rename airflow/{ => include}/sql/sparte/models/admin_express/departement.sql (100%) rename airflow/{ => include}/sql/sparte/models/admin_express/schema.yml (100%) rename airflow/{ => include}/sql/sparte/models/app/app_commune.sql (100%) rename airflow/{ => include}/sql/sparte/models/app/app_couverturesol.sql (100%) rename airflow/{ => include}/sql/sparte/models/app/app_couvertureusagematrix.sql (100%) rename airflow/{ => include}/sql/sparte/models/app/app_departement.sql (100%) rename airflow/{ => include}/sql/sparte/models/app/app_epci.sql (100%) rename airflow/{ => include}/sql/sparte/models/app/app_region.sql (100%) rename airflow/{ => include}/sql/sparte/models/app/app_scot.sql (100%) rename airflow/{ => include}/sql/sparte/models/app/app_usagesol.sql (100%) rename airflow/{ => include}/sql/sparte/models/app/schema.yml (100%) rename airflow/{ => include}/sql/sparte/models/gpu/for_app/for_app_zoneurba.sql (100%) rename airflow/{ => include}/sql/sparte/models/gpu/schema.yml (100%) rename airflow/{ => include}/sql/sparte/models/gpu/zonage_urbanisme.sql (100%) rename airflow/{ => include}/sql/sparte/models/ocsge/difference.sql (100%) rename airflow/{ => include}/sql/sparte/models/ocsge/for_app/for_app_artifareazoneurba.sql (100%) rename airflow/{ => include}/sql/sparte/models/ocsge/for_app/for_app_artificialarea.sql (100%) rename airflow/{ => include}/sql/sparte/models/ocsge/for_app/for_app_commune.sql (100%) rename airflow/{ => include}/sql/sparte/models/ocsge/for_app/for_app_communediff.sql (100%) rename airflow/{ => include}/sql/sparte/models/ocsge/for_app/for_app_communesol.sql (100%) rename airflow/{ => include}/sql/sparte/models/ocsge/for_app/for_app_departement.sql (100%) rename airflow/{ => include}/sql/sparte/models/ocsge/for_app/for_app_ocsge.sql (100%) rename airflow/{ => include}/sql/sparte/models/ocsge/for_app/for_app_ocsgediff.sql (100%) rename airflow/{ => include}/sql/sparte/models/ocsge/for_app/for_app_zoneconstruite.sql (100%) rename airflow/{ => include}/sql/sparte/models/ocsge/intersected/artificial_commune.sql (100%) rename airflow/{ => include}/sql/sparte/models/ocsge/intersected/difference_commune.sql (100%) rename airflow/{ => include}/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql (100%) rename airflow/{ => include}/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql (100%) rename airflow/{ => include}/sql/sparte/models/ocsge/occupation_du_sol.sql (100%) rename airflow/{ => include}/sql/sparte/models/ocsge/schema.yml (100%) rename airflow/{ => include}/sql/sparte/models/ocsge/zone_construite.sql (100%) rename airflow/{ => include}/sql/sparte/package-lock.yml (100%) rename airflow/{ => include}/sql/sparte/packages.yml (100%) rename airflow/{ => include}/sql/sparte/seeds/.gitkeep (100%) rename airflow/{ => include}/sql/sparte/snapshots/.gitkeep (100%) rename airflow/{ => include}/sql/sparte/tests/generic/is_valid_geom.sql (100%) rename airflow/{ => include}/sql/sparte/tests/generic/is_valid_geom_warning.sql (100%) rename airflow/{dependencies => include}/utils.py (100%) diff --git a/airflow/dags/diff_ocsge_download_page_to_mattermost.py b/airflow/dags/diff_ocsge_download_page_to_mattermost.py index a46cc297c..ef44033c9 100644 --- a/airflow/dags/diff_ocsge_download_page_to_mattermost.py +++ b/airflow/dags/diff_ocsge_download_page_to_mattermost.py @@ -3,7 +3,7 @@ import requests from airflow.decorators import dag, task from bs4 import BeautifulSoup -from dependencies.container import Container +from include.container import Container from pendulum import datetime diff --git a/airflow/dags/ingest_admin_express.py b/airflow/dags/ingest_admin_express.py index 184e499d6..75fcdb5b8 100644 --- a/airflow/dags/ingest_admin_express.py +++ b/airflow/dags/ingest_admin_express.py @@ -4,7 +4,7 @@ import py7zr from airflow.decorators import dag, task -from dependencies.container import Container +from include.container import Container from pendulum import datetime @@ -46,7 +46,7 @@ def ingest() -> str: @task.bash(retries=0, trigger_rule="all_success") def dbt_run(**context): - return 'cd "${AIRFLOW_HOME}/sql/sparte" && dbt run -s admin_express' + return 'cd "${AIRFLOW_HOME}/include/sql/sparte" && dbt run -s admin_express' download_admin_express() >> ingest() >> dbt_run() diff --git a/airflow/dags/ingest_app_dependencies.py b/airflow/dags/ingest_app_dependencies.py index c002d95f7..710284a98 100644 --- a/airflow/dags/ingest_app_dependencies.py +++ b/airflow/dags/ingest_app_dependencies.py @@ -1,6 +1,6 @@ from airflow.decorators import dag, task -from dependencies.container import Container from gdaltools import ogr2ogr +from include.container import Container from pendulum import datetime diff --git a/airflow/dags/ingest_gpu.py b/airflow/dags/ingest_gpu.py index 0f963663a..2f65e6cfb 100644 --- a/airflow/dags/ingest_gpu.py +++ b/airflow/dags/ingest_gpu.py @@ -1,7 +1,7 @@ from airflow.decorators import dag, task from airflow.operators.bash import BashOperator -from dependencies.container import Container -from dependencies.utils import multiline_string_to_single_line +from include.container import Container +from include.utils import multiline_string_to_single_line from pendulum import datetime diff --git a/airflow/dags/ocsge.py b/airflow/dags/ocsge.py index f91c20c57..02859b834 100644 --- a/airflow/dags/ocsge.py +++ b/airflow/dags/ocsge.py @@ -10,19 +10,19 @@ from airflow.decorators import dag, task from airflow.models.param import Param from airflow.operators.bash import BashOperator -from dependencies.container import Container -from dependencies.ocsge.delete_in_dw import ( +from include.container import Container +from include.ocsge.delete_in_dw import ( delete_difference_in_dw_sql, delete_occupation_du_sol_in_dw_sql, delete_zone_construite_in_dw_sql, ) -from dependencies.ocsge.enums import DatasetName, SourceName -from dependencies.ocsge.normalization import ( +from include.ocsge.enums import DatasetName, SourceName +from include.ocsge.normalization import ( ocsge_diff_normalization_sql, ocsge_occupation_du_sol_normalization_sql, ocsge_zone_construite_normalization_sql, ) -from dependencies.utils import multiline_string_to_single_line +from include.utils import multiline_string_to_single_line def get_paths_from_directory(directory: str) -> list[tuple[str, str]]: @@ -41,7 +41,7 @@ def get_paths_from_directory(directory: str) -> list[tuple[str, str]]: return paths -with open("dependencies/ocsge/sources.json", "r") as f: +with open("include/ocsge/sources.json", "r") as f: sources = json.load(f) vars = { @@ -259,7 +259,7 @@ def ingest_staging(path, **context) -> int: def db_test_ocsge_staging(**context): dataset = context["params"]["dataset"] dbt_select = " ".join([vars["dbt_selector_staging"] for vars in vars_dataset[dataset]]) - return 'cd "${AIRFLOW_HOME}/sql/sparte" && dbt test -s ' + dbt_select + return 'cd "${AIRFLOW_HOME}/include/sql/sparte" && dbt test -s ' + dbt_select @task.python def ingest_ocsge(path, **context) -> int: @@ -281,7 +281,7 @@ def ingest_ocsge(path, **context) -> int: def dbt_run_ocsge(**context): dataset = context["params"]["dataset"] dbt_select = " ".join([f'{vars["dbt_selector"]}+' for vars in vars_dataset[dataset]]) - return 'cd "${AIRFLOW_HOME}/sql/sparte" && dbt build -s ' + dbt_select + return 'cd "${AIRFLOW_HOME}/include/sql/sparte" && dbt build -s ' + dbt_select @task.python(trigger_rule="all_success") def delete_previously_loaded_data_in_dw(**context) -> dict: diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index 9e9ac68b8..a90a71187 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -1,7 +1,7 @@ from airflow.decorators import dag, task from airflow.models.param import Param -from dependencies.container import Container from gdaltools import ogr2ogr +from include.container import Container from pendulum import datetime diff --git a/airflow/dependencies/container.py b/airflow/include/container.py similarity index 100% rename from airflow/dependencies/container.py rename to airflow/include/container.py diff --git a/airflow/dependencies/mattermost.py b/airflow/include/mattermost.py similarity index 100% rename from airflow/dependencies/mattermost.py rename to airflow/include/mattermost.py diff --git a/airflow/dependencies/ocsge/delete_in_dw.py b/airflow/include/ocsge/delete_in_dw.py similarity index 100% rename from airflow/dependencies/ocsge/delete_in_dw.py rename to airflow/include/ocsge/delete_in_dw.py diff --git a/airflow/dependencies/ocsge/enums.py b/airflow/include/ocsge/enums.py similarity index 100% rename from airflow/dependencies/ocsge/enums.py rename to airflow/include/ocsge/enums.py diff --git a/airflow/dependencies/ocsge/normalization.py b/airflow/include/ocsge/normalization.py similarity index 100% rename from airflow/dependencies/ocsge/normalization.py rename to airflow/include/ocsge/normalization.py diff --git a/airflow/dependencies/ocsge/sources.json b/airflow/include/ocsge/sources.json similarity index 100% rename from airflow/dependencies/ocsge/sources.json rename to airflow/include/ocsge/sources.json diff --git a/airflow/sql/sparte/.gitignore b/airflow/include/sql/sparte/.gitignore similarity index 100% rename from airflow/sql/sparte/.gitignore rename to airflow/include/sql/sparte/.gitignore diff --git a/airflow/sql/sparte/README.md b/airflow/include/sql/sparte/README.md similarity index 100% rename from airflow/sql/sparte/README.md rename to airflow/include/sql/sparte/README.md diff --git a/airflow/sql/sparte/analyses/.gitkeep b/airflow/include/sql/sparte/analyses/.gitkeep similarity index 100% rename from airflow/sql/sparte/analyses/.gitkeep rename to airflow/include/sql/sparte/analyses/.gitkeep diff --git a/airflow/sql/sparte/dbt_project.yml b/airflow/include/sql/sparte/dbt_project.yml similarity index 100% rename from airflow/sql/sparte/dbt_project.yml rename to airflow/include/sql/sparte/dbt_project.yml diff --git a/airflow/sql/sparte/macros/.gitkeep b/airflow/include/sql/sparte/macros/.gitkeep similarity index 100% rename from airflow/sql/sparte/macros/.gitkeep rename to airflow/include/sql/sparte/macros/.gitkeep diff --git a/airflow/sql/sparte/macros/delete_from_this_where_field_not_in.sql b/airflow/include/sql/sparte/macros/delete_from_this_where_field_not_in.sql similarity index 100% rename from airflow/sql/sparte/macros/delete_from_this_where_field_not_in.sql rename to airflow/include/sql/sparte/macros/delete_from_this_where_field_not_in.sql diff --git a/airflow/sql/sparte/macros/is_artificial.sql b/airflow/include/sql/sparte/macros/is_artificial.sql similarity index 100% rename from airflow/sql/sparte/macros/is_artificial.sql rename to airflow/include/sql/sparte/macros/is_artificial.sql diff --git a/airflow/sql/sparte/macros/is_impermeable.sql b/airflow/include/sql/sparte/macros/is_impermeable.sql similarity index 100% rename from airflow/sql/sparte/macros/is_impermeable.sql rename to airflow/include/sql/sparte/macros/is_impermeable.sql diff --git a/airflow/sql/sparte/models/admin_express/commune.sql b/airflow/include/sql/sparte/models/admin_express/commune.sql similarity index 100% rename from airflow/sql/sparte/models/admin_express/commune.sql rename to airflow/include/sql/sparte/models/admin_express/commune.sql diff --git a/airflow/sql/sparte/models/admin_express/departement.sql b/airflow/include/sql/sparte/models/admin_express/departement.sql similarity index 100% rename from airflow/sql/sparte/models/admin_express/departement.sql rename to airflow/include/sql/sparte/models/admin_express/departement.sql diff --git a/airflow/sql/sparte/models/admin_express/schema.yml b/airflow/include/sql/sparte/models/admin_express/schema.yml similarity index 100% rename from airflow/sql/sparte/models/admin_express/schema.yml rename to airflow/include/sql/sparte/models/admin_express/schema.yml diff --git a/airflow/sql/sparte/models/app/app_commune.sql b/airflow/include/sql/sparte/models/app/app_commune.sql similarity index 100% rename from airflow/sql/sparte/models/app/app_commune.sql rename to airflow/include/sql/sparte/models/app/app_commune.sql diff --git a/airflow/sql/sparte/models/app/app_couverturesol.sql b/airflow/include/sql/sparte/models/app/app_couverturesol.sql similarity index 100% rename from airflow/sql/sparte/models/app/app_couverturesol.sql rename to airflow/include/sql/sparte/models/app/app_couverturesol.sql diff --git a/airflow/sql/sparte/models/app/app_couvertureusagematrix.sql b/airflow/include/sql/sparte/models/app/app_couvertureusagematrix.sql similarity index 100% rename from airflow/sql/sparte/models/app/app_couvertureusagematrix.sql rename to airflow/include/sql/sparte/models/app/app_couvertureusagematrix.sql diff --git a/airflow/sql/sparte/models/app/app_departement.sql b/airflow/include/sql/sparte/models/app/app_departement.sql similarity index 100% rename from airflow/sql/sparte/models/app/app_departement.sql rename to airflow/include/sql/sparte/models/app/app_departement.sql diff --git a/airflow/sql/sparte/models/app/app_epci.sql b/airflow/include/sql/sparte/models/app/app_epci.sql similarity index 100% rename from airflow/sql/sparte/models/app/app_epci.sql rename to airflow/include/sql/sparte/models/app/app_epci.sql diff --git a/airflow/sql/sparte/models/app/app_region.sql b/airflow/include/sql/sparte/models/app/app_region.sql similarity index 100% rename from airflow/sql/sparte/models/app/app_region.sql rename to airflow/include/sql/sparte/models/app/app_region.sql diff --git a/airflow/sql/sparte/models/app/app_scot.sql b/airflow/include/sql/sparte/models/app/app_scot.sql similarity index 100% rename from airflow/sql/sparte/models/app/app_scot.sql rename to airflow/include/sql/sparte/models/app/app_scot.sql diff --git a/airflow/sql/sparte/models/app/app_usagesol.sql b/airflow/include/sql/sparte/models/app/app_usagesol.sql similarity index 100% rename from airflow/sql/sparte/models/app/app_usagesol.sql rename to airflow/include/sql/sparte/models/app/app_usagesol.sql diff --git a/airflow/sql/sparte/models/app/schema.yml b/airflow/include/sql/sparte/models/app/schema.yml similarity index 100% rename from airflow/sql/sparte/models/app/schema.yml rename to airflow/include/sql/sparte/models/app/schema.yml diff --git a/airflow/sql/sparte/models/gpu/for_app/for_app_zoneurba.sql b/airflow/include/sql/sparte/models/gpu/for_app/for_app_zoneurba.sql similarity index 100% rename from airflow/sql/sparte/models/gpu/for_app/for_app_zoneurba.sql rename to airflow/include/sql/sparte/models/gpu/for_app/for_app_zoneurba.sql diff --git a/airflow/sql/sparte/models/gpu/schema.yml b/airflow/include/sql/sparte/models/gpu/schema.yml similarity index 100% rename from airflow/sql/sparte/models/gpu/schema.yml rename to airflow/include/sql/sparte/models/gpu/schema.yml diff --git a/airflow/sql/sparte/models/gpu/zonage_urbanisme.sql b/airflow/include/sql/sparte/models/gpu/zonage_urbanisme.sql similarity index 100% rename from airflow/sql/sparte/models/gpu/zonage_urbanisme.sql rename to airflow/include/sql/sparte/models/gpu/zonage_urbanisme.sql diff --git a/airflow/sql/sparte/models/ocsge/difference.sql b/airflow/include/sql/sparte/models/ocsge/difference.sql similarity index 100% rename from airflow/sql/sparte/models/ocsge/difference.sql rename to airflow/include/sql/sparte/models/ocsge/difference.sql diff --git a/airflow/sql/sparte/models/ocsge/for_app/for_app_artifareazoneurba.sql b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_artifareazoneurba.sql similarity index 100% rename from airflow/sql/sparte/models/ocsge/for_app/for_app_artifareazoneurba.sql rename to airflow/include/sql/sparte/models/ocsge/for_app/for_app_artifareazoneurba.sql diff --git a/airflow/sql/sparte/models/ocsge/for_app/for_app_artificialarea.sql b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_artificialarea.sql similarity index 100% rename from airflow/sql/sparte/models/ocsge/for_app/for_app_artificialarea.sql rename to airflow/include/sql/sparte/models/ocsge/for_app/for_app_artificialarea.sql diff --git a/airflow/sql/sparte/models/ocsge/for_app/for_app_commune.sql b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql similarity index 100% rename from airflow/sql/sparte/models/ocsge/for_app/for_app_commune.sql rename to airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql diff --git a/airflow/sql/sparte/models/ocsge/for_app/for_app_communediff.sql b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_communediff.sql similarity index 100% rename from airflow/sql/sparte/models/ocsge/for_app/for_app_communediff.sql rename to airflow/include/sql/sparte/models/ocsge/for_app/for_app_communediff.sql diff --git a/airflow/sql/sparte/models/ocsge/for_app/for_app_communesol.sql b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_communesol.sql similarity index 100% rename from airflow/sql/sparte/models/ocsge/for_app/for_app_communesol.sql rename to airflow/include/sql/sparte/models/ocsge/for_app/for_app_communesol.sql diff --git a/airflow/sql/sparte/models/ocsge/for_app/for_app_departement.sql b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_departement.sql similarity index 100% rename from airflow/sql/sparte/models/ocsge/for_app/for_app_departement.sql rename to airflow/include/sql/sparte/models/ocsge/for_app/for_app_departement.sql diff --git a/airflow/sql/sparte/models/ocsge/for_app/for_app_ocsge.sql b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_ocsge.sql similarity index 100% rename from airflow/sql/sparte/models/ocsge/for_app/for_app_ocsge.sql rename to airflow/include/sql/sparte/models/ocsge/for_app/for_app_ocsge.sql diff --git a/airflow/sql/sparte/models/ocsge/for_app/for_app_ocsgediff.sql b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_ocsgediff.sql similarity index 100% rename from airflow/sql/sparte/models/ocsge/for_app/for_app_ocsgediff.sql rename to airflow/include/sql/sparte/models/ocsge/for_app/for_app_ocsgediff.sql diff --git a/airflow/sql/sparte/models/ocsge/for_app/for_app_zoneconstruite.sql b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_zoneconstruite.sql similarity index 100% rename from airflow/sql/sparte/models/ocsge/for_app/for_app_zoneconstruite.sql rename to airflow/include/sql/sparte/models/ocsge/for_app/for_app_zoneconstruite.sql diff --git a/airflow/sql/sparte/models/ocsge/intersected/artificial_commune.sql b/airflow/include/sql/sparte/models/ocsge/intersected/artificial_commune.sql similarity index 100% rename from airflow/sql/sparte/models/ocsge/intersected/artificial_commune.sql rename to airflow/include/sql/sparte/models/ocsge/intersected/artificial_commune.sql diff --git a/airflow/sql/sparte/models/ocsge/intersected/difference_commune.sql b/airflow/include/sql/sparte/models/ocsge/intersected/difference_commune.sql similarity index 100% rename from airflow/sql/sparte/models/ocsge/intersected/difference_commune.sql rename to airflow/include/sql/sparte/models/ocsge/intersected/difference_commune.sql diff --git a/airflow/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql b/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql similarity index 100% rename from airflow/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql rename to airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql diff --git a/airflow/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql b/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql similarity index 100% rename from airflow/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql rename to airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql diff --git a/airflow/sql/sparte/models/ocsge/occupation_du_sol.sql b/airflow/include/sql/sparte/models/ocsge/occupation_du_sol.sql similarity index 100% rename from airflow/sql/sparte/models/ocsge/occupation_du_sol.sql rename to airflow/include/sql/sparte/models/ocsge/occupation_du_sol.sql diff --git a/airflow/sql/sparte/models/ocsge/schema.yml b/airflow/include/sql/sparte/models/ocsge/schema.yml similarity index 100% rename from airflow/sql/sparte/models/ocsge/schema.yml rename to airflow/include/sql/sparte/models/ocsge/schema.yml diff --git a/airflow/sql/sparte/models/ocsge/zone_construite.sql b/airflow/include/sql/sparte/models/ocsge/zone_construite.sql similarity index 100% rename from airflow/sql/sparte/models/ocsge/zone_construite.sql rename to airflow/include/sql/sparte/models/ocsge/zone_construite.sql diff --git a/airflow/sql/sparte/package-lock.yml b/airflow/include/sql/sparte/package-lock.yml similarity index 100% rename from airflow/sql/sparte/package-lock.yml rename to airflow/include/sql/sparte/package-lock.yml diff --git a/airflow/sql/sparte/packages.yml b/airflow/include/sql/sparte/packages.yml similarity index 100% rename from airflow/sql/sparte/packages.yml rename to airflow/include/sql/sparte/packages.yml diff --git a/airflow/sql/sparte/seeds/.gitkeep b/airflow/include/sql/sparte/seeds/.gitkeep similarity index 100% rename from airflow/sql/sparte/seeds/.gitkeep rename to airflow/include/sql/sparte/seeds/.gitkeep diff --git a/airflow/sql/sparte/snapshots/.gitkeep b/airflow/include/sql/sparte/snapshots/.gitkeep similarity index 100% rename from airflow/sql/sparte/snapshots/.gitkeep rename to airflow/include/sql/sparte/snapshots/.gitkeep diff --git a/airflow/sql/sparte/tests/generic/is_valid_geom.sql b/airflow/include/sql/sparte/tests/generic/is_valid_geom.sql similarity index 100% rename from airflow/sql/sparte/tests/generic/is_valid_geom.sql rename to airflow/include/sql/sparte/tests/generic/is_valid_geom.sql diff --git a/airflow/sql/sparte/tests/generic/is_valid_geom_warning.sql b/airflow/include/sql/sparte/tests/generic/is_valid_geom_warning.sql similarity index 100% rename from airflow/sql/sparte/tests/generic/is_valid_geom_warning.sql rename to airflow/include/sql/sparte/tests/generic/is_valid_geom_warning.sql diff --git a/airflow/dependencies/utils.py b/airflow/include/utils.py similarity index 100% rename from airflow/dependencies/utils.py rename to airflow/include/utils.py From be1bc2d139c79cef555416b843fc8469d708f024 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Mon, 26 Aug 2024 10:33:04 +0200 Subject: [PATCH 24/99] feat(airflow): make land models managed=False --- .../0188_alter_artifareazoneurba_options_and_more.py | 4 ---- public_data/models/administration/Commune.py | 3 +++ public_data/models/administration/Departement.py | 1 + public_data/models/administration/Epci.py | 1 + public_data/models/administration/Region.py | 1 + public_data/models/administration/Scot.py | 3 +++ 6 files changed, 9 insertions(+), 4 deletions(-) diff --git a/public_data/migrations/0188_alter_artifareazoneurba_options_and_more.py b/public_data/migrations/0188_alter_artifareazoneurba_options_and_more.py index 7beceb998..da6200f9c 100644 --- a/public_data/migrations/0188_alter_artifareazoneurba_options_and_more.py +++ b/public_data/migrations/0188_alter_artifareazoneurba_options_and_more.py @@ -61,8 +61,4 @@ class Migration(migrations.Migration): name="zoneurba", options={"managed": False}, ), - migrations.RemoveField( - model_name="commune", - name="map_color", - ), ] diff --git a/public_data/models/administration/Commune.py b/public_data/models/administration/Commune.py index 068881bba..a2607029b 100644 --- a/public_data/models/administration/Commune.py +++ b/public_data/models/administration/Commune.py @@ -12,6 +12,9 @@ class Commune(DataColorationMixin, LandMixin, GetDataFromCeremaMixin, models.Model): + class Meta: + managed = False + insee = models.CharField("Code INSEE", max_length=7) name = models.CharField("Nom", max_length=50) departement = models.ForeignKey("Departement", on_delete=models.PROTECT) diff --git a/public_data/models/administration/Departement.py b/public_data/models/administration/Departement.py index fa6979eca..43bedf7b4 100644 --- a/public_data/models/administration/Departement.py +++ b/public_data/models/administration/Departement.py @@ -13,6 +13,7 @@ class Departement(LandMixin, GetDataFromCeremaMixin, models.Model): class Meta: verbose_name = "Département" + managed = False source_id = models.CharField("Identifiant source", max_length=50) region = models.ForeignKey("Region", on_delete=models.CASCADE) diff --git a/public_data/models/administration/Epci.py b/public_data/models/administration/Epci.py index e13413bc4..dc526af39 100644 --- a/public_data/models/administration/Epci.py +++ b/public_data/models/administration/Epci.py @@ -12,6 +12,7 @@ class Epci(LandMixin, GetDataFromCeremaMixin, models.Model): class Meta: verbose_name = "EPCI" + managed = False source_id = models.CharField("Identifiant source", max_length=50) name = models.CharField("Nom", max_length=70) diff --git a/public_data/models/administration/Region.py b/public_data/models/administration/Region.py index 179c345b7..8874d19e0 100644 --- a/public_data/models/administration/Region.py +++ b/public_data/models/administration/Region.py @@ -13,6 +13,7 @@ class Region(LandMixin, GetDataFromCeremaMixin, models.Model): class Meta: verbose_name = "Région" + managed = False source_id = models.CharField("Identifiant source", max_length=50) name = models.CharField("Nom", max_length=50) diff --git a/public_data/models/administration/Scot.py b/public_data/models/administration/Scot.py index 9ae4e5134..1a5b47490 100644 --- a/public_data/models/administration/Scot.py +++ b/public_data/models/administration/Scot.py @@ -10,6 +10,9 @@ class Scot(LandMixin, GetDataFromCeremaMixin, models.Model): + class Meta: + managed = False + name = models.CharField("Nom", max_length=250) mpoly = models.MultiPolygonField(srid=4326, null=True, blank=True) srid_source = models.IntegerField( From de2c61fcc7dc3637cf30584300f9a223f8b75946 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 27 Aug 2024 11:23:04 +0200 Subject: [PATCH 25/99] feat(dbt): add many_to_many models for lands --- airflow/dags/ingest_app_dependencies.py | 60 ++++++--------- airflow/dags/update_app.py | 74 +++++-------------- .../models/app/app_epci_departements.sql | 13 ++++ .../models/app/app_scot_departements.sql | 13 ++++ .../sparte/models/app/app_scot_regions.sql | 13 ++++ .../include/sql/sparte/models/app/schema.yml | 6 ++ 6 files changed, 85 insertions(+), 94 deletions(-) create mode 100644 airflow/include/sql/sparte/models/app/app_epci_departements.sql create mode 100644 airflow/include/sql/sparte/models/app/app_scot_departements.sql create mode 100644 airflow/include/sql/sparte/models/app/app_scot_regions.sql diff --git a/airflow/dags/ingest_app_dependencies.py b/airflow/dags/ingest_app_dependencies.py index 710284a98..66cbe6b32 100644 --- a/airflow/dags/ingest_app_dependencies.py +++ b/airflow/dags/ingest_app_dependencies.py @@ -14,6 +14,21 @@ def ingest_table(source_table_name: str, destination_table_name: str): ogr.execute() +mapping = { + "public_data_region": "app_region", + "public_data_departement": "app_departement", + "public_data_commune": "app_commune", + "public_data_epci": "app_epci", + "public_data_scot": "app_scot", + "public_data_couverturesol": "app_couverturesol", + "public_data_usagesol": "app_usagesol", + "public_data_couvertureusagematrix": "app_couvertureusagematrix", + "public_data_epci_departements": "app_epci_departements", + "public_data_scot_departements": "app_scot_departements", + "public_data_scot_regions": "app_scot_regions", +} + + @dag( start_date=datetime(2024, 1, 1), schedule="@once", @@ -22,48 +37,15 @@ def ingest_table(source_table_name: str, destination_table_name: str): tags=["App"], ) def ingest_app_dependencies(): - @task.python - def ingest_region(): - ingest_table(source_table_name="public_data_region", destination_table_name="app_region") - - @task.python - def ingest_departement(): - ingest_table(source_table_name="public_data_departement", destination_table_name="app_departement") - - @task.python - def ingest_commune(): - ingest_table(source_table_name="public_data_commune", destination_table_name="app_commune") - - @task.python - def ingest_epci(): - ingest_table(source_table_name="public_data_epci", destination_table_name="app_epci") - - @task.python - def ingest_scot(): - ingest_table(source_table_name="public_data_scot", destination_table_name="app_scot") - - @task.python - def ingest_couverturesol(): - ingest_table(source_table_name="public_data_couverturesol", destination_table_name="app_couverturesol") - - @task.python - def ingest_usagesol(): - ingest_table(source_table_name="public_data_usagesol", destination_table_name="app_usagesol") + for source_table_name, destination_table_name in mapping.items(): - @task.python - def ingest_couvertureusagematrix(): - ingest_table( - source_table_name="public_data_couvertureusagematrix", destination_table_name="app_couvertureusagematrix" + @task.python( + task_id=f"ingest_{destination_table_name}", ) + def ingest(): + ingest_table(source_table_name, destination_table_name) - ingest_region() - ingest_departement() - ingest_commune() - ingest_epci() - ingest_scot() - ingest_couverturesol() - ingest_usagesol() - ingest_couvertureusagematrix() + ingest() ingest_app_dependencies() diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index a90a71187..72c714b4b 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -19,54 +19,23 @@ def copy_table_from_dw_to_app( ogr.execute() -mapping = [ - { - "from_table": "public_ocsge.for_app_ocsge", - "to_table": "public.public_data_ocsge", - }, - { - "from_table": "public_ocsge.for_app_artificialarea", - "to_table": "public.public_data_artificialarea", - }, - { - "from_table": "public_ocsge.for_app_artifareazoneurba", - "to_table": "public.public_data_artifareazoneurba", - }, - { - "from_table": "public_ocsge.for_app_commune", - "to_table": "public.public_data_commune", - }, - { - "from_table": "public_ocsge.for_app_departement", - "to_table": "public.public_data_departement", - }, - { - "from_table": "public_ocsge.for_app_communesol", - "to_table": "public.public_data_communesol", - }, - { - "from_table": "public_ocsge.for_app_ocsgediff", - "to_table": "public.public_data_ocsgediff", - }, - { - "from_table": "public_ocsge.for_app_communediff", - "to_table": "public.public_data_communediff", - }, - { - "from_table": "public_gpu.for_app_zoneurba", - "to_table": "public.public_data_zoneurba", - }, - { - "from_table": "public_ocsge.for_app_zoneconstruite", - "to_table": "public.public_data_zoneconstruite", - }, -] +mapping = { + "public_ocsge.for_app_ocsge": "public.public_data_ocsge", + "public_ocsge.for_app_artificialarea": "public.public_data_artificialarea", + "public_ocsge.for_app_artifareazoneurba": "public.public_data_artifareazoneurba", + "public_ocsge.for_app_commune": "public.public_data_commune", + "public_ocsge.for_app_departement": "public.public_data_departement", + "public_ocsge.for_app_communesol": "public.public_data_communesol", + "public_ocsge.for_app_ocsgediff": "public.public_data_ocsgediff", + "public_ocsge.for_app_communediff": "public.public_data_communediff", + "public_gpu.for_app_zoneurba": "public.public_data_zoneurba", + "public_ocsge.for_app_zoneconstruite": "public.public_data_zoneconstruite", +} -params = {map["to_table"]: Param(True) for map in mapping} +params = {table: Param(True) for table in mapping.values()} -# Define the basic parameters of the DAG, like schedule and start_date @dag( start_date=datetime(2024, 1, 1), schedule="@once", @@ -77,19 +46,14 @@ def copy_table_from_dw_to_app( params=params, ) def update_app(): - tasks = [] - for map in mapping: - to_table_str = map["to_table"].split(".")[1] + for from_table, to_table in mapping.items(): + to_table_short_name = to_table.split(".")[1] - @task.python(task_id=f"copy_{to_table_str}", retries=0) - def copy_table(from_table=map["from_table"], to_table=map["to_table"], **context): - if context["params"][to_table]: - copy_table_from_dw_to_app(from_table, to_table) - else: - print(f"Skipping {to_table_str}") + @task.python(task_id=f"copy_{to_table_short_name}", retries=0) + def copy_table(from_table=from_table, to_table=to_table, **context): + copy_table_from_dw_to_app(from_table, to_table) - tasks.append(copy_table()) + copy_table() -# Instantiate the DAG update_app() diff --git a/airflow/include/sql/sparte/models/app/app_epci_departements.sql b/airflow/include/sql/sparte/models/app/app_epci_departements.sql new file mode 100644 index 000000000..4f0b80b48 --- /dev/null +++ b/airflow/include/sql/sparte/models/app/app_epci_departements.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + docs={'node_color': '#D70040'} + ) +}} + +SELECT + id, + epci_id, + departement_id +FROM + {{ source('public', 'app_epci_departements') }} diff --git a/airflow/include/sql/sparte/models/app/app_scot_departements.sql b/airflow/include/sql/sparte/models/app/app_scot_departements.sql new file mode 100644 index 000000000..2bdc48744 --- /dev/null +++ b/airflow/include/sql/sparte/models/app/app_scot_departements.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + docs={'node_color': '#D70040'} + ) +}} + +SELECT + id, + scot_id, + departement_id +FROM + {{ source('public', 'app_scot_departements') }} diff --git a/airflow/include/sql/sparte/models/app/app_scot_regions.sql b/airflow/include/sql/sparte/models/app/app_scot_regions.sql new file mode 100644 index 000000000..3d1f041b5 --- /dev/null +++ b/airflow/include/sql/sparte/models/app/app_scot_regions.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + docs={'node_color': '#D70040'} + ) +}} + +SELECT + id, + scot_id, + region_id +FROM + {{ source('public', 'app_scot_regions') }} diff --git a/airflow/include/sql/sparte/models/app/schema.yml b/airflow/include/sql/sparte/models/app/schema.yml index db186762b..deab8410a 100644 --- a/airflow/include/sql/sparte/models/app/schema.yml +++ b/airflow/include/sql/sparte/models/app/schema.yml @@ -10,6 +10,9 @@ models: - name: app_usagesol - name: app_couverturesol - name: app_couvertureusagematrix + - name: app_scot_regions + - name: app_scot_departements + - name: app_epci_departements sources: - name: public @@ -22,3 +25,6 @@ sources: - name: app_usagesol - name: app_couverturesol - name: app_couvertureusagematrix + - name: app_scot_regions + - name: app_scot_departements + - name: app_epci_departements From e6e93d1cf30cb19732e99f76c871fe3ef8ae1e28 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 27 Aug 2024 13:46:27 +0200 Subject: [PATCH 26/99] feat(dbt): add many_to_many models for lands --- airflow/dags/ingest_app_dependencies.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/airflow/dags/ingest_app_dependencies.py b/airflow/dags/ingest_app_dependencies.py index 66cbe6b32..b48361769 100644 --- a/airflow/dags/ingest_app_dependencies.py +++ b/airflow/dags/ingest_app_dependencies.py @@ -23,9 +23,9 @@ def ingest_table(source_table_name: str, destination_table_name: str): "public_data_couverturesol": "app_couverturesol", "public_data_usagesol": "app_usagesol", "public_data_couvertureusagematrix": "app_couvertureusagematrix", - "public_data_epci_departements": "app_epci_departements", - "public_data_scot_departements": "app_scot_departements", - "public_data_scot_regions": "app_scot_regions", + "public_data_epci_departements": "app_epcidepartements", + "public_data_scot_departements": "app_scotdepartements", + "public_data_scot_regions": "app_scotregions", } From e42c5a4cb77ae605fe2cfde1cbb848d665a793ca Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 27 Aug 2024 13:47:52 +0200 Subject: [PATCH 27/99] Revert "feat(dbt): add many_to_many models for lands" This reverts commit e6e93d1cf30cb19732e99f76c871fe3ef8ae1e28. --- airflow/dags/ingest_app_dependencies.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/airflow/dags/ingest_app_dependencies.py b/airflow/dags/ingest_app_dependencies.py index b48361769..66cbe6b32 100644 --- a/airflow/dags/ingest_app_dependencies.py +++ b/airflow/dags/ingest_app_dependencies.py @@ -23,9 +23,9 @@ def ingest_table(source_table_name: str, destination_table_name: str): "public_data_couverturesol": "app_couverturesol", "public_data_usagesol": "app_usagesol", "public_data_couvertureusagematrix": "app_couvertureusagematrix", - "public_data_epci_departements": "app_epcidepartements", - "public_data_scot_departements": "app_scotdepartements", - "public_data_scot_regions": "app_scotregions", + "public_data_epci_departements": "app_epci_departements", + "public_data_scot_departements": "app_scot_departements", + "public_data_scot_regions": "app_scot_regions", } From 686fe35f726e5506bd0114da8e7885f6d1633643 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 27 Aug 2024 13:49:17 +0200 Subject: [PATCH 28/99] feat(airflow): add expose ports to config --- airflow/.astro/config.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/airflow/.astro/config.yaml b/airflow/.astro/config.yaml index 79aded917..9061daefd 100644 --- a/airflow/.astro/config.yaml +++ b/airflow/.astro/config.yaml @@ -1,3 +1,5 @@ +airflow: + expose_port: true project: name: airflow postgres: From 379eef8d0f016e19f26f4f262b0f2f76105b361d Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 27 Aug 2024 17:01:21 +0200 Subject: [PATCH 29/99] temp --- airflow/dags/ingest_app_dependencies.py | 85 ++++++++++++++++++------- airflow/dags/update_app.py | 80 +++++++++++++++-------- metabase/models.py | 2 +- 3 files changed, 117 insertions(+), 50 deletions(-) diff --git a/airflow/dags/ingest_app_dependencies.py b/airflow/dags/ingest_app_dependencies.py index 66cbe6b32..b257419b7 100644 --- a/airflow/dags/ingest_app_dependencies.py +++ b/airflow/dags/ingest_app_dependencies.py @@ -12,21 +12,7 @@ def ingest_table(source_table_name: str, destination_table_name: str): ogr.set_output(Container().gdal_dw_conn(), table_name=destination_table_name, srs="EPSG:4326") ogr.set_output_mode(layer_mode=ogr.MODE_LAYER_OVERWRITE) ogr.execute() - - -mapping = { - "public_data_region": "app_region", - "public_data_departement": "app_departement", - "public_data_commune": "app_commune", - "public_data_epci": "app_epci", - "public_data_scot": "app_scot", - "public_data_couverturesol": "app_couverturesol", - "public_data_usagesol": "app_usagesol", - "public_data_couvertureusagematrix": "app_couvertureusagematrix", - "public_data_epci_departements": "app_epci_departements", - "public_data_scot_departements": "app_scot_departements", - "public_data_scot_regions": "app_scot_regions", -} + return ogr.safe_args @dag( @@ -36,16 +22,69 @@ def ingest_table(source_table_name: str, destination_table_name: str): default_args={"owner": "Alexis Athlani", "retries": 3}, tags=["App"], ) -def ingest_app_dependencies(): - for source_table_name, destination_table_name in mapping.items(): +def ingest_app_dependencies(): # noqa: C901 + @task.python + def ingest_app_region(): + return ingest_table("public_data_region", "app_region") + + @task.python + def ingest_app_departement(): + return ingest_table("public_data_departement", "app_departement") + + @task.python + def ingest_app_commune(): + return ingest_table("public_data_commune", "app_commune") + + @task.python + def ingest_app_epci(): + return ingest_table("public_data_epci", "app_epci") + + @task.python + def ingest_app_scot(): + return ingest_table("public_data_scot", "app_scot") + + @task.python + def ingest_app_couverturesol(): + return ingest_table("public_data_couverturesol", "app_couverturesol") + + @task.python + def ingest_app_usagesol(): + return ingest_table("public_data_usagesol", "app_usagesol") + + @task.python + def ingest_app_couvertureusagematrix(): + return ingest_table("public_data_couvertureusagematrix", "app_couvertureusagematrix") + + @task.python + def ingest_app_epci_departements(): + return ingest_table("public_data_epci_departements", "app_epci_departements") + + @task.python + def ingest_app_scot_departements(): + return ingest_table("public_data_scot_departements", "app_scot_departements") + + @task.python + def ingest_app_scot_regions(): + return ingest_table("public_data_scot_regions", "app_scot_regions") - @task.python( - task_id=f"ingest_{destination_table_name}", - ) - def ingest(): - ingest_table(source_table_name, destination_table_name) + @task.bash(retries=0, trigger_rule="all_success") + def dbt_run(**context): + return 'cd "${AIRFLOW_HOME}/include/sql/sparte" && dbt run -s app' - ingest() + ( + ingest_app_region() + >> ingest_app_departement() + >> ingest_app_commune() + >> ingest_app_epci() + >> ingest_app_scot() + >> ingest_app_couverturesol() + >> ingest_app_usagesol() + >> ingest_app_couvertureusagematrix() + >> ingest_app_epci_departements() + >> ingest_app_scot_departements() + >> ingest_app_scot_regions() + >> dbt_run() + ) ingest_app_dependencies() diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index 72c714b4b..6ac849504 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -1,5 +1,4 @@ from airflow.decorators import dag, task -from airflow.models.param import Param from gdaltools import ogr2ogr from include.container import Container from pendulum import datetime @@ -17,23 +16,7 @@ def copy_table_from_dw_to_app( ogr.set_output(Container().gdal_app_conn(), table_name=to_table) ogr.set_output_mode(layer_mode=ogr.MODE_LAYER_OVERWRITE) ogr.execute() - - -mapping = { - "public_ocsge.for_app_ocsge": "public.public_data_ocsge", - "public_ocsge.for_app_artificialarea": "public.public_data_artificialarea", - "public_ocsge.for_app_artifareazoneurba": "public.public_data_artifareazoneurba", - "public_ocsge.for_app_commune": "public.public_data_commune", - "public_ocsge.for_app_departement": "public.public_data_departement", - "public_ocsge.for_app_communesol": "public.public_data_communesol", - "public_ocsge.for_app_ocsgediff": "public.public_data_ocsgediff", - "public_ocsge.for_app_communediff": "public.public_data_communediff", - "public_gpu.for_app_zoneurba": "public.public_data_zoneurba", - "public_ocsge.for_app_zoneconstruite": "public.public_data_zoneconstruite", -} - - -params = {table: Param(True) for table in mapping.values()} + return ogr.safe_args @dag( @@ -43,17 +26,62 @@ def copy_table_from_dw_to_app( doc_md=__doc__, default_args={"owner": "Alexis Athlani", "retries": 3}, tags=["App"], - params=params, ) -def update_app(): - for from_table, to_table in mapping.items(): - to_table_short_name = to_table.split(".")[1] +def update_app(): # noqa: C901 + @task.python + def copy_public_data_ocsge(): + return copy_table_from_dw_to_app("public_ocsge.for_app_ocsge", "public.public_data_ocsge") + + @task.python + def copy_public_data_artificialarea(): + return copy_table_from_dw_to_app("public_ocsge.for_app_artificialarea", "public.public_data_artificialarea") + + @task.python + def copy_public_data_artifareazoneurba(): + return copy_table_from_dw_to_app( + "public_ocsge.for_app_artifareazoneurba", "public.public_data_artifareazoneurba" + ) + + @task.python + def copy_public_data_commune(): + return copy_table_from_dw_to_app("public_ocsge.for_app_commune", "public.public_data_commune") + + @task.python + def copy_public_data_departement(): + return copy_table_from_dw_to_app("public_ocsge.for_app_departement", "public.public_data_departement") + + @task.python + def copy_public_data_communesol(): + return copy_table_from_dw_to_app("public_ocsge.for_app_communesol", "public.public_data_communesol") + + @task.python + def copy_public_data_ocsgediff(): + return copy_table_from_dw_to_app("public_ocsge.for_app_ocsgediff", "public.public_data_ocsgediff") + + @task.python + def copy_public_data_communediff(): + return copy_table_from_dw_to_app("public_ocsge.for_app_communediff", "public.public_data_communediff") + + @task.python + def copy_public_data_zoneconstruite(): + return copy_table_from_dw_to_app("public_ocsge.for_app_zoneconstruite", "public.public_data_zoneconstruite") - @task.python(task_id=f"copy_{to_table_short_name}", retries=0) - def copy_table(from_table=from_table, to_table=to_table, **context): - copy_table_from_dw_to_app(from_table, to_table) + @task.python + def copy_public_data_zoneurba(): + return copy_table_from_dw_to_app("public_gpu.for_app_zoneurba", "public.public_data_zoneurba") - copy_table() + ( + copy_public_data_ocsge() + >> copy_public_data_artificialarea() + >> copy_public_data_artifareazoneurba() + >> copy_public_data_commune() + >> copy_public_data_departement() + >> copy_public_data_communesol() + >> copy_public_data_ocsgediff() + >> copy_public_data_communediff() + >> copy_public_data_zoneconstruite() + >> copy_public_data_zoneurba() + ) update_app() diff --git a/metabase/models.py b/metabase/models.py index 6d87eeeb0..e9834757c 100644 --- a/metabase/models.py +++ b/metabase/models.py @@ -72,7 +72,7 @@ def update_with_request(self, request: Request) -> None: self.save() def update_locations(self, project: Project) -> None: - qs = project.cities.all().select_related("epci", "departement", "scot", "departement__region") + qs = project.cities.all() if qs.count() == 1: city = qs.first() From 7d2ebd9870ef917b68fdb530b4d948aaf272ae41 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 27 Aug 2024 17:49:33 +0200 Subject: [PATCH 30/99] temp --- airflow/dags/update_app.py | 42 ++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index 6ac849504..fe98696c1 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -1,9 +1,14 @@ from airflow.decorators import dag, task -from gdaltools import ogr2ogr +from gdaltools import ogr2ogr, ogrinfo from include.container import Container from pendulum import datetime +def create_spatial_index(table_name: str, column_name="mpoly"): + sql = f"CREATE INDEX IF NO EXISTS ON {table_name} USING GIST ({column_name});" + return ogrinfo(Container().gdal_app_conn(), sql=sql) + + def copy_table_from_dw_to_app( from_table: str, to_table: str, @@ -30,11 +35,17 @@ def copy_table_from_dw_to_app( def update_app(): # noqa: C901 @task.python def copy_public_data_ocsge(): - return copy_table_from_dw_to_app("public_ocsge.for_app_ocsge", "public.public_data_ocsge") + to_table = "public.public_data_ocsge" + copy_result = copy_table_from_dw_to_app("public_ocsge.for_app_ocsge", to_table) + create_spatial_index(to_table) + return copy_result @task.python def copy_public_data_artificialarea(): - return copy_table_from_dw_to_app("public_ocsge.for_app_artificialarea", "public.public_data_artificialarea") + to_table = "public.public_data_artificialarea" + copy_result = copy_table_from_dw_to_app("public_ocsge.for_app_artificialarea", to_table) + create_spatial_index(to_table) + return copy_result @task.python def copy_public_data_artifareazoneurba(): @@ -44,11 +55,17 @@ def copy_public_data_artifareazoneurba(): @task.python def copy_public_data_commune(): - return copy_table_from_dw_to_app("public_ocsge.for_app_commune", "public.public_data_commune") + to_table = "public.public_data_commune" + copy_result = copy_table_from_dw_to_app("public_ocsge.for_app_commune", to_table) + create_spatial_index(to_table) + return copy_result @task.python def copy_public_data_departement(): - return copy_table_from_dw_to_app("public_ocsge.for_app_departement", "public.public_data_departement") + to_table = "public.public_data_departement" + copy_result = copy_table_from_dw_to_app("public_ocsge.for_app_departement", to_table) + create_spatial_index(to_table) + return copy_result @task.python def copy_public_data_communesol(): @@ -56,7 +73,10 @@ def copy_public_data_communesol(): @task.python def copy_public_data_ocsgediff(): - return copy_table_from_dw_to_app("public_ocsge.for_app_ocsgediff", "public.public_data_ocsgediff") + to_table = "public.public_data_ocsgediff" + copy_result = copy_table_from_dw_to_app("public_ocsge.for_app_ocsgediff", to_table) + create_spatial_index(to_table) + return copy_result @task.python def copy_public_data_communediff(): @@ -64,11 +84,17 @@ def copy_public_data_communediff(): @task.python def copy_public_data_zoneconstruite(): - return copy_table_from_dw_to_app("public_ocsge.for_app_zoneconstruite", "public.public_data_zoneconstruite") + to_table = "public.public_data_zoneconstruite" + copy_result = copy_table_from_dw_to_app("public_ocsge.for_app_zoneconstruite", to_table) + create_spatial_index(to_table) + return copy_result @task.python def copy_public_data_zoneurba(): - return copy_table_from_dw_to_app("public_gpu.for_app_zoneurba", "public.public_data_zoneurba") + to_table = "public.public_data_zoneurba" + copy_result = copy_table_from_dw_to_app("public_gpu.for_app_zoneurba", to_table) + create_spatial_index(to_table) + return copy_result ( copy_public_data_ocsge() From ea09c8c76d160ffa6b74666abc013360fbfcff81 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 27 Aug 2024 17:50:18 +0200 Subject: [PATCH 31/99] temp --- airflow/dags/update_app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index fe98696c1..defa907a0 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -5,7 +5,7 @@ def create_spatial_index(table_name: str, column_name="mpoly"): - sql = f"CREATE INDEX IF NO EXISTS ON {table_name} USING GIST ({column_name});" + sql = f"CREATE INDEX IF NOT EXISTS ON {table_name} USING GIST ({column_name});" return ogrinfo(Container().gdal_app_conn(), sql=sql) From cf99e810edab64abe3c4716af0aa485912374429 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Wed, 28 Aug 2024 09:43:54 +0200 Subject: [PATCH 32/99] feat(airflow): add all ocsge url in source.json --- airflow/include/ocsge/sources.json | 299 +++++++++++++++++++++++++---- 1 file changed, 258 insertions(+), 41 deletions(-) diff --git a/airflow/include/ocsge/sources.json b/airflow/include/ocsge/sources.json index ccc1123ec..50804c793 100644 --- a/airflow/include/ocsge/sources.json +++ b/airflow/include/ocsge/sources.json @@ -1,101 +1,318 @@ { "01": { + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D001_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D001_2018-2021.7z" + }, "occupation_du_sol_et_zone_construite": { "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D001_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D001_2018-01-01.7z", "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D001_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D001_2021-01-01.7z" + } + }, + "02": { + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D002_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D002_2018-2021.7z" }, + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D002_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D002_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D002_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D002_2021-01-01.7z" + } + }, + "06": { "difference": { - "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D001_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D001_2018-2021.7z" + "2017_2020": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D006_2017-2020/OCS-GE_2-0_DIFF_SHP_LAMB93_D006_2017-2020.7z" + }, + "occupation_du_sol_et_zone_construite": { + "2017": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D006_2017-01-01/OCS-GE_2-0__SHP_LAMB93_D006_2017-01-01.7z", + "2020": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D006_2020-01-01/OCS-GE_2-0__SHP_LAMB93_D006_2020-01-01.7z" } }, - "38": { + "11": { + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D011_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D011_DIFF_2018-2021.7z" + }, "occupation_du_sol_et_zone_construite": { - "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D038_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D038_2018-01-01.7z", - "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D038_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D038_2021-01-01.7z" + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D011_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D011_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D011_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D011_2021-01-01.7z" + } + }, + "17": { + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D017_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D017_2018-2021.7z" }, + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D017_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D017_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D017_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D017_2021-01-01.7z" + } + }, + "24": { "difference": { - "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D038_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D038_DIFF_2018-2021.7z" + "2017_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D024_2017-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D024_2017-2021.7z" + }, + "occupation_du_sol_et_zone_construite": { + "2017": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D024_2017-01-01/OCS-GE_2-0__SHP_LAMB93_D024_2017-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D024_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D024_2021-01-01.7z" } }, - "69": { + "29": { + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D029_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D029_DIFF_2018-2021.7z" + }, "occupation_du_sol_et_zone_construite": { - "2017": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D069_2017-01-01/OCS-GE_2-0__SHP_LAMB93_D069_2017-01-01.7z", - "2020": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D069_2020-01-01/OCS-GE_2-0__SHP_LAMB93_D069_2020-01-01.7z" + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D029_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D029_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D029_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D029_2021-01-01.7z" + } + }, + "30": { + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D030_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D030_2018-2021.7z" }, + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D030_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D030_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D030_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D030_2021-01-01.7z" + } + }, + "32": { "difference": { - "2017_2020": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D069_DIFF_2017-2020/OCS-GE_2-0__SHP_LAMB93_D069_DIFF_2017-2020.7z" + "2016_2019": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D032_DIFF_2016-2019/OCS-GE_2-0__SHP_LAMB93_D032_DIFF_2016-2019.7z" + }, + "occupation_du_sol_et_zone_construite": { + "2016": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D032_2016-01-01/OCS-GE_2-0__SHP_LAMB93_D032_2016-01-01.7z", + "2019": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D032_2019-01-01/OCS-GE_2-0__SHP_LAMB93_D032_2019-01-01.7z" } }, - "91": { + "33": { + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D033_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D033_2018-2021.7z" + }, "occupation_du_sol_et_zone_construite": { - "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D091_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D091_2018-01-01.7z", - "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D091_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D091_2021-01-01.7z" + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D033_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D033_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D033_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D033_2021-01-01.7z" + } + }, + "34": { + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D034_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D034_2018-2021.7z" }, + "occupation_du_sol_et_zone_construite": { + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D034_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D034_2021-01-01.7z", + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D034_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D034_2018-01-01.7z" + } + }, + "35": { "difference": { - "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D091_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D091_2018-2021.7z" + "2017_2020": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE-NG_1-1__SHP_LAMB93_D035_DIFF_2017-2020/OCS-GE-NG_1-1__SHP_LAMB93_D035_DIFF_2017-2020.7z" + }, + "occupation_du_sol_et_zone_construite": { + "2017": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D035_2017-01-01/OCS-GE_2-0__SHP_LAMB93_D035_2017-01-01.7z", + "2020": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D035_2020-01-01/OCS-GE_2-0__SHP_LAMB93_D035_2020-01-01.7z" } }, - "92": { + "37": { + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D037_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D037_DIFF_2018-2021.7z" + }, "occupation_du_sol_et_zone_construite": { - "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D092_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D092_2018-01-01.7z", - "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D092_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D092_2021-01-01.7z" + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D037_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D037_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D037_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D037_2021-01-01.7z" + } + }, + "38": { + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D038_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D038_DIFF_2018-2021.7z" }, + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D038_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D038_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D038_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D038_2021-01-01.7z" + } + }, + "40": { "difference": { - "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D092_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D092_2018-2021.7z" + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D040_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D040_DIFF_2018-2021.7z" + }, + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D040_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D040_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D040_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D040_2021-01-01.7z" } }, - "78": { + "47": { + "difference": { + "2017_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D047_2017-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D047_2017-2021.7z" + }, "occupation_du_sol_et_zone_construite": { - "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D078_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D078_2018-01-01.7z", - "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D078_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D078_2021-01-01.7z" + "2017": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D047_2017-01-01/OCS-GE_2-0__SHP_LAMB93_D047_2017-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D047_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D047_2021-01-01.7z" + } + }, + "48": { + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D048_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D048_2018-2021.7z" }, + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D048_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D048_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D048_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D048_2021-01-01.7z" + } + }, + "60": { "difference": { - "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D078_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D078_DIFF_2018-2021.7z" + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D060_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D060_2018-2021.7z" + }, + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D060_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D060_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D060_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D060_2021-01-01.7z" } }, - "94": { + "62": { + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE-NG_1-1__SHP_LAMB93_D062_DIFF_2018-2021/OCS-GE-NG_1-1__SHP_LAMB93_D062_DIFF_2018-2021.7z" + }, "occupation_du_sol_et_zone_construite": { - "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D094_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D094_2018-01-01.7z", - "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D094_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D094_2021-01-01.7z" + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D062_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D062_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D062_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D062_2021-01-01.7z" + } + }, + "66": { + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D066_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D066_2018-2021.7z" }, + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D066_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D066_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D066_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D066_2021-01-01.7z" + + } + }, + "64": { "difference": { - "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D094_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D094_2018-2021.7z" + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D064_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D064_2018-2021.7z" + }, + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D064_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D064_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D064_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D064_2021-01-01.7z" + } + }, + "67": { + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D067_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D067_DIFF_2018-2021.7z" + }, + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D067_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D067_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D067_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D067_2021-01-01.7z" + } + }, + "68": { + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D068_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D068_2018-2021.7z" + }, + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D068_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D068_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D068_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D068_2021-01-01.7z" + } + }, + "69": { + "difference": { + "2017_2020": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D069_DIFF_2017-2020/OCS-GE_2-0__SHP_LAMB93_D069_DIFF_2017-2020.7z" + }, + "occupation_du_sol_et_zone_construite": { + "2017": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D069_2017-01-01/OCS-GE_2-0__SHP_LAMB93_D069_2017-01-01.7z", + "2020": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D069_2020-01-01/OCS-GE_2-0__SHP_LAMB93_D069_2020-01-01.7z" } }, "75": { + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D075_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D075_DIFF_2018-2021.7z" + }, "occupation_du_sol_et_zone_construite": { "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D075_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D075_2018-01-01.7z", "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D075_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D075_2021-01-01.7z" - }, + } + }, + "77": { "difference": { - "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D075_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D075_DIFF_2018-2021.7z" + "2017_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D077_2017-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D077_2017-2021.7z" + }, + "occupation_du_sol_et_zone_construite": { + "2017": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D077_2017-01-01/OCS-GE_2-0__SHP_LAMB93_D077_2017-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D077_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D077_2021-01-01.7z" } }, - "32": { + "78": { + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D078_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D078_DIFF_2018-2021.7z" + }, "occupation_du_sol_et_zone_construite": { - "2016": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D032_2016-01-01/OCS-GE_2-0__SHP_LAMB93_D032_2016-01-01.7z", - "2019": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D032_2019-01-01/OCS-GE_2-0__SHP_LAMB93_D032_2019-01-01.7z" + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D078_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D078_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D078_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D078_2021-01-01.7z" + } + }, + "80": { + "difference": { + "2017_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D080_2017-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D080_2017-2021.7z" }, + "occupation_du_sol_et_zone_construite": { + "2017": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D080_2017-01-01/OCS-GE_2-0__SHP_LAMB93_D080_2017-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D080_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D080_2021-01-01.7z" + } + }, + "83": { "difference": { - "2016_2019": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D032_DIFF_2016-2019/OCS-GE_2-0__SHP_LAMB93_D032_DIFF_2016-2019.7z" + "2017_2020": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE-NG_1-1__SHP_LAMB93_D083_DIFF_2017-2020/OCS-GE-NG_1-1__SHP_LAMB93_D083_DIFF_2017-2020.7z" + }, + "occupation_du_sol_et_zone_construite": { + "2017": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D083_2017-01-01/OCS-GE_2-0__SHP_LAMB93_D083_2017-01-01.7z", + "2020": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D083_2020-01-01/OCS-GE_2-0__SHP_LAMB93_D083_2020-01-01.7z" } }, - "37": { + "84": { + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D084_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D084_DIFF_2018-2021.7z" + }, "occupation_du_sol_et_zone_construite": { - "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D037_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D037_2018-01-01.7z", - "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D037_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D037_2021-01-01.7z" + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D084_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D084_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D084_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D084_2021-01-01.7z" + } + }, + "91": { + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D091_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D091_2018-2021.7z" }, + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D091_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D091_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D091_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D091_2021-01-01.7z" + } + }, + "92": { "difference": { - "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D037_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D037_DIFF_2018-2021.7z" + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D092_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D092_2018-2021.7z" + }, + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D092_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D092_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D092_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D092_2021-01-01.7z" } - }, - "29": { + }, + "93": { + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D093_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D093_DIFF_2018-2021.7z" + }, "occupation_du_sol_et_zone_construite": { - "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D029_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D029_2018-01-01.7z", - "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D029_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D029_2021-01-01.7z" + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D093_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D093_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D093_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D093_2021-01-01.7z" + } + }, + "94": { + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D094_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D094_2018-2021.7z" }, + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D094_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D094_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D094_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D094_2021-01-01.7z" + } + }, + "95": { "difference": { - "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D029_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D029_DIFF_2018-2021.7z" + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D095_DIFF_2018-2021/OCS-GE_2-0__SHP_LAMB93_D095_DIFF_2018-2021.7z" + }, + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D095_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D095_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D095_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D095_2021-01-01.7z" } } -} +} \ No newline at end of file From 77cddf675b81b59220eb39a882b63076ccd06119 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Wed, 28 Aug 2024 13:24:06 +0200 Subject: [PATCH 33/99] feat(airflow): increase concurrency to 4 threads --- airflow/dbt_profile.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/dbt_profile.yml b/airflow/dbt_profile.yml index f39c999a3..90ba9c1d1 100644 --- a/airflow/dbt_profile.yml +++ b/airflow/dbt_profile.yml @@ -6,7 +6,7 @@ sparte: pass: "{{ env_var('DBT_DB_PASSWORD') }}" port: "{{ env_var('DBT_DB_PORT') | as_number }}" schema: "{{ env_var('DBT_DB_SCHEMA') }}" - threads: 1 + threads: 4 type: "postgres" user: "{{ env_var('DBT_DB_USER') }}" target: dev From be8520ef69710eba1fd12a2a976f70c317b14b8c Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 29 Aug 2024 09:48:49 +0200 Subject: [PATCH 34/99] temp --- airflow/dbt_profile.yml | 2 +- airflow/include/ocsge/sources.json | 21 ++++++++++----------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/airflow/dbt_profile.yml b/airflow/dbt_profile.yml index 90ba9c1d1..f39c999a3 100644 --- a/airflow/dbt_profile.yml +++ b/airflow/dbt_profile.yml @@ -6,7 +6,7 @@ sparte: pass: "{{ env_var('DBT_DB_PASSWORD') }}" port: "{{ env_var('DBT_DB_PORT') | as_number }}" schema: "{{ env_var('DBT_DB_SCHEMA') }}" - threads: 4 + threads: 1 type: "postgres" user: "{{ env_var('DBT_DB_USER') }}" target: dev diff --git a/airflow/include/ocsge/sources.json b/airflow/include/ocsge/sources.json index 50804c793..14cb21e90 100644 --- a/airflow/include/ocsge/sources.json +++ b/airflow/include/ocsge/sources.json @@ -94,8 +94,8 @@ "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D034_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D034_2018-2021.7z" }, "occupation_du_sol_et_zone_construite": { - "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D034_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D034_2021-01-01.7z", - "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D034_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D034_2018-01-01.7z" + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D034_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D034_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D034_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D034_2021-01-01.7z" } }, "35": { @@ -170,23 +170,22 @@ "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D062_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D062_2021-01-01.7z" } }, - "66": { + "64": { "difference": { - "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D066_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D066_2018-2021.7z" + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D064_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D064_2018-2021.7z" }, "occupation_du_sol_et_zone_construite": { - "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D066_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D066_2018-01-01.7z", - "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D066_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D066_2021-01-01.7z" - + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D064_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D064_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D064_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D064_2021-01-01.7z" } }, - "64": { + "66": { "difference": { - "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D064_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D064_2018-2021.7z" + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D066_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D066_2018-2021.7z" }, "occupation_du_sol_et_zone_construite": { - "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D064_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D064_2018-01-01.7z", - "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D064_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D064_2021-01-01.7z" + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D066_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D066_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D066_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D066_2021-01-01.7z" } }, "67": { From 8116684ffddf52588758233baa7b0e3e50da18c8 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 29 Aug 2024 10:25:59 +0200 Subject: [PATCH 35/99] feat(dbt): add many indexes --- airflow/include/sql/sparte/dbt_project.yml | 17 +---------------- .../sql/sparte/models/ocsge/difference.sql | 14 ++++++++++++-- .../intersected/occupation_du_sol_commune.sql | 10 +++++++++- .../sparte/models/ocsge/occupation_du_sol.sql | 10 +++++++--- .../sql/sparte/models/ocsge/zone_construite.sql | 8 +++++++- 5 files changed, 36 insertions(+), 23 deletions(-) diff --git a/airflow/include/sql/sparte/dbt_project.yml b/airflow/include/sql/sparte/dbt_project.yml index 21b39585e..7e5c37694 100644 --- a/airflow/include/sql/sparte/dbt_project.yml +++ b/airflow/include/sql/sparte/dbt_project.yml @@ -1,16 +1,7 @@ - -# Name your project! Project names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models name: 'sparte' version: '1.0.0' - -# This setting configures which "profile" dbt uses for this project. profile: 'sparte' -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that models in this project can be -# found in the "models/" directory. You probably won't need to change these! model-paths: ["models"] analysis-paths: ["analyses"] test-paths: ["tests"] @@ -18,17 +9,11 @@ seed-paths: ["seeds"] macro-paths: ["macros"] snapshot-paths: ["snapshots"] -clean-targets: # directories to be removed by `dbt clean` +clean-targets: - "target" - "dbt_packages" -# Configuring models -# Full documentation: https://docs.getdbt.com/docs/configuring-models - -# In this example config, we tell dbt to build all models in the example/ -# directory as views. These settings can be overridden in the individual model -# files using the `{{ config(...) }}` macro. models: sparte: app: diff --git a/airflow/include/sql/sparte/models/ocsge/difference.sql b/airflow/include/sql/sparte/models/ocsge/difference.sql index 741c2f0d8..a3e6b4a8d 100644 --- a/airflow/include/sql/sparte/models/ocsge/difference.sql +++ b/airflow/include/sql/sparte/models/ocsge/difference.sql @@ -1,8 +1,18 @@ - {{ config( materialized='table', - post_hook="CREATE INDEX ON {{ this }} USING GIST (geom)" + indexes=[ + {'columns': ['loaded_date'], 'type': 'btree'}, + {'columns': ['year_old'], 'type': 'btree'}, + {'columns': ['year_new'], 'type': 'btree'}, + {'columns': ['departement'], 'type': 'btree'}, + {'columns': ['uuid'], 'type': 'btree'}, + {'columns': ['cs_old'], 'type': 'btree'}, + {'columns': ['cs_new'], 'type': 'btree'}, + {'columns': ['us_old'], 'type': 'btree'}, + {'columns': ['us_new'], 'type': 'btree'}, + {'columns': ['geom'], 'type': 'gist'} + ] ) }} diff --git a/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql b/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql index 5a3b82aaf..6833b8836 100644 --- a/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql +++ b/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql @@ -1,7 +1,15 @@ {{ config( materialized='incremental', - post_hook="{{ delete_from_this_where_field_not_in('ocsge_loaded_date', 'occupation_du_sol', 'loaded_date') }}" + post_hook="{{ delete_from_this_where_field_not_in('ocsge_loaded_date', 'occupation_du_sol', 'loaded_date') }}", + indexes=[ + {'columns': ['departement'], 'type': 'btree'}, + {'columns': ['year'], 'type': 'btree'}, + {'columns': ['uuid'], 'type': 'btree'}, + {'columns': ['commune_code'], 'type': 'btree'}, + {'columns': ['ocsge_uuid'], 'type': 'btree'}, + {'columns': ['geom'], 'type': 'gist'} + ] ) }} diff --git a/airflow/include/sql/sparte/models/ocsge/occupation_du_sol.sql b/airflow/include/sql/sparte/models/ocsge/occupation_du_sol.sql index e689701d7..aef74a383 100644 --- a/airflow/include/sql/sparte/models/ocsge/occupation_du_sol.sql +++ b/airflow/include/sql/sparte/models/ocsge/occupation_du_sol.sql @@ -3,12 +3,16 @@ {{ config( materialized='table', + unlogged=True, indexes=[ + {'columns': ['loaded_date'], 'type': 'btree'}, {'columns': ['departement','year'], 'type': 'btree'}, {'columns': ['departement'], 'type': 'btree'}, - {'columns': ['uuid'], 'type': 'btree'} - ], - post_hook="CREATE INDEX ON {{ this }} USING GIST (geom)" + {'columns': ['uuid'], 'type': 'btree'}, + {'columns': ['code_cs'], 'type': 'btree'}, + {'columns': ['code_us'], 'type': 'btree'}, + {'columns': ['geom'], 'type': 'gist'} + ] ) }} diff --git a/airflow/include/sql/sparte/models/ocsge/zone_construite.sql b/airflow/include/sql/sparte/models/ocsge/zone_construite.sql index d162c057d..f3048163a 100644 --- a/airflow/include/sql/sparte/models/ocsge/zone_construite.sql +++ b/airflow/include/sql/sparte/models/ocsge/zone_construite.sql @@ -1,7 +1,13 @@ {{ config( materialized='table', - post_hook="CREATE INDEX ON {{ this }} USING GIST (geom)" + indexes=[ + {'columns': ['loaded_date'], 'type': 'btree'}, + {'columns': ['departement','year'], 'type': 'btree'}, + {'columns': ['departement'], 'type': 'btree'}, + {'columns': ['uuid'], 'type': 'btree'}, + {'columns': ['geom'], 'type': 'gist'} + ] ) }} From 85ae386c3096e6ee8b5bfbcb421fbc622cbf49e2 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 29 Aug 2024 11:27:22 +0200 Subject: [PATCH 36/99] add more indexes --- .../models/ocsge/intersected/occupation_du_sol_commune.sql | 6 ++++-- .../intersected/occupation_du_sol_zonage_urbanisme.sql | 5 ++++- .../include/sql/sparte/models/ocsge/occupation_du_sol.sql | 1 - 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql b/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql index 6833b8836..5a8aac038 100644 --- a/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql +++ b/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql @@ -8,7 +8,8 @@ {'columns': ['uuid'], 'type': 'btree'}, {'columns': ['commune_code'], 'type': 'btree'}, {'columns': ['ocsge_uuid'], 'type': 'btree'}, - {'columns': ['geom'], 'type': 'gist'} + {'columns': ['geom'], 'type': 'gist'}, + {'columns': ['ocsge_loaded_date'], 'type': 'btree'} ] ) }} @@ -49,7 +50,8 @@ with occupation_du_sol_commune_without_surface as ( ST_Intersects(commune.geom, ocsge.geom) {% if is_incremental() %} - WHERE ocsge.uuid not in (SELECT foo.ocsge_uuid from {{ this }} as foo) + WHERE ocsge.loaded_date > + (SELECT max(foo.ocsge_loaded_date) FROM {{ this }} as foo) {% endif %} ) diff --git a/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql b/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql index ed90d3aad..428f6d1d2 100644 --- a/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql +++ b/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql @@ -5,7 +5,10 @@ {'columns': ['departement'], 'type': 'btree'}, {'columns': ['year'], 'type': 'btree'}, {'columns': ['uuid'], 'type': 'btree'}, - {'columns': ['zonage_checksum'], 'type': 'btree'} + {'columns': ['zonage_checksum'], 'type': 'btree'}, + {'columns': ['ocsge_loaded_date'], 'type': 'btree'}, + {'columns': ['zonage_gpu_timestamp'], 'type': 'btree'}, + {'columns': ['geom'], 'type': 'gist'} ], post_hook=[ "{{ delete_from_this_where_field_not_in('ocsge_loaded_date', 'occupation_du_sol', 'loaded_date') }}", diff --git a/airflow/include/sql/sparte/models/ocsge/occupation_du_sol.sql b/airflow/include/sql/sparte/models/ocsge/occupation_du_sol.sql index aef74a383..6cfed0407 100644 --- a/airflow/include/sql/sparte/models/ocsge/occupation_du_sol.sql +++ b/airflow/include/sql/sparte/models/ocsge/occupation_du_sol.sql @@ -3,7 +3,6 @@ {{ config( materialized='table', - unlogged=True, indexes=[ {'columns': ['loaded_date'], 'type': 'btree'}, {'columns': ['departement','year'], 'type': 'btree'}, From dd3ec1406471f7d9cfc962899ff853db60836fd3 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 29 Aug 2024 12:02:10 +0200 Subject: [PATCH 37/99] increase threads to 4 --- airflow/dbt_profile.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/dbt_profile.yml b/airflow/dbt_profile.yml index f39c999a3..90ba9c1d1 100644 --- a/airflow/dbt_profile.yml +++ b/airflow/dbt_profile.yml @@ -6,7 +6,7 @@ sparte: pass: "{{ env_var('DBT_DB_PASSWORD') }}" port: "{{ env_var('DBT_DB_PORT') | as_number }}" schema: "{{ env_var('DBT_DB_SCHEMA') }}" - threads: 1 + threads: 4 type: "postgres" user: "{{ env_var('DBT_DB_USER') }}" target: dev From 1c9ad6061533fa5730c2b6cba57b00d89d5491ee Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 29 Aug 2024 12:35:15 +0200 Subject: [PATCH 38/99] feat(dbt): improve macro performance --- .../sql/sparte/macros/delete_from_this_where_field_not_in.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/include/sql/sparte/macros/delete_from_this_where_field_not_in.sql b/airflow/include/sql/sparte/macros/delete_from_this_where_field_not_in.sql index 6a3891f8a..400c62090 100644 --- a/airflow/include/sql/sparte/macros/delete_from_this_where_field_not_in.sql +++ b/airflow/include/sql/sparte/macros/delete_from_this_where_field_not_in.sql @@ -6,5 +6,5 @@ {% if not that_field %} {% set that_field = this_field %} {% endif %} - DELETE FROM {{ this }} WHERE {{ this_field }} not in (SELECT {{ that_field }} FROM {{ ref(table) }} ) + DELETE FROM {{ this }} WHERE {{ this_field }} not in (SELECT DISTINCT {{ that_field }} FROM {{ ref(table) }} ) {% endmacro %} From a3d73fc788a9f85426de0bf324e7f50e324ee60f Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 29 Aug 2024 15:19:47 +0200 Subject: [PATCH 39/99] feat(dbt): optimize incremental condition --- .../intersected/occupation_du_sol_commune.sql | 8 ++++---- .../occupation_du_sol_zonage_urbanisme.sql | 14 ++++++++------ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql b/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql index 5a8aac038..e75c9bc29 100644 --- a/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql +++ b/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql @@ -23,8 +23,9 @@ la surface totale de l'objet sera conservée. */ - -with occupation_du_sol_commune_without_surface as ( +with max_ocsge_loaded_date as ( + SELECT max(loaded_date) as ocsge_loaded_date FROM {{ this }} +), occupation_du_sol_commune_without_surface as ( SELECT concat(ocsge.uuid::text, '_', commune.code::text) as ocsge_commune_id, -- surrogate key -- les attributs spécifiques aux communes sont préfixés par commune_ @@ -50,8 +51,7 @@ with occupation_du_sol_commune_without_surface as ( ST_Intersects(commune.geom, ocsge.geom) {% if is_incremental() %} - WHERE ocsge.loaded_date > - (SELECT max(foo.ocsge_loaded_date) FROM {{ this }} as foo) + WHERE ocsge.loaded_date > (select ocsge_loaded_date from max_ocsge_loaded_date) {% endif %} ) diff --git a/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql b/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql index 428f6d1d2..f8d9ad651 100644 --- a/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql +++ b/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql @@ -26,7 +26,12 @@ la surface totale de l'objet sera conservée. */ -with occupation_du_sol_zonage_urbanisme_without_surface as ( + +with max_ocsge_loaded_date as ( + SELECT max(loaded_date) as ocsge_loaded_date FROM {{ this }} +), max_zonage_gpu_timestamp as ( + SELECT max(gpu_timestamp) as zonage_gpu_timestamp FROM {{ this }} +), occupation_du_sol_zonage_urbanisme_without_surface as ( SELECT concat(ocsge.uuid::text, '_', zonage.checksum::text) as ocsge_zonage_id, -- surrogate key -- les attributs spécifiques aux zonages sont préfixés par zonage_ @@ -52,11 +57,8 @@ with occupation_du_sol_zonage_urbanisme_without_surface as ( ST_Intersects(zonage.geom, ocsge.geom) {% if is_incremental() %} - WHERE ocsge.loaded_date > - (SELECT max(foo.ocsge_loaded_date) FROM {{ this }} as foo) - OR - zonage.gpu_timestamp > - (SELECT max(bar.zonage_gpu_timestamp) FROM {{ this }} as bar) + where ocsge.loaded_date > (select ocsge_loaded_date from max_ocsge_loaded_date) + or zonage.gpu_timestamp > (select zonage_gpu_timestamp from max_zonage_gpu_timestamp) {% endif %} ) From 95d05b56185d7b888b7c27325166f2f1f20393a4 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 29 Aug 2024 15:39:09 +0200 Subject: [PATCH 40/99] feat(airflow): do not throw error if ocsge url does not exist --- airflow/dags/ocsge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/dags/ocsge.py b/airflow/dags/ocsge.py index 02859b834..da36b782f 100644 --- a/airflow/dags/ocsge.py +++ b/airflow/dags/ocsge.py @@ -208,7 +208,7 @@ def get_url(**context) -> str: def check_url_exists(url) -> dict: response = requests.head(url) if not response.ok: - raise ValueError(f"Failed to download {url}. Response : {response.content}") + print(f"Failed to download {url}. Response : {response.content}") return { "url": url, From 98604c5991321d575cd1ee281d6c2a4b50548ea5 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 29 Aug 2024 16:16:40 +0200 Subject: [PATCH 41/99] temp --- .../models/ocsge/intersected/occupation_du_sol_commune.sql | 2 +- .../ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql b/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql index e75c9bc29..16146f5a1 100644 --- a/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql +++ b/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql @@ -24,7 +24,7 @@ la surface totale de l'objet sera conservée. */ with max_ocsge_loaded_date as ( - SELECT max(loaded_date) as ocsge_loaded_date FROM {{ this }} + SELECT max(ocsge_loaded_date) as ocsge_loaded_date FROM {{ this }} ), occupation_du_sol_commune_without_surface as ( SELECT concat(ocsge.uuid::text, '_', commune.code::text) as ocsge_commune_id, -- surrogate key diff --git a/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql b/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql index f8d9ad651..94bdd41dd 100644 --- a/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql +++ b/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql @@ -28,9 +28,9 @@ la surface totale de l'objet sera conservée. with max_ocsge_loaded_date as ( - SELECT max(loaded_date) as ocsge_loaded_date FROM {{ this }} + SELECT max(ocsge_loaded_date) as ocsge_loaded_date FROM {{ this }} ), max_zonage_gpu_timestamp as ( - SELECT max(gpu_timestamp) as zonage_gpu_timestamp FROM {{ this }} + SELECT max(zonage_gpu_timestamp) as zonage_gpu_timestamp FROM {{ this }} ), occupation_du_sol_zonage_urbanisme_without_surface as ( SELECT concat(ocsge.uuid::text, '_', zonage.checksum::text) as ocsge_zonage_id, -- surrogate key From ae6b96c1e9452917360b332c6ec0ef7b0624a77e Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Fri, 30 Aug 2024 12:01:21 +0200 Subject: [PATCH 42/99] feat(airflow): allow multiple type of normalizations --- airflow/dags/download_all_ocsge.py | 75 ++++++++++++++++++++++++ airflow/dags/ocsge.py | 10 +++- airflow/include/ocsge/normalization.py | 79 ++++++++++++++++++++------ airflow/include/shapefile.py | 6 ++ 4 files changed, 151 insertions(+), 19 deletions(-) create mode 100644 airflow/dags/download_all_ocsge.py create mode 100644 airflow/include/shapefile.py diff --git a/airflow/dags/download_all_ocsge.py b/airflow/dags/download_all_ocsge.py new file mode 100644 index 000000000..931e25751 --- /dev/null +++ b/airflow/dags/download_all_ocsge.py @@ -0,0 +1,75 @@ +import cgi +import json +import os + +import pendulum +import requests +from airflow.decorators import dag +from airflow.operators.empty import EmptyOperator +from airflow.operators.python import PythonOperator +from include.container import Container + +with open("include/ocsge/sources.json", "r") as f: + sources = json.load(f) + + +def get_urls_from_sources(sources: dict) -> list[str]: + urls = [] + + for departement in sources: + difference = sources[departement]["difference"] + occupation_du_sol_et_zone_construite = sources[departement]["occupation_du_sol_et_zone_construite"] + for years in difference: + urls.append(difference[years]) + for year in occupation_du_sol_et_zone_construite: + urls.append(occupation_du_sol_et_zone_construite[year]) + + return urls + + +def download_file_to_s3(url: str): + bucket_name = "airflow-staging" + response = requests.get(url) + + if not response.ok: + raise ValueError(f"Failed to download {url}. Response : {response.content}") + + header = response.headers["content-disposition"] + _, params = cgi.parse_header(header) + filename = params.get("filename") + + path_on_bucket = f"{bucket_name}/{os.path.basename(filename)}" + with Container().s3().open(path_on_bucket, "wb") as distant_file: + distant_file.write(response.content) + + return path_on_bucket + + +@dag( + dag_id="download_all_ocsge", + start_date=pendulum.datetime(2024, 1, 1), + schedule="@once", + catchup=False, + doc_md=__doc__, + max_active_runs=1, + default_args={"owner": "Alexis Athlani", "retries": 3}, + tags=["OCS GE"], + max_active_tasks=10, +) +def download_all_ocsge(): + start = EmptyOperator(task_id="start") + end = EmptyOperator(task_id="end") + + tasks = [] + for url in get_urls_from_sources(sources): + task = PythonOperator( + task_id=url.split("/")[-1], + python_callable=download_file_to_s3, + op_args=[url], + ) + tasks.append(task) + + start >> tasks >> end + + +download_all_ocsge() diff --git a/airflow/dags/ocsge.py b/airflow/dags/ocsge.py index da36b782f..eef36bb30 100644 --- a/airflow/dags/ocsge.py +++ b/airflow/dags/ocsge.py @@ -22,6 +22,7 @@ ocsge_occupation_du_sol_normalization_sql, ocsge_zone_construite_normalization_sql, ) +from include.shapefile import get_shapefile_fields from include.utils import multiline_string_to_single_line @@ -106,7 +107,7 @@ def get_vars_by_shapefile_name(shapefile_name: str) -> dict | None: return vars[source_name] -def load_shapefile_to_dw( +def load_shapefiles_to_dw( path: str, years: list[int], departement: str, @@ -126,12 +127,15 @@ def load_shapefile_to_dw( if not variables: continue + fields = get_shapefile_fields(file_path) + sql = multiline_string_to_single_line( variables["normalization_sql"]( shapefile_name=filename.split(".")[0], years=years, departement=departement, loaded_date=loaded_date, + fields=fields, ) ) table_name = variables[table_key] @@ -244,7 +248,7 @@ def ingest_staging(path, **context) -> int: departement = context["params"]["departement"] years = context["params"]["years"] - load_shapefile_to_dw( + load_shapefiles_to_dw( path=path, years=years, departement=departement, @@ -267,7 +271,7 @@ def ingest_ocsge(path, **context) -> int: departement = context["params"]["departement"] years = context["params"]["years"] - load_shapefile_to_dw( + load_shapefiles_to_dw( path=path, years=years, departement=departement, diff --git a/airflow/include/ocsge/normalization.py b/airflow/include/ocsge/normalization.py index 26b5f5360..e1e7db95c 100644 --- a/airflow/include/ocsge/normalization.py +++ b/airflow/include/ocsge/normalization.py @@ -1,27 +1,56 @@ +def get_normalized_fields( + shapefile_fields: list[str], + possible_normalized_fields: list[dict[str, str]], +) -> dict[str, str]: + """ + Check if all the expected fields are present in one of the possible_normalized_fields + Otherwise raise a ValueError + """ + for normalized_fields in possible_normalized_fields: + if set(normalized_fields.values()).issubset(set(shapefile_fields)): + print(f"Normalized fields found : {normalized_fields}") + return normalized_fields + + raise ValueError( + f"Could not find the normalized fields in the shapefile. Shapefile fields are : {shapefile_fields}" + ) + + def ocsge_diff_normalization_sql( years: list[int], departement: str, shapefile_name: str, loaded_date: float, + fields: list[str], ) -> str: - fields = { - "cs_new": f"CS_{years[1]}", - "cs_old": f"CS_{years[0]}", - "us_new": f"US_{years[1]}", - "us_old": f"US_{years[0]}", - "year_old": years[0], - "year_new": years[1], - } + possible_normalized_fields = [ + { + "cs_new": f"CS_{years[1]}", + "cs_old": f"CS_{years[0]}", + "us_new": f"US_{years[1]}", + "us_old": f"US_{years[0]}", + }, + { + "cs_new": "cs_apres", + "cs_old": "cs_avant", + "us_new": "us_apres", + "us_old": "us_avant", + }, + ] + normalized_fields = get_normalized_fields( + shapefile_fields=fields, + possible_normalized_fields=possible_normalized_fields, + ) return f""" SELECT {loaded_date} AS loaded_date, - {fields['year_old']} AS year_old, - {fields['year_new']} AS year_new, - {fields['cs_new']} AS cs_new, - {fields['cs_old']} AS cs_old, - {fields['us_new']} AS us_new, - {fields['us_old']} AS us_old, + {years[0]} AS year_old, + {years[1]} AS year_new, + {normalized_fields['cs_new']} AS cs_new, + {normalized_fields['cs_old']} AS cs_old, + {normalized_fields['us_new']} AS us_new, + {normalized_fields['us_old']} AS us_old, '{departement}' AS departement, CreateUUID() as uuid, GEOMETRY as geom @@ -35,12 +64,28 @@ def ocsge_occupation_du_sol_normalization_sql( departement: str, shapefile_name: str, loaded_date: float, + fields: list[str], ) -> str: + possible_normalized_fields = [ + { + "code_cs": "code_cs", + "code_us": "code_us", + }, + { + "code_cs": "couverture", + "code_us": "usage", + }, + ] + normalized_fields = get_normalized_fields( + shapefile_fields=fields, + possible_normalized_fields=possible_normalized_fields, + ) + return f""" SELECT {loaded_date} AS loaded_date, ID AS id, - code_cs AS code_cs, - code_us AS code_us, + {normalized_fields['code_cs']} AS code_cs, + {normalized_fields['code_us']} AS code_us, GEOMETRY AS geom, '{departement}' AS departement, {years[0]} AS year, @@ -55,7 +100,9 @@ def ocsge_zone_construite_normalization_sql( departement: str, shapefile_name: str, loaded_date: float, + fields: list[str], ) -> str: + print(fields) return f""" SELECT {loaded_date} AS loaded_date, ID AS id, diff --git a/airflow/include/shapefile.py b/airflow/include/shapefile.py new file mode 100644 index 000000000..081a1e095 --- /dev/null +++ b/airflow/include/shapefile.py @@ -0,0 +1,6 @@ +import geopandas as gpd + + +def get_shapefile_fields(shapefile_path: str) -> list[str]: + df = gpd.read_file(shapefile_path) + return df.columns.to_list() From eeaf540f6aae683fc4b1435ea28b35dc3dab0a5b Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Fri, 30 Aug 2024 12:20:42 +0200 Subject: [PATCH 43/99] feat(airflow): add noramlization ruel for ocsge --- airflow/include/ocsge/normalization.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/airflow/include/ocsge/normalization.py b/airflow/include/ocsge/normalization.py index e1e7db95c..b3bae5534 100644 --- a/airflow/include/ocsge/normalization.py +++ b/airflow/include/ocsge/normalization.py @@ -71,6 +71,10 @@ def ocsge_occupation_du_sol_normalization_sql( "code_cs": "code_cs", "code_us": "code_us", }, + { + "code_cs": "CODE_CS", + "code_us": "CODE_US", + }, { "code_cs": "couverture", "code_us": "usage", From db055704c04f6ca558a17c1305e68b1f57414833 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Fri, 30 Aug 2024 14:37:15 +0200 Subject: [PATCH 44/99] feat(zonage_urbanisme): make_valid on geom --- airflow/include/sql/sparte/models/gpu/zonage_urbanisme.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airflow/include/sql/sparte/models/gpu/zonage_urbanisme.sql b/airflow/include/sql/sparte/models/gpu/zonage_urbanisme.sql index 688de9734..f19488de1 100644 --- a/airflow/include/sql/sparte/models/gpu/zonage_urbanisme.sql +++ b/airflow/include/sql/sparte/models/gpu/zonage_urbanisme.sql @@ -30,13 +30,13 @@ SELECT *, ST_Area(geom) as surface FROM ( row_number() OVER (PARTITION BY geom ORDER BY gpu_timestamp), CASE WHEN ST_IsValid(geom) THEN ST_transform(geom, 2154) - ELSE st_multi( + ELSE ST_MakeValid(st_multi( st_collectionextract( st_makevalid( ST_transform(geom, 2154) ), 3) - ) + )) END as geom FROM {{ source('public', 'gpu_zone_urba') }} From b9a449e67a8b5510da9e7276135152e113f501be Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Fri, 30 Aug 2024 15:02:23 +0200 Subject: [PATCH 45/99] temp --- airflow/include/sql/sparte/models/gpu/zonage_urbanisme.sql | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/airflow/include/sql/sparte/models/gpu/zonage_urbanisme.sql b/airflow/include/sql/sparte/models/gpu/zonage_urbanisme.sql index f19488de1..926a12c81 100644 --- a/airflow/include/sql/sparte/models/gpu/zonage_urbanisme.sql +++ b/airflow/include/sql/sparte/models/gpu/zonage_urbanisme.sql @@ -28,16 +28,13 @@ SELECT *, ST_Area(geom) as surface FROM ( NULLIF(idurba, '') as id_document_urbanisme, checksum, row_number() OVER (PARTITION BY geom ORDER BY gpu_timestamp), - CASE - WHEN ST_IsValid(geom) THEN ST_transform(geom, 2154) - ELSE ST_MakeValid(st_multi( + ST_MakeValid(st_multi( st_collectionextract( st_makevalid( ST_transform(geom, 2154) ), 3) - )) - END as geom + )) as geom FROM {{ source('public', 'gpu_zone_urba') }} ) as foo From 8a4e298f6447e50e06c298f9ca1e0a83fc954d74 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Sun, 1 Sep 2024 15:08:33 +0200 Subject: [PATCH 46/99] fix(dbt): remove unique condition on zone construite --- airflow/include/sql/sparte/models/ocsge/schema.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/airflow/include/sql/sparte/models/ocsge/schema.yml b/airflow/include/sql/sparte/models/ocsge/schema.yml index a25827a5c..55acde6a5 100644 --- a/airflow/include/sql/sparte/models/ocsge/schema.yml +++ b/airflow/include/sql/sparte/models/ocsge/schema.yml @@ -58,7 +58,7 @@ models: - name: id data_tests: - not_null - - unique + # - unique -> the same object can be present in different departement. TODO: replace the departement field by an array - name: year data_tests: - not_null @@ -87,6 +87,7 @@ models: - name: id data_tests: - not_null + # - unique -> the same object can be present in different departement. TODO: replace the departement field by an array - name: code_cs data_tests: - not_null From 3ac893b1cd51164bb54b69adf7d702a6d7037c9e Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 3 Sep 2024 16:17:58 +0200 Subject: [PATCH 47/99] feat(airflow): add ocsge 22 source --- airflow/include/ocsge/sources.json | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/airflow/include/ocsge/sources.json b/airflow/include/ocsge/sources.json index 14cb21e90..6c0c31f3d 100644 --- a/airflow/include/ocsge/sources.json +++ b/airflow/include/ocsge/sources.json @@ -44,6 +44,15 @@ "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D017_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D017_2021-01-01.7z" } }, + "22": { + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D022_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D022_2018-2021.7z" + }, + "occupation_du_sol_et_zone_construite": { + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D022_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D022_2018-01-01.7z", + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D022_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D022_2021-01-01.7z" + } + }, "24": { "difference": { "2017_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D024_2017-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D024_2017-2021.7z" From 8aa9dbba3e41e6288aea0a7696b64cb505e61ba4 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 3 Sep 2024 22:11:18 +0200 Subject: [PATCH 48/99] feat(dbt): readd map_color to commune --- airflow/include/sql/sparte/models/app/app_commune.sql | 1 + .../include/sql/sparte/models/ocsge/for_app/for_app_commune.sql | 1 + 2 files changed, 2 insertions(+) diff --git a/airflow/include/sql/sparte/models/app/app_commune.sql b/airflow/include/sql/sparte/models/app/app_commune.sql index c7e5d934d..3b550d412 100644 --- a/airflow/include/sql/sparte/models/app/app_commune.sql +++ b/airflow/include/sql/sparte/models/app/app_commune.sql @@ -15,6 +15,7 @@ SELECT ocsge_available, first_millesime, last_millesime, + map_color, surface_artif FROM {{ source('public', 'app_commune') }} diff --git a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql index abef47fd8..4cfcdfafa 100644 --- a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql +++ b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql @@ -36,6 +36,7 @@ SELECT commune.departement_id, commune.epci_id, commune.scot_id, + commune.map_color, CASE WHEN artif_commune.surface IS NOT NULL From 558f4906731e20b49c81ae26dd0fbcd16d2a02db Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Wed, 4 Sep 2024 16:03:14 +0200 Subject: [PATCH 49/99] feat(airflow): allow update of staging, production or dev --- airflow/dags/ingest_admin_express.py | 2 +- airflow/dags/ingest_app_dependencies.py | 4 +- airflow/dags/ingest_gpu.py | 2 +- airflow/dags/ocsge.py | 4 +- airflow/dags/update_app.py | 161 +++++++++++++++++------- airflow/include/container.py | 52 ++++---- 6 files changed, 144 insertions(+), 81 deletions(-) diff --git a/airflow/dags/ingest_admin_express.py b/airflow/dags/ingest_admin_express.py index 75fcdb5b8..77ffa66f6 100644 --- a/airflow/dags/ingest_admin_express.py +++ b/airflow/dags/ingest_admin_express.py @@ -41,7 +41,7 @@ def ingest() -> str: for filename in filenames: if filename.endswith(".shp"): path = os.path.abspath(os.path.join(dirpath, filename)) - cmd = f'ogr2ogr -f "PostgreSQL" "{Container().gdal_dw_conn_str()}" -overwrite -lco GEOMETRY_NAME=geom -a_srs EPSG:2154 -nlt MULTIPOLYGON -nlt PROMOTE_TO_MULTI {path} --config PG_USE_COPY YES' # noqa: E501 + cmd = f'ogr2ogr -f "PostgreSQL" "{Container().gdal_dbt_conn().encode()}" -overwrite -lco GEOMETRY_NAME=geom -a_srs EPSG:2154 -nlt MULTIPOLYGON -nlt PROMOTE_TO_MULTI {path} --config PG_USE_COPY YES' # noqa: E501 subprocess.run(cmd, shell=True, check=True) @task.bash(retries=0, trigger_rule="all_success") diff --git a/airflow/dags/ingest_app_dependencies.py b/airflow/dags/ingest_app_dependencies.py index b257419b7..adc7a086c 100644 --- a/airflow/dags/ingest_app_dependencies.py +++ b/airflow/dags/ingest_app_dependencies.py @@ -8,8 +8,8 @@ def ingest_table(source_table_name: str, destination_table_name: str): ogr = ogr2ogr() ogr.config_options = {"PG_USE_COPY": "YES", "OGR_TRUNCATE": "NO"} ogr.set_preserve_fid(True) - ogr.set_input(Container().gdal_app_conn(), table_name=source_table_name, srs="EPSG:4326") - ogr.set_output(Container().gdal_dw_conn(), table_name=destination_table_name, srs="EPSG:4326") + ogr.set_input(Container().gdal_prod_conn(), table_name=source_table_name, srs="EPSG:4326") + ogr.set_output(Container().gdal_dbt_conn(), table_name=destination_table_name, srs="EPSG:4326") ogr.set_output_mode(layer_mode=ogr.MODE_LAYER_OVERWRITE) ogr.execute() return ogr.safe_args diff --git a/airflow/dags/ingest_gpu.py b/airflow/dags/ingest_gpu.py index 2f65e6cfb..9537fa735 100644 --- a/airflow/dags/ingest_gpu.py +++ b/airflow/dags/ingest_gpu.py @@ -65,7 +65,7 @@ def ingest(path_on_bucket: str) -> str: "SQLITE", "-f", '"PostgreSQL"', - f'"{Container().gdal_dw_conn_str()}"', + f'"{Container().gdal_dbt_conn().encode()}"', "-overwrite", "-lco", "GEOMETRY_NAME=geom", diff --git a/airflow/dags/ocsge.py b/airflow/dags/ocsge.py index eef36bb30..428c23f1e 100644 --- a/airflow/dags/ocsge.py +++ b/airflow/dags/ocsge.py @@ -146,7 +146,7 @@ def load_shapefiles_to_dw( "SQLITE", "-f", '"PostgreSQL"', - f'"{Container().gdal_dw_conn_str()}"', + f'"{Container().gdal_dbt_conn().encode()}"', f"-{mode}", "-lco", "GEOMETRY_NAME=geom", @@ -292,7 +292,7 @@ def delete_previously_loaded_data_in_dw(**context) -> dict: dataset = context["params"]["dataset"] departement = context["params"]["departement"] years = context["params"]["years"] - conn = Container().psycopg2_dw_conn() + conn = Container().psycopg2_dbt_conn() cur = conn.cursor() results = {} diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index defa907a0..e6c38ac62 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -1,26 +1,60 @@ from airflow.decorators import dag, task -from gdaltools import ogr2ogr, ogrinfo +from airflow.models.param import Param +from gdaltools import PgConnectionString, ogr2ogr, ogrinfo from include.container import Container from pendulum import datetime +STAGING = "staging" +PRODUCTION = "production" +DEV = "dev" -def create_spatial_index(table_name: str, column_name="mpoly"): + +def get_database_connection_string(environment: str) -> PgConnectionString: + return { + STAGING: Container().gdal_staging_conn(), + PRODUCTION: Container().gdal_prod_conn(), + DEV: Container().gdal_dev_conn(), + }[environment] + + +def create_spatial_index(table_name: str, column_name: str, conn: PgConnectionString): sql = f"CREATE INDEX IF NOT EXISTS ON {table_name} USING GIST ({column_name});" - return ogrinfo(Container().gdal_app_conn(), sql=sql) + return ogrinfo(conn, sql=sql) + + +def create_btree_index(table_name: str, columns_name: list[str], conn: PgConnectionString): + sql = f"CREATE INDEX IF NOT EXISTS ON {table_name} ({', '.join(columns_name)});" + return ogrinfo(conn, sql=sql) def copy_table_from_dw_to_app( from_table: str, to_table: str, + environment: str, + spatial_index_column: str = None, + btree_index_columns: list[list[str]] = None, ): ogr = ogr2ogr() ogr.config_options = {"PG_USE_COPY": "YES", "OGR_TRUNCATE": "NO"} - ogr.set_input(Container().gdal_dw_conn(), table_name=from_table) + conn = get_database_connection_string(environment) + ogr.set_input(conn, table_name=from_table) # the option below will an id column to the table only if it does not exist ogr.layer_creation_options = {"FID": "id"} - ogr.set_output(Container().gdal_app_conn(), table_name=to_table) + ogr.set_output(conn, table_name=to_table) ogr.set_output_mode(layer_mode=ogr.MODE_LAYER_OVERWRITE) ogr.execute() + + if spatial_index_column: + create_spatial_index( + table_name=to_table, + column_name=spatial_index_column, + conn=conn, + ) + + if btree_index_columns: + for columns in btree_index_columns: + create_btree_index(table_name=to_table, columns_name=columns, conn=conn) + return ogr.safe_args @@ -31,70 +65,105 @@ def copy_table_from_dw_to_app( doc_md=__doc__, default_args={"owner": "Alexis Athlani", "retries": 3}, tags=["App"], + params={ + "environment": Param( + default=DEV, + type="string", + enum=[ + STAGING, + PRODUCTION, + DEV, + ], + ) + }, ) def update_app(): # noqa: C901 @task.python - def copy_public_data_ocsge(): - to_table = "public.public_data_ocsge" - copy_result = copy_table_from_dw_to_app("public_ocsge.for_app_ocsge", to_table) - create_spatial_index(to_table) - return copy_result + def copy_public_data_ocsge(**context): + return copy_table_from_dw_to_app( + from_table="public_ocsge.for_app_ocsge", + to_table="public.public_data_ocsge", + environment=context["params"]["environment"], + spatial_index_column="mpoly", + ) @task.python - def copy_public_data_artificialarea(): - to_table = "public.public_data_artificialarea" - copy_result = copy_table_from_dw_to_app("public_ocsge.for_app_artificialarea", to_table) - create_spatial_index(to_table) - return copy_result + def copy_public_data_artificialarea(**context): + return copy_table_from_dw_to_app( + from_table="public_ocsge.for_app_artificialarea", + to_table="public.public_data_artificialarea", + environment=context["params"]["environment"], + spatial_index_column="mpoly", + ) @task.python - def copy_public_data_artifareazoneurba(): + def copy_public_data_artifareazoneurba(**context): return copy_table_from_dw_to_app( - "public_ocsge.for_app_artifareazoneurba", "public.public_data_artifareazoneurba" + from_table="public_ocsge.for_app_artifareazoneurba", + to_table="public.public_data_artifareazoneurba", + environment=context["params"]["environment"], ) @task.python - def copy_public_data_commune(): - to_table = "public.public_data_commune" - copy_result = copy_table_from_dw_to_app("public_ocsge.for_app_commune", to_table) - create_spatial_index(to_table) - return copy_result + def copy_public_data_commune(**context): + return copy_table_from_dw_to_app( + from_table="public_ocsge.for_app_commune", + to_table="public.public_data_commune", + environment=context["params"]["environment"], + spatial_index_column="mpoly", + ) @task.python - def copy_public_data_departement(): - to_table = "public.public_data_departement" - copy_result = copy_table_from_dw_to_app("public_ocsge.for_app_departement", to_table) - create_spatial_index(to_table) - return copy_result + def copy_public_data_departement(**context): + return copy_table_from_dw_to_app( + from_table="public_ocsge.for_app_departement", + to_table="public.public_data_departement", + environment=context["params"]["environment"], + spatial_index_column="mpoly", + ) @task.python - def copy_public_data_communesol(): - return copy_table_from_dw_to_app("public_ocsge.for_app_communesol", "public.public_data_communesol") + def copy_public_data_communesol(**context): + return copy_table_from_dw_to_app( + from_table="public_ocsge.for_app_communesol", + to_table="public.public_data_communesol", + environment=context["params"]["environment"], + ) @task.python - def copy_public_data_ocsgediff(): - to_table = "public.public_data_ocsgediff" - copy_result = copy_table_from_dw_to_app("public_ocsge.for_app_ocsgediff", to_table) - create_spatial_index(to_table) - return copy_result + def copy_public_data_ocsgediff(**context): + return copy_table_from_dw_to_app( + from_table="public_ocsge.for_app_ocsgediff", + to_table="public.public_data_ocsgediff", + environment=context["params"]["environment"], + spatial_index_column="mpoly", + ) @task.python - def copy_public_data_communediff(): - return copy_table_from_dw_to_app("public_ocsge.for_app_communediff", "public.public_data_communediff") + def copy_public_data_communediff(**context): + return copy_table_from_dw_to_app( + from_table="public_ocsge.for_app_communediff", + to_table="public.public_data_communediff", + environment=context["params"]["environment"], + ) @task.python - def copy_public_data_zoneconstruite(): - to_table = "public.public_data_zoneconstruite" - copy_result = copy_table_from_dw_to_app("public_ocsge.for_app_zoneconstruite", to_table) - create_spatial_index(to_table) - return copy_result + def copy_public_data_zoneconstruite(**context): + return copy_table_from_dw_to_app( + from_table="public_ocsge.for_app_zoneconstruite", + to_table="public.public_data_zoneconstruite", + environment=context["params"]["environment"], + spatial_index_column="mpoly", + ) @task.python - def copy_public_data_zoneurba(): - to_table = "public.public_data_zoneurba" - copy_result = copy_table_from_dw_to_app("public_gpu.for_app_zoneurba", to_table) - create_spatial_index(to_table) - return copy_result + def copy_public_data_zoneurba(**context): + return copy_table_from_dw_to_app( + from_table="public_gpu.for_app_zoneurba", + to_table="public.public_data_zoneurba", + environment=context["params"]["environment"], + spatial_index_column="mpoly", + ) ( copy_public_data_ocsge() diff --git a/airflow/include/container.py b/airflow/include/container.py index 05d47440c..17801a926 100644 --- a/airflow/include/container.py +++ b/airflow/include/container.py @@ -31,12 +31,7 @@ class Container(containers.DeclarativeContainer): }, ) - postgres_conn_sqlalchemy = providers.Factory( - create_sql_alchemy_conn, - url=getenv("AIRFLOW_CONN_DATA_WAREHOUSE"), - ) - - gdal_dw_conn = providers.Factory( + gdal_dbt_conn = providers.Factory( PgConnectionString, dbname=getenv("DBT_DB_NAME"), user=getenv("DBT_DB_USER"), @@ -44,9 +39,8 @@ class Container(containers.DeclarativeContainer): host=getenv("DBT_DB_HOST"), port=getenv("DBT_DB_PORT"), ) - - gdal_dw_conn_str = providers.Factory( - db_str_for_ogr2ogr, + psycopg2_dbt_conn: connection = providers.Factory( + provides=connect, dbname=getenv("DBT_DB_NAME"), user=getenv("DBT_DB_USER"), password=getenv("DBT_DB_PASSWORD"), @@ -54,31 +48,31 @@ class Container(containers.DeclarativeContainer): port=getenv("DBT_DB_PORT"), ) - psycopg2_dw_conn: connection = providers.Factory( - provides=connect, - dbname=getenv("DBT_DB_NAME"), - user=getenv("DBT_DB_USER"), - password=getenv("DBT_DB_PASSWORD"), - host=getenv("DBT_DB_HOST"), - port=getenv("DBT_DB_PORT"), + gdal_dev_conn = providers.Factory( + PgConnectionString, + dbname=getenv("DEV_DB_NAME"), + user=getenv("DEV_DB_USER"), + password=getenv("DEV_DB_PASSWORD"), + host=getenv("DEV_DB_HOST"), + port=getenv("DEV_DB_PORT"), ) - psycopg2_app_conn: connection = providers.Factory( - provides=connect, - dbname=getenv("APP_DB_NAME"), - user=getenv("APP_DB_USER"), - password=getenv("APP_DB_PASSWORD"), - host=getenv("APP_DB_HOST"), - port=getenv("APP_DB_PORT"), + gdal_prod_conn = providers.Factory( + PgConnectionString, + dbname=getenv("PROD_DB_NAME"), + user=getenv("PROD_DB_USER"), + password=getenv("PROD_DB_PASSWORD"), + host=getenv("PROD_DB_HOST"), + port=getenv("PROD_DB_PORT"), ) - gdal_app_conn = providers.Factory( + gdal_staging_conn = providers.Factory( PgConnectionString, - dbname=getenv("APP_DB_NAME"), - user=getenv("APP_DB_USER"), - password=getenv("APP_DB_PASSWORD"), - host=getenv("APP_DB_HOST"), - port=getenv("APP_DB_PORT"), + dbname=getenv("STAGING_DB_NAME"), + user=getenv("STAGING_DB_USER"), + password=getenv("STAGING_DB_PASSWORD"), + host=getenv("STAGING_DB_HOST"), + port=getenv("STAGING_DB_PORT"), ) cnopts = pysftp.CnOpts() From c1e18f17900eef76a2bf9e6e2c33239c6839ff55 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Wed, 4 Sep 2024 16:11:33 +0200 Subject: [PATCH 50/99] fea(update_app): set source to dbt --- airflow/dags/update_app.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index e6c38ac62..40d0ee48e 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -36,10 +36,11 @@ def copy_table_from_dw_to_app( ): ogr = ogr2ogr() ogr.config_options = {"PG_USE_COPY": "YES", "OGR_TRUNCATE": "NO"} - conn = get_database_connection_string(environment) - ogr.set_input(conn, table_name=from_table) + ogr.set_input(Container().gdal_dbt_conn(), table_name=from_table) # the option below will an id column to the table only if it does not exist ogr.layer_creation_options = {"FID": "id"} + + conn = get_database_connection_string(environment) ogr.set_output(conn, table_name=to_table) ogr.set_output_mode(layer_mode=ogr.MODE_LAYER_OVERWRITE) ogr.execute() From 2da179b9570559a31787cc837f19310269f8db64 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Wed, 4 Sep 2024 17:55:57 +0200 Subject: [PATCH 51/99] feat(airflow): add indexes --- airflow/README.md | 48 ------------------- .../diff_ocsge_download_page_to_mattermost.py | 11 +++-- airflow/dags/download_all_ocsge.py | 7 ++- airflow/dags/ingest_admin_express.py | 5 ++ airflow/dags/ingest_app_dependencies.py | 5 ++ airflow/dags/ingest_gpu.py | 4 ++ airflow/dags/ocsge.py | 5 ++ airflow/dags/update_app.py | 43 +++++++++++++++++ 8 files changed, 75 insertions(+), 53 deletions(-) diff --git a/airflow/README.md b/airflow/README.md index 699fda7b9..e69de29bb 100644 --- a/airflow/README.md +++ b/airflow/README.md @@ -1,48 +0,0 @@ -Overview -======== - -Welcome to Astronomer! This project was generated after you ran 'astro dev init' using the Astronomer CLI. This readme describes the contents of the project, as well as how to run Apache Airflow on your local machine. - -Project Contents -================ - -Your Astro project contains the following files and folders: - -- dags: This folder contains the Python files for your Airflow DAGs. By default, this directory includes one example DAG: - - `example_astronauts`: This DAG shows a simple ETL pipeline example that queries the list of astronauts currently in space from the Open Notify API and prints a statement for each astronaut. The DAG uses the TaskFlow API to define tasks in Python, and dynamic task mapping to dynamically print a statement for each astronaut. For more on how this DAG works, see our [Getting started tutorial](https://docs.astronomer.io/learn/get-started-with-airflow). -- Dockerfile: This file contains a versioned Astro Runtime Docker image that provides a differentiated Airflow experience. If you want to execute other commands or overrides at runtime, specify them here. -- include: This folder contains any additional files that you want to include as part of your project. It is empty by default. -- packages.txt: Install OS-level packages needed for your project by adding them to this file. It is empty by default. -- requirements.txt: Install Python packages needed for your project by adding them to this file. It is empty by default. -- plugins: Add custom or community plugins for your project to this file. It is empty by default. -- airflow_settings.yaml: Use this local-only file to specify Airflow Connections, Variables, and Pools instead of entering them in the Airflow UI as you develop DAGs in this project. - -Deploy Your Project Locally -=========================== - -1. Start Airflow on your local machine by running 'astro dev start'. - -This command will spin up 4 Docker containers on your machine, each for a different Airflow component: - -- Postgres: Airflow's Metadata Database -- Webserver: The Airflow component responsible for rendering the Airflow UI -- Scheduler: The Airflow component responsible for monitoring and triggering tasks -- Triggerer: The Airflow component responsible for triggering deferred tasks - -2. Verify that all 4 Docker containers were created by running 'docker ps'. - -Note: Running 'astro dev start' will start your project with the Airflow Webserver exposed at port 8080 and Postgres exposed at port 5432. If you already have either of those ports allocated, you can either [stop your existing Docker containers or change the port](https://docs.astronomer.io/astro/test-and-troubleshoot-locally#ports-are-not-available). - -3. Access the Airflow UI for your local Airflow project. To do so, go to http://localhost:8080/ and log in with 'admin' for both your Username and Password. - -You should also be able to access your Postgres Database at 'localhost:5432/postgres'. - -Deploy Your Project to Astronomer -================================= - -If you have an Astronomer account, pushing code to a Deployment on Astronomer is simple. For deploying instructions, refer to Astronomer documentation: https://docs.astronomer.io/cloud/deploy-code/ - -Contact -======= - -The Astronomer CLI is maintained with love by the Astronomer team. To report a bug or suggest a change, reach out to our support. diff --git a/airflow/dags/diff_ocsge_download_page_to_mattermost.py b/airflow/dags/diff_ocsge_download_page_to_mattermost.py index ef44033c9..2fc4f805c 100644 --- a/airflow/dags/diff_ocsge_download_page_to_mattermost.py +++ b/airflow/dags/diff_ocsge_download_page_to_mattermost.py @@ -1,3 +1,8 @@ +""" +Ce dag compare le contenu de la page de téléchargement de l'IGN OCS GE +et envoie un message sur Mattermost en cas de différence. +""" + import difflib import requests @@ -7,14 +12,13 @@ from pendulum import datetime -# Define the basic parameters of the DAG, like schedule and start_date @dag( start_date=datetime(2024, 1, 1), - schedule="0 10 * * *", + schedule="0 10 * * *", # every day at 10:00 catchup=False, doc_md=__doc__, default_args={"owner": "Alexis Athlani", "retries": 3}, - tags=["App"], + tags=["OCS GE"], ) def diff_ocsge_download_page_to_mattermost(): @task.python @@ -57,5 +61,4 @@ def diff(): diff() -# Instantiate the DAG diff_ocsge_download_page_to_mattermost() diff --git a/airflow/dags/download_all_ocsge.py b/airflow/dags/download_all_ocsge.py index 931e25751..ec3ba0bc7 100644 --- a/airflow/dags/download_all_ocsge.py +++ b/airflow/dags/download_all_ocsge.py @@ -1,3 +1,9 @@ +""" +Ce dag télécharge tous les fichiers OCS GE depuis les sources définies +dans `sources.json` et les stocke dans un bucket S3. +""" + + import cgi import json import os @@ -46,7 +52,6 @@ def download_file_to_s3(url: str): @dag( - dag_id="download_all_ocsge", start_date=pendulum.datetime(2024, 1, 1), schedule="@once", catchup=False, diff --git a/airflow/dags/ingest_admin_express.py b/airflow/dags/ingest_admin_express.py index 77ffa66f6..81331fc60 100644 --- a/airflow/dags/ingest_admin_express.py +++ b/airflow/dags/ingest_admin_express.py @@ -1,3 +1,8 @@ +""" +Ce dag télécharge et importe les données de l'IGN Admin Express dans une base de données PostgreSQL, +puis lance un job dbt pour les transformer. +""" + import os import subprocess from urllib.request import URLopener diff --git a/airflow/dags/ingest_app_dependencies.py b/airflow/dags/ingest_app_dependencies.py index adc7a086c..d0bced4d9 100644 --- a/airflow/dags/ingest_app_dependencies.py +++ b/airflow/dags/ingest_app_dependencies.py @@ -1,3 +1,8 @@ +""" +Ce dag ingère les dépendances de l'application dans une base de +données PostgreSQL, puis lance un job dbt pour les transformer. +""" + from airflow.decorators import dag, task from gdaltools import ogr2ogr from include.container import Container diff --git a/airflow/dags/ingest_gpu.py b/airflow/dags/ingest_gpu.py index 9537fa735..9005c833a 100644 --- a/airflow/dags/ingest_gpu.py +++ b/airflow/dags/ingest_gpu.py @@ -1,3 +1,7 @@ +""" +Ce dag ingère les données de l'IGN GPU dans une base de données PostgreSQL. +""" + from airflow.decorators import dag, task from airflow.operators.bash import BashOperator from include.container import Container diff --git a/airflow/dags/ocsge.py b/airflow/dags/ocsge.py index 428c23f1e..87c16440d 100644 --- a/airflow/dags/ocsge.py +++ b/airflow/dags/ocsge.py @@ -1,3 +1,8 @@ +""" +Ce dag ingère les données de l'IGN OCS GE dans une base de données +PostgreSQL, puis lance un job dbt pour les transformer. +""" + import cgi import json import os diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index 40d0ee48e..c6050e073 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -1,3 +1,7 @@ +""" +Ce dag met à jour les données de l'application à partir des données de l'entrepôt de données. +""" + from airflow.decorators import dag, task from airflow.models.param import Param from gdaltools import PgConnectionString, ogr2ogr, ogrinfo @@ -103,6 +107,10 @@ def copy_public_data_artifareazoneurba(**context): from_table="public_ocsge.for_app_artifareazoneurba", to_table="public.public_data_artifareazoneurba", environment=context["params"]["environment"], + btree_index_columns=[ + ["zone_urba"], + ["year"], + ], ) @task.python @@ -112,6 +120,9 @@ def copy_public_data_commune(**context): to_table="public.public_data_commune", environment=context["params"]["environment"], spatial_index_column="mpoly", + btree_index_columns=[ + ["insee"], + ], ) @task.python @@ -121,6 +132,9 @@ def copy_public_data_departement(**context): to_table="public.public_data_departement", environment=context["params"]["environment"], spatial_index_column="mpoly", + btree_index_columns=[ + ["source_id"], + ], ) @task.python @@ -129,6 +143,11 @@ def copy_public_data_communesol(**context): from_table="public_ocsge.for_app_communesol", to_table="public.public_data_communesol", environment=context["params"]["environment"], + btree_index_columns=[ + ["city_id"], + ["matrix_id"], + ["year"], + ], ) @task.python @@ -138,6 +157,15 @@ def copy_public_data_ocsgediff(**context): to_table="public.public_data_ocsgediff", environment=context["params"]["environment"], spatial_index_column="mpoly", + btree_index_columns=[ + ["year_old"], + ["year_new"], + ["departement"], + ["cs_new"], + ["cs_old"], + ["us_new"], + ["us_old"], + ], ) @task.python @@ -146,6 +174,11 @@ def copy_public_data_communediff(**context): from_table="public_ocsge.for_app_communediff", to_table="public.public_data_communediff", environment=context["params"]["environment"], + btree_index_columns=[ + ["year_old"], + ["year_new"], + ["city_id"], + ], ) @task.python @@ -155,6 +188,11 @@ def copy_public_data_zoneconstruite(**context): to_table="public.public_data_zoneconstruite", environment=context["params"]["environment"], spatial_index_column="mpoly", + btree_index_columns=[ + ["millesime"], + ["year"], + ["departement"], + ], ) @task.python @@ -164,6 +202,11 @@ def copy_public_data_zoneurba(**context): to_table="public.public_data_zoneurba", environment=context["params"]["environment"], spatial_index_column="mpoly", + btree_index_columns=[ + ["checksum"], + ["libelle"], + ["typezone"], + ], ) ( From ec554b736e8d96e7f4431a438e734e0d776b09fb Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Wed, 4 Sep 2024 18:45:16 +0200 Subject: [PATCH 52/99] temp --- airflow/dags/update_app.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index c6050e073..85eadabfe 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -23,11 +23,13 @@ def get_database_connection_string(environment: str) -> PgConnectionString: def create_spatial_index(table_name: str, column_name: str, conn: PgConnectionString): sql = f"CREATE INDEX IF NOT EXISTS ON {table_name} USING GIST ({column_name});" + print(sql) return ogrinfo(conn, sql=sql) def create_btree_index(table_name: str, columns_name: list[str], conn: PgConnectionString): sql = f"CREATE INDEX IF NOT EXISTS ON {table_name} ({', '.join(columns_name)});" + print(sql) return ogrinfo(conn, sql=sql) @@ -58,7 +60,11 @@ def copy_table_from_dw_to_app( if btree_index_columns: for columns in btree_index_columns: - create_btree_index(table_name=to_table, columns_name=columns, conn=conn) + create_btree_index( + table_name=to_table, + columns_name=columns, + conn=conn, + ) return ogr.safe_args From 0bb76737b8a87c72b9eec14406ac21cb6ba4e07f Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Wed, 4 Sep 2024 18:58:49 +0200 Subject: [PATCH 53/99] feat(update_add): name created btree index --- airflow/dags/update_app.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index 85eadabfe..5186734d6 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -28,7 +28,8 @@ def create_spatial_index(table_name: str, column_name: str, conn: PgConnectionSt def create_btree_index(table_name: str, columns_name: list[str], conn: PgConnectionString): - sql = f"CREATE INDEX IF NOT EXISTS ON {table_name} ({', '.join(columns_name)});" + idx_name = f"idx_{'_'.join(columns_name)}_{table_name}" + sql = f"CREATE INDEX IF NOT EXISTS {idx_name} ON {table_name} ({', '.join(columns_name)});" print(sql) return ogrinfo(conn, sql=sql) From a1e5413b79ff5567f7accb07aa6902947536d85f Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Wed, 4 Sep 2024 19:03:15 +0200 Subject: [PATCH 54/99] feat(update_app): add index type --- airflow/dags/update_app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index 5186734d6..fed3d3b19 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -29,7 +29,7 @@ def create_spatial_index(table_name: str, column_name: str, conn: PgConnectionSt def create_btree_index(table_name: str, columns_name: list[str], conn: PgConnectionString): idx_name = f"idx_{'_'.join(columns_name)}_{table_name}" - sql = f"CREATE INDEX IF NOT EXISTS {idx_name} ON {table_name} ({', '.join(columns_name)});" + sql = f"CREATE INDEX IF NOT EXISTS {idx_name} ON {table_name} USING btree ({', '.join(columns_name)});" print(sql) return ogrinfo(conn, sql=sql) From 0d1b0f29d6dffbc04e7c231c809b44fcc7d944af Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Wed, 4 Sep 2024 19:21:52 +0200 Subject: [PATCH 55/99] temp --- airflow/dags/update_app.py | 47 ++++++++++++++++++------------------ airflow/include/container.py | 8 ++++++ 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index fed3d3b19..2962e3684 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -4,7 +4,7 @@ from airflow.decorators import dag, task from airflow.models.param import Param -from gdaltools import PgConnectionString, ogr2ogr, ogrinfo +from gdaltools import PgConnectionString, ogr2ogr from include.container import Container from pendulum import datetime @@ -12,26 +12,24 @@ PRODUCTION = "production" DEV = "dev" +GDAL = "gdal" +PSYCOPG = "psycopg" + def get_database_connection_string(environment: str) -> PgConnectionString: return { STAGING: Container().gdal_staging_conn(), PRODUCTION: Container().gdal_prod_conn(), - DEV: Container().gdal_dev_conn(), + DEV: {GDAL: Container().gdal_dev_conn(), PSYCOPG: Container().psycopg2_dev_conn()}, }[environment] -def create_spatial_index(table_name: str, column_name: str, conn: PgConnectionString): - sql = f"CREATE INDEX IF NOT EXISTS ON {table_name} USING GIST ({column_name});" - print(sql) - return ogrinfo(conn, sql=sql) +def get_spatial_index_request(table_name: str, column_name: str): + return f"CREATE INDEX IF NOT EXISTS ON {table_name} USING GIST ({column_name});" -def create_btree_index(table_name: str, columns_name: list[str], conn: PgConnectionString): - idx_name = f"idx_{'_'.join(columns_name)}_{table_name}" - sql = f"CREATE INDEX IF NOT EXISTS {idx_name} ON {table_name} USING btree ({', '.join(columns_name)});" - print(sql) - return ogrinfo(conn, sql=sql) +def get_btree_index_request(table_name: str, columns_name: list[str]): + return f"CREATE INDEX IF NOT EXISTS ON {table_name} USING btree ({', '.join(columns_name)});" def copy_table_from_dw_to_app( @@ -47,27 +45,28 @@ def copy_table_from_dw_to_app( # the option below will an id column to the table only if it does not exist ogr.layer_creation_options = {"FID": "id"} - conn = get_database_connection_string(environment) - ogr.set_output(conn, table_name=to_table) + connections = get_database_connection_string(environment) + + ogr.set_output(connections[GDAL], table_name=to_table) ogr.set_output_mode(layer_mode=ogr.MODE_LAYER_OVERWRITE) ogr.execute() + index_requests = [] + index_results = [] + if spatial_index_column: - create_spatial_index( - table_name=to_table, - column_name=spatial_index_column, - conn=conn, - ) + index_requests.append(get_spatial_index_request(to_table, spatial_index_column)) if btree_index_columns: for columns in btree_index_columns: - create_btree_index( - table_name=to_table, - columns_name=columns, - conn=conn, - ) + index_requests.append(get_btree_index_request(to_table, columns)) + + with connections[PSYCOPG].cursor() as cursor: + for request in index_requests: + result = cursor.execute(request) + index_results.append(result.fetchall()) - return ogr.safe_args + return {"index_requests": index_requests, "index_results": index_results, "ogr2ogr_request": ogr.safe_args} @dag( diff --git a/airflow/include/container.py b/airflow/include/container.py index 17801a926..4ba431225 100644 --- a/airflow/include/container.py +++ b/airflow/include/container.py @@ -56,6 +56,14 @@ class Container(containers.DeclarativeContainer): host=getenv("DEV_DB_HOST"), port=getenv("DEV_DB_PORT"), ) + psycopg2_dev_conn: connection = providers.Factory( + provides=connect, + dbname=getenv("DEV_DB_NAME"), + user=getenv("DEV_DB_USER"), + password=getenv("DEV_DB_PASSWORD"), + host=getenv("DEV_DB_HOST"), + port=getenv("DEV_DB_PORT"), + ) gdal_prod_conn = providers.Factory( PgConnectionString, From 022cf384f5892e9d746f55385b82fe2ad4d3633e Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Wed, 4 Sep 2024 19:26:50 +0200 Subject: [PATCH 56/99] temp --- airflow/dags/update_app.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index 2962e3684..7343818ab 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -25,11 +25,13 @@ def get_database_connection_string(environment: str) -> PgConnectionString: def get_spatial_index_request(table_name: str, column_name: str): - return f"CREATE INDEX IF NOT EXISTS ON {table_name} USING GIST ({column_name});" + idx_name = f"{table_name}_{column_name}_idx" + return f"CREATE INDEX IF NOT EXISTS {idx_name} ON {table_name} USING GIST ({column_name});" def get_btree_index_request(table_name: str, columns_name: list[str]): - return f"CREATE INDEX IF NOT EXISTS ON {table_name} USING btree ({', '.join(columns_name)});" + idx_name = f"{table_name}_{'_'.join(columns_name)}_idx" + return f"CREATE INDEX IF NOT EXISTS {idx_name} ON {table_name} USING btree ({', '.join(columns_name)});" def copy_table_from_dw_to_app( From 54440312b67472c9ba3c73dbdecf0b8a332494d5 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Wed, 4 Sep 2024 19:29:16 +0200 Subject: [PATCH 57/99] temp --- airflow/dags/update_app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index 7343818ab..c2ffe16f5 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -25,12 +25,12 @@ def get_database_connection_string(environment: str) -> PgConnectionString: def get_spatial_index_request(table_name: str, column_name: str): - idx_name = f"{table_name}_{column_name}_idx" + idx_name = f"{table_name.replace('.', '')}_{column_name}_idx" return f"CREATE INDEX IF NOT EXISTS {idx_name} ON {table_name} USING GIST ({column_name});" def get_btree_index_request(table_name: str, columns_name: list[str]): - idx_name = f"{table_name}_{'_'.join(columns_name)}_idx" + idx_name = f"{table_name.replace('.', '')}_{'_'.join(columns_name)}_idx" return f"CREATE INDEX IF NOT EXISTS {idx_name} ON {table_name} USING btree ({', '.join(columns_name)});" From 85600bb05352a0cc141706f313d5f7451b658bc2 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Wed, 4 Sep 2024 19:31:47 +0200 Subject: [PATCH 58/99] temp --- airflow/dags/update_app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index c2ffe16f5..a01fb5b36 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -65,8 +65,8 @@ def copy_table_from_dw_to_app( with connections[PSYCOPG].cursor() as cursor: for request in index_requests: - result = cursor.execute(request) - index_results.append(result.fetchall()) + cursor.execute(request) + index_results.append(cursor.fetchall()) return {"index_requests": index_requests, "index_results": index_results, "ogr2ogr_request": ogr.safe_args} From e634b067eee9e70bc1c0606d7e48bd0f66395abf Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Wed, 4 Sep 2024 19:34:03 +0200 Subject: [PATCH 59/99] temp --- airflow/dags/update_app.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index a01fb5b36..d7516097f 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -54,7 +54,6 @@ def copy_table_from_dw_to_app( ogr.execute() index_requests = [] - index_results = [] if spatial_index_column: index_requests.append(get_spatial_index_request(to_table, spatial_index_column)) @@ -66,9 +65,8 @@ def copy_table_from_dw_to_app( with connections[PSYCOPG].cursor() as cursor: for request in index_requests: cursor.execute(request) - index_results.append(cursor.fetchall()) - return {"index_requests": index_requests, "index_results": index_results, "ogr2ogr_request": ogr.safe_args} + return {"index_requests": index_requests, "ogr2ogr_request": ogr.safe_args} @dag( From 24a25a4675c7315d1c12f6a166bc2b2523324431 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Wed, 4 Sep 2024 19:42:40 +0200 Subject: [PATCH 60/99] temp --- airflow/dags/update_app.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index d7516097f..adb384e5b 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -62,9 +62,12 @@ def copy_table_from_dw_to_app( for columns in btree_index_columns: index_requests.append(get_btree_index_request(to_table, columns)) - with connections[PSYCOPG].cursor() as cursor: - for request in index_requests: - cursor.execute(request) + conn = connections[PSYCOPG] + cur = conn.cursor() + for request in index_requests: + cur.execute(request) + conn.commit() + conn.close() return {"index_requests": index_requests, "ogr2ogr_request": ogr.safe_args} From 680a9ee62d2797990b91aecb4ea204dc50b9b607 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Wed, 4 Sep 2024 21:49:41 +0200 Subject: [PATCH 61/99] feat(dbt): make index creation concurent --- airflow/dags/update_app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index adb384e5b..07f8b38b7 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -26,12 +26,12 @@ def get_database_connection_string(environment: str) -> PgConnectionString: def get_spatial_index_request(table_name: str, column_name: str): idx_name = f"{table_name.replace('.', '')}_{column_name}_idx" - return f"CREATE INDEX IF NOT EXISTS {idx_name} ON {table_name} USING GIST ({column_name});" + return f"CREATE INDEX CONCURRENTLY IF NOT EXISTS {idx_name} ON {table_name} USING GIST ({column_name});" def get_btree_index_request(table_name: str, columns_name: list[str]): idx_name = f"{table_name.replace('.', '')}_{'_'.join(columns_name)}_idx" - return f"CREATE INDEX IF NOT EXISTS {idx_name} ON {table_name} USING btree ({', '.join(columns_name)});" + return f"CREATE INDEX CONCURRENTLY IF NOT EXISTS {idx_name} ON {table_name} USING btree ({', '.join(columns_name)});" # noqa: E501 def copy_table_from_dw_to_app( From c8cecb78268f9d228a76c49bd6a845ddbe1c39e7 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Wed, 4 Sep 2024 23:22:15 +0200 Subject: [PATCH 62/99] feat(dbt): revert make index creation concurent --- airflow/dags/update_app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index 07f8b38b7..adb384e5b 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -26,12 +26,12 @@ def get_database_connection_string(environment: str) -> PgConnectionString: def get_spatial_index_request(table_name: str, column_name: str): idx_name = f"{table_name.replace('.', '')}_{column_name}_idx" - return f"CREATE INDEX CONCURRENTLY IF NOT EXISTS {idx_name} ON {table_name} USING GIST ({column_name});" + return f"CREATE INDEX IF NOT EXISTS {idx_name} ON {table_name} USING GIST ({column_name});" def get_btree_index_request(table_name: str, columns_name: list[str]): idx_name = f"{table_name.replace('.', '')}_{'_'.join(columns_name)}_idx" - return f"CREATE INDEX CONCURRENTLY IF NOT EXISTS {idx_name} ON {table_name} USING btree ({', '.join(columns_name)});" # noqa: E501 + return f"CREATE INDEX IF NOT EXISTS {idx_name} ON {table_name} USING btree ({', '.join(columns_name)});" def copy_table_from_dw_to_app( From 394a27b96cd61ee2c8bf39f516f3127f61bbc32b Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Wed, 4 Sep 2024 23:49:30 +0200 Subject: [PATCH 63/99] feat(update_app): change order of tasks --- airflow/dags/update_app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index adb384e5b..5e115bba2 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -219,8 +219,7 @@ def copy_public_data_zoneurba(**context): ) ( - copy_public_data_ocsge() - >> copy_public_data_artificialarea() + copy_public_data_artificialarea() >> copy_public_data_artifareazoneurba() >> copy_public_data_commune() >> copy_public_data_departement() @@ -228,6 +227,7 @@ def copy_public_data_zoneurba(**context): >> copy_public_data_ocsgediff() >> copy_public_data_communediff() >> copy_public_data_zoneconstruite() + >> copy_public_data_ocsge() >> copy_public_data_zoneurba() ) From 32db19103a89b5dc78b919c3e990a9df64634485 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Wed, 4 Sep 2024 23:49:59 +0200 Subject: [PATCH 64/99] feat(container): add staging and prod psycopg2 dep --- airflow/include/container.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/airflow/include/container.py b/airflow/include/container.py index 4ba431225..6306cc4d1 100644 --- a/airflow/include/container.py +++ b/airflow/include/container.py @@ -30,7 +30,7 @@ class Container(containers.DeclarativeContainer): "region_name": BaseHook.get_connection("scaleway_airflow_bucket").extra_dejson.get("region_name") }, ) - + # DBT connections gdal_dbt_conn = providers.Factory( PgConnectionString, dbname=getenv("DBT_DB_NAME"), @@ -48,6 +48,7 @@ class Container(containers.DeclarativeContainer): port=getenv("DBT_DB_PORT"), ) + # DEV connections gdal_dev_conn = providers.Factory( PgConnectionString, dbname=getenv("DEV_DB_NAME"), @@ -56,7 +57,7 @@ class Container(containers.DeclarativeContainer): host=getenv("DEV_DB_HOST"), port=getenv("DEV_DB_PORT"), ) - psycopg2_dev_conn: connection = providers.Factory( + psycopg2_dev_conn = providers.Factory( provides=connect, dbname=getenv("DEV_DB_NAME"), user=getenv("DEV_DB_USER"), @@ -65,6 +66,7 @@ class Container(containers.DeclarativeContainer): port=getenv("DEV_DB_PORT"), ) + # PROD connections gdal_prod_conn = providers.Factory( PgConnectionString, dbname=getenv("PROD_DB_NAME"), @@ -73,7 +75,16 @@ class Container(containers.DeclarativeContainer): host=getenv("PROD_DB_HOST"), port=getenv("PROD_DB_PORT"), ) + psycopg2_prod_conn = providers.Factory( + provides=connect, + dbname=getenv("PROD_DB_NAME"), + user=getenv("PROD_DB_USER"), + password=getenv("PROD_DB_PASSWORD"), + host=getenv("PROD_DB_HOST"), + port=getenv("PROD_DB_PORT"), + ) + # STAGING connections gdal_staging_conn = providers.Factory( PgConnectionString, dbname=getenv("STAGING_DB_NAME"), @@ -82,6 +93,14 @@ class Container(containers.DeclarativeContainer): host=getenv("STAGING_DB_HOST"), port=getenv("STAGING_DB_PORT"), ) + psycopg2_staging_conn = providers.Factory( + provides=connect, + dbname=getenv("STAGING_DB_NAME"), + user=getenv("STAGING_DB_USER"), + password=getenv("STAGING_DB_PASSWORD"), + host=getenv("STAGING_DB_HOST"), + port=getenv("STAGING_DB_PORT"), + ) cnopts = pysftp.CnOpts() cnopts.hostkeys = None From 9c56cd02a5c59f2e55ef52b6993327310bc242e4 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Wed, 4 Sep 2024 23:52:09 +0200 Subject: [PATCH 65/99] feat(update_app): add gdal and psycopg conn --- airflow/dags/update_app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index 5e115bba2..c18a3e811 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -18,8 +18,8 @@ def get_database_connection_string(environment: str) -> PgConnectionString: return { - STAGING: Container().gdal_staging_conn(), - PRODUCTION: Container().gdal_prod_conn(), + STAGING: {GDAL: Container().gdal_staging_conn(), PSYCOPG: Container().psycopg2_staging_conn()}, + PRODUCTION: {GDAL: Container().gdal_prod_conn(), PSYCOPG: Container().psycopg2_prod_conn()}, DEV: {GDAL: Container().gdal_dev_conn(), PSYCOPG: Container().psycopg2_dev_conn()}, }[environment] From e8367ac194c14b6ad99530f25fe33daf66431119 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 5 Sep 2024 00:04:03 +0200 Subject: [PATCH 66/99] feat(airflow): add github action to automate deploy --- .github/workflows/deploy_airflow.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 .github/workflows/deploy_airflow.yml diff --git a/.github/workflows/deploy_airflow.yml b/.github/workflows/deploy_airflow.yml new file mode 100644 index 000000000..c43459f3b --- /dev/null +++ b/.github/workflows/deploy_airflow.yml @@ -0,0 +1,19 @@ +name: Deploy to production Airflow +on: + push: + branches: + - "feat-airflow" + +jobs: + deploy: + name: Deploy + runs-on: ubuntu-latest + steps: + - name: executing remote ssh git pull + uses: appleboy/ssh-action@master + with: + host: ${{ secrets.AIRFLOW_SSH_HOST }} + username: ${{ secrets.AIRFLOW_SSH_USER }} + key: ${{ secrets.AIRFLOW_SSH_KEY }} + port: ${{ secrets.AIRFLOW_SSH_PORT }} + script: cd ~/sparte && git pull From 31c3b57550b130947b6a327c333d4668a2dc8e9c Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 5 Sep 2024 00:17:46 +0200 Subject: [PATCH 67/99] feat(update_app): remove gist index creation as it is automatic with ogr2ogr --- airflow/dags/update_app.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index c18a3e811..7f9dd6bff 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -24,11 +24,6 @@ def get_database_connection_string(environment: str) -> PgConnectionString: }[environment] -def get_spatial_index_request(table_name: str, column_name: str): - idx_name = f"{table_name.replace('.', '')}_{column_name}_idx" - return f"CREATE INDEX IF NOT EXISTS {idx_name} ON {table_name} USING GIST ({column_name});" - - def get_btree_index_request(table_name: str, columns_name: list[str]): idx_name = f"{table_name.replace('.', '')}_{'_'.join(columns_name)}_idx" return f"CREATE INDEX IF NOT EXISTS {idx_name} ON {table_name} USING btree ({', '.join(columns_name)});" @@ -38,7 +33,6 @@ def copy_table_from_dw_to_app( from_table: str, to_table: str, environment: str, - spatial_index_column: str = None, btree_index_columns: list[list[str]] = None, ): ogr = ogr2ogr() @@ -55,9 +49,6 @@ def copy_table_from_dw_to_app( index_requests = [] - if spatial_index_column: - index_requests.append(get_spatial_index_request(to_table, spatial_index_column)) - if btree_index_columns: for columns in btree_index_columns: index_requests.append(get_btree_index_request(to_table, columns)) @@ -98,7 +89,6 @@ def copy_public_data_ocsge(**context): from_table="public_ocsge.for_app_ocsge", to_table="public.public_data_ocsge", environment=context["params"]["environment"], - spatial_index_column="mpoly", ) @task.python @@ -107,7 +97,6 @@ def copy_public_data_artificialarea(**context): from_table="public_ocsge.for_app_artificialarea", to_table="public.public_data_artificialarea", environment=context["params"]["environment"], - spatial_index_column="mpoly", ) @task.python @@ -128,7 +117,6 @@ def copy_public_data_commune(**context): from_table="public_ocsge.for_app_commune", to_table="public.public_data_commune", environment=context["params"]["environment"], - spatial_index_column="mpoly", btree_index_columns=[ ["insee"], ], @@ -140,7 +128,6 @@ def copy_public_data_departement(**context): from_table="public_ocsge.for_app_departement", to_table="public.public_data_departement", environment=context["params"]["environment"], - spatial_index_column="mpoly", btree_index_columns=[ ["source_id"], ], @@ -165,7 +152,6 @@ def copy_public_data_ocsgediff(**context): from_table="public_ocsge.for_app_ocsgediff", to_table="public.public_data_ocsgediff", environment=context["params"]["environment"], - spatial_index_column="mpoly", btree_index_columns=[ ["year_old"], ["year_new"], @@ -196,7 +182,6 @@ def copy_public_data_zoneconstruite(**context): from_table="public_ocsge.for_app_zoneconstruite", to_table="public.public_data_zoneconstruite", environment=context["params"]["environment"], - spatial_index_column="mpoly", btree_index_columns=[ ["millesime"], ["year"], @@ -210,7 +195,6 @@ def copy_public_data_zoneurba(**context): from_table="public_gpu.for_app_zoneurba", to_table="public.public_data_zoneurba", environment=context["params"]["environment"], - spatial_index_column="mpoly", btree_index_columns=[ ["checksum"], ["libelle"], From a64fe20f5b43c09ed961939a8ac23779e21ac719 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 5 Sep 2024 00:50:32 +0200 Subject: [PATCH 68/99] feat(update_app): remove prod connection --- airflow/dags/update_app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index 7f9dd6bff..1c6118c76 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -19,7 +19,7 @@ def get_database_connection_string(environment: str) -> PgConnectionString: return { STAGING: {GDAL: Container().gdal_staging_conn(), PSYCOPG: Container().psycopg2_staging_conn()}, - PRODUCTION: {GDAL: Container().gdal_prod_conn(), PSYCOPG: Container().psycopg2_prod_conn()}, + # PRODUCTION: {GDAL: Container().gdal_prod_conn(), PSYCOPG: Container().psycopg2_prod_conn()}, DEV: {GDAL: Container().gdal_dev_conn(), PSYCOPG: Container().psycopg2_dev_conn()}, }[environment] From 5e6b8101f7814d4a3649cf4e575ad1dbb2c81221 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 5 Sep 2024 15:49:35 +0200 Subject: [PATCH 69/99] feat(admin_express): add support for drom com --- airflow/dags/ingest_admin_express.py | 86 ++++++++++++++++--- airflow/include/admin_express/sources.json | 49 +++++++++++ .../sparte/macros/admin_express/commune.sql | 21 +++++ .../macros/admin_express/departement.sql | 14 +++ .../sparte/models/admin_express/commune.sql | 38 ++++---- .../admin_express/commune_guadeloupe.sql | 1 + .../models/admin_express/commune_guyane.sql | 1 + .../admin_express/commune_martinique.sql | 1 + .../admin_express/commune_metropole.sql | 1 + .../models/admin_express/commune_reunion.sql | 1 + .../models/admin_express/departement.sql | 24 +++--- .../admin_express/departement_guadeloupe.sql | 1 + .../admin_express/departement_guyane.sql | 1 + .../admin_express/departement_martinique.sql | 1 + .../admin_express/departement_metropole.sql | 1 + .../admin_express/departement_reunion.sql | 1 + .../sparte/models/admin_express/schema.yml | 28 +++--- .../models/ocsge/for_app/for_app_commune.sql | 2 +- .../sql/sparte/models/ocsge/schema.yml | 5 ++ 19 files changed, 220 insertions(+), 57 deletions(-) create mode 100644 airflow/include/admin_express/sources.json create mode 100644 airflow/include/sql/sparte/macros/admin_express/commune.sql create mode 100644 airflow/include/sql/sparte/macros/admin_express/departement.sql create mode 100644 airflow/include/sql/sparte/models/admin_express/commune_guadeloupe.sql create mode 100644 airflow/include/sql/sparte/models/admin_express/commune_guyane.sql create mode 100644 airflow/include/sql/sparte/models/admin_express/commune_martinique.sql create mode 100644 airflow/include/sql/sparte/models/admin_express/commune_metropole.sql create mode 100644 airflow/include/sql/sparte/models/admin_express/commune_reunion.sql create mode 100644 airflow/include/sql/sparte/models/admin_express/departement_guadeloupe.sql create mode 100644 airflow/include/sql/sparte/models/admin_express/departement_guyane.sql create mode 100644 airflow/include/sql/sparte/models/admin_express/departement_martinique.sql create mode 100644 airflow/include/sql/sparte/models/admin_express/departement_metropole.sql create mode 100644 airflow/include/sql/sparte/models/admin_express/departement_reunion.sql diff --git a/airflow/dags/ingest_admin_express.py b/airflow/dags/ingest_admin_express.py index 81331fc60..db5412996 100644 --- a/airflow/dags/ingest_admin_express.py +++ b/airflow/dags/ingest_admin_express.py @@ -3,15 +3,25 @@ puis lance un job dbt pour les transformer. """ +import json import os import subprocess from urllib.request import URLopener import py7zr from airflow.decorators import dag, task +from airflow.models.param import Param from include.container import Container from pendulum import datetime +with open("include/admin_express/sources.json", "r") as f: + sources = json.load(f) + zones = [source["name"] for source in sources] + + +def get_source_by_name(name: str) -> dict: + return [source for source in sources if source["name"] == name][0] + @dag( start_date=datetime(2024, 1, 1), @@ -20,40 +30,90 @@ doc_md=__doc__, default_args={"owner": "Alexis Athlani", "retries": 3}, tags=["Admin Express"], + params={ + "zone": Param( + default=zones[0], + description="Zone à ingérer", + type="string", + enum=zones, + ), + "refresh_source": Param( + default=False, + description="Rafraîchir la source", + type="boolean", + ), + }, ) def ingest_admin_express(): - admin_express_archive_file = "admin_express.7z" bucket_name = "airflow-staging" - path_on_bucket = f"{bucket_name}/{admin_express_archive_file}" @task.python - def download_admin_express() -> str: - url = "https://data.geopf.fr/telechargement/download/ADMIN-EXPRESS-COG/ADMIN-EXPRESS-COG_3-2__SHP_LAMB93_FXX_2024-02-22/ADMIN-EXPRESS-COG_3-2__SHP_LAMB93_FXX_2024-02-22.7z" # noqa: E501 + def download_admin_express(**context) -> str: + url = get_source_by_name(context["params"]["zone"])["url"] + + filename = url.split("/")[-1] + path_on_bucket = f"{bucket_name}/{filename}" + print(path_on_bucket) + + file_exists = Container().s3().exists(path_on_bucket) + + if file_exists and not context["params"]["refresh_source"]: + return path_on_bucket opener = URLopener() opener.addheader("User-Agent", "Mozilla/5.0") - opener.retrieve(url=url, filename=admin_express_archive_file) + opener.retrieve(url=url, filename=filename) - with open(admin_express_archive_file, "rb") as local_file: - with Container().s3().open(path_on_bucket, "wb") as distant_file: - distant_file.write(local_file.read()) + Container().s3().put_file(filename, path_on_bucket) + + return path_on_bucket @task.python - def ingest() -> str: + def ingest(path_on_bucket, **context) -> str: + srid = get_source_by_name(context["params"]["zone"])["srid"] + shp_to_table_map = get_source_by_name(context["params"]["zone"])["shapefile_to_table"] + with Container().s3().open(path_on_bucket, "rb") as f: py7zr.SevenZipFile(f, mode="r").extractall() for dirpath, _, filenames in os.walk("."): for filename in filenames: if filename.endswith(".shp"): + table_name = shp_to_table_map.get(filename) + if not table_name: + continue path = os.path.abspath(os.path.join(dirpath, filename)) - cmd = f'ogr2ogr -f "PostgreSQL" "{Container().gdal_dbt_conn().encode()}" -overwrite -lco GEOMETRY_NAME=geom -a_srs EPSG:2154 -nlt MULTIPOLYGON -nlt PROMOTE_TO_MULTI {path} --config PG_USE_COPY YES' # noqa: E501 - subprocess.run(cmd, shell=True, check=True) + cmd = [ + "ogr2ogr", + "-f", + '"PostgreSQL"', + f'"{Container().gdal_dbt_conn().encode()}"', + "-overwrite", + "-lco", + "GEOMETRY_NAME=geom", + "-a_srs", + f"EPSG:{srid}", + "-nlt", + "MULTIPOLYGON", + "-nlt", + "PROMOTE_TO_MULTI", + "-nln", + table_name, + path, + "--config", + "PG_USE_COPY", + "YES", + ] + subprocess.run(" ".join(cmd), shell=True, check=True) @task.bash(retries=0, trigger_rule="all_success") def dbt_run(**context): - return 'cd "${AIRFLOW_HOME}/include/sql/sparte" && dbt run -s admin_express' + dbt_model = get_source_by_name(context["params"]["zone"])["dbt_model"] + dbt_run_cmd = f"dbt run -s {dbt_model}" + return 'cd "${AIRFLOW_HOME}/include/sql/sparte" && ' + dbt_run_cmd - download_admin_express() >> ingest() >> dbt_run() + path_on_bucket = download_admin_express() + ingest_result = ingest(path_on_bucket) + ingest_result >> dbt_run() # Instantiate the DAG diff --git a/airflow/include/admin_express/sources.json b/airflow/include/admin_express/sources.json new file mode 100644 index 000000000..5a8bc2cc9 --- /dev/null +++ b/airflow/include/admin_express/sources.json @@ -0,0 +1,49 @@ +[ + { + "name": "France Métropolitaine", + "url": "https://data.geopf.fr/telechargement/download/ADMIN-EXPRESS-COG/ADMIN-EXPRESS-COG_3-2__SHP_LAMB93_FXX_2024-02-22/ADMIN-EXPRESS-COG_3-2__SHP_LAMB93_FXX_2024-02-22.7z", + "srid": 2154, + "shapefile_to_table": { + "COMMUNE.shp": "commune_metropole", + "DEPARTEMENT.shp": "departement_metropole" + }, + "dbt_model": "commune_metropole.sql" + }, { + "name": "Guadeloupe", + "url": "https://data.geopf.fr/telechargement/download/ADMIN-EXPRESS-COG/ADMIN-EXPRESS-COG_3-2__SHP_RGAF09UTM20_GLP_2024-02-22/ADMIN-EXPRESS-COG_3-2__SHP_RGAF09UTM20_GLP_2024-02-22.7z", + "srid": 32620, + "shapefile_to_table": { + "COMMUNE.shp": "commune_guadeloupe", + "DEPARTEMENT.shp": "departement_guadeloupe" + }, + "dbt_model": "commune_guadeloupe.sql" + + }, { + "name": "Martinique", + "url": "https://data.geopf.fr/telechargement/download/ADMIN-EXPRESS-COG/ADMIN-EXPRESS-COG_3-2__SHP_RGAF09UTM20_MTQ_2024-02-22/ADMIN-EXPRESS-COG_3-2__SHP_RGAF09UTM20_MTQ_2024-02-22.7z", + "srid": 32620, + "shapefile_to_table": { + "COMMUNE.shp": "commune_martinique", + "DEPARTEMENT.shp": "departement_martinique" + }, + "dbt_model": "commune_martinique.sql" + }, { + "name": "Guyanne", + "url": "https://data.geopf.fr/telechargement/download/ADMIN-EXPRESS-COG/ADMIN-EXPRESS-COG_3-2__SHP_UTM22RGFG95_GUF_2024-02-22/ADMIN-EXPRESS-COG_3-2__SHP_UTM22RGFG95_GUF_2024-02-22.7z", + "srid": 2972, + "shapefile_to_table": { + "COMMUNE.shp": "commune_guyane", + "DEPARTEMENT.shp": "departement_guyane" + }, + "dbt_model": "commune_guyane.sql" + }, { + "name": "La Réunion", + "url": "https://data.geopf.fr/telechargement/download/ADMIN-EXPRESS-COG/ADMIN-EXPRESS-COG_3-2__SHP_RGR92UTM40S_REU_2024-02-22/ADMIN-EXPRESS-COG_3-2__SHP_RGR92UTM40S_REU_2024-02-22.7z", + "srid": 2975, + "shapefile_to_table": { + "COMMUNE.shp": "commune_reunion", + "DEPARTEMENT.shp": "departement_reunion" + }, + "dbt_model": "commune_reunion.sql" + } +] \ No newline at end of file diff --git a/airflow/include/sql/sparte/macros/admin_express/commune.sql b/airflow/include/sql/sparte/macros/admin_express/commune.sql new file mode 100644 index 000000000..96b08bb63 --- /dev/null +++ b/airflow/include/sql/sparte/macros/admin_express/commune.sql @@ -0,0 +1,21 @@ + +{% macro commune(source_table_name) %} + {{ config(materialized='table') }} + + SELECT + id, + nom as name, + nom_m as name_uppercase, + insee_com as code, + statut as type, + population as population, + insee_can as canton, + insee_arr as arrondissement, + insee_dep as departement, + insee_reg as region, + siren_epci as epci, + ST_Area(geom) as surface, + geom + FROM + {{ source('public', source_table_name) }} as commune +{% endmacro %} diff --git a/airflow/include/sql/sparte/macros/admin_express/departement.sql b/airflow/include/sql/sparte/macros/admin_express/departement.sql new file mode 100644 index 000000000..e300bcce1 --- /dev/null +++ b/airflow/include/sql/sparte/macros/admin_express/departement.sql @@ -0,0 +1,14 @@ +{% macro departement(source_table_name) %} + {{ config(materialized='table') }} + + SELECT + id, + nom as name, + nom_m as name_uppercase, + insee_dep as code, + insee_reg as region, + ST_Area(geom) as surface, + geom + FROM + {{ source('public', source_table_name) }} as departement +{% endmacro %} diff --git a/airflow/include/sql/sparte/models/admin_express/commune.sql b/airflow/include/sql/sparte/models/admin_express/commune.sql index c3d18e0ed..4b8d48ffc 100644 --- a/airflow/include/sql/sparte/models/admin_express/commune.sql +++ b/airflow/include/sql/sparte/models/admin_express/commune.sql @@ -1,28 +1,24 @@ - {{ config( materialized='table', indexes=[ - {'columns': ['departement'], 'type': 'btree'}, + {'columns': ['id'], 'type': 'btree'}, {'columns': ['code'], 'type': 'btree'}, - {'columns': ['geom'], 'type': 'gist'} - ]) + {'columns': ['name'], 'type': 'btree'}, + {'columns': ['departement'], 'type': 'btree'}, + {'columns': ['region'], 'type': 'btree'}, + {'columns': ['epci'], 'type': 'btree'}, + {'columns': ['geom'], 'type': 'gist'}, + ] + ) }} -SELECT - id, - nom as name, - nom_m as name_uppercase, - insee_com as code, - statut as type, - population as population, - insee_can as canton, - insee_arr as arrondissement, - insee_dep as departement, - insee_reg as region, - siren_epci as epci, - ST_Area(geom) as surface, - gen_random_uuid() as uuid, - geom -FROM - {{ source('public', 'commune') }} as commune +SELECT *, 32620 as srid_source FROM {{ ref('commune_guadeloupe') }} +UNION ALL +SELECT *, 32620 as srid_source FROM {{ ref('commune_martinique') }} +UNION ALL +SELECT *, 2972 as srid_source FROM {{ ref('commune_guyane') }} +UNION ALL +SELECT *, 2975 as srid_source FROM {{ ref('commune_reunion') }} +UNION ALL +SELECT *, 2154 as srid_source FROM {{ ref('commune_metropole') }} diff --git a/airflow/include/sql/sparte/models/admin_express/commune_guadeloupe.sql b/airflow/include/sql/sparte/models/admin_express/commune_guadeloupe.sql new file mode 100644 index 000000000..e3c5e85b2 --- /dev/null +++ b/airflow/include/sql/sparte/models/admin_express/commune_guadeloupe.sql @@ -0,0 +1 @@ +{{ commune('commune_guadeloupe') }} diff --git a/airflow/include/sql/sparte/models/admin_express/commune_guyane.sql b/airflow/include/sql/sparte/models/admin_express/commune_guyane.sql new file mode 100644 index 000000000..40201175b --- /dev/null +++ b/airflow/include/sql/sparte/models/admin_express/commune_guyane.sql @@ -0,0 +1 @@ +{{ commune('commune_guyane') }} diff --git a/airflow/include/sql/sparte/models/admin_express/commune_martinique.sql b/airflow/include/sql/sparte/models/admin_express/commune_martinique.sql new file mode 100644 index 000000000..235314e8a --- /dev/null +++ b/airflow/include/sql/sparte/models/admin_express/commune_martinique.sql @@ -0,0 +1 @@ +{{ commune('commune_martinique') }} diff --git a/airflow/include/sql/sparte/models/admin_express/commune_metropole.sql b/airflow/include/sql/sparte/models/admin_express/commune_metropole.sql new file mode 100644 index 000000000..3ab67ee42 --- /dev/null +++ b/airflow/include/sql/sparte/models/admin_express/commune_metropole.sql @@ -0,0 +1 @@ +{{ commune('commune_metropole') }} diff --git a/airflow/include/sql/sparte/models/admin_express/commune_reunion.sql b/airflow/include/sql/sparte/models/admin_express/commune_reunion.sql new file mode 100644 index 000000000..99ec51ae5 --- /dev/null +++ b/airflow/include/sql/sparte/models/admin_express/commune_reunion.sql @@ -0,0 +1 @@ +{{ commune('commune_reunion') }} diff --git a/airflow/include/sql/sparte/models/admin_express/departement.sql b/airflow/include/sql/sparte/models/admin_express/departement.sql index 559ca66bb..05034677b 100644 --- a/airflow/include/sql/sparte/models/admin_express/departement.sql +++ b/airflow/include/sql/sparte/models/admin_express/departement.sql @@ -1,21 +1,21 @@ - {{ config( materialized='table', indexes=[ + {'columns': ['id'], 'type': 'btree'}, {'columns': ['code'], 'type': 'btree'}, + {'columns': ['name'], 'type': 'btree'}, + {'columns': ['region'], 'type': 'btree'}, {'columns': ['geom'], 'type': 'gist'} ]) }} -SELECT - id, - nom as name, - nom_m as name_uppercase, - insee_dep as code, - insee_reg as region, - ST_Area(geom) as surface, - gen_random_uuid() as uuid, - geom -FROM - {{ source('public', 'departement') }} as departement +SELECT * FROM {{ ref('departement_guadeloupe') }} +UNION ALL +SELECT * FROM {{ ref('departement_martinique') }} +UNION ALL +SELECT * FROM {{ ref('departement_guyane') }} +UNION ALL +SELECT * FROM {{ ref('departement_reunion') }} +UNION ALL +SELECT * FROM {{ ref('departement_metropole') }} diff --git a/airflow/include/sql/sparte/models/admin_express/departement_guadeloupe.sql b/airflow/include/sql/sparte/models/admin_express/departement_guadeloupe.sql new file mode 100644 index 000000000..8ba7108ad --- /dev/null +++ b/airflow/include/sql/sparte/models/admin_express/departement_guadeloupe.sql @@ -0,0 +1 @@ +{{ departement('departement_guadeloupe') }} diff --git a/airflow/include/sql/sparte/models/admin_express/departement_guyane.sql b/airflow/include/sql/sparte/models/admin_express/departement_guyane.sql new file mode 100644 index 000000000..199e20f10 --- /dev/null +++ b/airflow/include/sql/sparte/models/admin_express/departement_guyane.sql @@ -0,0 +1 @@ +{{ departement('departement_guyane') }} diff --git a/airflow/include/sql/sparte/models/admin_express/departement_martinique.sql b/airflow/include/sql/sparte/models/admin_express/departement_martinique.sql new file mode 100644 index 000000000..1607428f9 --- /dev/null +++ b/airflow/include/sql/sparte/models/admin_express/departement_martinique.sql @@ -0,0 +1 @@ +{{ departement('departement_martinique') }} diff --git a/airflow/include/sql/sparte/models/admin_express/departement_metropole.sql b/airflow/include/sql/sparte/models/admin_express/departement_metropole.sql new file mode 100644 index 000000000..eb7b73548 --- /dev/null +++ b/airflow/include/sql/sparte/models/admin_express/departement_metropole.sql @@ -0,0 +1 @@ +{{ departement('departement_metropole') }} diff --git a/airflow/include/sql/sparte/models/admin_express/departement_reunion.sql b/airflow/include/sql/sparte/models/admin_express/departement_reunion.sql new file mode 100644 index 000000000..4376d1c3d --- /dev/null +++ b/airflow/include/sql/sparte/models/admin_express/departement_reunion.sql @@ -0,0 +1 @@ +{{ departement('departement_reunion') }} diff --git a/airflow/include/sql/sparte/models/admin_express/schema.yml b/airflow/include/sql/sparte/models/admin_express/schema.yml index 19f6eadf1..e9b165b75 100644 --- a/airflow/include/sql/sparte/models/admin_express/schema.yml +++ b/airflow/include/sql/sparte/models/admin_express/schema.yml @@ -3,20 +3,28 @@ version: 2 models: - name: commune + - name: commune_guadeloupe + - name: commune_guyane + - name: commune_martinique + - name: commune_reunion - name: departement + - name: departement_guadeloupe + - name: departement_guyane + - name: departement_martinique + - name: departement_reunion sources: - name: public tables: - - name: arrondissement - - name: arrondissement_municipal - - name: canton - - name: chflieu_arrondissement_municipal - - name: chflieu_commune - - name: chflieu_commune_associee_ou_deleguee - - name: collectivite_territoriale - - name: commune - - name: commune_associee_ou_deleguee - - name: departement + - name: commune_metropole + - name: commune_guadeloupe + - name: commune_guyane + - name: commune_martinique + - name: commune_reunion + - name: departement_metropole + - name: departement_guadeloupe + - name: departement_guyane + - name: departement_martinique + - name: departement_reunion - name: epci - name: region diff --git a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql index 4cfcdfafa..e5ada4dbd 100644 --- a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql +++ b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql @@ -57,7 +57,7 @@ SELECT ) as surface_artif, admin_express_commune.surface / 10000 as area, ST_Transform(admin_express_commune.geom, 4326) as mpoly, - 2154 as srid_source + commune.srid_source as srid_source FROM {{ ref('app_commune') }} as commune LEFT JOIN diff --git a/airflow/include/sql/sparte/models/ocsge/schema.yml b/airflow/include/sql/sparte/models/ocsge/schema.yml index 55acde6a5..01cfa22e7 100644 --- a/airflow/include/sql/sparte/models/ocsge/schema.yml +++ b/airflow/include/sql/sparte/models/ocsge/schema.yml @@ -48,6 +48,11 @@ not_null_cs_us_config_staging: ¬_null_cs_us_config_staging warn_if: ">0" models: + - name: for_app_commune + columns: + - name: area + data_tests: + - not_null - name: occupation_du_sol_commune - name: artificial_commune - name: zone_construite From 10fbe3d59055c253dab83f22d22d61ddabda93ae Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 5 Sep 2024 16:04:27 +0200 Subject: [PATCH 70/99] feat(admin_express): add + to model creation in airflow --- airflow/dags/ingest_admin_express.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/airflow/dags/ingest_admin_express.py b/airflow/dags/ingest_admin_express.py index db5412996..50fc6d90b 100644 --- a/airflow/dags/ingest_admin_express.py +++ b/airflow/dags/ingest_admin_express.py @@ -28,6 +28,7 @@ def get_source_by_name(name: str) -> dict: schedule="@once", catchup=False, doc_md=__doc__, + max_active_runs=1, default_args={"owner": "Alexis Athlani", "retries": 3}, tags=["Admin Express"], params={ @@ -108,7 +109,7 @@ def ingest(path_on_bucket, **context) -> str: @task.bash(retries=0, trigger_rule="all_success") def dbt_run(**context): dbt_model = get_source_by_name(context["params"]["zone"])["dbt_model"] - dbt_run_cmd = f"dbt run -s {dbt_model}" + dbt_run_cmd = f"dbt run -s {dbt_model}+" return 'cd "${AIRFLOW_HOME}/include/sql/sparte" && ' + dbt_run_cmd path_on_bucket = download_admin_express() From 01754fe2794dc0f0ceb1ad6e8ca9a120c314a235 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 5 Sep 2024 16:29:57 +0200 Subject: [PATCH 71/99] feat(dbt): add srid_source --- airflow/include/sql/sparte/models/gpu/zonage_urbanisme.sql | 3 ++- airflow/include/sql/sparte/models/ocsge/difference.sql | 3 ++- .../sql/sparte/models/ocsge/for_app/for_app_artificialarea.sql | 2 +- .../sql/sparte/models/ocsge/for_app/for_app_commune.sql | 2 +- .../sql/sparte/models/ocsge/for_app/for_app_departement.sql | 2 +- .../include/sql/sparte/models/ocsge/for_app/for_app_ocsge.sql | 2 +- .../sql/sparte/models/ocsge/for_app/for_app_ocsgediff.sql | 2 +- .../sql/sparte/models/ocsge/for_app/for_app_zoneconstruite.sql | 2 +- .../sql/sparte/models/ocsge/intersected/artificial_commune.sql | 1 + .../sql/sparte/models/ocsge/intersected/difference_commune.sql | 3 +++ .../models/ocsge/intersected/occupation_du_sol_commune.sql | 3 +++ .../ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql | 3 +++ airflow/include/sql/sparte/models/ocsge/occupation_du_sol.sql | 3 ++- airflow/include/sql/sparte/models/ocsge/zone_construite.sql | 3 ++- 14 files changed, 24 insertions(+), 10 deletions(-) diff --git a/airflow/include/sql/sparte/models/gpu/zonage_urbanisme.sql b/airflow/include/sql/sparte/models/gpu/zonage_urbanisme.sql index 926a12c81..0a76da85a 100644 --- a/airflow/include/sql/sparte/models/gpu/zonage_urbanisme.sql +++ b/airflow/include/sql/sparte/models/gpu/zonage_urbanisme.sql @@ -34,7 +34,8 @@ SELECT *, ST_Area(geom) as surface FROM ( ST_transform(geom, 2154) ), 3) - )) as geom + )) as geom, + 2154 as srid_source FROM {{ source('public', 'gpu_zone_urba') }} ) as foo diff --git a/airflow/include/sql/sparte/models/ocsge/difference.sql b/airflow/include/sql/sparte/models/ocsge/difference.sql index a3e6b4a8d..7031906e4 100644 --- a/airflow/include/sql/sparte/models/ocsge/difference.sql +++ b/airflow/include/sql/sparte/models/ocsge/difference.sql @@ -54,7 +54,8 @@ SELECT new_is_artif = false THEN true ELSE false END AS new_not_artificial, - geom + geom, + 2154 as srid_source FROM ( SELECT ocsge.loaded_date, diff --git a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_artificialarea.sql b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_artificialarea.sql index 9f2c81aaf..dc1fd5340 100644 --- a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_artificialarea.sql +++ b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_artificialarea.sql @@ -9,7 +9,7 @@ SELECT commune_year_id, year, surface / 10000 as surface, - 2154 as srid_source, + srid_source, departement, commune_code as city, ST_Transform(geom, 4326) as mpoly diff --git a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql index e5ada4dbd..37f785e4b 100644 --- a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql +++ b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql @@ -57,7 +57,7 @@ SELECT ) as surface_artif, admin_express_commune.surface / 10000 as area, ST_Transform(admin_express_commune.geom, 4326) as mpoly, - commune.srid_source as srid_source + admin_express_commune.srid_source as srid_source FROM {{ ref('app_commune') }} as commune LEFT JOIN diff --git a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_departement.sql b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_departement.sql index fc3d7573e..90b243e57 100644 --- a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_departement.sql +++ b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_departement.sql @@ -22,7 +22,7 @@ SELECT array_length(millesimes.ocsge_millesimes, 1) > 1 AS is_artif_ready, millesimes.ocsge_millesimes, ST_Transform(admin_express_departement.geom, 4326) as mpoly, - 2154 as srid_source + admin_express_departement.srid_source FROM {{ ref('app_departement') }} as app_departement LEFT JOIN diff --git a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_ocsge.sql b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_ocsge.sql index c284cf363..e6c435dd9 100644 --- a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_ocsge.sql +++ b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_ocsge.sql @@ -14,7 +14,7 @@ SELECT id as id_source, is_artificial, surface, - 2154 as srid_source, + srid_source, departement, is_impermeable FROM diff --git a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_ocsgediff.sql b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_ocsgediff.sql index 4005d56dc..e7602d6dd 100644 --- a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_ocsgediff.sql +++ b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_ocsgediff.sql @@ -15,7 +15,7 @@ SELECT us_old, ST_Transform(geom, 4326) as mpoly, surface, - 2154 as srid_source, + srid_source, departement, new_is_artificial as is_new_artif, new_not_artificial as is_new_natural, diff --git a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_zoneconstruite.sql b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_zoneconstruite.sql index b9353eb96..f634cd409 100644 --- a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_zoneconstruite.sql +++ b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_zoneconstruite.sql @@ -12,7 +12,7 @@ SELECT ST_Transform(geom, 4326) as mpoly, year, surface, - 2154 as srid_source, + srid_source, departement FROM {{ ref("zone_construite") }} diff --git a/airflow/include/sql/sparte/models/ocsge/intersected/artificial_commune.sql b/airflow/include/sql/sparte/models/ocsge/intersected/artificial_commune.sql index 3b128581e..099cde4b8 100644 --- a/airflow/include/sql/sparte/models/ocsge/intersected/artificial_commune.sql +++ b/airflow/include/sql/sparte/models/ocsge/intersected/artificial_commune.sql @@ -18,6 +18,7 @@ with artificial_commune_without_surface as ( ocsge.commune_code, ocsge.ocsge_loaded_date, + ocsge.srid_source, ocsge.departement, ocsge.year, diff --git a/airflow/include/sql/sparte/models/ocsge/intersected/difference_commune.sql b/airflow/include/sql/sparte/models/ocsge/intersected/difference_commune.sql index 27bc2da7a..145f6fe2e 100644 --- a/airflow/include/sql/sparte/models/ocsge/intersected/difference_commune.sql +++ b/airflow/include/sql/sparte/models/ocsge/intersected/difference_commune.sql @@ -35,6 +35,7 @@ with difference_commune_without_surface as ( ocsge.us_old, ocsge.cs_new, ocsge.us_new, + ocsge.srid_source, ST_Intersection(commune.geom, ocsge.geom) AS geom FROM {{ ref("commune") }} AS commune @@ -42,6 +43,8 @@ with difference_commune_without_surface as ( {{ ref("difference") }} AS ocsge ON ocsge.departement = commune.departement + AND + ocsge.srid_source = commune.srid_source AND ST_Intersects(commune.geom, ocsge.geom) diff --git a/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql b/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql index 16146f5a1..ccbb30d63 100644 --- a/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql +++ b/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_commune.sql @@ -40,6 +40,7 @@ with max_ocsge_loaded_date as ( ocsge.code_us, ocsge.is_artificial, ocsge.is_impermeable, + ocsge.srid_source, ST_Intersection(commune.geom, ocsge.geom) AS geom FROM {{ ref("commune") }} AS commune @@ -47,6 +48,8 @@ with max_ocsge_loaded_date as ( {{ ref("occupation_du_sol") }} AS ocsge ON ocsge.departement = commune.departement + AND + ocsge.srid_source = commune.srid_source AND ST_Intersects(commune.geom, ocsge.geom) diff --git a/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql b/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql index 94bdd41dd..034a21db4 100644 --- a/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql +++ b/airflow/include/sql/sparte/models/ocsge/intersected/occupation_du_sol_zonage_urbanisme.sql @@ -48,6 +48,7 @@ with max_ocsge_loaded_date as ( ocsge.uuid, ocsge.is_artificial, ocsge.is_impermeable, + ocsge.srid_source, ST_Intersection(zonage.geom, ocsge.geom) AS geom FROM {{ ref("zonage_urbanisme") }} AS zonage @@ -55,6 +56,8 @@ with max_ocsge_loaded_date as ( {{ ref("occupation_du_sol") }} AS ocsge ON ST_Intersects(zonage.geom, ocsge.geom) + AND + zonage.srid_source = ocsge.srid_source {% if is_incremental() %} where ocsge.loaded_date > (select ocsge_loaded_date from max_ocsge_loaded_date) diff --git a/airflow/include/sql/sparte/models/ocsge/occupation_du_sol.sql b/airflow/include/sql/sparte/models/ocsge/occupation_du_sol.sql index 6cfed0407..2cb822ee5 100644 --- a/airflow/include/sql/sparte/models/ocsge/occupation_du_sol.sql +++ b/airflow/include/sql/sparte/models/ocsge/occupation_du_sol.sql @@ -26,6 +26,7 @@ SELECT {{ is_impermeable('code_cs') }} as is_impermeable, {{ is_artificial('code_cs', 'code_us') }} as is_artificial, uuid::uuid, - ST_MakeValid(geom) AS geom + ST_MakeValid(geom) AS geom, + 2154 as srid_source FROM {{ source('public', 'ocsge_occupation_du_sol') }} AS ocsge diff --git a/airflow/include/sql/sparte/models/ocsge/zone_construite.sql b/airflow/include/sql/sparte/models/ocsge/zone_construite.sql index f3048163a..48cd56151 100644 --- a/airflow/include/sql/sparte/models/ocsge/zone_construite.sql +++ b/airflow/include/sql/sparte/models/ocsge/zone_construite.sql @@ -18,6 +18,7 @@ SELECT departement, ST_MakeValid(geom) AS geom, ST_Area(geom) as surface, - uuid::uuid + uuid::uuid, + 2154 as srid_source FROM {{ source('public', 'ocsge_zone_construite') }} as ocsge From 3a48ce8cc45184b693dc8e8ea85577fea1debaaf Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 5 Sep 2024 16:59:06 +0200 Subject: [PATCH 72/99] feat(dbt): update selector for admin express --- airflow/dags/ingest_admin_express.py | 4 ++-- airflow/include/admin_express/sources.json | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/airflow/dags/ingest_admin_express.py b/airflow/dags/ingest_admin_express.py index 50fc6d90b..567bdf9ea 100644 --- a/airflow/dags/ingest_admin_express.py +++ b/airflow/dags/ingest_admin_express.py @@ -108,8 +108,8 @@ def ingest(path_on_bucket, **context) -> str: @task.bash(retries=0, trigger_rule="all_success") def dbt_run(**context): - dbt_model = get_source_by_name(context["params"]["zone"])["dbt_model"] - dbt_run_cmd = f"dbt run -s {dbt_model}+" + dbt_selector = get_source_by_name(context["params"]["zone"])["dbt_selector"] + dbt_run_cmd = f"dbt build -s {dbt_selector}" return 'cd "${AIRFLOW_HOME}/include/sql/sparte" && ' + dbt_run_cmd path_on_bucket = download_admin_express() diff --git a/airflow/include/admin_express/sources.json b/airflow/include/admin_express/sources.json index 5a8bc2cc9..7589f918f 100644 --- a/airflow/include/admin_express/sources.json +++ b/airflow/include/admin_express/sources.json @@ -7,7 +7,7 @@ "COMMUNE.shp": "commune_metropole", "DEPARTEMENT.shp": "departement_metropole" }, - "dbt_model": "commune_metropole.sql" + "dbt_selector": "commune_metropole.sql+ departement_metropole.sql+" }, { "name": "Guadeloupe", "url": "https://data.geopf.fr/telechargement/download/ADMIN-EXPRESS-COG/ADMIN-EXPRESS-COG_3-2__SHP_RGAF09UTM20_GLP_2024-02-22/ADMIN-EXPRESS-COG_3-2__SHP_RGAF09UTM20_GLP_2024-02-22.7z", @@ -16,7 +16,7 @@ "COMMUNE.shp": "commune_guadeloupe", "DEPARTEMENT.shp": "departement_guadeloupe" }, - "dbt_model": "commune_guadeloupe.sql" + "dbt_selector": "commune_guadeloupe.sql+ departement_guadeloupe.sql+" }, { "name": "Martinique", @@ -26,7 +26,7 @@ "COMMUNE.shp": "commune_martinique", "DEPARTEMENT.shp": "departement_martinique" }, - "dbt_model": "commune_martinique.sql" + "dbt_selector": "commune_martinique.sql+ departement_martinique.sql+" }, { "name": "Guyanne", "url": "https://data.geopf.fr/telechargement/download/ADMIN-EXPRESS-COG/ADMIN-EXPRESS-COG_3-2__SHP_UTM22RGFG95_GUF_2024-02-22/ADMIN-EXPRESS-COG_3-2__SHP_UTM22RGFG95_GUF_2024-02-22.7z", @@ -35,7 +35,7 @@ "COMMUNE.shp": "commune_guyane", "DEPARTEMENT.shp": "departement_guyane" }, - "dbt_model": "commune_guyane.sql" + "dbt_selector": "commune_guyane.sql+ departement_guyane.sql+" }, { "name": "La Réunion", "url": "https://data.geopf.fr/telechargement/download/ADMIN-EXPRESS-COG/ADMIN-EXPRESS-COG_3-2__SHP_RGR92UTM40S_REU_2024-02-22/ADMIN-EXPRESS-COG_3-2__SHP_RGR92UTM40S_REU_2024-02-22.7z", @@ -44,6 +44,6 @@ "COMMUNE.shp": "commune_reunion", "DEPARTEMENT.shp": "departement_reunion" }, - "dbt_model": "commune_reunion.sql" + "dbt_selector": "commune_reunion.sql+ departement_reunion.sql+" } ] \ No newline at end of file From b6c89db167e52b5b59e38d9a308d29925e1536c8 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 5 Sep 2024 17:15:22 +0200 Subject: [PATCH 73/99] feat(departement): add srid_source --- .../sql/sparte/models/admin_express/departement.sql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/airflow/include/sql/sparte/models/admin_express/departement.sql b/airflow/include/sql/sparte/models/admin_express/departement.sql index 05034677b..ecd576a3f 100644 --- a/airflow/include/sql/sparte/models/admin_express/departement.sql +++ b/airflow/include/sql/sparte/models/admin_express/departement.sql @@ -10,12 +10,12 @@ ]) }} -SELECT * FROM {{ ref('departement_guadeloupe') }} +SELECT *, 32620 as srid_source FROM {{ ref('departement_guadeloupe') }} UNION ALL -SELECT * FROM {{ ref('departement_martinique') }} +SELECT *, 32620 as srid_source FROM {{ ref('departement_martinique') }} UNION ALL -SELECT * FROM {{ ref('departement_guyane') }} +SELECT *, 2972 as srid_source FROM {{ ref('departement_guyane') }} UNION ALL -SELECT * FROM {{ ref('departement_reunion') }} +SELECT *, 2975 as srid_source FROM {{ ref('departement_reunion') }} UNION ALL -SELECT * FROM {{ ref('departement_metropole') }} +SELECT *, 2154 as srid_source FROM {{ ref('departement_metropole') }} From e4e8e1d41892904827887b5fdf659c584a187694 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 5 Sep 2024 17:45:25 +0200 Subject: [PATCH 74/99] feat(aritificial_commune): add srid_source to table --- .../sql/sparte/models/ocsge/intersected/artificial_commune.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/airflow/include/sql/sparte/models/ocsge/intersected/artificial_commune.sql b/airflow/include/sql/sparte/models/ocsge/intersected/artificial_commune.sql index 099cde4b8..15f07f8bc 100644 --- a/airflow/include/sql/sparte/models/ocsge/intersected/artificial_commune.sql +++ b/airflow/include/sql/sparte/models/ocsge/intersected/artificial_commune.sql @@ -37,7 +37,8 @@ with artificial_commune_without_surface as ( ocsge.commune_code, ocsge.departement, ocsge.year, - ocsge.ocsge_loaded_date + ocsge.ocsge_loaded_date, + ocsge.srid_source ) SELECT *, From f34f965979855dfb45965fe3fab65d804405c585 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 5 Sep 2024 19:42:05 +0200 Subject: [PATCH 75/99] feat(updateapp): add index on ocsge table --- airflow/dags/update_app.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index 1c6118c76..7cb30951c 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -89,6 +89,14 @@ def copy_public_data_ocsge(**context): from_table="public_ocsge.for_app_ocsge", to_table="public.public_data_ocsge", environment=context["params"]["environment"], + btree_index_columns=[ + ["departement"], + ["year"], + ["departement", "year"], + ["code_cs"], + ["code_us"], + ["code_cs", "code_us"], + ], ) @task.python From 7340ac123cd5d066cb1601f320bfa0d8335ce592 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 5 Sep 2024 22:00:48 +0200 Subject: [PATCH 76/99] feat(ocsge): add url for 04 --- airflow/include/ocsge/sources.json | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/airflow/include/ocsge/sources.json b/airflow/include/ocsge/sources.json index 6c0c31f3d..bd14a2cb6 100644 --- a/airflow/include/ocsge/sources.json +++ b/airflow/include/ocsge/sources.json @@ -17,6 +17,15 @@ "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D002_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D002_2021-01-01.7z" } }, + "04": { + "occupation_du_sol_et_zone_construite": { + "2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D004_2021-01-01/OCS-GE_2-0__SHP_LAMB93_D004_2021-01-01.7z", + "2018": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0__SHP_LAMB93_D004_2018-01-01/OCS-GE_2-0__SHP_LAMB93_D004_2018-01-01.7z" + }, + "difference": { + "2018_2021": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D004_2018-2021/OCS-GE_2-0_DIFF_SHP_LAMB93_D004_2018-2021.7z" + } + }, "06": { "difference": { "2017_2020": "https://data.geopf.fr/telechargement/download/OCSGE/OCS-GE_2-0_DIFF_SHP_LAMB93_D006_2017-2020/OCS-GE_2-0_DIFF_SHP_LAMB93_D006_2017-2020.7z" From 3e86f50bc4575d44eaefa81db919759c957ff279 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 5 Sep 2024 22:03:30 +0200 Subject: [PATCH 77/99] feat(update_app): set max_action_runs to 1 --- airflow/dags/update_app.py | 1 + 1 file changed, 1 insertion(+) diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index 7cb30951c..655a32fce 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -68,6 +68,7 @@ def copy_table_from_dw_to_app( schedule="@once", catchup=False, doc_md=__doc__, + max_active_runs=1, default_args={"owner": "Alexis Athlani", "retries": 3}, tags=["App"], params={ From 2c74efbf33d0a7d23f895f4856fdcf3a4bfbce20 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Thu, 5 Sep 2024 22:25:50 +0200 Subject: [PATCH 78/99] feat(dbt): remove test for for_app_commune --- airflow/include/sql/sparte/models/ocsge/schema.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/airflow/include/sql/sparte/models/ocsge/schema.yml b/airflow/include/sql/sparte/models/ocsge/schema.yml index 01cfa22e7..2e7e7e193 100644 --- a/airflow/include/sql/sparte/models/ocsge/schema.yml +++ b/airflow/include/sql/sparte/models/ocsge/schema.yml @@ -49,10 +49,6 @@ not_null_cs_us_config_staging: ¬_null_cs_us_config_staging models: - name: for_app_commune - columns: - - name: area - data_tests: - - not_null - name: occupation_du_sol_commune - name: artificial_commune - name: zone_construite From bf368669dfa21357e75918bdfa78e1941e518e04 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Fri, 6 Sep 2024 09:07:59 +0200 Subject: [PATCH 79/99] docs(version): bump to 8 --- config/settings.py | 2 +- package.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config/settings.py b/config/settings.py index 20a077355..33545fdee 100644 --- a/config/settings.py +++ b/config/settings.py @@ -22,7 +22,7 @@ from sentry_sdk.integrations.django import DjangoIntegration from sentry_sdk.integrations.redis import RedisIntegration -OFFICIAL_VERSION = "7.4" +OFFICIAL_VERSION = "8.0" root = environ.Path(__file__) - 2 # get root of the project diff --git a/package.json b/package.json index 5c91bb1ff..6771ef4cf 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "sparte", - "version": "7.4", + "version": "8.0", "private": true, "scripts": { "dev": "webpack --watch --mode development", From 22b0d9be4393dfea260ba5c0da1fdc0887e69eca Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Fri, 6 Sep 2024 12:52:39 +0200 Subject: [PATCH 80/99] feat(update_app): fix index name --- airflow/dags/update_app.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index 655a32fce..4daa9d269 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -94,9 +94,9 @@ def copy_public_data_ocsge(**context): ["departement"], ["year"], ["departement", "year"], - ["code_cs"], - ["code_us"], - ["code_cs", "code_us"], + ["couverture"], + ["usage"], + ["couverture", "usage"], ], ) From 9104bfa20169bd942217613d6b9332cd99c2b2ae Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Fri, 6 Sep 2024 13:02:07 +0200 Subject: [PATCH 81/99] feat(dbt): add pre-commit lint & fix --- .pre-commit-config.yaml | 12 +++++++----- .sqlfluff | 20 ++++++++++++++++++++ 2 files changed, 27 insertions(+), 5 deletions(-) create mode 100644 .sqlfluff diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1c4b660d7..cabf18386 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,11 +27,6 @@ repos: hooks: - id: black language_version: python3 -# - repo: https://github.com/pycqa/bandit -# rev: 1.7.0 -# hooks: -# - id: bandit -# args: ['-iii', '-ll'] - repo: https://github.com/PyCQA/autoflake rev: v2.2.1 hooks: @@ -41,6 +36,13 @@ repos: hooks: - id: flake8 args: ['--config', 'flake8'] +- repo: https://github.com/sqlfluff/sqlfluff + rev: 3.1.1 + hooks: + - id: sqlfluff-lint + additional_dependencies: ['postgres', 'sqlfluff-templater-dbt'] + - id: sqlfluff-fix + additional_dependencies: ['postgres', 'sqlfluff-templater-dbt'] - repo: https://github.com/gitguardian/ggshield rev: v1.24.0 hooks: diff --git a/.sqlfluff b/.sqlfluff new file mode 100644 index 000000000..b9ea37ef2 --- /dev/null +++ b/.sqlfluff @@ -0,0 +1,20 @@ +[sqlfluff] +templater = dbt +dialect = postgres + +[sqlfluff:templater:jinja] +apply_dbt_builtins = True + + +[sqlfluff:layout:type:alias_expression] +# We want non-default spacing _before_ the alias expressions. +spacing_before = align +# We want to align them within the next outer select clause. +# This means for example that alias expressions within the FROM +# or JOIN clause would _not_ be aligned with them. +align_within = select_clause +# The point at which to stop searching outward for siblings, which +# in this example would likely be the boundary of a CTE. Stopping +# when we hit brackets is usually a good rule of thumb for this +# configuration. +align_scope = bracketed From 6929fa36d186d41a19591103fc19fe3f1f31b134 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Fri, 6 Sep 2024 13:21:28 +0200 Subject: [PATCH 82/99] feat(dbt): add sqlfluff linter --- .pre-commit-config.yaml | 10 ++- .sqlfluff | 5 ++ .../sql/sparte/models/ocsge/difference.sql | 71 ++++++++----------- .../sparte/models/ocsge/occupation_du_sol.sql | 17 +++-- .../sparte/models/ocsge/zone_construite.sql | 10 +-- 5 files changed, 56 insertions(+), 57 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cabf18386..ecb5138ca 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -40,9 +40,15 @@ repos: rev: 3.1.1 hooks: - id: sqlfluff-lint - additional_dependencies: ['postgres', 'sqlfluff-templater-dbt'] + additional_dependencies: [ + 'dbt-postgres==1.8.2', + 'sqlfluff-templater-dbt' + ] - id: sqlfluff-fix - additional_dependencies: ['postgres', 'sqlfluff-templater-dbt'] + additional_dependencies: [ + 'dbt-postgres==1.8.2', + 'sqlfluff-templater-dbt' + ] - repo: https://github.com/gitguardian/ggshield rev: v1.24.0 hooks: diff --git a/.sqlfluff b/.sqlfluff index b9ea37ef2..39ccaaf0c 100644 --- a/.sqlfluff +++ b/.sqlfluff @@ -6,6 +6,11 @@ dialect = postgres apply_dbt_builtins = True +[sqlfluff:templater:dbt] +project_dir = airflow/include/sql/sparte +profiles_dir = ~/.dbt + + [sqlfluff:layout:type:alias_expression] # We want non-default spacing _before_ the alias expressions. spacing_before = align diff --git a/airflow/include/sql/sparte/models/ocsge/difference.sql b/airflow/include/sql/sparte/models/ocsge/difference.sql index 7031906e4..e27c3f1f1 100644 --- a/airflow/include/sql/sparte/models/ocsge/difference.sql +++ b/airflow/include/sql/sparte/models/ocsge/difference.sql @@ -17,45 +17,34 @@ }} SELECT - to_timestamp(foo.loaded_date) as loaded_date, foo.year_old, foo.year_new, - cs_new, - cs_old, - us_new, - us_old, + foo.cs_new, + foo.cs_old, + foo.us_new, + foo.us_old, foo.departement, - ST_Area(geom) AS surface, - uuid, - CASE - WHEN - old_is_imper = false AND - new_is_imper = true - THEN true - ELSE false - END AS new_is_impermeable, - CASE - WHEN - old_is_imper = true AND - new_is_imper = false - THEN true - ELSE false - END AS new_not_impermeable, - CASE - WHEN - old_is_artif = false AND - new_is_artif = true - THEN true - ELSE false - END AS new_is_artificial, - CASE - WHEN - old_is_artif = true AND - new_is_artif = false THEN true - ELSE false - END AS new_not_artificial, - geom, - 2154 as srid_source + foo.uuid, + foo.geom, + 2154 AS srid_source, + to_timestamp(foo.loaded_date) AS loaded_date, + st_area(foo.geom) AS surface, + coalesce( + foo.old_is_imper = false + AND foo.new_is_imper = true, false + ) AS new_is_impermeable, + coalesce( + foo.old_is_imper = true + AND foo.new_is_imper = false, false + ) AS new_not_impermeable, + coalesce( + foo.old_is_artif = false + AND foo.new_is_artif = true, false + ) AS new_is_artificial, + coalesce( + foo.old_is_artif = true + AND foo.new_is_artif = false, false + ) AS new_not_artificial FROM ( SELECT ocsge.loaded_date, @@ -66,7 +55,7 @@ FROM ( ocsge.us_new, ocsge.us_old, ocsge.departement, - ST_MakeValid(ocsge.geom) AS geom, + st_makevalid(ocsge.geom) AS geom, {{ is_artificial('cs_old', 'us_old') }} AS old_is_artif, {{ is_impermeable('cs_old') }} AS old_is_imper, {{ is_artificial('cs_new', 'us_new') }} AS new_is_artif, @@ -75,8 +64,8 @@ FROM ( FROM {{ source('public', 'ocsge_difference') }} AS ocsge WHERE - cs_new IS NOT NULL AND - cs_old IS NOT NULL AND - us_new IS NOT NULL AND - us_old IS NOT NULL + ocsge.cs_new IS NOT null + AND ocsge.cs_old IS NOT null + AND ocsge.us_new IS NOT null + AND ocsge.us_old IS NOT null ) AS foo diff --git a/airflow/include/sql/sparte/models/ocsge/occupation_du_sol.sql b/airflow/include/sql/sparte/models/ocsge/occupation_du_sol.sql index 2cb822ee5..3b201334f 100644 --- a/airflow/include/sql/sparte/models/ocsge/occupation_du_sol.sql +++ b/airflow/include/sql/sparte/models/ocsge/occupation_du_sol.sql @@ -1,5 +1,3 @@ - - {{ config( materialized='table', @@ -15,18 +13,19 @@ ) }} + SELECT - to_timestamp(loaded_date) as loaded_date, + to_timestamp(loaded_date) AS loaded_date, id, code_cs, code_us, departement, year, - ST_area(geom) AS surface, - {{ is_impermeable('code_cs') }} as is_impermeable, - {{ is_artificial('code_cs', 'code_us') }} as is_artificial, + st_area(geom) AS surface, + {{ is_impermeable('code_cs') }} AS is_impermeable, + {{ is_artificial('code_cs', 'code_us') }} AS is_artificial, uuid::uuid, - ST_MakeValid(geom) AS geom, - 2154 as srid_source + st_makevalid(geom) AS geom, + 2154 AS srid_source FROM - {{ source('public', 'ocsge_occupation_du_sol') }} AS ocsge + {{ source('public', 'ocsge_occupation_du_sol') }} diff --git a/airflow/include/sql/sparte/models/ocsge/zone_construite.sql b/airflow/include/sql/sparte/models/ocsge/zone_construite.sql index 48cd56151..1d72aeeec 100644 --- a/airflow/include/sql/sparte/models/ocsge/zone_construite.sql +++ b/airflow/include/sql/sparte/models/ocsge/zone_construite.sql @@ -12,13 +12,13 @@ }} SELECT - to_timestamp(loaded_date) as loaded_date, id, year, departement, - ST_MakeValid(geom) AS geom, - ST_Area(geom) as surface, uuid::uuid, - 2154 as srid_source + 2154 AS srid_source, + to_timestamp(loaded_date) AS loaded_date, + st_makevalid(geom) AS geom, + st_area(geom) AS surface FROM - {{ source('public', 'ocsge_zone_construite') }} as ocsge + {{ source('public', 'ocsge_zone_construite') }} From af4d9354fe381585af4c0bbc8368a982a1f99235 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Fri, 6 Sep 2024 14:51:41 +0200 Subject: [PATCH 83/99] feat(precommit): skip sqlflull in ci --- .github/workflows/pr.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index c9f440ac8..f1fdf2633 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -36,7 +36,7 @@ jobs: - uses: actions/checkout@v4 - uses: pre-commit/action@v3.0.1 env: - SKIP: ggshield + SKIP: ggshield,sqlfluff-lint,sqlfluff-fix test: runs-on: ubuntu-latest steps: From 6c839f74342ef1018be45f04caa9bf5f37efac27 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Fri, 6 Sep 2024 15:41:33 +0200 Subject: [PATCH 84/99] feat(dbt): add pools to prevent airflow from triggering dbt twice at the same time --- airflow/dags/ingest_admin_express.py | 3 ++- airflow/dags/ocsge.py | 7 ++++--- airflow/include/pools.py | 2 ++ 3 files changed, 8 insertions(+), 4 deletions(-) create mode 100644 airflow/include/pools.py diff --git a/airflow/dags/ingest_admin_express.py b/airflow/dags/ingest_admin_express.py index 567bdf9ea..72bb9ac15 100644 --- a/airflow/dags/ingest_admin_express.py +++ b/airflow/dags/ingest_admin_express.py @@ -12,6 +12,7 @@ from airflow.decorators import dag, task from airflow.models.param import Param from include.container import Container +from include.pools import DBT_POOL from pendulum import datetime with open("include/admin_express/sources.json", "r") as f: @@ -106,7 +107,7 @@ def ingest(path_on_bucket, **context) -> str: ] subprocess.run(" ".join(cmd), shell=True, check=True) - @task.bash(retries=0, trigger_rule="all_success") + @task.bash(retries=0, trigger_rule="all_success", pool=DBT_POOL) def dbt_run(**context): dbt_selector = get_source_by_name(context["params"]["zone"])["dbt_selector"] dbt_run_cmd = f"dbt build -s {dbt_selector}" diff --git a/airflow/dags/ocsge.py b/airflow/dags/ocsge.py index 87c16440d..c81f2d10f 100644 --- a/airflow/dags/ocsge.py +++ b/airflow/dags/ocsge.py @@ -27,6 +27,7 @@ ocsge_occupation_du_sol_normalization_sql, ocsge_zone_construite_normalization_sql, ) +from include.pools import DBT_POOL, OCSGE_STAGING_POOL from include.shapefile import get_shapefile_fields from include.utils import multiline_string_to_single_line @@ -247,7 +248,7 @@ def download_ocsge(url, **context) -> str: return path_on_bucket - @task.python + @task.python(pool=OCSGE_STAGING_POOL) def ingest_staging(path, **context) -> int: loaded_date = int(pendulum.now().timestamp()) departement = context["params"]["departement"] @@ -264,7 +265,7 @@ def ingest_staging(path, **context) -> int: return loaded_date - @task.bash + @task.bash(pool=OCSGE_STAGING_POOL) def db_test_ocsge_staging(**context): dataset = context["params"]["dataset"] dbt_select = " ".join([vars["dbt_selector_staging"] for vars in vars_dataset[dataset]]) @@ -286,7 +287,7 @@ def ingest_ocsge(path, **context) -> int: return loaded_date - @task.bash(retries=0, trigger_rule="all_success") + @task.bash(retries=0, trigger_rule="all_success", pool=DBT_POOL) def dbt_run_ocsge(**context): dataset = context["params"]["dataset"] dbt_select = " ".join([f'{vars["dbt_selector"]}+' for vars in vars_dataset[dataset]]) diff --git a/airflow/include/pools.py b/airflow/include/pools.py new file mode 100644 index 000000000..de86868be --- /dev/null +++ b/airflow/include/pools.py @@ -0,0 +1,2 @@ +DBT_POOL = "dbt_pool" +OCSGE_STAGING_POOL = "ocsge_staging_pool" From f9f585695f4069c4a28d4cada2a0f369c7648ef0 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Fri, 6 Sep 2024 23:57:44 +0200 Subject: [PATCH 85/99] feat(dbt): add dbt pool --- airflow/dags/ingest_app_dependencies.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/airflow/dags/ingest_app_dependencies.py b/airflow/dags/ingest_app_dependencies.py index d0bced4d9..720a9853f 100644 --- a/airflow/dags/ingest_app_dependencies.py +++ b/airflow/dags/ingest_app_dependencies.py @@ -6,6 +6,7 @@ from airflow.decorators import dag, task from gdaltools import ogr2ogr from include.container import Container +from include.pools import DBT_POOL from pendulum import datetime @@ -72,7 +73,7 @@ def ingest_app_scot_departements(): def ingest_app_scot_regions(): return ingest_table("public_data_scot_regions", "app_scot_regions") - @task.bash(retries=0, trigger_rule="all_success") + @task.bash(retries=0, trigger_rule="all_success", pool=DBT_POOL) def dbt_run(**context): return 'cd "${AIRFLOW_HOME}/include/sql/sparte" && dbt run -s app' From a705112298bfcdd03126c7f54a12991c9926d868 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 10 Sep 2024 12:52:42 +0200 Subject: [PATCH 86/99] fix(for_app_commune): set surface to 0 if missing --- .../models/ocsge/for_app/for_app_commune.sql | 102 +++++++++--------- 1 file changed, 54 insertions(+), 48 deletions(-) diff --git a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql index 37f785e4b..db9cd6ef0 100644 --- a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql +++ b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql @@ -6,69 +6,75 @@ }} with artif_commune_partitionned as ( - SELECT - row_number() OVER (PARTITION BY commune_code ORDER BY year DESC) as rn, - * - FROM + select + *, + row_number() over (partition by commune_code order by year desc) as rn + from {{ ref('artificial_commune') }} -), latest_year_artif_commune as ( - SELECT - * - FROM +), + +latest_year_artif_commune as ( + select * + from artif_commune_partitionned - WHERE + where rn = 1 -), first_and_last_millesimes as ( - SELECT +), + +first_and_last_millesimes as ( + select commune_code, - MIN(year) as first_millesime, - MAX(year) as last_millesime - FROM + min(year) as first_millesime, + max(year) as last_millesime + from {{ ref('occupation_du_sol_commune') }} - GROUP BY + group by commune_code ) -SELECT + +select commune.id, commune.insee, - commune.name, + admin_express_commune.name, commune.departement_id, commune.epci_id, commune.scot_id, commune.map_color, - CASE - WHEN - artif_commune.surface IS NOT NULL - THEN true - ELSE commune.ocsge_available - END AS ocsge_available, - millesimes.first_millesime as first_millesime, - millesimes.last_millesime as last_millesime, - COALESCE( - CASE - WHEN - artif_commune.surface IS NOT NULL - THEN artif_commune.surface / 10000 - ELSE - NULL - END, - commune.surface_artif - ) as surface_artif, - admin_express_commune.surface / 10000 as area, - ST_Transform(admin_express_commune.geom, 4326) as mpoly, - admin_express_commune.srid_source as srid_source -FROM + millesimes.first_millesime, + millesimes.last_millesime, + admin_express_commune.srid_source, + coalesce(artif_commune.surface is not NULL, FALSE) as ocsge_available, + case + when + artif_commune.surface is not NULL + then artif_commune.surface / 10000 + end as surface_artif, + case + when + admin_express_commune.surface is not NULL + then admin_express_commune.surface / 10000 + else + 0 + end as area, + case + when + admin_express_commune.geom is not NULL + then st_transform(admin_express_commune.geom, 4326) + else + st_setsrid('MULTIPOLYGON EMPTY'::geometry, 4326) + end as mpoly +from {{ ref('app_commune') }} as commune -LEFT JOIN +left join latest_year_artif_commune as artif_commune -ON - commune.insee = artif_commune.commune_code -LEFT JOIN + on + commune.insee = artif_commune.commune_code +left join first_and_last_millesimes as millesimes -ON - commune.insee = millesimes.commune_code -LEFT JOIN + on + commune.insee = millesimes.commune_code +left join {{ ref('commune') }} as admin_express_commune -ON - commune.insee = admin_express_commune.code + on + commune.insee = admin_express_commune.code From 60577d7721490a741129eaa1e21c100dbd28268a Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 10 Sep 2024 13:00:23 +0200 Subject: [PATCH 87/99] feat(update_app): allow runing only certain tasks --- airflow/dags/update_app.py | 45 +++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/airflow/dags/update_app.py b/airflow/dags/update_app.py index 4daa9d269..b2a308443 100644 --- a/airflow/dags/update_app.py +++ b/airflow/dags/update_app.py @@ -80,7 +80,22 @@ def copy_table_from_dw_to_app( PRODUCTION, DEV, ], - ) + ), + "tasks": Param( + default=[ + "copy_public_data_ocsge", + "copy_public_data_artificialarea", + "copy_public_data_artifareazoneurba", + "copy_public_data_commune", + "copy_public_data_departement", + "copy_public_data_communesol", + "copy_public_data_ocsgediff", + "copy_public_data_communediff", + "copy_public_data_zoneconstruite", + "copy_public_data_zoneurba", + ], + type="array", + ), }, ) def update_app(): # noqa: C901 @@ -211,18 +226,22 @@ def copy_public_data_zoneurba(**context): ], ) - ( - copy_public_data_artificialarea() - >> copy_public_data_artifareazoneurba() - >> copy_public_data_commune() - >> copy_public_data_departement() - >> copy_public_data_communesol() - >> copy_public_data_ocsgediff() - >> copy_public_data_communediff() - >> copy_public_data_zoneconstruite() - >> copy_public_data_ocsge() - >> copy_public_data_zoneurba() - ) + @task.branch + def copy_public_data_branch(**context): + return context["params"]["tasks"] + + copy_public_data_branch() >> [ + copy_public_data_ocsge(), + copy_public_data_artificialarea(), + copy_public_data_artifareazoneurba(), + copy_public_data_commune(), + copy_public_data_departement(), + copy_public_data_communesol(), + copy_public_data_ocsgediff(), + copy_public_data_communediff(), + copy_public_data_zoneconstruite(), + copy_public_data_zoneurba(), + ] update_app() From aaba21b5c91fd005ac50d2a31c84a6a01167222c Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 10 Sep 2024 15:15:56 +0200 Subject: [PATCH 88/99] temp --- .../sql/sparte/models/ocsge/for_app/for_app_commune.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql index db9cd6ef0..481d2298d 100644 --- a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql +++ b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql @@ -36,14 +36,14 @@ first_and_last_millesimes as ( select commune.id, commune.insee, - admin_express_commune.name, commune.departement_id, commune.epci_id, commune.scot_id, commune.map_color, millesimes.first_millesime, millesimes.last_millesime, - admin_express_commune.srid_source, + coalesce(admin_express_commune.name, commune.name) as name, -- noqa: L029 + coalesce(admin_express_commune.srid_source, 2154) as srid_source, coalesce(artif_commune.surface is not NULL, FALSE) as ocsge_available, case when From e80fa4522b9f3ce8437b2cb708230a4a18c82ef0 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 10 Sep 2024 15:27:43 +0200 Subject: [PATCH 89/99] temp --- .../sql/sparte/models/ocsge/for_app/for_app_commune.sql | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql index 481d2298d..bb8d36307 100644 --- a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql +++ b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql @@ -44,7 +44,11 @@ select millesimes.last_millesime, coalesce(admin_express_commune.name, commune.name) as name, -- noqa: L029 coalesce(admin_express_commune.srid_source, 2154) as srid_source, - coalesce(artif_commune.surface is not NULL, FALSE) as ocsge_available, + case + when artif_commune.surface is not NULL then TRUE + when commune.name is NULL then TRUE + else FALSE + end as ocsge_available, case when artif_commune.surface is not NULL From 7f1ee70bbb96c04cbce5ba07185bc3a8e316018a Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 10 Sep 2024 15:41:03 +0200 Subject: [PATCH 90/99] temp --- .../models/ocsge/for_app/for_app_commune.sql | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql index bb8d36307..1ff66fb6f 100644 --- a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql +++ b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql @@ -40,8 +40,18 @@ select commune.epci_id, commune.scot_id, commune.map_color, - millesimes.first_millesime, - millesimes.last_millesime, + case + when + millesimes.first_millesime is not NULL + then millesimes.first_millesime + when commune.name is NULL then 2016 + end as first_millesime, + case + when + millesimes.last_millesime is not NULL + then millesimes.last_millesime + when commune.name is NULL then 2019 + end as last_millesime, coalesce(admin_express_commune.name, commune.name) as name, -- noqa: L029 coalesce(admin_express_commune.srid_source, 2154) as srid_source, case From 885b28ceb20b72a7732004f59af56397c8adf20d Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 10 Sep 2024 15:45:21 +0200 Subject: [PATCH 91/99] temp --- .../include/sql/sparte/models/ocsge/for_app/for_app_commune.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql index 1ff66fb6f..86b7477f8 100644 --- a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql +++ b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql @@ -50,7 +50,7 @@ select when millesimes.last_millesime is not NULL then millesimes.last_millesime - when commune.name is NULL then 2019 + when commune.name is NULL then 2021 end as last_millesime, coalesce(admin_express_commune.name, commune.name) as name, -- noqa: L029 coalesce(admin_express_commune.srid_source, 2154) as srid_source, From 63e33bff1d1e89b3ad7ddffce297056033d48352 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 10 Sep 2024 15:51:19 +0200 Subject: [PATCH 92/99] temp --- .../sql/sparte/models/ocsge/for_app/for_app_commune.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql index 86b7477f8..09c069083 100644 --- a/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql +++ b/airflow/include/sql/sparte/models/ocsge/for_app/for_app_commune.sql @@ -44,19 +44,19 @@ select when millesimes.first_millesime is not NULL then millesimes.first_millesime - when commune.name is NULL then 2016 + when admin_express_commune.name is NULL then 2016 end as first_millesime, case when millesimes.last_millesime is not NULL then millesimes.last_millesime - when commune.name is NULL then 2021 + when admin_express_commune.name is NULL then 2021 end as last_millesime, coalesce(admin_express_commune.name, commune.name) as name, -- noqa: L029 coalesce(admin_express_commune.srid_source, 2154) as srid_source, case when artif_commune.surface is not NULL then TRUE - when commune.name is NULL then TRUE + when admin_express_commune.name is NULL then TRUE else FALSE end as ocsge_available, case From 4da7114f4b900fa22ece6653d49c02ee52297149 Mon Sep 17 00:00:00 2001 From: "Alexis A." Date: Tue, 10 Sep 2024 18:38:46 +0200 Subject: [PATCH 93/99] feat(cog2023): hide from search results --- .../templates/project/partials/search.html | 30 ++++++++++++++++--- public_data/serializers.py | 1 + 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/project/templates/project/partials/search.html b/project/templates/project/partials/search.html index 8555698f6..4a87dc116 100644 --- a/project/templates/project/partials/search.html +++ b/project/templates/project/partials/search.html @@ -86,6 +86,19 @@ .advanced-search:hover { opacity: 0.7; } + +.disabled { + color: #B0B0B0 !important; + pointer-events: none; +} + +.disabled .text-muted { + color: #B0B0B0 !important; +} + +.disabled-land-message { + margin-left: auto; +}