diff --git a/CHANGELOG.md b/CHANGELOG.md index b583c4e3..a99a434e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ **Under development** +- update: latest input data - feat: add population analysis output - fix: avoid regenerating OSM when population changes - feat: add municipality information to households and activities diff --git a/data/bpe/raw.py b/data/bpe/raw.py index 98135631..8836f3e2 100644 --- a/data/bpe/raw.py +++ b/data/bpe/raw.py @@ -8,8 +8,8 @@ def configure(context): context.config("data_path") - context.config("bpe_path", "bpe_2021/bpe21_ensemble_xy_csv.zip") - context.config("bpe_csv", "bpe21_ensemble_xy.csv") + context.config("bpe_path", "bpe_2023/BPE23.zip") + context.config("bpe_csv", "BPE23.csv") context.stage("data.spatial.codes") def execute(context): diff --git a/data/census/cleaned.py b/data/census/cleaned.py index 789d0adb..a4539894 100644 --- a/data/census/cleaned.py +++ b/data/census/cleaned.py @@ -62,7 +62,8 @@ def execute(context): df.loc[df["TRANS"] == "2", "commute_mode"] = "walk" df.loc[df["TRANS"] == "3", "commute_mode"] = "bike" df.loc[df["TRANS"] == "4", "commute_mode"] = "car" - df.loc[df["TRANS"] == "5", "commute_mode"] = "pt" + df.loc[df["TRANS"] == "5", "commute_mode"] = "car" + df.loc[df["TRANS"] == "6", "commute_mode"] = "pt" df.loc[df["TRANS"] == "Z", "commute_mode"] = np.nan df["commute_mode"] = df["commute_mode"].astype("category") diff --git a/data/census/raw.py b/data/census/raw.py index 73eebd4a..f286dc2e 100644 --- a/data/census/raw.py +++ b/data/census/raw.py @@ -10,8 +10,8 @@ def configure(context): context.stage("data.spatial.codes") context.config("data_path") - context.config("census_path", "rp_2019/RP2019_INDCVI_csv.zip") - context.config("census_csv", "FD_INDCVI_2019.csv") + context.config("census_path", "rp_2021/RP2021_indcvi.zip") + context.config("census_csv", "FD_INDCVI_2021.csv") context.config("projection_year", None) @@ -65,6 +65,6 @@ def execute(context): def validate(context): if not os.path.exists("{}/{}".format(context.config("data_path"), context.config("census_path"))): - raise RuntimeError("RP 2019 data is not available") + raise RuntimeError("RP 2021 data is not available") return os.path.getsize("{}/{}".format(context.config("data_path"), context.config("census_path"))) diff --git a/data/od/raw.py b/data/od/raw.py index 41bc515b..a1cc0f97 100644 --- a/data/od/raw.py +++ b/data/od/raw.py @@ -9,10 +9,10 @@ def configure(context): context.stage("data.spatial.codes") context.config("data_path") - context.config("od_pro_path", "rp_2019/RP2019_MOBPRO_csv.zip") - context.config("od_sco_path", "rp_2019/RP2019_MOBSCO_csv.zip") - context.config("od_pro_csv", "FD_MOBPRO_2019.csv") - context.config("od_sco_csv", "FD_MOBSCO_2019.csv") + context.config("od_pro_path", "rp_2021/RP2021_mobpro.zip") + context.config("od_sco_path", "rp_2021/RP2021_mobsco.zip") + context.config("od_pro_csv", "FD_MOBPRO_2021.csv") + context.config("od_sco_csv", "FD_MOBSCO_2021.csv") def execute(context): df_codes = context.stage("data.spatial.codes") diff --git a/data/spatial/codes.py b/data/spatial/codes.py index 38200a14..a02a1590 100644 --- a/data/spatial/codes.py +++ b/data/spatial/codes.py @@ -13,8 +13,8 @@ def configure(context): context.config("regions", [11]) context.config("departments", []) - context.config("codes_path", "codes_2021/reference_IRIS_geo2021.zip") - context.config("codes_xlsx", "reference_IRIS_geo2021.xlsx") + context.config("codes_path", "codes_2023/reference_IRIS_geo2023.zip") + context.config("codes_xlsx", "reference_IRIS_geo2023.xlsx") def execute(context): # Load IRIS registry diff --git a/data/spatial/iris.py b/data/spatial/iris.py index 8f10457a..ddc738e6 100644 --- a/data/spatial/iris.py +++ b/data/spatial/iris.py @@ -10,7 +10,7 @@ def configure(context): context.config("data_path") - context.config("iris_path", "iris_2021") + context.config("iris_path", "iris_2023") context.stage("data.spatial.codes") def execute(context): diff --git a/data/spatial/population.py b/data/spatial/population.py index 04ab94bb..d2a91906 100644 --- a/data/spatial/population.py +++ b/data/spatial/population.py @@ -9,9 +9,9 @@ def configure(context): context.config("data_path") context.stage("data.spatial.codes") - context.config("population_path", "rp_2019/base-ic-evol-struct-pop-2019.zip") - context.config("population_xlsx", "base-ic-evol-struct-pop-2019.xlsx") - context.config("population_year", 19) + context.config("population_path", "rp_2021/base-ic-evol-struct-pop-2021_xlsx.zip") + context.config("population_xlsx", "base-ic-evol-struct-pop-2021.xlsx") + context.config("population_year", 21) def execute(context): year = str(context.config("population_year")) diff --git a/docs/population.md b/docs/population.md index 4bf64326..6742958b 100644 --- a/docs/population.md +++ b/docs/population.md @@ -21,31 +21,31 @@ to start with an empty folder, e.g. `/data`. All data sets need to be named in a specific way and put into specific sub-directories. The following paragraphs describe this process. -### 1) Census data (RP 2019) +### 1) Census data (RP 2021) Census data containing the socio-demographic information of people living in France is available from INSEE: -- [Census data](https://www.insee.fr/fr/statistiques/6544333) +- [Census data](https://www.insee.fr/fr/statistiques/8268848) - Download the data set in **csv** format by clicking the link under *Individus localisés au canton-ou-ville*. -- Copy the *zip* file into the folder `data/rp_2019` +- Copy the *zip* file into the folder `data/rp_2021` -### 2) Population totals (RP 2019) +### 2) Population totals (RP 2021) We also make use of more aggregated population totals available from INSEE: -- [Population data](https://www.insee.fr/fr/statistiques/6543200) +- [Population data](https://www.insee.fr/fr/statistiques/8268806) - Download the data for *France hors Mayotte* in **xlsx** format. -- Copy the *zip* file into the folder `data/rp_2019`. +- Copy the *zip* file into the folder `data/rp_2021`. -### 3) Origin-destination data (RP-MOBPRO / RP-MOBSCO 2019) +### 3) Origin-destination data (RP-MOBPRO / RP-MOBSCO 2021) Origin-destination data is available from INSEE (at two locations): -- [Work origin-destination data](https://www.insee.fr/fr/statistiques/6456056) -- [Education origin-destination data](https://www.insee.fr/fr/statistiques/6456052) +- [Work origin-destination data](https://www.insee.fr/fr/statistiques/8205896) +- [Education origin-destination data](https://www.insee.fr/fr/statistiques/8205892) - Download the data from the links, both in **csv** format. -- Copy both *zip* files into the folder `data/rp_2019`. +- Copy both *zip* files into the folder `data/rp_2021`. ### 4) Income tax data (Filosofi 2019) @@ -57,14 +57,14 @@ The tax data set is available from INSEE: - Download the administrative level data (second link): *Base niveau administratif en 2019* in **xlsx** format - Copy the second *zip* file into `data/filosofi_2019` -### 5) Service and facility census (BPE 2021) +### 5) Service and facility census (BPE 2023) The census of services and facilities in France is available from INSEE: -- [Service and facility census](https://www.insee.fr/fr/statistiques/3568638) +- [Service and facility census](https://www.insee.fr/fr/statistiques/8217525) - Download the uppermost data set in **csv** format. It contains all available services while the lower data sets only contain observations for specific sectors. -- Copy the *zip* file into the folder `data/bpe_2021`. +- Copy the *zip* file into the folder `data/bpe_2023`. ### 6a) National household travel survey (ENTD 2008) @@ -91,23 +91,23 @@ guarantee that you have exactly the correct format), you should make sure that the following files are accessible in the folder `data/egt_2010`: `Menages_semaine.csv`, `Personnes_semaine.csv`, `Deplacements_semaine.csv`. -### 7) IRIS zoning system (2021) +### 7) IRIS zoning system (2023) The IRIS zoning system is available from IGN: - [IRIS data](https://geoservices.ign.fr/contoursiris) -- Download the **2021** edition. -- Copy the *7z* file into the folder `data/iris_2021` +- Download the **2023** edition. +- Copy the *7z* file into the folder `data/iris_2023` -### 8) Zoning registry (2021) +### 8) Zoning registry (2023) We make use of a zoning registry by INSEE that establishes a connection between the identifiers of IRIS, municipalities, departments and regions: - [Zoning data](https://www.insee.fr/fr/information/7708995) -- Download the **2021** edition as a *zip* file. -- Copy the *zip* file into `data/codes_2021`. +- Download the **2023** edition as a *zip* file. +- Copy the *zip* file into `data/codes_2023`. ### 9) Enterprise census (SIRENE) @@ -134,7 +134,7 @@ The geolocated enterprise census is available on data.gouv.fr: The French Buildings database is available from IGN: - [Buildings database](https://geoservices.ign.fr/bdtopo) -- In the sidebar on the right, under *Téléchargement anciennes éditions*, click on *BD TOPO® 2022 GeoPackage Départements* to go to the saved data publications from 2022. +- In the sidebar on the right, under *Téléchargement anciennes éditions*, click on *BD TOPO® 2024 GeoPackage Départements* to go to the saved data publications from 2024. - The data is split by department and they are identified with a number. For the Île-de-France region, download: - Paris (75) - Seine-et-Marne (77) @@ -159,32 +159,32 @@ The French adresses database is available on data.gouv.fr : Your folder structure should now have at least the following files: -- `data/rp_2019/RP2019_INDCVI_csv.zip` -- `data/rp_2019/RP2019_MOBPRO_csv.zip` -- `data/rp_2019/RP2019_MOBSCO_csv.zip` -- `data/rp_2019/base-ic-evol-struct-pop-2019.zip` +- `data/rp_2021/RP2021_indcvi.zip` +- `data/rp_2021/RP2021_mobpro.zip` +- `data/rp_2021/RP2021_mobsco.zip` +- `data/rp_2021/base-ic-evol-struct-pop-2021_xlsx.zip` - `data/filosofi_2019/indic-struct-distrib-revenu-2019-COMMUNES.zip` - `data/filosofi_2019/indic-struct-distrib-revenu-2019-SUPRA.zip` -- `data/bpe_2021/bpe21_ensemble_xy_csv.zip` +- `data/bpe_2023/BPE23.zip` - `data/entd_2008/Q_individu.csv` - `data/entd_2008/Q_tcm_individu.csv` - `data/entd_2008/Q_menage.csv` - `data/entd_2008/Q_tcm_menage_0.csv` - `data/entd_2008/K_deploc.csv` - `data/entd_2008/Q_ind_lieu_teg.csv` -- `data/iris_2021/CONTOURS-IRIS_2-1__SHP__FRA_2021-01-01.7z` -- `data/codes_2021/reference_IRIS_geo2021.zip` +- `data/iris_2023/CONTOURS-IRIS_3-0__SHP__FRA_2023-01-01.7z` +- `data/codes_2023/reference_IRIS_geo2023.zip` - `data/sirene/StockEtablissement_utf8.csv` - `data/sirene/StockUniteLegale_utf8.zip` - `data/sirene/GeolocalisationEtablissement_Sirene_pour_etudes_statistiques_utf8.zip` -- `data/bdtopo_idf/BDTOPO_3-0_TOUSTHEMES_GPKG_LAMB93_D075_2022-03-15.7z` -- `data/bdtopo_idf/BDTOPO_3-0_TOUSTHEMES_GPKG_LAMB93_D077_2022-03-15.7z` -- `data/bdtopo_idf/BDTOPO_3-0_TOUSTHEMES_GPKG_LAMB93_D078_2022-03-15.7z` -- `data/bdtopo_idf/BDTOPO_3-0_TOUSTHEMES_GPKG_LAMB93_D091_2022-03-15.7z` -- `data/bdtopo_idf/BDTOPO_3-0_TOUSTHEMES_GPKG_LAMB93_D092_2022-03-15.7z` -- `data/bdtopo_idf/BDTOPO_3-0_TOUSTHEMES_GPKG_LAMB93_D093_2022-03-15.7z` -- `data/bdtopo_idf/BDTOPO_3-0_TOUSTHEMES_GPKG_LAMB93_D094_2022-03-15.7z` -- `data/bdtopo_idf/BDTOPO_3-0_TOUSTHEMES_GPKG_LAMB93_D095_2022-03-15.7z` +- `data/bdtopo_idf/BDTOPO_3-0_TOUSTHEMES_GPKG_LAMB93_D075_2024-03-15.7z` +- `data/bdtopo_idf/BDTOPO_3-0_TOUSTHEMES_GPKG_LAMB93_D077_2024-03-15.7z` +- `data/bdtopo_idf/BDTOPO_3-0_TOUSTHEMES_GPKG_LAMB93_D078_2024-03-15.7z` +- `data/bdtopo_idf/BDTOPO_3-0_TOUSTHEMES_GPKG_LAMB93_D091_2024-03-15.7z` +- `data/bdtopo_idf/BDTOPO_3-0_TOUSTHEMES_GPKG_LAMB93_D092_2024-03-15.7z` +- `data/bdtopo_idf/BDTOPO_3-0_TOUSTHEMES_GPKG_LAMB93_D093_2024-03-15.7z` +- `data/bdtopo_idf/BDTOPO_3-0_TOUSTHEMES_GPKG_LAMB93_D094_2024-03-15.7z` +- `data/bdtopo_idf/BDTOPO_3-0_TOUSTHEMES_GPKG_LAMB93_D095_2024-03-15.7z` - `data/ban_idf/adresses-75.csv.gz` - `data/ban_idf/adresses-77.csv.gz` - `data/ban_idf/adresses-78.csv.gz` diff --git a/docs/verify_data.py b/docs/verify_data.py index f657dbff..604e6291 100644 --- a/docs/verify_data.py +++ b/docs/verify_data.py @@ -7,28 +7,28 @@ tests = [ { - "name": "Census 2019", + "name": "Census 2021", "urls": [ - "https://www.insee.fr/fr/statistiques/6544333", - "https://www.insee.fr/fr/statistiques/fichier/6544333/RP2019_INDCVIZA_csv.zip", - "https://www.insee.fr/fr/statistiques/fichier/6544333/RP2019_INDCVIZD_csv.zip", - "https://www.insee.fr/fr/statistiques/fichier/6544333/RP2019_INDCVIZE_csv.zip" + "https://www.insee.fr/fr/statistiques/8268848", + "https://www.insee.fr/fr/statistiques/fichier/8268848/RP2021_indcviza.zip", + "https://www.insee.fr/fr/statistiques/fichier/8268848/RP2021_indcvizd.zip", + "https://www.insee.fr/fr/statistiques/fichier/8268848/RP2021_indcvize.zip" ] }, { "name": "OD Matrices 2019", "urls": [ - "https://www.insee.fr/fr/statistiques/6456056", - "https://www.insee.fr/fr/statistiques/6456052", - "https://www.insee.fr/fr/statistiques/fichier/6456056/RP2019_mobpro_csv.zip", - "https://www.insee.fr/fr/statistiques/fichier/6456052/RP2019_mobsco_csv.zip" + "https://www.insee.fr/fr/statistiques/8205896", + "https://www.insee.fr/fr/statistiques/8205892", + "https://www.insee.fr/fr/statistiques/fichier/8205896/RP2021_mobpro.zip", + "https://www.insee.fr/fr/statistiques/fichier/8205892/RP2021_mobsco.zip" ] }, { "name": "Population totals 2019", "urls": [ - "https://www.insee.fr/fr/statistiques/6543200", - "https://www.insee.fr/fr/statistiques/fichier/6543200/base-ic-evol-struct-pop-2019_csv.zip" + "https://www.insee.fr/fr/statistiques/8268806", + "https://www.insee.fr/fr/statistiques/fichier/8268806/base-ic-evol-struct-pop-2021_csv.zip" ] }, { @@ -40,10 +40,10 @@ ] }, { - "name": "BPE 2021", + "name": "BPE 2023", "urls": [ - "https://www.insee.fr/fr/statistiques/3568638", - "https://www.insee.fr/fr/statistiques/fichier/3568638/bpe21_ensemble_xy_csv.zip" + "https://www.insee.fr/fr/statistiques/8217525", + "https://www.insee.fr/fr/statistiques/fichier/8217525/BPE23.zip" ] }, { @@ -59,17 +59,17 @@ ] }, { - "name": "IRIS 2021", + "name": "IRIS 2023", "urls": [ "https://geoservices.ign.fr/contoursiris", - "https://wxs.ign.fr/1yhlj2ehpqf3q6dt6a2y7b64/telechargement/inspire/CONTOURS-IRIS-PACK_2021-01$CONTOURS-IRIS_2-1__SHP__FRA_2021-01-01/file/CONTOURS-IRIS_2-1__SHP__FRA_2021-01-01.7z" + "https://data.geopf.fr/telechargement/download/CONTOURS-IRIS/CONTOURS-IRIS_3-0__SHP__FRA_2023-01-01/CONTOURS-IRIS_3-0__SHP__FRA_2023-01-01.7z" ] }, { - "name": "Zoning 2021", + "name": "Zoning 2023", "urls": [ "https://www.insee.fr/fr/information/2017499", - "https://www.insee.fr/fr/statistiques/fichier/2017499/reference_IRIS_geo2021.zip" + "https://www.insee.fr/fr/statistiques/fichier/7708995/reference_IRIS_geo2023.zip" ] }, { diff --git a/synthesis/locations/education.py b/synthesis/locations/education.py index 45a32a70..03d433c1 100644 --- a/synthesis/locations/education.py +++ b/synthesis/locations/education.py @@ -12,10 +12,9 @@ def configure(context): context.stage("data.bpe.cleaned", alias = "location_source") EDUCATION_WEIGHT_MAP = [ - ("C101", 100), # Preschools - ("C102", 50), # Intercommunal preschools - ("C104", 145), # Elemantary schools - ("C105", 80), # Intercommunal elemantary schools + ("C107", 100), # Preschools + ("C108", 115), # Primary schools + ("C109", 145), # Elemantary schools ("C301", 700), # General and technological high schools, multi-purpose high schools ("C302", 285), # Professional high schools ("C303", 100), # Agricultural high schools diff --git a/tests/testdata.py b/tests/testdata.py index 6e75f71d..ba6a3760 100644 --- a/tests/testdata.py +++ b/tests/testdata.py @@ -161,11 +161,11 @@ def create(output_path): iris = "CODE_IRIS", municipality = "INSEE_COM" )) - os.mkdir("%s/iris_2021" % output_path) - df_iris.to_file("%s/iris_2021/CONTOURS-IRIS.shp" % output_path) + os.mkdir("%s/iris_2023" % output_path) + df_iris.to_file("%s/iris_2023/CONTOURS-IRIS.shp" % output_path) - with py7zr.SevenZipFile("%s/iris_2021/iris.7z" % output_path, "w") as archive: - for source in glob.glob("%s/iris_2021/CONTOURS-IRIS.*" % output_path): + with py7zr.SevenZipFile("%s/iris_2023/iris.7z" % output_path, "w") as archive: + for source in glob.glob("%s/iris_2023/CONTOURS-IRIS.*" % output_path): archive.write(source, "LAMB93/{}".format(source.split("/")[-1])) os.remove(source) @@ -178,17 +178,17 @@ def create(output_path): iris = "CODE_IRIS", municipality = "DEPCOM", department = "DEP", region = "REG" )) - os.mkdir("%s/codes_2021" % output_path) + os.mkdir("%s/codes_2023" % output_path) - with zipfile.ZipFile("%s/codes_2021/reference_IRIS_geo2021.zip" % output_path, "w") as archive: - with archive.open("reference_IRIS_geo2021.xlsx", "w") as f: + with zipfile.ZipFile("%s/codes_2023/reference_IRIS_geo2023.zip" % output_path, "w") as archive: + with archive.open("reference_IRIS_geo2023.xlsx", "w") as f: df_codes.to_excel( f, sheet_name = "Emboitements_IRIS", startrow = 5, index = False ) # Dataset: Aggregate census - # Required attributes: IRIS, COM, DEP, REG, P15_POP + # Required attributes: IRIS, COM, DEP, REG, P21_POP print("Creating aggregate census ...") df_population = df.copy() @@ -197,12 +197,12 @@ def create(output_path): )) # Set all population to fixed number - df_population["P19_POP"] = 120.0 + df_population["P21_POP"] = 120.0 - os.mkdir("%s/rp_2019" % output_path) + os.mkdir("%s/rp_2021" % output_path) - with zipfile.ZipFile("%s/rp_2019/base-ic-evol-struct-pop-2019.zip" % output_path, "w") as archive: - with archive.open("base-ic-evol-struct-pop-2019.xlsx", "w") as f: + with zipfile.ZipFile("%s/rp_2021/base-ic-evol-struct-pop-2021_xlsx.zip" % output_path, "w") as archive: + with archive.open("base-ic-evol-struct-pop-2021.xlsx", "w") as f: df_population.to_excel( f, sheet_name = "IRIS", startrow = 5, index = False ) @@ -229,10 +229,10 @@ def create(output_path): columns = ["DCIRIS", "LAMBERT_X", "LAMBERT_Y", "TYPEQU", "DEPCOM", "DEP"] - os.mkdir("%s/bpe_2021" % output_path) + os.mkdir("%s/bpe_2023" % output_path) - with zipfile.ZipFile("%s/bpe_2021/bpe21_ensemble_xy_csv.zip" % output_path, "w") as archive: - with archive.open("bpe21_ensemble_xy.csv", "w") as f: + with zipfile.ZipFile("%s/bpe_2023/BPE23.zip" % output_path, "w") as archive: + with archive.open("BPE23.csv", "w") as f: df_selection[columns].to_csv(f, sep = ";", index = False) @@ -563,8 +563,8 @@ def create(output_path): df_persons = pd.DataFrame.from_records(persons)[columns] df_persons.columns = columns - with zipfile.ZipFile("%s/rp_2019/RP2019_INDCVI_csv.zip" % output_path, "w") as archive: - with archive.open("FD_INDCVI_2019.csv", "w") as f: + with zipfile.ZipFile("%s/rp_2021/RP2021_indcvi.zip" % output_path, "w") as archive: + with archive.open("FD_INDCVI_2021.csv", "w") as f: df_persons.to_csv(f, sep = ";") # Data set: commute flows @@ -586,8 +586,8 @@ def create(output_path): columns = ["COMMUNE", "DCLT", "TRANS", "ARM", "IPONDI"] df_work.columns = columns - with zipfile.ZipFile("%s/rp_2019/RP2019_MOBPRO_csv.zip" % output_path, "w") as archive: - with archive.open("FD_MOBPRO_2019.csv", "w") as f: + with zipfile.ZipFile("%s/rp_2021/RP2021_mobpro.zip" % output_path, "w") as archive: + with archive.open("FD_MOBPRO_2021.csv", "w") as f: df_work.to_csv(f, sep = ";") # ... education @@ -602,8 +602,8 @@ def create(output_path): columns = ["COMMUNE", "DCETUF", "ARM", "IPONDI","AGEREV10"] df_education.columns = columns - with zipfile.ZipFile("%s/rp_2019/RP2019_MOBSCO_csv.zip" % output_path, "w") as archive: - with archive.open("FD_MOBSCO_2019.csv", "w") as f: + with zipfile.ZipFile("%s/rp_2021/RP2021_mobsco.zip" % output_path, "w") as archive: + with archive.open("FD_MOBSCO_2021.csv", "w") as f: df_education.to_csv(f, sep = ";") # Data set: BD-TOPO