From 180d1b39ca10cd8d0055634e3439a8799ca2beb4 Mon Sep 17 00:00:00 2001 From: hadrilec Date: Thu, 2 Jan 2025 01:31:07 +0000 Subject: [PATCH] bugfix in get_file_list from download module + readme cleaning --- README.md | 3 +-- docs/readme.rst | 2 +- pynsee/download/get_file_list.py | 42 ++++++++++++-------------------- 3 files changed, 17 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 9074bc9c..747a6c99 100644 --- a/README.md +++ b/README.md @@ -20,8 +20,7 @@ It benefits from the developments made by teams working on APIs at INSEE and IGN ## Installation & API subscription -Credentials are necessary to access SIRENE API available through `pynsee` by the module `sirene`. API credentials can be created here : [portail-api.insee.fr](https://portail-api.insee.fr/). All other modules are freely accessible. `macrodata` module gives access to macroeconomic data from BDM database. `localdata` module helps users download data at regional and departmental level. Some metadata as the activity classification (NACE) is available thanks to the `metadata` module. The files available on [insee.fr](https://www.insee.fr) and IGN data, are available from the modules `download` and `geodata` respectively. - +Credentials are necessary to access SIRENE API available through `pynsee` by the module `sirene`. API credentials can be created here : [portail-api.insee.fr](https://portail-api.insee.fr/). All other modules are freely accessible. ```python diff --git a/docs/readme.rst b/docs/readme.rst index 820033b5..4f2d459e 100644 --- a/docs/readme.rst +++ b/docs/readme.rst @@ -65,7 +65,7 @@ It benefits from the developments made by teams working on APIs at INSEE and IGN Installation & API subscription ------------------------------- -Credentials are necessary to access SIRENE API available through `pynsee` by the module `sirene`. API credentials can be created here : `portail-api.insee.fr `_. All other modules are freely accessible. `macrodata` module gives access to macroeconomic data from BDM database. `localdata` module helps users download data at regional and departmental level. Some metadata as the activity classification (NACE) is available thanks to the `metadata` module. The files available on `insee.fr `_ and IGN data, are available from the modules `download` and `geodata` respectively. +Credentials are necessary to access SIRENE API available through `pynsee` by the module `sirene`. API credentials can be created here : `portail-api.insee.fr `_. All other modules are freely accessible. .. code-block:: python diff --git a/pynsee/download/get_file_list.py b/pynsee/download/get_file_list.py index f88ff541..37483ee4 100644 --- a/pynsee/download/get_file_list.py +++ b/pynsee/download/get_file_list.py @@ -31,33 +31,21 @@ def get_file_list(): df = df.reset_index(drop=True) df = _move_col_before(df, "id", "nom") - df.columns = [ - "id", - "name", - "label", - "collection", - "link", - "type", - "zip", - "big_zip", - "data_file", - "tab", - "first_row", - "api_rest", - "md5", - "size", - "label_col", - "date_ref", - "meta_file", - "separator", - "type_col", - "long_col", - "val_col", - "encoding", - "last_row", - "missing_value", - ] - + rename_col_dict = { + "nom": "name", + "libelle": "label", + "lien": "link", + "fichier_donnees": "data_file", + "onglet": "tab", + "premiere_ligne": "first_row", + "fichier_meta": "meta_file", + "separateur": "separator", + "derniere_ligne": "last_row", + "valeurs_manquantes": "missing_value", + "disponible": "available" + } + df = df.rename(columns = rename_col_dict) + df = df[~df.link.str.contains("https://api.insee.fr")] warning_metadata_download()