diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml index 9883f6b3..b04fc855 100644 --- a/.github/workflows/examples.yml +++ b/.github/workflows/examples.yml @@ -30,8 +30,7 @@ jobs: - name: Test all examples env: - insee_key: ${{ secrets.INSEE_KEY }} - insee_secret: ${{ secrets.INSEE_SECRET }} + sirene_key: ${{ secrets.SIRENE_KEY }} run: | pip install jupytext pip install .[full] @@ -48,8 +47,7 @@ jobs: - name: Test idbank list download env: - insee_key: ${{ secrets.INSEE_KEY }} - insee_secret: ${{ secrets.INSEE_SECRET }} + sirene_key: ${{ secrets.SIRENE_KEY }} run: | pip install . python -c "from pynsee.macrodata._dwn_idbank_files import _dwn_idbank_files;_dwn_idbank_files()" diff --git a/.github/workflows/pkgTests.yml b/.github/workflows/pkgTests.yml index 196a88d0..ed5b598f 100644 --- a/.github/workflows/pkgTests.yml +++ b/.github/workflows/pkgTests.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v2 @@ -22,7 +22,7 @@ jobs: run: | #sudo apt-get install libgeos-dev python -m pip install --upgrade pip - pip install flake8 pytest pytest-cov geopandas nbconvert matplotlib descartes + pip install flake8 pytest pytest-cov geopandas nbconvert matplotlib descartes parameterized if [ -f requirements.txt ]; then pip install -r requirements.txt; fi if [ -f requirements-extra.txt ]; then pip install -r requirements-extra.txt; fi - name: Lint with flake8 @@ -33,8 +33,7 @@ jobs: flake8 . --count --ignore=E722,C901 --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test examples env: - insee_key: ${{ secrets.INSEE_KEY }} - insee_secret: ${{ secrets.INSEE_SECRET }} + sirene_key: ${{ secrets.SIRENE_KEY }} run: | pip install jupytext pip install -r requirements.txt @@ -48,8 +47,7 @@ jobs: cd ../.. - name: Test with pytest env: - insee_key: ${{ secrets.INSEE_KEY }} - insee_secret: ${{ secrets.INSEE_SECRET }} + sirene_key: ${{ secrets.SIRENE_KEY }} run: | pytest -v --cov - name: "Upload coverage to Codecov" diff --git a/.github/workflows/pkgTests_pull_requests.yml b/.github/workflows/pkgTests_pull_requests.yml index c4e86fd8..247b2ffe 100644 --- a/.github/workflows/pkgTests_pull_requests.yml +++ b/.github/workflows/pkgTests_pull_requests.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 @@ -24,7 +24,7 @@ jobs: run: | #sudo apt-get install libgeos-dev python -m pip install --upgrade pip - pip install flake8 pytest pytest-cov geopandas nbconvert matplotlib descartes + pip install flake8 pytest pytest-cov geopandas nbconvert matplotlib descartes parameterized if [ -f requirements.txt ]; then pip install -r requirements.txt; fi if [ -f requirements-extra.txt ]; then pip install -r requirements-extra.txt; fi - name: Lint with flake8 @@ -35,8 +35,7 @@ jobs: flake8 . --count --ignore=E722,C901 --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test examples env: - insee_key: ${{ secrets.INSEE_KEY }} - insee_secret: ${{ secrets.INSEE_SECRET }} + sirene_key: ${{ secrets.SIRENE_KEY }} run: | pip install jupytext pip install -r requirements.txt @@ -50,8 +49,7 @@ jobs: cd ../.. - name: Test with pytest env: - insee_key: ${{ secrets.INSEE_KEY }} - insee_secret: ${{ secrets.INSEE_SECRET }} + sirene_key: ${{ secrets.SIRENE_KEY }} run: | pytest -v --cov - name: "Upload coverage to Codecov" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index ae0737f6..c4cb3266 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -20,7 +20,7 @@ jobs: matrix: os: [ubuntu-20.04] #[ubuntu-20.04, windows-2019, macOS-10.15] - python-version: ["3.8"] + python-version: ["3.12"] #["3.7", "3.8", "3.9", "3.10"] steps: diff --git a/README.md b/README.md index 8666553d..747a6c99 100644 --- a/README.md +++ b/README.md @@ -20,9 +20,7 @@ It benefits from the developments made by teams working on APIs at INSEE and IGN ## Installation & API subscription -The files available on [insee.fr](https://www.insee.fr) and IGN data, i.e. the use of `download` and `geodata` modules, do not require authentication. -Credentials are necessary to access some of the INSEE APIs available through `pynsee` by the modules `macrodata`, `localdata`, `metadata` and `sirene`. -API credentials can be created here : [portail-api.insee.fr](https://portail-api.insee.fr/) +Credentials are necessary to access SIRENE API available through `pynsee` by the module `sirene`. API credentials can be created here : [portail-api.insee.fr](https://portail-api.insee.fr/). All other modules are freely accessible. ```python diff --git a/docs/readme.rst b/docs/readme.rst index 83e48f97..4f2d459e 100644 --- a/docs/readme.rst +++ b/docs/readme.rst @@ -65,9 +65,7 @@ It benefits from the developments made by teams working on APIs at INSEE and IGN Installation & API subscription ------------------------------- -The files available on `insee.fr `_ and IGN data, i.e. the use of `download` and `geodata` modules, do not require authentication. -Credentials are necessary to access some of the INSEE APIs available through `pynsee` by the modules `macrodata`, `localdata`, `metadata` and `sirene`. -API credentials can be created here : `portail-api.insee.fr `_ +Credentials are necessary to access SIRENE API available through `pynsee` by the module `sirene`. API credentials can be created here : `portail-api.insee.fr `_. All other modules are freely accessible. .. code-block:: python diff --git a/pynsee/download/_download_pb.py b/pynsee/download/_download_pb.py index 72cea8af..ea2f39d5 100644 --- a/pynsee/download/_download_pb.py +++ b/pynsee/download/_download_pb.py @@ -4,7 +4,7 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) -from pynsee.utils.requests_params import _get_requests_proxies +from pynsee.utils.requests_params import _get_requests_proxies, _get_requests_session def _download_pb(url: str, fname: str, total: int = None): @@ -19,8 +19,10 @@ def _download_pb(url: str, fname: str, total: int = None): """ proxies = _get_requests_proxies() + session = _get_requests_session() - resp = requests.get(url, proxies=proxies, stream=True, verify=False) + with session as s: + resp = s.get(url, proxies=proxies, stream=True, verify=False) if total is None: total = int(resp.headers.get("content-length", 0)) diff --git a/pynsee/download/_get_file_list_internal.py b/pynsee/download/_get_file_list_internal.py index 74aab433..91134d7c 100644 --- a/pynsee/download/_get_file_list_internal.py +++ b/pynsee/download/_get_file_list_internal.py @@ -1,14 +1,16 @@ import io import zipfile -import pkg_resources +import importlib import json def _get_file_list_internal(): - zip_file = pkg_resources.resource_stream( - __name__, "data/liste_donnees.zip" - ) + try: + zip_file = str(importlib.resources.files(__name__)) + "/data/liste_donnees.zip" + except: + import pkg_resources + zip_file = pkg_resources.resource_stream(__name__, "data/liste_donnees.zip") with zipfile.ZipFile(zip_file, "r") as zip_ref: zip_file = io.BytesIO(zip_ref.read("liste_donnees.json")) diff --git a/pynsee/download/download_file.py b/pynsee/download/download_file.py index 51164f0d..f2f9eddf 100644 --- a/pynsee/download/download_file.py +++ b/pynsee/download/download_file.py @@ -31,13 +31,13 @@ def download_file(id, variables=None, update=False, silent=False): """ with tempfile.TemporaryDirectory() as tmpdir: - try: + # try: - dwn = _download_store_file(tmpdir, id, update=update) - df = _load_data_from_schema(dwn, variables=variables) + dwn = _download_store_file(tmpdir, id, update=update) + df = _load_data_from_schema(dwn, variables=variables) - except: - warnings.warn("Download failed") - df = pd.DataFrame() + # except: + # warnings.warn("Download failed") + # df = pd.DataFrame() return df diff --git a/pynsee/download/get_file_list.py b/pynsee/download/get_file_list.py index f88ff541..37483ee4 100644 --- a/pynsee/download/get_file_list.py +++ b/pynsee/download/get_file_list.py @@ -31,33 +31,21 @@ def get_file_list(): df = df.reset_index(drop=True) df = _move_col_before(df, "id", "nom") - df.columns = [ - "id", - "name", - "label", - "collection", - "link", - "type", - "zip", - "big_zip", - "data_file", - "tab", - "first_row", - "api_rest", - "md5", - "size", - "label_col", - "date_ref", - "meta_file", - "separator", - "type_col", - "long_col", - "val_col", - "encoding", - "last_row", - "missing_value", - ] - + rename_col_dict = { + "nom": "name", + "libelle": "label", + "lien": "link", + "fichier_donnees": "data_file", + "onglet": "tab", + "premiere_ligne": "first_row", + "fichier_meta": "meta_file", + "separateur": "separator", + "derniere_ligne": "last_row", + "valeurs_manquantes": "missing_value", + "disponible": "available" + } + df = df.rename(columns = rename_col_dict) + df = df[~df.link.str.contains("https://api.insee.fr")] warning_metadata_download() diff --git a/pynsee/geodata/_find_wfs_closest_match.py b/pynsee/geodata/_find_wfs_closest_match.py index ad86f6c1..7aad4026 100644 --- a/pynsee/geodata/_find_wfs_closest_match.py +++ b/pynsee/geodata/_find_wfs_closest_match.py @@ -1,17 +1,16 @@ -import os -import sys + import difflib from pynsee.geodata._get_geodata import _get_geodata from pynsee.geodata.get_geodata_list import get_geodata_list +from pynsee.utils.HiddenPrints import HiddenPrints string = "ADMINEXPRESS-COG.LATEST:departement" + def _find_wfs_closest_match(string=string): - sys.stdout = open(os.devnull, 'w') - wfs = get_geodata_list() - sys.stdout = sys.__stdout__ - + with HiddenPrints(): + wfs = get_geodata_list() list_sugg = list(wfs.Identifier.unique()) suggestions = difflib.get_close_matches( diff --git a/pynsee/localdata/__init__.py b/pynsee/localdata/__init__.py index 11476612..08dea425 100644 --- a/pynsee/localdata/__init__.py +++ b/pynsee/localdata/__init__.py @@ -1,7 +1,6 @@ from .get_area_list import get_area_list from .get_geo_list import get_geo_list from .get_local_data import get_local_data -from .get_included_area import get_included_area from .get_nivgeo_list import get_nivgeo_list from .get_local_metadata import get_local_metadata from .get_population import get_population @@ -15,7 +14,6 @@ "get_area_list", "get_geo_list", "get_local_data", - "get_included_area", "get_nivgeo_list", "get_local_metadata", "get_population", diff --git a/pynsee/localdata/_find_latest_local_dataset.py b/pynsee/localdata/_find_latest_local_dataset.py index e3d2bb0f..7e8004cb 100644 --- a/pynsee/localdata/_find_latest_local_dataset.py +++ b/pynsee/localdata/_find_latest_local_dataset.py @@ -1,5 +1,4 @@ -import sys import os import re from tqdm import trange @@ -11,11 +10,12 @@ from pynsee.localdata._get_insee_local_onegeo import _get_insee_local_onegeo from pynsee.utils._create_insee_folder import _create_insee_folder from pynsee.utils._hash import _hash +from pynsee.utils.HiddenPrints import HiddenPrints import logging logger = logging.getLogger(__name__) -def _find_latest_local_dataset(dataset_version, variables, nivgeo, codegeo, update): +def _find_latest_local_dataset(dataset_version, variables, nivgeo, codegeo, update, backwardperiod = 6): filename = _hash("".join([dataset_version] + ['_find_latest_local_dataset'])) insee_folder = _create_insee_folder() @@ -26,7 +26,7 @@ def _find_latest_local_dataset(dataset_version, variables, nivgeo, codegeo, upda datasetname = dataset_version.replace('latest', '').replace('GEO', '') current_year = int(datetime.datetime.today().strftime('%Y')) - backwardperiod = 10 + list_geo_dates = range(current_year, current_year-backwardperiod, -1) list_data_dates = range(current_year, current_year-backwardperiod, -1) @@ -44,11 +44,10 @@ def _find_latest_local_dataset(dataset_version, variables, nivgeo, codegeo, upda dv = list_dataset_version[dvindex] try: - sys.stdout = open(os.devnull, 'w') - df = _get_insee_local_onegeo( - variables, dv, nivgeo=nivgeo, codegeo=codegeo - ) - sys.stdout = sys.__stdout__ + with HiddenPrints(): + df = _get_insee_local_onegeo( + variables, dv, nivgeo=nivgeo, codegeo=codegeo + ) if type(df) == pd.core.frame.DataFrame: if len(df.index) == 1: @@ -63,11 +62,15 @@ def _find_latest_local_dataset(dataset_version, variables, nivgeo, codegeo, upda else: dataset_version = dv break - - pickle.dump(dataset_version, open(file_localdata, "wb")) + + f = open(file_localdata, "wb") + pickle.dump(str(dataset_version), f) + f.close() else: try: - dataset_version = pickle.load(open(file_localdata, "rb")) + f = open(file_localdata, "rb") + dataset_version = pickle.load(f) + f.close() except: os.remove(file_localdata) dataset_version = _find_latest_local_dataset( diff --git a/pynsee/localdata/_get_insee_local_onegeo.py b/pynsee/localdata/_get_insee_local_onegeo.py index 5fba93d6..0b18ed4e 100644 --- a/pynsee/localdata/_get_insee_local_onegeo.py +++ b/pynsee/localdata/_get_insee_local_onegeo.py @@ -14,109 +14,87 @@ def _get_insee_local_onegeo(variables, dataset_version, nivgeo, codegeo): count_sep = variables.count("-") modalite = ".all" * (count_sep + 1) - link = "https://api.insee.fr/donnees-locales/V0.1/donnees/" + link = "https://api.insee.fr/donnees-locales/donnees/" link = link + "geo-{}@{}/{}-{}{}".format( variables, dataset_version, nivgeo, codegeo, modalite ) request = _request_insee(api_url=link, file_format="application/json;charset=utf-8") - try: + data_request = request.json() - data_request = request.json() + Cellule = data_request["Cellule"] + Variable = data_request["Variable"] + Croisement = data_request["Croisement"] + Zone = data_request["Zone"] - # if 'Cellule' in list(data_request.keys()): - Cellule = data_request["Cellule"] - Variable = data_request["Variable"] - Croisement = data_request["Croisement"] - Zone = data_request["Zone"] + dataset_version = Croisement["JeuDonnees"]["code"] + dataset_name = Croisement["JeuDonnees"]["Source"] + data_date = Croisement["JeuDonnees"]["Annee"] - dataset_version = Croisement["JeuDonnees"]["@code"] - dataset_name = Croisement["JeuDonnees"]["Source"] - data_date = Croisement["JeuDonnees"]["Annee"] - geo_date = Zone["Millesime"]["@annee"] - codegeo_label = Zone["Millesime"]["Nccenr"] + list_data = [] - list_data = [] + for i in range(len(Cellule)): + dico = {**Cellule[i]["Zone"], **Cellule[i]["Mesure"]} + modalite = Cellule[i]["Modalite"] - for i in range(len(Cellule)): - dico = {**Cellule[i]["Zone"], **Cellule[i]["Mesure"]} - modalite = Cellule[i]["Modalite"] - - for m in range(len(modalite)): - try: - dico_added = {modalite[m]["@variable"]: modalite[m]["@code"]} - except: - dico_added = {modalite["@variable"]: modalite["@code"]} - dico = {**dico, **dico_added} + for m in range(len(modalite)): + try: + dico_added = {modalite[m]["variable"]: modalite[m]["code"]} + except: + dico_added = {modalite["variable"]: modalite["code"]} + dico = {**dico, **dico_added} - dico["OBS_VALUE"] = Cellule[i]["Valeur"] + dico["OBS_VALUE"] = Cellule[i]["Valeur"] + dico = {k: v for k, v in dico.items() if len(v) != 0} + try: df = pd.DataFrame(dico, index=[0]) - list_data.append(df) + except: + df = pd.DataFrame(dico) + + list_data.append(df) + + data = pd.concat(list_data) + + for i in range(len(Variable)): + + list_dict_var = [] + values = Variable[i]["Modalite"] + for d in range(len(values)): + df_dict = pd.DataFrame(values[d], index=[0]) + list_dict_var.append(df_dict) + + var_name = Variable[i]["code"] + + df = (pd.concat(list_dict_var) + .reset_index(drop=True) + .drop(columns=['variable']) + .rename(columns = {'code': var_name}) + ) + + data[f"{var_name}_label"] = Variable[i]["Libelle"] + + data = data.merge(df, on = var_name, how="left") + data = data.assign( + DATASET_VERSION=dataset_version, + DATASET_NAME=dataset_name, + DATA_DATE=data_date, + ) - data = pd.concat(list_data) + data.rename( + columns={ + "codgeo": "CODEGEO", + "nivgeo": "NIVGEO", + "code": "UNIT", + "value": "UNIT_label_fr", + }, + inplace=True, + ) - try: - for i in range(len(Variable)): - - try: - df = pd.DataFrame(Variable[i]["Modalite"], index=[0]) - except: - list_dict_var = [] - values = Variable[i]["Modalite"] - for d in range(len(values)): - df_dict = pd.DataFrame(values[d], index=[0]) - list_dict_var.append(df_dict) - df = pd.concat(list_dict_var).reset_index(drop=True) - - var_name = Variable[i]["@code"] - df = df[["@code", "Libelle"]] - df.columns = [var_name, var_name + "_label"] - data = data.merge(df, on=var_name, how="left") - except: - try: - var_name = Variable["@code"] - var_name_label = var_name + "_label" - value = Variable["Modalite"]["@code"] - label = Variable["Modalite"]["Libelle"] - df = pd.DataFrame({var_name: value, var_name_label: label}, index=[0]) - data = data.merge(df, on=var_name, how="left") - except: - var_name = Variable["@code"] - var_name_label = var_name + "_label" - - list_dict_var = [] - values = Variable["Modalite"] - for d in range(len(values)): - df_dict = pd.DataFrame(values[d], index=[0]) - list_dict_var.append(df_dict) - df = pd.concat(list_dict_var).reset_index(drop=True) - df = df[["@code", "Libelle"]] - df.columns = [var_name, var_name_label] - data = data.merge(df, on=var_name, how="left") - - data = data.assign( - DATASET_VERSION=dataset_version, - DATASET_NAME=dataset_name, - DATA_DATE=data_date, - GEO_DATE=geo_date, - CODEGEO_label=codegeo_label, - ) - - data.rename( - columns={ - "@codgeo": "CODEGEO", - "@nivgeo": "NIVGEO", - "@code": "UNIT", - "$": "UNIT_label_fr", - }, - inplace=True, - ) - - data["OBS_VALUE"] = pd.to_numeric(data["OBS_VALUE"]) - - except Exception as e: - #print(e) - data = pd.DataFrame({"CODEGEO": codegeo, "OBS_VALUE": None}, index=[0]) + data["OBS_VALUE"] = pd.to_numeric(data["OBS_VALUE"]) + + # except Exception as e: + # #print(e) + # data = pd.DataFrame({"CODEGEO": codegeo, "OBS_VALUE": None}, index=[0]) return data diff --git a/pynsee/localdata/get_included_area.py b/pynsee/localdata/get_included_area.py deleted file mode 100644 index 2d424ff4..00000000 --- a/pynsee/localdata/get_included_area.py +++ /dev/null @@ -1,42 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright : INSEE, 2021 - -from pynsee.localdata._get_insee_one_area import _get_insee_one_area - -import pandas as pd -from tqdm import trange -from warnings import warn - -def get_included_area(area_type, codeareas): - """Get all areas included in the list of areas provided - - Args: - area_type (str): type of area - - codeareas (str): list of areas - - Raises: - ValueError: Error if codeareas is not a list - - Examples: - >>> from pynsee.localdata import get_area_list, get_included_area - >>> area_list = get_area_list() - >>> paris_empl_area = get_included_area(area_type = 'zonesDEmploi2020', codeareas = '1109') - """ - warn('This function is deprecated.\nPlease, use get_descending_area instead.', DeprecationWarning, stacklevel=2) - - if type(codeareas) == str: - codeareas = [codeareas] - - if type(codeareas) != list: - raise ValueError("!!! codeareas must be a list or a str!!!") - - list_data = [] - - for c in trange(len(codeareas)): - list_data.append(_get_insee_one_area(area_type, codeareas[c])) - - data_final = pd.concat(list_data) - data_final = data_final.assign(area_type=area_type) - - return data_final diff --git a/pynsee/localdata/get_local_data.py b/pynsee/localdata/get_local_data.py index c61f301a..9262cc4a 100644 --- a/pynsee/localdata/get_local_data.py +++ b/pynsee/localdata/get_local_data.py @@ -31,15 +31,17 @@ def _warning_nivgeo(nivgeo): @save_df(day_lapse_max=90) def get_local_data( - variables, dataset_version, nivgeo="FE", geocodes=["1"], update=False, silent=False + variables, dataset_version, nivgeo="FE", + geocodes=["1"], update=False, silent=False, + backwardperiod = 6 ): """Get INSEE local numeric data Args: variables (str): one or several variables separated by an hyphen (see get_local_metadata) - dataset_version (str): code of a dataset version (see get_local_metadata), if dates are replaced by 'latest' the function triggers a loop to find the latest data available (examples: 'GEOlatestRPlatest', 'GEOlatestFLORESlatest') - + dataset_version (str): code of a dataset version (see get_local_metadata), if dates are replaced by 'latest' the function triggers a loop to find the latest data available (examples: 'GEOlatestRPlatest', 'GEOlatestFLORESlatest'). + nivgeo (str): code of kind of French administrative area (see get_nivgeo_list), by default it is 'FE' ie all France geocodes (list): code one specific area (see get_geo_list), by default it is ['1'] ie all France @@ -47,6 +49,8 @@ def get_local_data( update (bool): data is saved locally, set update=True to trigger an update silent (bool, optional): Set to True, to disable messages printed in log info + + backwardperiod (int, optional): this arg is used only whenever the latest data is searched, it specifies the number of past years the loop should run through. Raises: ValueError: Error if geocodes is not a list @@ -103,7 +107,8 @@ def get_local_data( if pattern.match(dataset_version): - dataset_version = _find_latest_local_dataset(dataset_version, variables, nivgeo, geocodes[0], update) + dataset_version = _find_latest_local_dataset(dataset_version, variables, + nivgeo, geocodes[0], update, backwardperiod) list_data_all = [] diff --git a/pynsee/localdata/get_local_metadata.py b/pynsee/localdata/get_local_metadata.py index 96f8fd54..562f437d 100644 --- a/pynsee/localdata/get_local_metadata.py +++ b/pynsee/localdata/get_local_metadata.py @@ -5,8 +5,8 @@ import os import re import zipfile -import pkg_resources import pandas as pd +import importlib from pynsee.utils._create_insee_folder import _create_insee_folder @@ -64,12 +64,16 @@ def get_local_metadata(): list_files = [f for f in list_files if re.search("^doc_.*csv$", f)] test_file_available = [f not in list_files for f in all_files] - # any(test_file_available) - if True: + + try: + pkg_path = importlib.resources.files(__name__) + zip_file = str(pkg_path) + "/data/local_metadata.zip" + except: + import pkg_resources zip_file = pkg_resources.resource_stream(__name__, "data/local_metadata.zip") - with zipfile.ZipFile(zip_file, "r") as zip_ref: - zip_ref.extractall(insee_folder) + with zipfile.ZipFile(zip_file, "r") as zip_ref: + zip_ref.extractall(insee_folder) def extract_data_from_excel_sheet( var, diff --git a/pynsee/macrodata/_get_dataset_list_internal.py b/pynsee/macrodata/_get_dataset_list_internal.py index 997a919f..ae6b46e7 100644 --- a/pynsee/macrodata/_get_dataset_list_internal.py +++ b/pynsee/macrodata/_get_dataset_list_internal.py @@ -1,6 +1,6 @@ import io import zipfile -import pkg_resources +import importlib import pandas as pd from pynsee.utils.save_df import save_df @@ -9,9 +9,12 @@ @save_df(day_lapse_max=90) def _get_dataset_list_internal(): - zip_file = pkg_resources.resource_stream( - __name__, "data/dataset_list_internal.zip" - ) + try: + pkg_macrodata = importlib.resources.files(__name__) + zip_file = str(pkg_macrodata) + "/data/dataset_list_internal.zip" + except: + import pkg_resources + zip_file = pkg_resources.resource_stream(__name__, "data/dataset_list_internal.zip") with zipfile.ZipFile(zip_file, "r") as zip_ref: dataset_file = io.BytesIO(zip_ref.read("dataset_list_internal.csv")) diff --git a/pynsee/macrodata/_get_idbank_internal_data.py b/pynsee/macrodata/_get_idbank_internal_data.py index ce159993..9600fc2f 100644 --- a/pynsee/macrodata/_get_idbank_internal_data.py +++ b/pynsee/macrodata/_get_idbank_internal_data.py @@ -3,7 +3,7 @@ import io import zipfile -import pkg_resources +import importlib import pandas as pd from pynsee.utils.save_df import save_df @@ -16,9 +16,13 @@ @save_df(day_lapse_max=90) def _get_idbank_internal_data(update=False, silent=True): - zip_file = pkg_resources.resource_stream( - __name__, "data/idbank_list_internal.zip" - ) + try: + pkg_macrodata = importlib.resources.files(__name__) + zip_file = str(pkg_macrodata) + "/data/idbank_list_internal.zip" + except: + import pkg_resources + zip_file = pkg_resources.resource_stream(__name__, "data/idbank_list_internal.zip") + with zipfile.ZipFile(zip_file, "r") as zip_ref: data_file = io.BytesIO(zip_ref.read("idbank_list_internal.csv")) @@ -27,4 +31,8 @@ def _get_idbank_internal_data(update=False, silent=True): data_file, encoding="utf-8", quotechar='"', sep=",", dtype=str ) + col = "Unnamed: 0" + if col in idbank_list.columns: + idbank_list = idbank_list.drop(columns={col}) + return idbank_list diff --git a/pynsee/metadata/get_activity_list.py b/pynsee/metadata/get_activity_list.py index a6cbc2b0..d4b480dc 100644 --- a/pynsee/metadata/get_activity_list.py +++ b/pynsee/metadata/get_activity_list.py @@ -5,7 +5,7 @@ import os import re import zipfile -import pkg_resources +import importlib import pandas as pd from pynsee.utils._create_insee_folder import _create_insee_folder @@ -98,13 +98,15 @@ def get_activity_list(level, version="NAFRev2"): list_available_file = [not os.path.exists(f) for f in list_expected_files] # unzipping raw files - # any(list_available_file) - if True: - + try: + pkg_path = importlib.resources.files(__name__) + zip_file = str(pkg_path) + "/data/naf.zip" + except: + import pkg_resources zip_file = pkg_resources.resource_stream(__name__, "data/naf.zip") - with zipfile.ZipFile(zip_file, "r") as zip_ref: - zip_ref.extractall(insee_folder) + with zipfile.ZipFile(zip_file, "r") as zip_ref: + zip_ref.extractall(insee_folder) def drop_space(string): if pd.isna(string): diff --git a/pynsee/metadata/get_definition_list.py b/pynsee/metadata/get_definition_list.py index 93c7b904..c0e634b2 100644 --- a/pynsee/metadata/get_definition_list.py +++ b/pynsee/metadata/get_definition_list.py @@ -8,7 +8,7 @@ import zipfile import os -import pkg_resources +import importlib import pandas as pd import logging @@ -48,8 +48,12 @@ def get_definition_list(): # unzipping raw files if any(list_available_file): - - zip_file = pkg_resources.resource_stream(__name__, "data/definition.zip") + try: + pkg_path = importlib.resources.files(__name__) + zip_file = str(pkg_path) + "/data/definition.zip" + except: + import pkg_resources + zip_file = pkg_resources.resource_stream(__name__, "data/definition.zip") with zipfile.ZipFile(zip_file, "r") as zip_ref: zip_ref.extractall(insee_folder) diff --git a/pynsee/sirene/get_sirene_data.py b/pynsee/sirene/get_sirene_data.py index 192f6bcc..257327ef 100644 --- a/pynsee/sirene/get_sirene_data.py +++ b/pynsee/sirene/get_sirene_data.py @@ -3,12 +3,12 @@ import pandas as pd from functools import lru_cache -import sys -import os import re from pynsee.utils._request_insee import _request_insee from pynsee.utils._make_dataframe_from_dict import _make_dataframe_from_dict +from pynsee.utils.HiddenPrints import HiddenPrints + from pynsee.sirene.SireneDataFrame import SireneDataFrame import logging @@ -65,14 +65,12 @@ def get_sirene_data(*id): ) try: - sys.stdout = open(os.devnull, "w") - request = _request_insee( - api_url=link, file_format="application/json;charset=utf-8" - ) - - data_request = request.json() - sys.stdout = sys.__stdout__ - + with HiddenPrints(): + request = _request_insee( + api_url=link, file_format="application/json;charset=utf-8" + ) + + data_request = request.json() try: data = data_request[main_key] except: diff --git a/pynsee/sirene/get_sirene_relatives.py b/pynsee/sirene/get_sirene_relatives.py index e67df28f..846c1ffb 100644 --- a/pynsee/sirene/get_sirene_relatives.py +++ b/pynsee/sirene/get_sirene_relatives.py @@ -1,11 +1,10 @@ -import os -import sys + import pandas as pd import re from pynsee.utils._request_insee import _request_insee from pynsee.utils._make_dataframe_from_dict import _make_dataframe_from_dict - +from pynsee.utils.HiddenPrints import HiddenPrints def get_sirene_relatives(*siret): """Find parent or child entities for one siret entity (etablissement) @@ -46,12 +45,11 @@ def get_sirene_relatives(*siret): criteria = types[i] + ":" + re.sub(r"\s+", "", list_siret[s]) query = f"https://api.insee.fr/api-sirene/3.11/siret/liensSuccession?q={criteria}" try: - sys.stdout = open(os.devnull, "w") - result = _request_insee( - api_url=query, file_format="application/json;charset=utf-8" - ) - json = result.json() - sys.stdout = sys.__stdout__ + with HiddenPrints(): + result = _request_insee( + api_url=query, file_format="application/json;charset=utf-8" + ) + json = result.json() except: pass else: diff --git a/pynsee/utils/HiddenPrints.py b/pynsee/utils/HiddenPrints.py new file mode 100644 index 00000000..f336a696 --- /dev/null +++ b/pynsee/utils/HiddenPrints.py @@ -0,0 +1,13 @@ +import os +import sys + +# source and credits: +# https://stackoverflow.com/a/45669280 + +class HiddenPrints: + def __enter__(self): + self._original_stdout = sys.stdout + sys.stdout = open(os.devnull, 'w') + def __exit__(self, exc_type, exc_val, exc_tb): + sys.stdout.close() + sys.stdout = self._original_stdout diff --git a/pynsee/utils/_get_credentials.py b/pynsee/utils/_get_credentials.py index 0f1e98fe..65e83cd7 100644 --- a/pynsee/utils/_get_credentials.py +++ b/pynsee/utils/_get_credentials.py @@ -27,32 +27,35 @@ def _get_credentials() -> Dict[str, str]: ''' key_dict: Dict[str, str] = {} - try: - key_dict["sirene_key"] = os.environ["sirene_key"] + sirene_key = os.environ.get("sirene_key") + + if sirene_key is None: + sirene_key = os.environ.get("SIRENE_KEY") - envir_var_used = True - except KeyError: + if sirene_key is None: envir_var_used = False - - config_file = os.path.join( - user_config_dir("pynsee", ensure_exists=True), "config.json") - try: + config_file = os.path.join( + user_config_dir("pynsee", ensure_exists=True), "config.json") + with open(config_file, "r") as f: key_dict = json.load(f) - + http_proxy = key_dict["http_proxy"] https_proxy = key_dict["https_proxy"] - + if (http_proxy is None) or (not isinstance(http_proxy, str)): http_proxy = "" if (https_proxy is None) or (not isinstance(https_proxy, str)): https_proxy = "" - + os.environ["http_proxy"] = http_proxy os.environ["https_proxy"] = https_proxy except Exception: _missing_credentials() + else: + envir_var_used = True + key_dict["sirene_key"] = sirene_key if envir_var_used: _warn_env_credentials() diff --git a/pynsee/utils/_get_installed_packages.py b/pynsee/utils/_get_installed_packages.py deleted file mode 100644 index 06ef47c4..00000000 --- a/pynsee/utils/_get_installed_packages.py +++ /dev/null @@ -1,20 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright : INSEE, 2021 - -import pkg_resources -import pandas as pd - - -def _get_installed_packages(): - - installed_packages = pkg_resources.working_set - - df = pd.DataFrame({"package": None, "version": None}, index=[0]) - j = 0 - for i in installed_packages: - df.loc[j, "package"], df.loc[j, "version"] = i.key, i.version - j += 1 - - df = df.sort_values("package").reset_index(drop=True) - - return df diff --git a/pynsee/utils/_request_insee.py b/pynsee/utils/_request_insee.py index 1d0e8858..deba6871 100644 --- a/pynsee/utils/_request_insee.py +++ b/pynsee/utils/_request_insee.py @@ -77,7 +77,10 @@ def _request_insee( if api_url is not None: if keys is not None: - sirene_key = keys["sirene_key"] + try: + sirene_key = keys["sirene_key"] + except: + sirene_key = None else: sirene_key = None @@ -90,10 +93,12 @@ def _request_insee( session = _get_requests_session() - if (re.match(".*api-sirene.*", api_url) and (sirene_key is not None)) or \ - (not re.match(".*api-sirene.*", api_url)): - - headers["X-INSEE-Api-Key-Integration"] = sirene_key + sirene_request = (re.match(".*api-sirene.*", api_url) and (sirene_key is not None)) + + if sirene_request: + headers["X-INSEE-Api-Key-Integration"] = sirene_key + + if sirene_request or (not re.match(".*api-sirene.*", api_url)): # avoid reaching the limit of 30 queries per minute from insee api _wait_api_query_limit(api_url) diff --git a/pynsee/utils/save_df.py b/pynsee/utils/save_df.py index b89143d7..01e39706 100644 --- a/pynsee/utils/save_df.py +++ b/pynsee/utils/save_df.py @@ -129,7 +129,7 @@ def _save_dataframe( if parquet: df.to_parquet(file_name) else: - df.to_csv(file_name, index=False) + df.to_pickle(file_name, index=False) except Exception as e: warnings.warn(str(e)) warnings.warn(f"Error, file not saved:\n{file_name}\n{df}\n") diff --git a/requirements.txt b/requirements.txt index 20a61887..7e1fb3dc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ pandas>=0.24.2 pyarrow tqdm>=4.56.0 -requests>=2.23 +requests[security]>=2.23 platformdirs unidecode>=1.1.0 urllib3 diff --git a/setup.py b/setup.py index 192fd571..ced6f5e1 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ "pandas>=0.24.2", "pyarrow", "tqdm>=4.56.0", - "requests>=2.23", + "requests[security]>=2.23", "platformdirs", "unidecode>=1.1.0", "shapely>=1.8.0", diff --git a/tests/download/test_pynsee_download.py b/tests/download/test_pynsee_download.py index 39d3b03b..1e58b086 100644 --- a/tests/download/test_pynsee_download.py +++ b/tests/download/test_pynsee_download.py @@ -2,6 +2,8 @@ import os import pandas as pd import sys +import re +from parameterized import parameterized from pynsee.download import * from pynsee.download._check_url import _check_url @@ -10,10 +12,17 @@ from pynsee.download import get_column_metadata from pynsee.utils.clear_all_cache import clear_all_cache +# manual commands for testing only on geodata module +# coverage run -m unittest tests/geodata/test_pynsee_geodata.py +# coverage report --omit=*/utils/*,*/macrodata/*,*/localdata/*,*/download/*,*/sirene/*,*/metadata/* -m + class MyTests(unittest.TestCase): version = (sys.version_info[0] == 3) & (sys.version_info[1] == 10) + + test_onyxia = re.match(".*onyxia.*", os.getcwd()) + version = version or test_onyxia if version: @@ -37,38 +46,30 @@ def test_download_big_file(self): df = download_file("RP_LOGEMENT_2017", variables = ["COMMUNE", "IRIS", "ACHL", "IPONDL"]) self.assertTrue(isinstance(df, pd.DataFrame)) - def test_download_file_all(self): + def test_get_file_list(self): meta = get_file_list() self.assertTrue(isinstance(meta, pd.DataFrame)) + + list_file_check = ["COG_COMMUNE_2018", "AIRE_URBAINE", "FILOSOFI_COM_2015", "DECES_2020", + "PRENOM_NAT", "ESTEL_T201_ENS_T", "FILOSOFI_DISP_IRIS_2017", + "BPE_ENS", "RP_MOBSCO_2016"] + + @parameterized.expand([[f] for f in list_file_check]) + def test_download(self, f): - meta['size'] = pd.to_numeric(meta['size']) - meta = meta[meta['size'] < 300000000].reset_index(drop=True) - - list_file = list(meta.id) - list_file_check = list_file[:20] + list_file[-20:] - list_file_check = ["COG_COMMUNE_2018", "AIRE_URBAINE", "FILOSOFI_COM_2015", "DECES_2020", - "PRENOM_NAT", "ESTEL_T201_ENS_T", "FILOSOFI_DISP_IRIS_2017", - "BPE_ENS", "RP_MOBSCO_2016"] + df = download_file(f, update=True) + label = get_column_metadata(id=f) - for i, f in enumerate(list_file_check): - print(f"{i} : {f}") - - df = download_file(f) - label = get_column_metadata(id=f) - - if label is None: - checkLabel = True - elif isinstance(label, pd.DataFrame): - checkLabel = True - else: - checkLabel = False - - self.assertTrue(checkLabel) - self.assertTrue(isinstance(df, pd.DataFrame)) - self.assertTrue((len(df.columns) > 2)) + if label is None: + checkLabel = True + elif isinstance(label, pd.DataFrame): + checkLabel = True + else: + checkLabel = False - df = download_file(list_file_check[0]) - self.assertTrue(isinstance(df, pd.DataFrame)) - + self.assertTrue(checkLabel) + self.assertTrue(isinstance(df, pd.DataFrame)) + self.assertTrue((len(df.columns) > 2)) + if __name__ == '__main__': unittest.main() diff --git a/tests/geodata/test_pynsee_geodata.py b/tests/geodata/test_pynsee_geodata.py index 1dc5a180..6c109aeb 100644 --- a/tests/geodata/test_pynsee_geodata.py +++ b/tests/geodata/test_pynsee_geodata.py @@ -6,6 +6,8 @@ import sys import requests import unittest +import re +import os from shapely.geometry import Polygon, MultiPolygon, MultiPoint, Point @@ -25,9 +27,12 @@ class TestFunction(TestCase): - version_3_8 = (sys.version_info[0] == 3) & (sys.version_info[1] == 8) + version = (sys.version_info[0] == 3) & (sys.version_info[1] == 11) - if not version_3_8: + test_onyxia = re.match(".*onyxia.*", os.getcwd()) + version = version or test_onyxia + + if version: def test_find_wfs_closest_match(self): self.assertTrue(isinstance(_find_wfs_closest_match(), str)) @@ -60,6 +65,8 @@ def test_get_geodata_short(self): df = get_geodata_list(update=True) self.assertTrue(isinstance(df, pd.DataFrame)) + def test_get_geodata_short2(self): + chflieu = get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:chflieu_commune', update=True) self.assertTrue(isinstance(chflieu, GeoFrDataFrame)) geo = chflieu.get_geom() @@ -67,11 +74,15 @@ def test_get_geodata_short(self): geo_chflieut = chflieu.translate().zoom().get_geom() self.assertTrue(isinstance(geo_chflieut, MultiPoint)) + def test_get_geodata_short3(self): + com = get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:commune', update=True) self.assertTrue(isinstance(com, GeoFrDataFrame)) geo = com.get_geom() self.assertTrue(isinstance(geo, MultiPolygon)) + def test_get_geodata_short4(self): + # query with polygon and crs 4326 dep29 = get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:departement', update=True, crs="EPSG:4326") dep29 = dep29[dep29["insee_dep"] == "29"] @@ -79,9 +90,12 @@ def test_get_geodata_short(self): geo29 = dep29.get_geom() self.assertTrue(isinstance(geo29, MultiPolygon)) - com29 = _get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:commune', update=True, polygon=geo29, crsPolygon="EPSG:4326") + com29 = _get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:commune', + update=True, polygon=geo29, crsPolygon="EPSG:4326") self.assertTrue(isinstance(com29, pd.DataFrame)) + def test_get_geodata_short5(self): + # query with polygon and crs 3857 dep29 = get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:departement', update=True, crs="EPSG:3857") dep29 = dep29[dep29["insee_dep"] == "29"] @@ -89,14 +103,19 @@ def test_get_geodata_short(self): geo29 = dep29.get_geom() self.assertTrue(isinstance(geo29, MultiPolygon)) - com29 = _get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:commune', update=True, polygon=geo29, crsPolygon="EPSG:3857") + com29 = _get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:commune', + update=True, polygon=geo29, crsPolygon="EPSG:3857") self.assertTrue(isinstance(com29, pd.DataFrame)) + def test_get_geodata_short5b(self): + + com = get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:commune') ovdep = com.translate().zoom() self.assertTrue(isinstance(ovdep, GeoFrDataFrame)) geo_ovdep = ovdep.get_geom() self.assertTrue(isinstance(geo_ovdep, MultiPolygon)) + def test_get_geodata_short6(self): #test _add_insee_dep_from_geodata epci = get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:epci', update=True) self.assertTrue(isinstance(epci, GeoFrDataFrame)) @@ -105,6 +124,7 @@ def test_get_geodata_short(self): geo_epcit = epcit.get_geom() self.assertTrue(isinstance(geo_epcit, MultiPolygon)) + def test_get_geodata_short7(self): # test _add_insee_dep_region reg = get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:region', update=True) self.assertTrue(isinstance(reg, GeoFrDataFrame)) @@ -113,6 +133,7 @@ def test_get_geodata_short(self): geo_regt = regt.get_geom() self.assertTrue(isinstance(geo_regt, MultiPolygon)) + def test_get_geodata_short8(self): dep = get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:departement', crs="EPSG:4326") dep13 = dep[dep["insee_dep"] == "13"] geo13 = dep13.get_geom() @@ -122,6 +143,8 @@ def test_get_geodata_short(self): bbox = _get_bbox_list(polygon=geo13) self.assertTrue(isinstance(bbox, list)) + def test_get_geodata_short9(self): + dep = get_geodata(id='ADMINEXPRESS-COG-CARTO.LATEST:departement', crs="EPSG:3857") dep13 = dep[dep["insee_dep"] == "13"] geo13 = dep13.get_geom() diff --git a/tests/localdata/test_pynsee_localdata.py b/tests/localdata/test_pynsee_localdata.py index 2e641ab1..c25536aa 100644 --- a/tests/localdata/test_pynsee_localdata.py +++ b/tests/localdata/test_pynsee_localdata.py @@ -7,6 +7,8 @@ import sys import unittest from requests.exceptions import RequestException +import re +import os from pynsee.localdata._get_geo_relation import _get_geo_relation from pynsee.localdata._get_insee_one_area import _get_insee_one_area @@ -15,7 +17,6 @@ from pynsee.localdata.get_geo_list import get_geo_list from pynsee.localdata.get_local_data import get_local_data -from pynsee.localdata.get_included_area import get_included_area from pynsee.localdata.get_nivgeo_list import get_nivgeo_list from pynsee.localdata.get_local_metadata import get_local_metadata from pynsee.localdata.get_population import get_population @@ -25,10 +26,16 @@ from pynsee.localdata.get_ascending_area import get_ascending_area from pynsee.localdata.get_descending_area import get_descending_area +# manual commands for testing only on geodata module +# coverage run -m unittest tests/geodata/test_pynsee_geodata.py +# coverage report --omit=*/utils/*,*/macrodata/*,*/localdata/*,*/download/*,*/sirene/*,*/metadata/* -m class TestFunction(TestCase): - version = (sys.version_info[0] == 3) & (sys.version_info[1] == 8) + version = (sys.version_info[0] == 3) & (sys.version_info[1] == 9) + + test_onyxia = re.match(".*onyxia.*", os.getcwd()) + version = version or test_onyxia if version: @@ -191,7 +198,7 @@ def test_get_geo_list_1(self): list_geo_data = [] for geo in list_available_geo: - time.sleep(10) + time.sleep(1) list_geo_data.append(get_geo_list(geo)) df = pd.concat(list_geo_data) @@ -205,7 +212,7 @@ def test_get_geo_list_2(self): def test_get_geo_relation_1(self): df1 = _get_geo_relation("region", "11", "descendants") - time.sleep(10) + time.sleep(1) df2 = _get_geo_relation("departement", "91", "ascendants") test = isinstance(df1, pd.DataFrame) & isinstance( df2, pd.DataFrame @@ -221,13 +228,13 @@ def test_get_local_metadata(self): self.assertTrue(isinstance(data, pd.DataFrame)) def test_get_local_data_1(self): - dep = get_geo_list("departements") + # dep = get_geo_list("departements") variables = "AGESCOL-SEXE-ETUD" dataset = "GEO2019RP2011" - # codegeo = ['91', '976'] - codegeos = list(dep.CODE) - codegeos = dep.CODE.to_list() + codegeos = ['91', '976'] + # codegeos = list(dep.CODE) + # codegeos = dep.CODE.to_list() geo = "DEP" data = get_local_data( variables=variables, @@ -245,7 +252,8 @@ def test_get_local_data_all(self): dataset_version="GEO2020RP2017", variables="SEXE-DIPL_19", nivgeo="DEP", - geocodes=["91", "92", "976"], + geocodes=["91", "976"], + update=True ) test = test & isinstance(data, pd.DataFrame) @@ -253,7 +261,8 @@ def test_get_local_data_all(self): dataset_version="GEO2020FILO2018", variables="INDICS_FILO_DISP_DET-TRAGERF", nivgeo="REG", - geocodes=["11", "01"], + geocodes=["01", "11"], + update=True ) test = test & isinstance(data, pd.DataFrame) @@ -262,6 +271,7 @@ def test_get_local_data_all(self): variables="INDICS_BDCOM", nivgeo="REG", geocodes=["11"], + update=True ) test = test & isinstance(data, pd.DataFrame) @@ -270,6 +280,7 @@ def test_get_local_data_all(self): variables="INDICS_ETATCIVIL", nivgeo="REG", geocodes=["11"], + update=True ) test = test & isinstance(data, pd.DataFrame) @@ -278,23 +289,7 @@ def test_get_local_data_all(self): variables="ETOILE", nivgeo="REG", geocodes=["11"], - ) - test = test & isinstance(data, pd.DataFrame) - - # repeat same query to test locally saved data use - data = get_local_data( - dataset_version="TOUR2019", - variables="ETOILE", - nivgeo="REG", - geocodes=["11"], - ) - test = test & isinstance(data, pd.DataFrame) - - data = get_local_data( - dataset_version="GEO2020FLORES2017", - variables="EFFECSAL5T_1_100P", - nivgeo="REG", - geocodes="11", + update=True ) test = test & isinstance(data, pd.DataFrame) @@ -303,6 +298,7 @@ def test_get_local_data_all(self): variables="NA5_B", nivgeo="REG", geocodes=["11"], + update=True ) test = test & isinstance(data, pd.DataFrame) @@ -311,17 +307,25 @@ def test_get_local_data_all(self): variables="IND_POPLEGALES", nivgeo="COM", geocodes=["91477"], + update=True ) test = test & isinstance(data, pd.DataFrame) - data = get_local_data( - dataset_version="GEOlatestRPlatest", variables="CS1_6" - ) - test = test & isinstance(data, pd.DataFrame) + for geo in ["DEP", "REG", "FE", "METRODOM"]: + data = get_local_data( + dataset_version="GEO2020FLORES2017", + variables="NA17", + nivgeo=geo, + update=True + ) + test = test & isinstance(data, pd.DataFrame) + + self.assertTrue(test) - # test data cached + def test_get_local_data_latest(self): + test = True data = get_local_data( - dataset_version="GEOlatestRPlatest", variables="TYPMR" + dataset_version="GEOlatestRPlatest", variables="CS1_6" ) test = test & isinstance(data, pd.DataFrame) @@ -368,20 +372,13 @@ def test_get_local_data_all(self): geocodes = '75056') test = test & isinstance(data, pd.DataFrame) - for geo in ["DEP", "REG", "FE", "METRODOM"]: - data = get_local_data( - dataset_version="GEO2020FLORES2017", - variables="NA17", - nivgeo=geo, - ) - test = test & isinstance(data, pd.DataFrame) - - test = test & isinstance(data, pd.DataFrame) + self.assertTrue(test) + def test_get_ascending_descending_area(self): # # test get_descending_area and get_ascending_are # - + test = True df = get_descending_area( "commune", code="59350", date="2018-01-01" ) @@ -420,6 +417,8 @@ def test_get_local_data_all(self): df = get_ascending_area("commune", code="59350", date="2018-01-01") test = test & isinstance(df, pd.DataFrame) + self.assertTrue(test) + def test_get_local_data_latest_error(self): def getlocaldataTestError(): @@ -463,26 +462,5 @@ def get_area_list_test(): get_area_list(area="regions", date="1900-01-01", update=True) self.assertRaises(RequestException, get_area_list_test) - def test_get_included_area(self): - list_available_area = [ - "zonesDEmploi2020", - "airesDAttractionDesVilles2020", - "unitesUrbaines2020", - ] - list_data = [] - - for a in list_available_area: - time.sleep(10) - df_list = get_area_list(a) - code = df_list.CODE[:3].to_list() - data = get_included_area(area_type=a, codeareas=code) - list_data.append(data) - - data_final = pd.concat(list_data) - - test = isinstance(data_final, pd.DataFrame) - - self.assertTrue(test) - if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/tests/macrodata/test_pynsee_macrodata.py b/tests/macrodata/test_pynsee_macrodata.py index 7f231a52..4e663415 100644 --- a/tests/macrodata/test_pynsee_macrodata.py +++ b/tests/macrodata/test_pynsee_macrodata.py @@ -8,6 +8,7 @@ import sys from datetime import datetime from datetime import timedelta +import re from pynsee.macrodata._get_insee import _get_insee from pynsee.macrodata._get_date import _get_date @@ -36,12 +37,18 @@ future_date = datetime.now() + timedelta(days=91) +# manual commands for testing only on geodata module +# coverage run -m unittest tests/geodata/test_pynsee_geodata.py +# coverage report --omit=*/utils/*,*/macrodata/*,*/localdata/*,*/download/*,*/sirene/*,*/metadata/* -m class TestFunction(TestCase): - version_3_8 = (sys.version_info[0] == 3) & (sys.version_info[1] == 8) + version = (sys.version_info[0] == 3) & (sys.version_info[1] == 12) - if not version_3_8: + test_onyxia = re.match(".*onyxia.*", os.getcwd()) + version = version or test_onyxia + + if version: def test_get_dataset_list_internal(self): df = _get_dataset_list_internal() @@ -54,7 +61,7 @@ def test_download_series_list(self): def test_get_series_title(self): series = search_macrodata() - series = series.loc[:420, "IDBANK"].to_list() + series = series.loc[:50, "IDBANK"].to_list() titles = get_series_title(series) self.assertTrue(isinstance(titles, pd.DataFrame)) @@ -67,7 +74,7 @@ def test_get_column_title_1(self): data1 = get_column_title() test1 = isinstance(data1, pd.DataFrame) - data2 = get_column_title(['CLIMAT-AFFAIRES', 'IPC-2015']) + data2 = get_column_title(['CLIMAT-AFFAIRES']) test2 = isinstance(data2, pd.DataFrame) self.assertTrue(test1 & test2) @@ -79,9 +86,6 @@ def test_get_series_list_1(self): data = get_series_list('CLIMAT-AFFAIRES') test = test & isinstance(data, pd.DataFrame) - data = get_series_list("IPPI-2015", update=True) - test = test & isinstance(data, pd.DataFrame) - data = get_series_list("CHOMAGE-TRIM-NATIONAL", update=True) test = test & isinstance(data, pd.DataFrame) @@ -91,7 +95,7 @@ def test_get_series_list_2(self): self.assertRaises(ValueError, get_series_list, 'a') def test_get_series_1(self): - idbank_list = get_series_list('IPC-2015').iloc[:900] + idbank_list = get_series_list('IPC-2015').iloc[:50] data = get_series(idbank_list.IDBANK) self.assertTrue(isinstance(data, pd.DataFrame)) diff --git a/tests/metadata/test_pynsee_metadata.py b/tests/metadata/test_pynsee_metadata.py index 1d3abc87..417d5b2c 100644 --- a/tests/metadata/test_pynsee_metadata.py +++ b/tests/metadata/test_pynsee_metadata.py @@ -4,15 +4,25 @@ from unittest import TestCase from pandas import pandas as pd import sys +import os +import re from pynsee.metadata.get_definition_list import get_definition_list from pynsee.metadata.get_definition import get_definition from pynsee.metadata.get_activity_list import get_activity_list from pynsee.metadata.get_legal_entity import get_legal_entity +# manual commands for testing only on geodata module +# coverage run -m unittest tests/geodata/test_pynsee_geodata.py +# coverage report --omit=*/utils/*,*/macrodata/*,*/localdata/*,*/download/*,*/sirene/*,*/metadata/* -m + + class TestFunction(TestCase): - version = (sys.version_info[0] == 3) & (sys.version_info[1] == 8) + version = (sys.version_info[0] == 3) & (sys.version_info[1] == 11) + + test_onyxia = re.match(".*onyxia.*", os.getcwd()) + version = version or test_onyxia if version: diff --git a/tests/sirene/test_pynsee_sirene.py b/tests/sirene/test_pynsee_sirene.py index 78eb5583..3a095141 100644 --- a/tests/sirene/test_pynsee_sirene.py +++ b/tests/sirene/test_pynsee_sirene.py @@ -3,6 +3,8 @@ from unittest import TestCase from pandas import pandas as pd import sys +import os +import re from shapely.geometry import ( Point, Polygon, MultiPolygon, LineString, MultiLineString, MultiPoint) @@ -13,11 +15,17 @@ get_sirene_relatives, search_sirene) from pynsee.sirene._request_sirene import _request_sirene +# manual commands for testing only on geodata module +# coverage run -m unittest tests/geodata/test_pynsee_geodata.py +# coverage report --omit=*/utils/*,*/macrodata/*,*/localdata/*,*/download/*,*/sirene/*,*/metadata/* -m class TestFunction(TestCase): version = (sys.version_info[0] == 3) & (sys.version_info[1] == 11) + test_onyxia = re.match(".*onyxia.*", os.getcwd()) + version = version or test_onyxia + if version: def test_get_sirene_relatives(self): @@ -166,7 +174,7 @@ def test_search_sirene(self): df = search_sirene(variable=['activitePrincipaleEtablissement', 'codePostalEtablissement'], pattern=['56.30Z', '83*'], - number=5000) + number=100) test = test & isinstance(df, pd.DataFrame) df = search_sirene(variable = ["denominationUniteLegale", 'categorieEntreprise'], diff --git a/tests/utils/test_pynsee_utils.py b/tests/utils/test_pynsee_utils.py index ec7436d6..16097406 100644 --- a/tests/utils/test_pynsee_utils.py +++ b/tests/utils/test_pynsee_utils.py @@ -4,16 +4,17 @@ import unittest from unittest import TestCase import requests - +import re import os import sys +from requests.exceptions import RequestException + from pynsee.utils._get_credentials import _get_credentials from pynsee.utils._request_insee import _request_insee from pynsee.utils.clear_all_cache import clear_all_cache from pynsee.utils.init_conn import init_conn - test_SDMX = True @@ -21,20 +22,26 @@ class TestFunction(TestCase): version = (sys.version_info[0] == 3) & (sys.version_info[1] == 9) + test_onyxia = re.match(".*onyxia.*", os.getcwd()) + version = version or test_onyxia + if version: StartKeys = _get_credentials() def test_request_insee_1(self): + # test both api and sdmx queries fail but token is not none sdmx_url = "https://bdm.insee.fr/series/sdmx/data/SERIES_BDM/test" api_url = "https://api.insee.fr/series/BDM/V1/data/SERIES_BDM/test" - def request_insee_test(sdmx_url=sdmx_url, api_url=api_url): + fail = False + + try: _request_insee(sdmx_url=sdmx_url, api_url=api_url) + except requests.exceptions.RequestException: + fail = True - self.assertRaises( - requests.exceptions.RequestException, request_insee_test - ) + self.assertTrue(fail) if test_SDMX: @@ -50,45 +57,6 @@ def test_request_insee_2(self): test = results.status_code == 200 self.assertTrue(test) - def test_request_insee_3(self): - # token is none and sdmx query fails - def init_conn_foo(): - init_conn(sirene_key="test") - - self.assertRaises(ValueError, init_conn_foo) - - os.environ["sirene_key"] = "key" - sdmx_url = "https://bdm.insee.fr/series/sdmx/data/SERIES_BDM/test" - api_url = "https://api.insee.fr/series/BDM/V1/data/SERIES_BDM/test" - - def request_insee_test(sdmx_url=sdmx_url, api_url=api_url): - _request_insee(sdmx_url=sdmx_url, api_url=api_url) - - self.assertRaises(ValueError, request_insee_test) - - def test_request_insee_4(self): - # token is none and sdmx query is None - # _get_token.cache_clear() - # _get_envir_token.cache_clear() - clear_all_cache() - - os.environ["sirene_key"] = "key" - api_url = "https://api.insee.fr/series/BDM/V1/data/SERIES_BDM/test" - - def request_insee_test(sdmx_url=None, api_url=api_url): - _request_insee(sdmx_url=sdmx_url, api_url=api_url) - - self.assertRaises(ValueError, request_insee_test) - - def test_request_insee_5(self): - # api query is none and sdmx query fails - sdmx_url = "https://bdm.insee.fr/series/sdmx/data/SERIES_BDM/test" - - def request_insee_test(sdmx_url=sdmx_url, api_url=None): - _request_insee(sdmx_url=sdmx_url, api_url=api_url) - - self.assertRaises(ValueError, request_insee_test) - def test_clear_all_cache(self): test = True try: