Skip to content

Commit

Permalink
Cleanup code, move to return early pattern
Browse files Browse the repository at this point in the history
  • Loading branch information
Tanguy Fardet committed Nov 19, 2023
1 parent 0afbd0f commit 977b67d
Show file tree
Hide file tree
Showing 7 changed files with 186 additions and 211 deletions.
2 changes: 1 addition & 1 deletion pynsee/localdata/_find_latest_local_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def _find_latest_local_dataset(dataset_version, variables, nivgeo, codegeo, upda

filename = _hash("".join([dataset_version] + ['_find_latest_local_dataset']))
insee_folder = _create_insee_folder()
file_localdata = insee_folder + "/" + filename
file_localdata = os.path.join(insee_folder, filename)

if (not os.path.exists(file_localdata)) or update:

Expand Down
13 changes: 11 additions & 2 deletions pynsee/macrodata/_load_dataset_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,22 @@ def _del_dataset_files():
for f in list_dataset_files:
os.remove(f)


def _get_dataset_files():
list_dataset = list(get_dataset_list().id.unique())
insee_folder = _create_insee_folder()
file_dataset_metadata_list = [insee_folder + "/" + _hash("idbank_list" + dt) for dt in list_dataset]
file_dataset_metadata_list_exist = [f for f in file_dataset_metadata_list if os.path.exists(f)]

file_dataset_metadata_list = [
os.path.join(insee_folder, _hash("idbank_list" + dt)) for dt in list_dataset
]

file_dataset_metadata_list_exist = [
f for f in file_dataset_metadata_list if os.path.exists(f)
]

return file_dataset_metadata_list_exist


def _load_dataset_data():
list_dataset_files = _get_dataset_files()
if len(list_dataset_files) > 0:
Expand Down
164 changes: 82 additions & 82 deletions pynsee/metadata/get_definition_list.py
Original file line number Diff line number Diff line change
@@ -1,82 +1,82 @@
# -*- coding: utf-8 -*-
# Copyright : INSEE, 2021

from functools import lru_cache
from pynsee.utils._request_insee import _request_insee
from pynsee.utils._create_insee_folder import _create_insee_folder
from pynsee.utils._make_dataframe_from_dict import _make_dataframe_from_dict

import zipfile
import os
import pkg_resources
import pandas as pd

import logging
logger = logging.getLogger(__name__)

@lru_cache(maxsize=None)
def _warning_definition_internal_data():
logger.info(
"Internal package data has been used !\n"
"If some data is missing, please use get_definition !"
)


@lru_cache(maxsize=None)
def get_definition_list():
"""Get a list of concept definitions
Examples:
>>> from pynsee.metadata import get_definition_list
>>> definition = get_definition_list()
"""

insee_folder = _create_insee_folder()

insee_folder_local_def = insee_folder + "/" + "definition"

if not os.path.exists(insee_folder_local_def):
os.mkdir(insee_folder_local_def)

list_expected_files = ["all_definitions.csv"]

list_expected_files = [
insee_folder + "/definition/" + f for f in list_expected_files
]

list_available_file = [not os.path.exists(f) for f in list_expected_files]

# unzipping raw files
if any(list_available_file):

zip_file = pkg_resources.resource_stream(__name__, "data/definition.zip")

with zipfile.ZipFile(zip_file, "r") as zip_ref:
zip_ref.extractall(insee_folder)

link = "https://api.insee.fr/metadonnees/V1/concepts/definitions"

request = _request_insee(api_url=link, file_format="application/json")

data_request = request.json()

list_data = []

for i in range(len(data_request)):
df = _make_dataframe_from_dict(data_request[i])
df = df.iloc[:, 0:3].reset_index(drop=True).drop_duplicates()
list_data.append(df)

data = pd.concat(list_data, axis=0)
data = data.reset_index(drop=True)
data.columns = ["ID", "URI", "TITLE_FR"]

if os.path.exists(list_expected_files[0]):
all_data = pd.read_csv(list_expected_files[0])
all_data = all_data.iloc[:, 1:10]
all_data = all_data.drop(columns={"URI", "TITLE_FR"})
data = data.merge(all_data, on="ID", how="left")

_warning_definition_internal_data()

return data
# -*- coding: utf-8 -*-
# Copyright : INSEE, 2021

from functools import lru_cache
from pynsee.utils._request_insee import _request_insee
from pynsee.utils._create_insee_folder import _create_insee_folder
from pynsee.utils._make_dataframe_from_dict import _make_dataframe_from_dict

import zipfile
import os
import pkg_resources
import pandas as pd

import logging
logger = logging.getLogger(__name__)

@lru_cache(maxsize=None)
def _warning_definition_internal_data():
logger.info(
"Internal package data has been used !\n"
"If some data is missing, please use get_definition !"
)


@lru_cache(maxsize=None)
def get_definition_list():
"""Get a list of concept definitions
Examples:
>>> from pynsee.metadata import get_definition_list
>>> definition = get_definition_list()
"""

insee_folder = _create_insee_folder()

insee_folder_local_def = os.path.join(insee_folder, "definition")

if not os.path.exists(insee_folder_local_def):
os.mkdir(insee_folder_local_def)

list_expected_files = ["all_definitions.csv"]

list_expected_files = [
insee_folder + "/definition/" + f for f in list_expected_files
]

list_available_file = [not os.path.exists(f) for f in list_expected_files]

# unzipping raw files
if any(list_available_file):

zip_file = pkg_resources.resource_stream(__name__, "data/definition.zip")

with zipfile.ZipFile(zip_file, "r") as zip_ref:
zip_ref.extractall(insee_folder)

link = "https://api.insee.fr/metadonnees/V1/concepts/definitions"

request = _request_insee(api_url=link, file_format="application/json")

data_request = request.json()

list_data = []

for i in range(len(data_request)):
df = _make_dataframe_from_dict(data_request[i])
df = df.iloc[:, 0:3].reset_index(drop=True).drop_duplicates()
list_data.append(df)

data = pd.concat(list_data, axis=0)
data = data.reset_index(drop=True)
data.columns = ["ID", "URI", "TITLE_FR"]

if os.path.exists(list_expected_files[0]):
all_data = pd.read_csv(list_expected_files[0])
all_data = all_data.iloc[:, 1:10]
all_data = all_data.drop(columns={"URI", "TITLE_FR"})
data = data.merge(all_data, on="ID", how="left")

_warning_definition_internal_data()

return data
2 changes: 1 addition & 1 deletion pynsee/sirene/search_sirene.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ def search_sirene(

filename = _hash(query + string)
insee_folder = _create_insee_folder()
file_sirene = insee_folder + "/" + filename
file_sirene = os.path.join(insee_folder, filename)

if (not os.path.exists(file_sirene)) or update:
data_final = _request_sirene(query=query, kind=kind, number=number)
Expand Down
10 changes: 2 additions & 8 deletions pynsee/utils/_create_insee_folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,18 @@ def _create_insee_folder():
try:
# find local folder
local_appdata_folder = platformdirs.user_cache_dir()
insee_folder = local_appdata_folder + "/pynsee"
insee_folder = os.path.join(local_appdata_folder, "pynsee")

# create insee folder
if not os.path.exists(insee_folder):
os.mkdir(insee_folder)

insee_folder = insee_folder + "/pynsee"
insee_folder = os.path.join(insee_folder, "pynsee")

# create insee folder
if not os.path.exists(insee_folder):
os.mkdir(insee_folder)

# create internal folder
# if folder is not None:
# insee_folder = insee_folder + '/' + folder
# if not os.path.exists(insee_folder):
# os.mkdir(insee_folder)

# test if saving a file is possible
test_file = os.path.join(insee_folder, _hash("test_file"))
with open(test_file, "w") as f:
Expand Down
85 changes: 38 additions & 47 deletions pynsee/utils/_request_insee.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,6 @@
def _request_insee(
api_url=None, sdmx_url=None, file_format="application/xml", print_msg=True
):
# sdmx_url = "https://bdm.insee.fr/series/sdmx/data/SERIES_BDM/001688370"
# api_url = "https://api.insee.fr/series/BDM/V1/data/SERIES_BDM/001688370"
# api_url = 'https://api.insee.fr/series/BDM/V1/data/CLIMAT-AFFAIRES/?firstNObservations=4&lastNObservations=1'
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

if api_url is not None:
Expand Down Expand Up @@ -67,14 +64,10 @@ def _request_insee(
# 2- if the api request fails

# if api url is missing sdmx url is used

if api_url is not None:
token = pynsee.get_config("insee_token")

try:
username = os.environ['USERNAME']
except Exception:
username = "username"
username = os.environ.get("USERNAME", "username")

if token:
headers = {
Expand Down Expand Up @@ -113,54 +106,52 @@ def _request_insee(

if success is True:
return results
else:
msg = (
"An error occurred !\n"
"Query : {api_url}\n"
f"{results.text}\n"
"Make sure you have subscribed to all APIs !\n"
"Click on all APIs' icons one by one, select your "
"application, and click on Subscribe"
)
raise requests.exceptions.RequestException(msg)

else:
# token is None
commands = "\n\ninit_conn(insee_key='my_insee_key', insee_secret='my_insee_secret')\n"
msg = (
"Token missing, please check your credentials "
"on api.insee.fr !\n"
"Please do the following to use your "
f"credentials: {commands}\n\n"
"If your token still does not work, please try to clear "
"the cache :\n "
"from pynsee.utils import clear_all_cache; clear_all_cache()\n"
"An error occurred !\n"
"Query : {api_url}\n"
f"{results.text}\n"
"Make sure you have subscribed to all APIs !\n"
"Click on all APIs' icons one by one, select your "
"application, and click on Subscribe"
)

if sdmx_url is not None:
msg2 = "\nSDMX web service used instead of API"
if print_msg:
logger.critical(msg + msg2)
raise requests.exceptions.RequestException(msg)

results = requests.get(sdmx_url, proxies=proxies, verify=False)
# token is None
commands = "\n\ninit_conn(insee_key='my_insee_key', insee_secret='my_insee_secret')\n"
msg = (
"Token missing, please check your credentials "
"on api.insee.fr !\n"
"Please do the following to use your "
f"credentials: {commands}\n\n"
"If your token still does not work, please try to clear "
"the cache :\n "
"from pynsee.utils import clear_all_cache; clear_all_cache()\n"
)

if results.status_code == 200:
return results
else:
raise ValueError(results.text + "\n" + sdmx_url)

else:
raise ValueError(msg)
else:
# api_url is None
if sdmx_url is not None:
msg2 = "\nSDMX web service used instead of API"
if print_msg:
logger.critical(msg + msg2)

results = requests.get(sdmx_url, proxies=proxies, verify=False)
print(sdmx_url, results.status_code)

if results.status_code == 200:
return results
else:
raise ValueError(results.text + "\n" + sdmx_url)

else:
raise ValueError("!!! Error : urls are missing")
raise ValueError(results.text + "\n" + sdmx_url)

raise ValueError(msg)

# api_url is None
if sdmx_url is not None:
results = requests.get(sdmx_url, proxies=proxies, verify=False)
logger.debug(f"{sdmx_url}: {results.status_code}")

if results.status_code == 200:
return results

raise ValueError(results.text + "\n" + sdmx_url)

raise ValueError("URLs are missing!")
Loading

0 comments on commit 977b67d

Please sign in to comment.