From 5b468f9438c9d93d0dc8e5e0c7d827d8e1c21052 Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 19 Oct 2023 09:43:44 +0200 Subject: [PATCH 1/3] adding `pyotp` for programmatic 2FA (#79) --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index a1ccd42..d392e04 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,3 +25,4 @@ alive_progress rasterstats pyyaml myst_parser +pyotp From 5041c34d4d972927c8cdeaf3d0ceda9b11b63eaf Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 19 Oct 2023 09:44:03 +0200 Subject: [PATCH 2/3] adding `CREODIAS_TOTP_SECRET` for 2FA (#79) --- eodal/config/settings.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/eodal/config/settings.py b/eodal/config/settings.py index eb2a340..444fc25 100644 --- a/eodal/config/settings.py +++ b/eodal/config/settings.py @@ -52,6 +52,11 @@ class Settings(BaseSettings): # define CREODIAS username and password CREODIAS_USER: str = "" CREODIAS_PASSWORD: str = "" + # define CREODIAS TOTP secret for 2FA + CREODIAS_TOTP_SECRET: str = "" + # CREODIAS API endpoint for downloading zipped datasets + CREODIAS_ZIPPER_URL: str = "https://zipper.creodias.eu/download" + # maximum number of records per request: 2000 (CREODIAS currently does not allow # more) CREODIAS_MAX_RECORDS: int = 2000 From 75c1ef0c8e049d7f468ecdddf91a9e13909d4ac2 Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 19 Oct 2023 10:08:36 +0200 Subject: [PATCH 3/3] stabilized download by try-excepts and new 2FA enabled --- eodal/downloader/utils/creodias.py | 144 ++++++++++++++++++++++------- 1 file changed, 112 insertions(+), 32 deletions(-) diff --git a/eodal/downloader/utils/creodias.py b/eodal/downloader/utils/creodias.py index 5e51e97..2c76a3d 100644 --- a/eodal/downloader/utils/creodias.py +++ b/eodal/downloader/utils/creodias.py @@ -20,7 +20,10 @@ import os import pandas as pd +import pyotp import requests +import shutil +import time from pathlib import Path from typing import Optional, Union @@ -33,38 +36,73 @@ CHUNK_SIZE = 2096 -def get_keycloak() -> str: +def get_totp() -> str: + """ + Get one-time access code using the TOTP algorithm to + login into CREODIAS. The TOTP secret must have been + set in the EOdal configurations using the `CREODIAS_TOTP_SECRET` + variable. + + :return: one-time access key valid for a maximum of 30 seconds. """ - Returns the CREODIAS keycloak token for a valid - (i.e., registered) CREODIAS user. Takes the username - and password from either config/settings.py, a .env - file or environment variables. + return pyotp.TOTP(Settings.CREODIAS_TOTP_SECRET).now() - The token is required for downloading data from - CREODIAS. - Function taken from: - https://creodias.eu/-/how-to-generate-keycloak-token-using-web-browser-console- - (2021-09-23) +def _get_keycloak(username: str, password: str) -> str: """ + Gets the keycloak token required by CREODIAS to sign requests. + It uses the CREODIAS username, password and one-time access code + (2FA) to authenticate at the Cloudferro Openid connector. + :param username: CREODIAS username + :param password: CREODIAS password + :return: keycloak access token + """ + # get one-time access code + totp = get_totp() + + # prepare data for HTTP POST request data = { "client_id": "CLOUDFERRO_PUBLIC", - "username": Settings.CREODIAS_USER, - "password": Settings.CREODIAS_PASSWORD, + "username": username, + "password": password, + "totp": int(totp), "grant_type": "password", } + # request the token + r = requests.post( + "https://identity.cloudferro.com/auth/realms/Creodias-new/protocol/openid-connect/token", + data=data, + ) + r.raise_for_status() + return r.json()["access_token"] + + +def get_keycloak() -> str: + """ + Gets the keycloak token required by CREODIAS to sign requests. + It uses the CREODIAS username, password and one-time access code + (2FA) to authenticate at the Cloudferro Openid connector. + + NOTE: + A one-time access code is valid for 30 seconds. It can + be only used once. This means, if one makes multiple requests + for a keycloak token within <= 30 seconds, there will be a + server error as the one-time access code has been consumed already. + To make this behavior more stable, the application waits 31 seconds + on error to request a new one-time access code (recursive call). + + :return: keycloak access token + """ + # set user name and password from EOdal configuration + username = Settings.CREODIAS_USER + password = Settings.CREODIAS_PASSWORD try: - r = requests.post( - "https://auth.creodias.eu/auth/realms/DIAS/protocol/openid-connect/token", - data=data, - ) - r.raise_for_status() + return _get_keycloak(username, password) except Exception: - raise Exception( - f"Keycloak token creation failed. Reponse from the server was: {r.json()}" - ) - return r.json()["access_token"] + # on error, sleep 31 seconds and then try again + time.sleep(31) + return get_keycloak() def download_datasets( @@ -90,9 +128,6 @@ def download_datasets( the existing zips are complete! """ - # get API token from CREODIAS - keycloak_token = get_keycloak() - # change into download directory os.chdir(str(download_dir)) @@ -100,7 +135,10 @@ def download_datasets( scene_counter = 1 for _, dataset in datasets.iterrows(): try: - dataset_url = dataset.properties["services"]["download"]["url"] + # get API token from CREODIAS (only valid for a limited time) + keycloak_token = get_keycloak() + dataset_temp_url = dataset.properties["services"]["download"]["url"].split('/')[-1] + dataset_url = f'{Settings.CREODIAS_ZIPPER_URL}/{dataset_temp_url}' response = requests.get( dataset_url, headers={"Authorization": f"Bearer {keycloak_token}"}, @@ -108,19 +146,22 @@ def download_datasets( ) response.raise_for_status() except Exception as e: - logger.error(f"Could not download {dataset.product_uri}: {e}") + logger.error(f"Could not download {dataset_url}: {e}") continue # download the data using the iter_content method (writes chunks to disk) # check if the dataset exists already and overwrite it only if # defined by the user fname = dataset.dataset_name.replace("SAFE", "zip") + if not fname.endswith("zip"): + fname = fname + ".zip" if Path(fname).exists(): if not overwrite_existing_zips: logger.info( f"{dataset.dataset_name} already downloaded - " + "continue with next dataset" ) + scene_counter += 1 continue else: logger.warning(f"Overwriting {dataset.dataset_name}") @@ -128,10 +169,49 @@ def download_datasets( logger.info( f"Starting downloading {fname} ({scene_counter}/{datasets.shape[0]})" ) - with open(fname, "wb") as fd: - for chunk in response.iter_content(chunk_size=CHUNK_SIZE): - fd.write(chunk) - logger.info( - f"Finished downloading {fname} ({scene_counter}/{datasets.shape[0]})" - ) + try: + with open(fname, "wb") as fd: + for chunk in response.iter_content(chunk_size=CHUNK_SIZE): + fd.write(chunk) + logger.info( + f"Finished downloading {fname} ({scene_counter}/{datasets.shape[0]})" + ) + except Exception as e: + # on error (e.g., broken network connection) delete the incomplete + # zip archive and continue. + shutil.rmtree(fname) + logger.error( + f'Downloading {fname} ({scene_counter}/{datasets.shape[0]}) ' + + f'was interrupted.\n{e}\nRemoved broken zip.\n' + + 'Consider re-running the download.') + scene_counter += 1 + + +# # unit test (requires credentials for CREODIAS) +# if __name__ == '__main__': +# +# from datetime import date +# from eodal.downloader.sentinel2.creodias import query_creodias +# from eodal.utils.constants.sentinel2 import ProcessingLevels +# from shapely.geometry import box +# +# bbox = box(*[8, 49, 9, 50]) +# start_date = date(2017, 10, 5) +# end_date = date(2017, 10, 31) +# max_records = 100 +# processing_level = ProcessingLevels.L2A +# datasets = query_creodias( +# start_date=start_date, +# end_date=end_date, +# max_records=max_records, +# processing_level=processing_level, +# bounding_box=bbox +# ) +# +# download_dir = Path('/mnt/ides/Lukas/03_Debug/SAT/') +# download_datasets(datasets=datasets, download_dir=download_dir, overwrite_existing_zips=False) +# +# from eodal.downloader.utils.unzip_datasets import unzip_datasets +# +# unzip_datasets(download_dir=download_dir, platform='S2', remove_zips=True)