From e5bacb89d70049e9b2af98ec4ac154ef8ba8b42e Mon Sep 17 00:00:00 2001 From: MatthieuBarbet Date: Tue, 9 Jul 2024 18:14:27 +0200 Subject: [PATCH] Feat: add new download impl --- .../aproc/proc/download/download_process.py | 5 +- .../aproc/proc/download/drivers/driver.py | 3 +- .../proc/download/drivers/impl/met_file.py | 60 +++++++++++++++++-- .../proc/download/drivers/impl/simple_copy.py | 2 +- .../proc/download/drivers/impl/tif_file.py | 19 +++++- .../aproc/proc/download/drivers/impl/utils.py | 28 ++++++--- 6 files changed, 98 insertions(+), 19 deletions(-) diff --git a/extensions/aproc/proc/download/download_process.py b/extensions/aproc/proc/download/download_process.py index 7bf05d04..3af925f3 100644 --- a/extensions/aproc/proc/download/download_process.py +++ b/extensions/aproc/proc/download/download_process.py @@ -154,7 +154,7 @@ def get_resource_id(inputs: BaseModel): return hash_object.hexdigest() @shared_task(bind=True, track_started=True) - def execute(self, headers: dict[str, str], requests: list[dict[str, str]], crop_wkt: str, target_projection: str, target_format: str = "Geotiff") -> dict: + def execute(self, headers: dict[str, str], requests: list[dict[str, str]], crop_wkt: str, target_projection: str, target_format: str = "Geotiff", raw_archive:bool = True) -> dict: (send_to, user_id) = AprocProcess.__get_user_email__(headers.get("authorization")) LOGGER.debug("processing download requests from {}".format(send_to)) download_locations = [] @@ -195,7 +195,8 @@ def execute(self, headers: dict[str, str], requests: list[dict[str, str]], crop_ file_name=file_name, crop_wkt=crop_wkt, target_projection=target_projection, - target_format=target_format) + target_format=target_format, + raw_archive=raw_archive) Notifications.report(item, Configuration.settings.email_subject_user, Configuration.settings.email_content_user, to=[send_to], context=mail_context, outcome="success") Notifications.report(item, Configuration.settings.email_subject_admin, Configuration.settings.email_content_admin, Configuration.settings.notification_admin_emails.split(","), context=mail_context) LOGGER.info("Download success", extra={"event.kind": "event", "event.category": "file", "event.type": "user-action", "event.action": "download", "event.outcome": "success", "user.id": user_id, "user.email": send_to, "event.module": "aproc-download", "arlas.collection": collection, "arlas.item.id": item_id}) diff --git a/extensions/aproc/proc/download/drivers/driver.py b/extensions/aproc/proc/download/drivers/driver.py index b807b4ae..c55eb3ac 100644 --- a/extensions/aproc/proc/download/drivers/driver.py +++ b/extensions/aproc/proc/download/drivers/driver.py @@ -35,7 +35,7 @@ def supports(item: Item) -> bool: ... @abstractmethod - def fetch_and_transform(self, item: Item, target_directory: str, file_name: str, crop_wkt: str, target_projection: str, target_format: str): + def fetch_and_transform(self, item: Item, target_directory: str, file_name: str, crop_wkt: str, target_projection: str, target_format: str, raw_archive: bool): """ Fetch and transform the item, given the wanted projection, format and crop. The file must be placed in the provided target directory. Args: @@ -45,5 +45,6 @@ def fetch_and_transform(self, item: Item, target_directory: str, file_name: str, crop_wkt (str): geometry to crop the raster with target_projection (str): target projection target_format (str): target format + raw_archive (bool): if true fetch raw archive """ ... diff --git a/extensions/aproc/proc/download/drivers/impl/met_file.py b/extensions/aproc/proc/download/drivers/impl/met_file.py index b357b076..dc209ed9 100644 --- a/extensions/aproc/proc/download/drivers/impl/met_file.py +++ b/extensions/aproc/proc/download/drivers/impl/met_file.py @@ -1,8 +1,14 @@ import os -from airs.core.models.model import Item, Role +from airs.core.models.model import Item, Role, ItemFormat from aproc.core.settings import Configuration from extensions.aproc.proc.download.drivers.driver import Driver as DownloadDriver from datetime import datetime + +from extensions.aproc.proc.download.drivers.impl.utils import make_raw_archive_zip +import shutil +import xml.etree.ElementTree as ET +from zipfile import ZipFile + class Driver(DownloadDriver): # Implements drivers method @@ -21,11 +27,21 @@ def supports(item: Item) -> bool: return False # Implements drivers method - def fetch_and_transform(self, item: Item, target_directory: str, file_name: str, crop_wkt: str, target_projection: str, target_format: str): - from extensions.aproc.proc.download.drivers.impl.utils import extract - import pyproj + def fetch_and_transform(self, item: Item, target_directory: str, file_name: str, crop_wkt: str, target_projection: str, target_format: str, raw_archive: bool): asset = item.assets.get(Role.metadata.value) met_file = asset.href + if raw_archive: + make_raw_archive_zip(met_file, target_directory) + return + # If the projetion and the format are natives, just copy the file + if target_projection == target_format == 'native': + if item.properties.item_format == ItemFormat.dimap.value: + self.copy_from_dimap(met_file,target_directory) + elif item.properties.item_format == ItemFormat.terrasar.value: + self.copy_from_terrasarx(met_file,target_directory) + return + from extensions.aproc.proc.download.drivers.impl.utils import extract + import pyproj met_file_name = os.path.basename(met_file) epsg_target = pyproj.Proj(target_projection) # Default driver is GTiff @@ -42,4 +58,40 @@ def fetch_and_transform(self, item: Item, target_directory: str, file_name: str, target_projection) + def copy_from_dimap(self,href: str, target_directory: str): + dir_name = os.path.dirname(href) + file_name = os.path.basename(href) + tree = ET.parse(href) + root = tree.getroot() + files_elements = root.findall('./Raster_Data/Data_Access/Data_Files/Data_File/DATA_FILE_PATH') + files = list(map(lambda f: [os.path.join(dir_name, f.attrib["href"]),f.attrib["href"]], files_elements)) + self.copy_from_met(files,target_directory,file_name) + + def copy_from_terrasarx(self,href: str, target_directory: str): + dir_name = os.path.dirname(href) + file_name = os.path.basename(href) + tree = ET.parse(href) + root = tree.getroot() + files_elements = root.findall('.productComponents/imageData/file/location') + print(files_elements) + files = [] + for file in files_elements: + f = [str(file.find('path').text), str(file.find('filename').text)] + files.append([os.path.join(dir_name,f[0],f[1]),f[1]]) + self.copy_from_met(files,target_directory,file_name) + + def copy_from_met(self,files,target_directory,file_name): + # If the met_file reference only one file we copy it + if len(files) == 1: + shutil.copyfile(files[0][0], os.path.join(target_directory,files[0][1])) + return + # If the met_file reference several files we zip it in one zip file + elif len(files) > 1: + tif_zip_file = ZipFile(os.path.join(target_directory,file_name+".zip"), mode='a') + for f in files: + valid_and_exist = os.path.isfile(f[0]) and os.path.exists(f[0]) + if valid_and_exist: + tif_zip_file.write(f[0],f[1]) + tif_zip_file.close() + return diff --git a/extensions/aproc/proc/download/drivers/impl/simple_copy.py b/extensions/aproc/proc/download/drivers/impl/simple_copy.py index 7d28f583..b411efea 100644 --- a/extensions/aproc/proc/download/drivers/impl/simple_copy.py +++ b/extensions/aproc/proc/download/drivers/impl/simple_copy.py @@ -19,7 +19,7 @@ def supports(item: Item) -> bool: return data is not None and data.href is not None and (data.href.startswith("file://") or data.href.startswith("http://") or data.href.startswith("https://")) # Implements drivers method - def fetch_and_transform(self, item: Item, target_directory: str, file_name: str, crop_wkt: str, target_projection: str, target_format: str): + def fetch_and_transform(self, item: Item, target_directory: str, file_name: str, crop_wkt: str, target_projection: str, target_format: str, raw_archive: bool): data = item.assets.get(Role.data.value) if data is not None: if data.href.startswith("file://"): diff --git a/extensions/aproc/proc/download/drivers/impl/tif_file.py b/extensions/aproc/proc/download/drivers/impl/tif_file.py index faab32af..4817bceb 100644 --- a/extensions/aproc/proc/download/drivers/impl/tif_file.py +++ b/extensions/aproc/proc/download/drivers/impl/tif_file.py @@ -1,8 +1,14 @@ import os +import shutil + from airs.core.models.model import Item, Role from aproc.core.settings import Configuration from extensions.aproc.proc.download.drivers.driver import Driver as DownloadDriver from datetime import datetime + +from extensions.aproc.proc.download.drivers.impl.utils import make_raw_archive_zip + + class Driver(DownloadDriver): # Implements drivers method @@ -21,12 +27,19 @@ def supports(item: Item) -> bool: return False # Implements drivers method - def fetch_and_transform(self, item: Item, target_directory: str, file_name: str, crop_wkt: str, target_projection: str, target_format: str): - from extensions.aproc.proc.download.drivers.impl.utils import extract - import pyproj + def fetch_and_transform(self, item: Item, target_directory: str, file_name: str, crop_wkt: str, target_projection: str, target_format: str, raw_archive: bool): asset = item.assets.get(Role.data.value) tif_file = asset.href tif_file_name = os.path.basename(tif_file) + if raw_archive: + make_raw_archive_zip(tif_file, target_directory) + return + if target_projection == target_format == 'native': + # If the projetion and the format are natives, just copy the file + shutil.copyfile(tif_file, os.path.join(target_directory, tif_file_name)) + return + from extensions.aproc.proc.download.drivers.impl.utils import extract + import pyproj epsg_target = pyproj.Proj(target_projection) # Default driver is GTiff driver_target = "GTiff" diff --git a/extensions/aproc/proc/download/drivers/impl/utils.py b/extensions/aproc/proc/download/drivers/impl/utils.py index aebf7bd4..c0116e11 100644 --- a/extensions/aproc/proc/download/drivers/impl/utils.py +++ b/extensions/aproc/proc/download/drivers/impl/utils.py @@ -1,10 +1,8 @@ -import pyproj -import rasterio.mask -import shapely.wkt -from osgeo import osr -from rasterio.warp import calculate_default_transform -from shapely.ops import transform - +from datetime import datetime +import shutil +import os +import xml.etree.ElementTree as ET +from zipfile import ZipFile def setup_gdal(): from osgeo import gdal @@ -15,6 +13,12 @@ def setup_gdal(): def extract(crop_wkt, file, driver_target, epsg_target, target_directory, target_file_name, target_projection): + import pyproj + import rasterio.mask + import shapely.wkt + from osgeo import osr + from rasterio.warp import calculate_default_transform + from shapely.ops import transform with rasterio.open(file) as src: epsg_4326 = pyproj.Proj('EPSG:4326') epsg_src = pyproj.Proj(src.crs) @@ -38,4 +42,12 @@ def extract(crop_wkt, file, driver_target, epsg_target, target_directory, target {"driver": driver_target, "nodata": 0, "height": height, "width": width, "transform": default_transform, "crs": {'init': target_projection}}) with rasterio.open(target_directory + "/" + target_file_name, "w", **out_meta) as dest: - dest.write(out_image) \ No newline at end of file + dest.write(out_image) + +def make_raw_archive_zip(href: str, target_directory: str): + file_name = os.path.basename(href) + # Get direct parent folder of href_file to zip + dir_name = os.path.dirname(href) + target_file_name = os.path.splitext(file_name)[0] + datetime.now().strftime("%d-%m-%Y-%H-%M-%S") + shutil.make_archive(target_directory + "/" + target_file_name, 'zip', dir_name) + return