Skip to content

Commit

Permalink
Feat: add new download impl
Browse files Browse the repository at this point in the history
  • Loading branch information
mbarbet committed Jul 10, 2024
1 parent 9f60577 commit 6650689
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 19 deletions.
5 changes: 3 additions & 2 deletions extensions/aproc/proc/download/download_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def get_resource_id(inputs: BaseModel):
return hash_object.hexdigest()

@shared_task(bind=True, track_started=True)
def execute(self, headers: dict[str, str], requests: list[dict[str, str]], crop_wkt: str, target_projection: str, target_format: str = "Geotiff") -> dict:
def execute(self, headers: dict[str, str], requests: list[dict[str, str]], crop_wkt: str, target_projection: str, target_format: str = "Geotiff", raw_archive:bool = True) -> dict:
(send_to, user_id) = AprocProcess.__get_user_email__(headers.get("authorization"))
LOGGER.debug("processing download requests from {}".format(send_to))
download_locations = []
Expand Down Expand Up @@ -195,7 +195,8 @@ def execute(self, headers: dict[str, str], requests: list[dict[str, str]], crop_
file_name=file_name,
crop_wkt=crop_wkt,
target_projection=target_projection,
target_format=target_format)
target_format=target_format,
raw_archive=raw_archive)
Notifications.report(item, Configuration.settings.email_subject_user, Configuration.settings.email_content_user, to=[send_to], context=mail_context, outcome="success")
Notifications.report(item, Configuration.settings.email_subject_admin, Configuration.settings.email_content_admin, Configuration.settings.notification_admin_emails.split(","), context=mail_context)
LOGGER.info("Download success", extra={"event.kind": "event", "event.category": "file", "event.type": "user-action", "event.action": "download", "event.outcome": "success", "user.id": user_id, "user.email": send_to, "event.module": "aproc-download", "arlas.collection": collection, "arlas.item.id": item_id})
Expand Down
3 changes: 2 additions & 1 deletion extensions/aproc/proc/download/drivers/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def supports(item: Item) -> bool:
...

@abstractmethod
def fetch_and_transform(self, item: Item, target_directory: str, file_name: str, crop_wkt: str, target_projection: str, target_format: str):
def fetch_and_transform(self, item: Item, target_directory: str, file_name: str, crop_wkt: str, target_projection: str, target_format: str, raw_archive: bool):
""" Fetch and transform the item, given the wanted projection, format and crop. The file must be placed in the provided target directory.
Args:
Expand All @@ -45,5 +45,6 @@ def fetch_and_transform(self, item: Item, target_directory: str, file_name: str,
crop_wkt (str): geometry to crop the raster with
target_projection (str): target projection
target_format (str): target format
raw_archive (bool): if true fetch raw archive
"""
...
22 changes: 18 additions & 4 deletions extensions/aproc/proc/download/drivers/impl/met_file.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import os
from airs.core.models.model import Item, Role
from airs.core.models.model import Item, Role, ItemFormat
from aproc.core.settings import Configuration
from extensions.aproc.proc.download.drivers.driver import Driver as DownloadDriver
from datetime import datetime

from extensions.aproc.proc.download.drivers.impl.utils import make_raw_archive_zip, copy_from_dimap, copy_from_terrasarx


class Driver(DownloadDriver):

# Implements drivers method
Expand All @@ -21,11 +25,21 @@ def supports(item: Item) -> bool:
return False

# Implements drivers method
def fetch_and_transform(self, item: Item, target_directory: str, file_name: str, crop_wkt: str, target_projection: str, target_format: str):
from extensions.aproc.proc.download.drivers.impl.utils import extract
import pyproj
def fetch_and_transform(self, item: Item, target_directory: str, file_name: str, crop_wkt: str, target_projection: str, target_format: str, raw_archive: bool):
asset = item.assets.get(Role.metadata.value)
met_file = asset.href
if raw_archive:
make_raw_archive_zip(met_file, target_directory)
return
# If the projetion and the format are natives, just copy the file
if target_projection == target_format == 'native':
if item.properties.item_format == ItemFormat.dimap.value:
copy_from_dimap(met_file,target_directory)
elif item.properties.item_format == ItemFormat.terrasar.value:
copy_from_terrasarx(met_file,target_directory)
return
from extensions.aproc.proc.download.drivers.impl.utils import extract
import pyproj
met_file_name = os.path.basename(met_file)
epsg_target = pyproj.Proj(target_projection)
# Default driver is GTiff
Expand Down
2 changes: 1 addition & 1 deletion extensions/aproc/proc/download/drivers/impl/simple_copy.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def supports(item: Item) -> bool:
return data is not None and data.href is not None and (data.href.startswith("file://") or data.href.startswith("http://") or data.href.startswith("https://"))

# Implements drivers method
def fetch_and_transform(self, item: Item, target_directory: str, file_name: str, crop_wkt: str, target_projection: str, target_format: str):
def fetch_and_transform(self, item: Item, target_directory: str, file_name: str, crop_wkt: str, target_projection: str, target_format: str, raw_archive: bool):
data = item.assets.get(Role.data.value)
if data is not None:
if data.href.startswith("file://"):
Expand Down
19 changes: 16 additions & 3 deletions extensions/aproc/proc/download/drivers/impl/tif_file.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
import os
import shutil

from airs.core.models.model import Item, Role
from aproc.core.settings import Configuration
from extensions.aproc.proc.download.drivers.driver import Driver as DownloadDriver
from datetime import datetime

from extensions.aproc.proc.download.drivers.impl.utils import make_raw_archive_zip


class Driver(DownloadDriver):

# Implements drivers method
Expand All @@ -21,12 +27,19 @@ def supports(item: Item) -> bool:
return False

# Implements drivers method
def fetch_and_transform(self, item: Item, target_directory: str, file_name: str, crop_wkt: str, target_projection: str, target_format: str):
from extensions.aproc.proc.download.drivers.impl.utils import extract
import pyproj
def fetch_and_transform(self, item: Item, target_directory: str, file_name: str, crop_wkt: str, target_projection: str, target_format: str, raw_archive: bool):
asset = item.assets.get(Role.data.value)
tif_file = asset.href
tif_file_name = os.path.basename(tif_file)
if raw_archive:
make_raw_archive_zip(tif_file, target_directory)
return
if target_projection == target_format == 'native':
# If the projetion and the format are natives, just copy the file
shutil.copyfile(tif_file, os.path.join(target_directory, tif_file_name))
return
from extensions.aproc.proc.download.drivers.impl.utils import extract
import pyproj
epsg_target = pyproj.Proj(target_projection)
# Default driver is GTiff
driver_target = "GTiff"
Expand Down
66 changes: 58 additions & 8 deletions extensions/aproc/proc/download/drivers/impl/utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import pyproj
import rasterio.mask
import shapely.wkt
from osgeo import osr
from rasterio.warp import calculate_default_transform
from shapely.ops import transform

from datetime import datetime
import shutil
import os
import xml.etree.ElementTree as ET
from zipfile import ZipFile

def setup_gdal():
from osgeo import gdal
Expand All @@ -15,6 +13,12 @@ def setup_gdal():

def extract(crop_wkt, file, driver_target, epsg_target, target_directory, target_file_name,
target_projection):
import pyproj
import rasterio.mask
import shapely.wkt
from osgeo import osr
from rasterio.warp import calculate_default_transform
from shapely.ops import transform
with rasterio.open(file) as src:
epsg_4326 = pyproj.Proj('EPSG:4326')
epsg_src = pyproj.Proj(src.crs)
Expand All @@ -38,4 +42,50 @@ def extract(crop_wkt, file, driver_target, epsg_target, target_directory, target
{"driver": driver_target, "nodata": 0, "height": height, "width": width, "transform": default_transform,
"crs": {'init': target_projection}})
with rasterio.open(target_directory + "/" + target_file_name, "w", **out_meta) as dest:
dest.write(out_image)
dest.write(out_image)

def make_raw_archive_zip(href: str, target_directory: str):
file_name = os.path.basename(href)
# Get direct parent folder of href_file to zip
dir_name = os.path.dirname(href)
target_file_name = os.path.splitext(file_name)[0] + datetime.now().strftime("%d-%m-%Y-%H-%M-%S")
shutil.make_archive(target_directory + "/" + target_file_name, 'zip', dir_name)
return

def copy_from_dimap(href: str, target_directory: str):
dir_name = os.path.dirname(href)
file_name = os.path.basename(href)
tree = ET.parse(href)
root = tree.getroot()
files_elements = root.findall('./Raster_Data/Data_Access/Data_Files/Data_File/DATA_FILE_PATH')
files = list(map(lambda f: [os.path.join(dir_name, f.attrib["href"]),f.attrib["href"]], files_elements))
copy_from_met(files,target_directory,file_name)

def copy_from_terrasarx(href: str, target_directory: str):
dir_name = os.path.dirname(href)
file_name = os.path.basename(href)
tree = ET.parse(href)
root = tree.getroot()
files_elements = root.findall('.productComponents/imageData/file/location')
print(files_elements)
files = []
for file in files_elements:
f = [str(file.find('path').text), str(file.find('filename').text)]
files.append([os.path.join(dir_name,f[0],f[1]),f[1]])
copy_from_met(files,target_directory,file_name)


def copy_from_met(files,target_directory,file_name):
# If the met_file reference only one file we copy it
if len(files) == 1:
shutil.copyfile(files[0][0], os.path.join(target_directory,files[0][1]))
return
# If the met_file reference several files we zip it in one zip file
elif len(files) > 1:
tif_zip_file = ZipFile(os.path.join(target_directory,file_name+".zip"), mode='a')
for f in files:
valid_and_exist = os.path.isfile(f[0]) and os.path.exists(f[0])
if valid_and_exist:
tif_zip_file.write(f[0],f[1])
tif_zip_file.close()
return

0 comments on commit 6650689

Please sign in to comment.