Skip to content

Commit

Permalink
Code cleanup/refactoring (#243)
Browse files Browse the repository at this point in the history
* Replaced use of api_url dict with StorageService instance or ID
* Did some refactoring to unify the AIP fetching code between the main
  app logic and the CLI tool
  • Loading branch information
mcantelon committed Dec 13, 2023
1 parent aa95805 commit 8a6e9a0
Show file tree
Hide file tree
Showing 12 changed files with 198 additions and 214 deletions.
12 changes: 7 additions & 5 deletions AIPscan/Aggregator/database_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,19 +203,20 @@ def create_storage_location_object(current_location, description, storage_servic
return storage_location


def create_or_update_storage_location(current_location, api_url, storage_service_id):
def create_or_update_storage_location(current_location, storage_service):
"""Create or update Storage Location and return it."""
storage_location = StorageLocation.query.filter_by(
current_location=current_location
).first()

request_url, request_url_without_api_key = get_storage_service_api_url(
api_url, current_location
storage_service, current_location
)
response = tasks.make_request(request_url, request_url_without_api_key)
description = response.get("description")
if not storage_location:
return create_storage_location_object(
current_location, description, storage_service_id
current_location, description, storage_service.id
)

if storage_location.description != description:
Expand All @@ -233,11 +234,12 @@ def create_pipeline_object(origin_pipeline, dashboard_url):
return pipeline


def create_or_update_pipeline(origin_pipeline, api_url):
def create_or_update_pipeline(origin_pipeline, storage_service):
"""Create or update Storage Location and return it."""
pipeline = Pipeline.query.filter_by(origin_pipeline=origin_pipeline).first()

request_url, request_url_without_api_key = get_storage_service_api_url(
api_url, origin_pipeline
storage_service, origin_pipeline
)
response = tasks.make_request(request_url, request_url_without_api_key)
dashboard_url = response.get("remote_name")
Expand Down
6 changes: 3 additions & 3 deletions AIPscan/Aggregator/mets_parse_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def get_aip_original_name(mets):
# ignore those.
TRANSFER_DIR_PREFIX = "%transferDirectory%"

NAMESPACES = {u"premis": u"http://www.loc.gov/premis/v3"}
NAMESPACES = {"premis": "http://www.loc.gov/premis/v3"}
ELEM_ORIGINAL_NAME_PATTERN = ".//premis:originalName"

original_name = ""
Expand All @@ -85,13 +85,13 @@ def get_aip_original_name(mets):


def download_mets(
api_url, package_uuid, relative_path_to_mets, timestamp, package_list_no
storage_service, package_uuid, relative_path_to_mets, timestamp, package_list_no
):
"""Download METS from the storage service."""

# Request the METS file.
mets_response = requests.get(
get_mets_url(api_url, package_uuid, relative_path_to_mets)
get_mets_url(storage_service, package_uuid, relative_path_to_mets)
)

# Create a directory to download the METS to.
Expand Down
48 changes: 27 additions & 21 deletions AIPscan/Aggregator/task_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""Collects a number of reusable components of tasks.py. Also ensures
the module remains clean and easy to refactor over time.
"""
import json
import os
from datetime import datetime

Expand All @@ -11,20 +12,17 @@
from AIPscan.Aggregator.types import StorageServicePackage


def format_api_url_with_limit_offset(api_url):
def format_api_url_with_limit_offset(storage_service):
"""Format the API URL here to make sure it is as correct as
possible.
"""
base_url = api_url.get("baseUrl", "").rstrip("/")
limit = int(api_url.get("limit", ""))
offset = api_url.get("offset", "")
user_name = api_url.get("userName")
api_key = api_url.get("apiKey", "")
base_url = storage_service.url.rstrip("/")

request_url_without_api_key = "{}/api/v2/file/?limit={}&offset={}".format(
base_url, limit, offset
base_url, storage_service.download_limit, storage_service.download_offset
)
request_url = "{}&username={}&api_key={}".format(
request_url_without_api_key, user_name, api_key
request_url_without_api_key, storage_service.user_name, storage_service.api_key
)
return base_url, request_url_without_api_key, request_url

Expand All @@ -36,6 +34,19 @@ def get_packages_directory(timestamp):
return os.path.join("AIPscan", "Aggregator", "downloads", timestamp, "packages")


def parse_package_list_file(filepath, logger=None, remove_after_parsing=False):
with open(filepath, "r") as packages_json:
package_list = json.load(packages_json)
try:
if remove_after_parsing:
os.remove(filepath)
except OSError as err:
if logger:
logger.warning("Unable to delete package JSON file: {}".format(err))

return package_list


def process_package_object(package_obj):
"""Process a package object as retrieve from the storage service
and return a StorageServicePackage type to the caller for further
Expand Down Expand Up @@ -95,32 +106,27 @@ def _tz_neutral_date(date):
return date


def get_mets_url(api_url, package_uuid, path_to_mets):
def get_mets_url(storage_service, package_uuid, path_to_mets):
"""Construct a URL from which we can download the METS files that
we are interested in.
"""
am_url = "baseUrl"
user_name = "userName"
api_key = "apiKey"

mets_url = "{}/api/v2/file/{}/extract_file/?relative_path_to_file={}&username={}&api_key={}".format(
api_url[am_url].rstrip("/"),
storage_service.url.rstrip("/"),
package_uuid,
path_to_mets,
api_url[user_name],
api_url[api_key],
storage_service.user_name,
storage_service.api_key,
)
return mets_url


def get_storage_service_api_url(api_url, api_path):
def get_storage_service_api_url(storage_service, api_path):
"""Return URL to fetch location infofrom Storage Service."""
base_url = api_url.get("baseUrl", "").rstrip("/")
base_url = storage_service.url.rstrip("/")
request_url_without_api_key = "{}{}".format(base_url, api_path).rstrip("/")
user_name = api_url.get("userName")
api_key = api_url.get("apiKey", "")

request_url = "{}?username={}&api_key={}".format(
request_url_without_api_key, user_name, api_key
request_url_without_api_key, storage_service.user_name, storage_service.api_key
)
return request_url, request_url_without_api_key

Expand Down
Loading

0 comments on commit 8a6e9a0

Please sign in to comment.