Skip to content

Commit

Permalink
Add test to troubleshoot audit failure; #22 (#23)
Browse files Browse the repository at this point in the history
* Add test to troubleshoot #22
* additional logging information
* lint
* Tweak container build GitHub Action
  • Loading branch information
jefferya authored Aug 30, 2024
1 parent 4d4d3c5 commit b93a053
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 17 deletions.
13 changes: 10 additions & 3 deletions .github/workflows/push.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
name: Build and publish

on:
# https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#onpushpull_requestbranchestags
push:
branches: [ "main" ]
branches:
- '**' # all branches; with or without '/'
- '!main' # exclude main
# Publish semver tags as releases.
tags: [ 'v*.*.*' ]
pull_request:
branches: [ "main" ]
# pull_request:
# branches: [ "main" ]

env:
# Use docker.io for Docker Hub if empty
Expand Down Expand Up @@ -39,6 +42,10 @@ jobs:
# with:
# cosign-release: 'v2.1.1'

# https://github.com/docker/setup-qemu-action
- name: Set up QEMU
uses: docker/setup-qemu-action@v3

# Set up BuildKit Docker container builder to be able to build
# multi-platform images and export cache
# https://github.com/docker/setup-buildx-action
Expand Down
10 changes: 5 additions & 5 deletions rootfs/var/www/leaf-isle-bagger/drupal/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@
def init_session(args, username, password):

session = requests.Session()
#session.auth = (username, password)
# session.auth = (username, password)

auth_endpoint = 'user/login?_format=json'
auth_endpoint = "user/login?_format=json"
response = session.post(
urljoin(args.server, auth_endpoint),
json={"name": username, 'pass': password},
headers={'Content-Type': 'application/json'}
urljoin(args.server, auth_endpoint),
json={"name": username, "pass": password},
headers={"Content-Type": "application/json"},
)
response.raise_for_status()

Expand Down
3 changes: 3 additions & 0 deletions rootfs/var/www/leaf-isle-bagger/drupal/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,18 @@ def id_list_merge_with_media(session, args, node_list):
else:
for media in media_json:
media_of = None

if (
"field_media_of" in media
and len(media["field_media_of"]) >= 1
and "target_id" in media["field_media_of"][0]
):
media_of = media["field_media_of"][0]["target_id"]

media_changed = (
media["changed"][0]["value"] if ("changed" in media) else None
)

if (
media_of is not None
and media_changed is not None
Expand Down
4 changes: 2 additions & 2 deletions rootfs/var/www/leaf-isle-bagger/leaf-bagger-audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,13 @@ def process(args, session, output_file):

# get a list of Drupal Node IDs changed since a given optional date
node_list = drupalUtilities.id_list_from_nodes(session, args)
logging.info(node_list)
logging.info(f"Audit: Drupal nodes before media inclusion - {node_list}")

# inspect Drupal Media for changes
# a Media change is does not transitively change the associated Node change timestamp)
# if Media changed then add associated Node ID to the list
drupalUtilities.id_list_merge_with_media(session, args, node_list)
logging.info(node_list)
logging.info(f"Audit: Drupal nodes with media changes - {node_list}")

# audit archival information packages
swiftUtilities.audit(output_file, node_list, args.bagger_app_dir, args.container)
Expand Down
5 changes: 3 additions & 2 deletions rootfs/var/www/leaf-isle-bagger/leaf-bagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,13 @@ def process(args, session):
# get a list of Drupal Node IDs changed since a given optional date
# or a single node then force update
node_list = drupalUtilities.id_list_from_nodes(session, args)
logging.info(node_list)
logging.info(f"AIP: Drupal nodes before media inclusion - {node_list}")

# inspect Drupal Media for changes
# a Media change is does not transitively change the associated Node change timestamp)
# if Media changed then add associated Node ID to the list
drupalUtilities.id_list_merge_with_media(session, args, node_list)
logging.info(f"Drupal nodes with media changes - {node_list}")
logging.info(f"AIP: Drupal nodes with media changes - {node_list}")

# create archival information packages
logging.info("Create AIPs")
Expand Down
12 changes: 7 additions & 5 deletions rootfs/var/www/leaf-isle-bagger/swift/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def validate(node_list, swift_container):
logging.error(
(
f"id:[{aip_id}] - mismatched modification timestamp [{src_value['changed']}]"
" : {dst['headers']['x-object-meta-last-mod-timestamp']}"
f" : {dst['headers']['x-object-meta-last-mod-timestamp']}"
)
)
break
Expand Down Expand Up @@ -167,7 +167,9 @@ def upload(swift_conn_dst, dst_objs, container_dst, db_writer=None):
if not dst_item["success"]:
if "object" in dst_item:
logging.error(f"{dst_item}")
raise SwiftError(dst_item["error"], container_dst, dst_item["object"])
raise SwiftError(
dst_item["error"], container_dst, dst_item["object"]
)
# Swift segmented object
elif "for_object" in dst_item:
logging.error(f"{dst_item}")
Expand All @@ -178,7 +180,9 @@ def upload(swift_conn_dst, dst_objs, container_dst, db_writer=None):
dst_item["segment_index"],
)

if dst_item["action"] == "upload_object" and os.path.isfile(dst_item["path"]):
if dst_item["action"] == "upload_object" and os.path.isfile(
dst_item["path"]
):
# test upload file against Swift header etag to verify
checksums = validate_checksum(
dst_item["path"],
Expand All @@ -200,8 +204,6 @@ def upload(swift_conn_dst, dst_objs, container_dst, db_writer=None):
logging.error(f"{e}")




#
def swift_timestamp_to_iso8601(ts):
return datetime.strptime(ts, "%a, %d %b %Y %H:%M:%S %Z").strftime(
Expand Down
34 changes: 34 additions & 0 deletions rootfs/var/www/leaf-isle-bagger/tests/test_drupal.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,37 @@ def test_drupal_media_change_without_node(mocker):
drupalUtilities.id_list_merge_with_media(_session, args, node_list)
assert node_list[1]
assert node_list[1]["changed"] == "2025-01-01"


# When node is updated the associated media is not updated;
# test that the date list captures the media date not the node date
def test_drupal_node_change_without_media(mocker):
mocker.patch(
"argparse.ArgumentParser.parse_args",
return_value=argparse.Namespace(date="2023-01-01", server="http://example.com"),
)
args = argparse.ArgumentParser.parse_args()
_adapter.register_uri(
"GET",
f"{args.server}/{drupalApi.node_view_endpoint(page='0', date_filter=args.date)}",
text='[ { "nid" : [{"value": 1}], "changed" : [{"value": "2025-01-02"}] } ]',
)
_adapter.register_uri(
"GET",
f"{args.server}/{drupalApi.node_view_endpoint(page='1', date_filter=args.date)}",
text="[]",
)
_adapter.register_uri(
"GET",
f"{args.server}/{drupalApi.media_view_endpoint(page='0', date_filter=args.date)}",
text='[ { "changed": [{"value": "2024-01-01"}], "field_media_of": [{"target_id": 1}] } ]',
)
_adapter.register_uri(
"GET",
f"{args.server}/{drupalApi.media_view_endpoint(page='1', date_filter=args.date)}",
text="[]",
)
node_list = drupalUtilities.id_list_from_nodes(_session, args)
drupalUtilities.id_list_merge_with_media(_session, args, node_list)
assert node_list[1]
assert node_list[1]["changed"] == "2025-01-02"

0 comments on commit b93a053

Please sign in to comment.