-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add node set gathering plus AIP creation
- Loading branch information
Showing
9 changed files
with
378 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
Dockerfile | ||
README.md | ||
tests | ||
tests/**/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# This workflow uses actions that are not certified by GitHub. | ||
# They are provided by a third-party and are governed by | ||
# separate terms of service, privacy policy, and support | ||
# documentation. | ||
|
||
# This workflow checks out code, builds an image, performs a container image | ||
# vulnerability scan with Anchore's Grype tool, and integrates the results with GitHub Advanced Security | ||
# code scanning feature. For more information on the Anchore scan action usage | ||
# and parameters, see https://github.com/anchore/scan-action. For more | ||
# information on Anchore's container image scanning tool Grype, see | ||
# https://github.com/anchore/grype | ||
name: Anchore Grype vulnerability scan | ||
|
||
on: | ||
push: | ||
branches: [ "main" ] | ||
pull_request: | ||
# The branches below must be a subset of the branches above | ||
branches: [ "main" ] | ||
schedule: | ||
- cron: '39 23 * * 1' | ||
|
||
permissions: | ||
contents: read | ||
|
||
jobs: | ||
Anchore-Build-Scan: | ||
permissions: | ||
contents: read # for actions/checkout to fetch code | ||
security-events: write # for github/codeql-action/upload-sarif to upload SARIF results | ||
actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Check out the code | ||
uses: actions/checkout@v4 | ||
- name: Build the Docker image | ||
run: docker build . --file Dockerfile --tag localbuild/testimage:latest | ||
- name: Run the Anchore Grype scan action | ||
uses: anchore/scan-action@3343887d815d7b07465f6fdcd395bd66508d486a #v3.6.4 | ||
id: scan | ||
with: | ||
image: "localbuild/testimage:latest" | ||
# fail-build: true | ||
fail-build: false | ||
severity-cutoff: critical | ||
- name: Upload vulnerability report | ||
uses: github/codeql-action/upload-sarif@v3 | ||
if: always() | ||
with: | ||
sarif_file: ${{ steps.scan.outputs.sarif }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
name: Build and publish | ||
|
||
on: | ||
push: | ||
branches: [ "main" ] | ||
# Publish semver tags as releases. | ||
tags: [ 'v*.*.*' ] | ||
pull_request: | ||
branches: [ "main" ] | ||
|
||
env: | ||
# Use docker.io for Docker Hub if empty | ||
REGISTRY: ghcr.io | ||
# github.repository as <account>/<repo> | ||
IMAGE_NAME: ${{ github.repository }} | ||
|
||
jobs: | ||
build: | ||
|
||
runs-on: ubuntu-latest | ||
|
||
permissions: | ||
contents: read | ||
packages: write | ||
# This is used to complete the identity challenge | ||
# with sigstore/fulcio when running outside of PRs. | ||
id-token: write | ||
attestations: write | ||
|
||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v4 | ||
|
||
# Install the cosign tool except on PR | ||
# https://github.com/sigstore/cosign-installer | ||
#- name: Install cosign | ||
# if: github.event_name != 'pull_request' | ||
# uses: sigstore/cosign-installer@6e04d228eb30da1757ee4e1dd75a0ec73a653e06 #v3.1.1 | ||
# with: | ||
# cosign-release: 'v2.1.1' | ||
|
||
# Set up BuildKit Docker container builder to be able to build | ||
# multi-platform images and export cache | ||
# https://github.com/docker/setup-buildx-action | ||
- name: Set up Docker Buildx | ||
uses: docker/setup-buildx-action@d70bba72b1f3fd22344832f00baa16ece964efeb # v3.3.0 | ||
|
||
# Login against a Docker registry except on PR | ||
# https://github.com/docker/login-action | ||
- name: Log into registry ${{ env.REGISTRY }} | ||
# if: github.event_name != 'pull_request' | ||
uses: docker/login-action@e92390c5fb421da1463c202d546fed0ec5c39f20 # v3.1.0 | ||
with: | ||
registry: ${{ env.REGISTRY }} | ||
username: ${{ github.actor }} | ||
password: ${{ secrets.GITHUB_TOKEN }} | ||
|
||
# Extract metadata (tags, labels) for Docker | ||
# https://github.com/docker/metadata-action | ||
- name: Extract Docker metadata | ||
id: meta | ||
uses: docker/metadata-action@8e5442c4ef9f78752691e2d8f8d19755c6f78e81 # v5.5.1 | ||
with: | ||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} | ||
|
||
# Build and push Docker image with Buildx (don't push on PR) | ||
# https://github.com/docker/build-push-action | ||
- name: Build and push Docker image | ||
id: build-and-push | ||
uses: docker/build-push-action@2cdde995de11925a030ce8070c3d77a52ffcf1c0 # v5.3.0 | ||
with: | ||
context: . | ||
# push: ${{ github.event_name != 'pull_request' }} | ||
push: true | ||
tags: ${{ steps.meta.outputs.tags }} | ||
labels: ${{ steps.meta.outputs.labels }} | ||
cache-from: type=gha | ||
cache-to: type=gha,mode=max | ||
|
||
# Sign the resulting Docker image digest except on PRs. | ||
# This will only write to the public Rekor transparency log when the Docker | ||
# repository is public to avoid leaking data. If you would like to publish | ||
# transparency data even for private images, pass --force to cosign below. | ||
# https://github.com/sigstore/cosign | ||
#- name: Sign the published Docker image | ||
# if: ${{ github.event_name != 'pull_request' }} | ||
# env: | ||
# # https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions#using-an-intermediate-environment-variable | ||
# TAGS: ${{ steps.meta.outputs.tags }} | ||
# DIGEST: ${{ steps.build-and-push.outputs.digest }} | ||
# # This step uses the identity token to provision an ephemeral certificate | ||
# # against the sigstore community Fulcio instance. | ||
# run: echo "${TAGS}" | xargs -I {} cosign sign --yes {}@${DIGEST} | ||
|
||
# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images | ||
# https://docs.github.com/en/actions/security-guides/using-artifact-attestations-to-establish-provenance-for-builds | ||
- name: Generate artifact attestation | ||
uses: actions/attest-build-provenance@v1 | ||
with: | ||
subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}} | ||
subject-digest: ${{ steps.build-and-push.outputs.digest }} | ||
push-to-registry: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# syntax=docker/dockerfile:1.7 | ||
ARG BAGGER_REPOSITORY | ||
ARG BAGGER_TAG | ||
|
||
FROM --platform=$BUILDPLATFORM ${BAGGER_REPOSITORY:-ghcr.io/cwrc}/isle-bagger:${BAGGER_TAG:-v0.0.1} | ||
|
||
# Install packages and tools that allow for basic downloads. | ||
RUN --mount=type=cache,id=bagger-apk-${TARGETARCH},sharing=locked,target=/var/cache/apk \ | ||
apk add --no-cache \ | ||
python3 \ | ||
py-pip \ | ||
py3-requests \ | ||
&& \ | ||
echo '' > /root/.ash_history | ||
|
||
WORKDIR /var/www/ | ||
|
||
# requries v24+ of Docker | ||
# https://github.com/docker/build-push-action/issues/761 | ||
#COPY --chown=nginx:nginx --link rootfs / | ||
COPY --chown=nginx:nginx rootfs / | ||
|
||
#RUN find /var/www/bagger ! -user nginx -exec chown nginx:ng | ||
|
||
#RUN pip install -r requirements.txt --user |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
""" | ||
Drupal API utility functions | ||
""" | ||
|
||
import requests | ||
|
||
from urllib.parse import urljoin | ||
|
||
|
||
# initialize a session with API endpoint | ||
def init_session(args, username, password): | ||
|
||
session = requests.Session() | ||
session.auth = (username, password) | ||
|
||
#auth_endpoint = 'user/login?_format=json' | ||
#response = session.post( | ||
# urljoin(args.server, auth_endpoint), | ||
# json={'email': username, 'pass': password}, | ||
# headers={'Content-Type': 'application/json'} | ||
#) | ||
#response.raise_for_status() | ||
|
||
return session | ||
|
||
|
||
# | ||
def get_node_list(session, server, page=0, date_filter=''): | ||
|
||
node_view_endpoint = f"views/preservation_show_node_timestamps?page={page}&changed={date_filter}" | ||
response = session.get( | ||
urljoin(server, node_view_endpoint), | ||
#allow_redirects=config["allow_redirects"], | ||
#verify=config["secure_ssl_only"], | ||
#auth=(config["username"], config["password"]), | ||
#params=config["query"], | ||
#headers=config["headers"], | ||
) | ||
response.raise_for_status() | ||
return response | ||
|
||
# | ||
def get_media_list(session, server, page=0, date_filter=''): | ||
node_view_endpoint = f"views/preservation_show_media_timestamps?page={page}&changed={date_filter}" | ||
response = session.get( | ||
urljoin(server, node_view_endpoint), | ||
) | ||
response.raise_for_status() | ||
return response |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
""" | ||
Script utility functions | ||
""" | ||
|
||
import json | ||
import subprocess | ||
|
||
from drupal import api as drupalApi | ||
|
||
# build list of ids from Drupal Nodes | ||
def id_list_from_nodes(session, args) : | ||
|
||
node_list = {} | ||
page = 0 | ||
|
||
while True: | ||
node = drupalApi.get_node_list(session, args.server, page, args.date) | ||
node_json = json.loads(node.content) | ||
|
||
if len(node_json) == 0 : | ||
break | ||
|
||
else : | ||
#print(node_json) | ||
for node in node_json: | ||
node_list[node["nid"][0]['value']] = { "changed": node['changed'][0]["value"]} | ||
page+=1 | ||
|
||
return node_list | ||
|
||
|
||
# query media as media changes are not reflected as node revisions | ||
# exclude Drupal Media not attached to a Drupal Node | ||
def id_list_merge_with_media(session, args, node_list) : | ||
|
||
page = 0 | ||
while True : | ||
media = drupalApi.get_media_list(session, args.server, page, args.date) | ||
media_json = json.loads(media.content) | ||
|
||
if len(media_json) == 0 : | ||
break | ||
else : | ||
for media in media_json: | ||
media_of = None | ||
if "field_media_of" in media and len(media["field_media_of"]) >= 1 and "target_id" in media["field_media_of"][0]: | ||
media_of = media["field_media_of"][0]['target_id'] | ||
media_changed = media['changed'][0]["value"] if ("changed" in media) else None | ||
if media_of is not None and media_changed is not None and media_of not in node_list : | ||
# media changed but the parent node did not change | ||
node_list[media_of] = { "changed": media_changed} | ||
elif media_of is not None and media_changed is not None and node_list[media_of]["changed"] < media_changed : | ||
node_list[media_of] = { "changed": media_changed} | ||
page+=1 | ||
|
||
# create archival information package | ||
def create_aip(node_list, bagger_app_path) : | ||
|
||
for node in node_list : | ||
# cd ${BAGGER_APP_DIR} && ./bin/console app:islandora_bagger:create_bag -vvv --settings=var/sample_per_bag_config.yaml --node=1 | ||
subprocess.run( | ||
[ './bin/console', 'app:islandora_bagger:create_bag', '-vvv', '--settings=var/sample_per_bag_config.yaml', f'--node={node.key}'], | ||
stdout=subprocess.PIPE, | ||
check=True, | ||
cwd=bagger_app_path | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
############################################################################################## | ||
# desc: connect to a Drupal instance, get a list of Drupal Nodes and Media that have changed | ||
# since a supplied date and return a list of Drupal Nodes (e.g., to preserve in an | ||
# AIP - archival information package) | ||
# usage: python3 get_node_id.py --server ${server_name} --output ${output_path} --date '2024-05-16T16:51:52' | ||
# license: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication | ||
# date: June 15, 2022 | ||
############################################################################################## | ||
|
||
from getpass import getpass | ||
from time import sleep | ||
import argparse | ||
import json | ||
import logging | ||
import os | ||
|
||
from drupal import api as drupalApi | ||
from drupal import utilities as drupalUtilities | ||
|
||
# | ||
def parse_args(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument('--server', required=True, help='Servername.') | ||
parser.add_argument('--output', required=True, help='Location to store JSON (like) output file.') | ||
parser.add_argument('--date', required=False, help='Items changed after the given date.') | ||
parser.add_argument('--wait', required=False, help='Time to wait between API calls.', type=float, default=0.1) | ||
parser.add_argument('--logging_level', required=False, help='Logging level.', default=logging.WARNING) | ||
return parser.parse_args() | ||
|
||
|
||
# | ||
def process(args, session, output_file): | ||
|
||
# a list of resources to preserve | ||
node_list = {} | ||
|
||
# get a list of Drupal Node IDs changed since a given optional date | ||
node_list = drupalUtilities.id_list_from_nodes(session, args) | ||
print(node_list) | ||
|
||
# inspect Drupal Media for changes | ||
# a Media change is does not transitively change the associated Node change timestamp) | ||
# if Media changed then add associated Node ID to the list | ||
drupalUtilities.id_list_merge_with_media(session, args, node_list) | ||
print(node_list) | ||
|
||
# create archival information packages | ||
drupalUtilities.create_aip(node_list, args.BAGGER_APP_PATH) | ||
|
||
# upload archival information packages | ||
# | ||
def main(): | ||
args = parse_args() | ||
args['BAGGER_APP_PATH'] = os.getenv('BAGGER_APP_PATH') | ||
|
||
username = input('Username:') | ||
password = getpass('Password:') | ||
|
||
session = drupalApi.init_session(args, username, password) | ||
|
||
with open(args.output, 'wt', encoding="utf-8", newline='') as output_file: | ||
process(args, session, output_file) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
requests>=2.31 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
""" Very quickly written unit tests for a one-time script | ||
""" | ||
|
||
|
||
import csv | ||
import os | ||
import pytest | ||
import pytest_mock | ||
import shutil | ||
import sys | ||
|
||
from swiftclient.service import ClientException, SwiftError, SwiftService, SwiftUploadObject | ||
|
||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||
|