forked from NMAAHC/nmaahcmm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
verifySIP
executable file
·178 lines (167 loc) · 11.5 KB
/
verifySIP
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
#!/bin/bash
# verifySIP creates a temporary XML of a SIP and compares it against an expected directory structure, using XML Starlet
# verifySIP checks for key files in each SIP: the master file, the derivative file, and the MD5 file (if expected within that package)
# load nmaahcmmfunctions into this script
SCRIPT_PATH="${0%/*}"
. "${SCRIPT_PATH}/nmaahcmmfunctions"
[[ -f "${SCRIPT_PATH}/nmaahcmmfunctions" ]] || { echo "Missing '${SCRIPT_PATH}/nmaahcmmfunctions'. Exiting." ; exit 1 ;};
_initialize_make # safe script termination process defined in nmaahcmmfunctions
DEPENDENCIES=(xmlstarlet grep egrep) # list dependencies required by script
_check_dependencies "${DEPENDENCIES[@]}" # defined in nmaahcmmfunctions
## USAGE
_usage(){
echo
echo "$(basename "${0}")"
echo
echo "This application will create a temporary tree of a SIP and compare it against an expected directory structure, using a series of xpath statements. It outputs mismatches and unexpected items found in AIPs into the terminal window."
echo
echo "Usage: $(basename ${0}) [ -x | -f | -v | -d | -u ] /path/to/SIP"
echo " "
echo " -x Package type: Digitized Film (DPX package)"
echo " -f Package type: Digitized Film (MOV, MKV, MP4 master files)"
echo " -v Package type: Digitized Analog Video (vrecord package)"
echo " -d Package type: Transferred DV (MOV, MKV, MP4 master files)"
echo " -u Package type: Other/Unknown"
echo " -h display this help"
echo
echo
echo "If you specify 'Other/Unknown', or do not supply a package type, the script will check for the following files or directories and apply rules based on their presence:"
echo " * .dpx files -> script assumes Digitized Film (DPX package)"
echo " * directories named after derivatives, specifically 'MP4_2048x1152' or 'ProRes_2048x1536 -> script assumes Digitized Film (MOV, MKV, MP4 master files)"
echo " * capture_options.log and/or qctools.xml.gz files -> script assumes Digitized Analog Video (vrecord package)"
echo " * string 'DV' in filenames -> script assumes Transferred DV (MOV, MKV, MP4 master files)"
echo "If your package does not fall into any of the above categories, the script will apply a simpler baseline set of comparisons."
echo
echo "See the nmaachmm README for more information on package definitions."
echo
exit
}
[ "${#}" = 0 ] && _usage # if the command is run with no arguments then _usage is called
# getopts loop
OPTIND=1
while getopts ":xfvduh" OPT; do
case "${OPT}" in
x) PACKAGETYPE="FILM_DPX" ;;
f) PACKAGETYPE="FILM_FILES" ;;
v) PACKAGETYPE="VIDEO_VRECORD" ;;
d) PACKAGETYPE="VIDEO_DV" ;;
u) PACKAGETYPE="OTHER_UNKNOWN" ;;
h) _usage ;; # if the operator runs "[scriptname] -h" then the _usage text above will display in the terminal
*) echo "Invalid option -${OPTARG}" ; _usage ;; # if the operator tries to use an option other than the ones listed above, the _usage text will display in the terminal
esac
done
shift $(( ${OPTIND} - 1 ))
# define _runtest function
_runtest(){
OPTIND=1
# by default, the test reports positively that a specified file or directory was found
# if the test is inverted, it will report positively that the file or directory is missing
INVERT_TEST="N"
while getopts ":i" OPT ; do
case "${OPT}" in
i) INVERT_TEST="Y";;
esac
done
shift $(( ${OPTIND} - 1 ))
LABEL="${1}"
RUN_ERR=""
shift
XMLRESULT=$("${@}") # the result of the xpath statement
RUN_ERR="${?}" # any stderr output by the xpath function
if [[ "${RUN_ERR}" != 0 ]] ; then
printf "${LABEL}" # the error message associated with each xpath query
_report -rt "ERROR: Running: \"${*}\" gave Error Code ${RUN_ERR}"
echo "${XMLRESULT}"
fi
# if the xpath statement finds a file that the test is looking for (default test); || OR if the xpath statement does NOT find a file and the test is NOT looking for one (inverted test);
if [[ ! -z "${XMLRESULT}" && "${INVERT_TEST}" != "Y" ]] || [[ -z "${XMLRESULT}" && "${INVERT_TEST}" == "Y" ]] ; then
_report -r "${LABEL}" # print the message associated with each xpath query
if [[ -n "${XMLRESULT}" ]] ; then
echo "${XMLRESULT}"
fi
fi
}
# log script beginning
_log -b
echo
# input the package
while [[ "${*}" != "" ]] ; do
PACKAGE="${1}"
shift
if [[ ! -d "${PACKAGE}" ]] ; then
_report -rt "The package you supplied is not a directory! Moving to next package if present..."
elif [[ -d "${PACKAGE}" ]] ; then
MEDIAID=$(basename "${PACKAGE}") # the name of the package being verified
echo "running verifySIP on ${PACKAGE}..."
# make a tree of the package files and directories
"${SCRIPTDIR}/removeDSStore" "${PACKAGE}"
TEMPTREE=$(_maketemp)
tree -DaNXs --du --timefmt "%Y-%m-%dT%H:%M:%SZ" -I "tree.xml" "${PACKAGE}" > "${TEMPTREE}"
# if no package type is assigned, look for telltale files and try to assign type
if [[ -z "${PACKAGETYPE}" ]] || [[ "${PACKAGETYPE}" == "OTHER_UNKNOWN" ]] ; then
echo "Package type unknown; trying to determine package type based on files."
if [[ -n "$(grep -i \.dpx "${TEMPTREE}")" ]] ; then # look for DPX files
echo "Based on the presence of .dpx files, this SIP is a DPX package."
PACKAGETYPE="FILM_DPX"
elif [[ -n "$(egrep -i "ProRes_2048x1536|MP4_2048x1152" "${TEMPTREE}")" ]] ; then # look for directories named after derivatives
echo "Based on directory names, this SIP is digitized film in non-DPX format."
PACKAGETYPE="FILM_FILES"
elif [[ -n $(egrep -i "capture_options\.log|qctools\.xml\.gz" "${TEMPTREE}") ]] ; then # look for sidecar files generated in the vrecord process
echo "Based on the presence of a vrecord log, this SIP is digitized video."
PACKAGETYPE="VIDEO_VRECORD"
elif [[ -n $(grep -i "dv" "${TEMPTREE}") ]] ; then # look for filenames incorporating the string 'DV'
echo "Based on the presence of a file with 'DV' in the filename, this SIP represents a transferred DV tape."
PACKAGETYPE="VIDEO_DV"
elif [[ -n $(grep "generalNotes.txt" "${TEMPTREE}") ]] ; then # look for the note file ending in "generalNotes.txt" generated by the makegm process
echo "Based on the presence of a filename ending in 'generalNotes.txt', this SIP represents a Great Migration appointment."
PACKAGETYPE="GM"
else # packages that don't fit into these categories
_report -r "Cannot identify SIP type! Running generic package tests."
fi
fi
# run package conformance tests
if [[ "${PACKAGETYPE}" == "FILM_DPX" ]] ; then
echo "Checking for DPX package conformance..."
[[ -z "$(grep -i \.wav "${TEMPTREE}")" ]] && _report -r "This package is missing one or more WAV files!"
[[ -z "$(grep -i \.mov "${TEMPTREE}")" ]] && _report -r "This package is missing one or more MOV files!"
[[ -z "$(grep -i \.md5 "${TEMPTREE}")" ]] && _report -r "This package is missing checksums!"
elif [[ "${PACKAGETYPE}" == "FILM_FILES" ]] ; then
echo "Checking for non-DPX film package conformance..."
_runtest -i "This package is missing a ProRes master file!" xmlstarlet sel -t -v "/tree/directory/directory/directory/directory/directory/directory[@name='ProRes_2048x1536']/file[substring(@name,string-length(@name)-2)='mov']/@name" -n "${TEMPTREE}"
_runtest -i "This package is missing an MP4 derivative file!" xmlstarlet sel -t -v "/tree/directory/directory/directory/directory/directory/directory[@name='MP4_2048x1152']/file[substring(@name,string-length(@name)-2)='mp4']/@name" -n "${TEMPTREE}"
elif [[ "${PACKAGETYPE}" == "VIDEO_VRECORD" ]] ; then
echo "Checking for digitized video package conformance..."
_runtest -i "This package is missing an MKV master file!" xmlstarlet sel -t -v "/tree/directory/file[substring(@name,string-length(@name)-2)='mkv']/@name" -n "${TEMPTREE}"
_runtest -i "No MP4 file in top directory, checking for MP4 file in subdirectory..." xmlstarlet sel -t -v "/tree/directory/file[substring(@name,string-length(@name)-2)='mp4']/@name" -n "${TEMPTREE}"
_runtest -i "This package is missing an MP4 derivative file!" xmlstarlet sel -t -v "/tree/directory/directory/file[substring(@name,string-length(@name)-2)='mp4']/@name" -n "${TEMPTREE}"
if [[ -n "${XMLRESULT}" ]] ; then _report -g "MP4 derivative file found. Disregard line above." ; fi
_runtest -i "This package is missing a frameMD5 file!" xmlstarlet sel -t -v "/tree/directory/file[substring(@name,string-length(@name)-7)='framemd5']/@name" -n "${TEMPTREE}"
elif [[ "${PACKAGETYPE}" == "VIDEO_DV" ]] ; then
echo "Checking for DV package conformance..."
_runtest -i "This package is missing an MOV master file!" xmlstarlet sel -t -v "/tree/directory/file[substring(@name,string-length(@name)-2)='mov']/@name" -n "${TEMPTREE}"
_runtest -i "This package is missing an MP4 derivative file!" xmlstarlet sel -t -v "/tree/directory/directory[@name='derivative']/file[substring(@name,string-length(@name)-2)='mp4']/@name" -n "${TEMPTREE}"
_runtest -i "This package is missing an MD5 file!" xmlstarlet sel -t -v "/tree/directory/file[substring(@name,string-length(@name)-2)='md5']/@name" -n "${TEMPTREE}"
elif [[ "${PACKAGETYPE}" == "GM" ]] ; then
echo "Checking for Great Migration package conformance..."
_runtest -i "This package is missing one or more ACCESS directories!" xmlstarlet sel -t -v "/tree/directory/directory/directory/directory[@name='ACCESS']/@name" -n "${TEMPTREE}"
_runtest -i "This package is missing one or more PRESERVATION directories!" xmlstarlet sel -t -v "/tree/directory/directory/directory/directory[@name='PRESERVATION']/@name" -n "${TEMPTREE}"
_runtest -i "This package is missing a general notes file!" xmlstarlet sel -t -v "/tree/directory/directory/file[substring(@name,string-length(@name)-15)='generalNotes.txt']/@name" -n "${TEMPTREE}"
fi
# FOR ALL PACKAGES:
#look for unexpected directories within the package
if [[ "${PACKAGETYPE}" != "FILM_FILES" ]] && [[ "${PACKAGETYPE}" != "GM" ]] ; then # the non-DPX digitized film packages have an extra subdirectory with an unpredictable name
_runtest "There are unexpected subdirectories in your package!" xmlstarlet sel -t -v "/tree/directory/directory[@name!='derivative' and @name!='MP4_2048x1152' and @name!='ProRes_2048x1536' and @name!='DPX' and @name!='MOV' and @name!='WAV' and @name!='ACCESS' and @name!='PRESERVATION']/@name" -n "${TEMPTREE}"
fi
#find any empty files
_runtest "There are empty files in your package!:" xmlstarlet sel -t -v "//file[@size='0']/@name" -n "${TEMPTREE}"
#make sure no file names contain colons
_runtest "There are improperly named files in this package!" xmlstarlet sel -t -v "//file[contains(@name,':')]/@name" -n "${TEMPTREE}"
#find any hidden files
_runtest "There are hidden files in this package!" xmlstarlet sel -t -v "//file[starts-with(@name,'.')]/@name" -n "${TEMPTREE}"
fi
done
# log script ending
_log -e
echo
_report -g "verifySIP process complete."
_report -g "If there was no output during this test after the 'Checking for conformance...' message, your package is good to go."