-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathocrd-im6convert
executable file
·102 lines (84 loc) · 3.11 KB
/
ocrd-im6convert
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/env bash
# shellcheck disable=SC2086
set -eu
set -o pipefail
which ocrd >/dev/null 2>/dev/null || { echo >&2 "ocrd not in \$PATH. Panicking"; exit 1; }
((BASH_VERSINFO<4 || BASH_VERSINFO==4 && BASH_VERSINFO[1]<4)) && echo >&2 "bash $BASH_VERSION is too old. Please install bash 4.4 or newer." && exit 1
SHAREDIR="$(cd "$(dirname "$0")" >/dev/null && pwd)"
MIMETYPE_PAGE=$(ocrd bashlib constants MIMETYPE_PAGE)
declare -A MIME_TO_EXT
eval "MIME_TO_EXT=( $(ocrd bashlib constants MIME_TO_EXT) )"
main () {
# Load ocrd bashlib functions
# shellcheck source=../core/ocrd/bashlib/lib.bash
source $(ocrd bashlib filename)
ocrd__minversion 2.58.1
# Describe calling script to lib.bash
ocrd__wrap "$SHAREDIR/ocrd-tool.json" "ocrd-im6convert" "$@"
cd "${ocrd__argv[working_dir]}"
page_id=${ocrd__argv[page_id]:-}
in_file_grp=${ocrd__argv[input_file_grp]}
out_file_grp=${ocrd__argv[output_file_grp]}
mkdir -p $out_file_grp
# Set output extension
local output_extension
output_extension="${MIME_TO_EXT[${params['output-format']}]}"
# Set input and output options
local input_options output_options
input_options=(${params['input-options']})
output_options=(${params['output-options']})
# Download the files and do the conversion
local IFS=$'\n'
files=($(ocrd workspace find \
${page_id:+-g} ${page_id:-} \
-G $in_file_grp \
-k local_filename \
-k ID \
-k pageId \
--download))
local IFS=$' \t\n'
local n=0 zeros=0000
for csv in "${files[@]}"; do
let n+=1
# Parse comma separated fields
local IFS=$'\t'
local fields=($csv)
local IFS=$' \t\n'
local in_file="${fields[0]}"
local in_id="${fields[1]}"
local pageid="${fields[2]:-}"
if ! test -f "$in_file"; then
ocrd log error "input file \"$in_file\" ID=${in_id} (pageId=${pageid}) is not on disk"
continue
fi
# Output filename
local out_id="${in_id//$in_file_grp/$out_file_grp}"
if [ "x$out_id" = "x$in_id" ]; then
out_id=${out_file_grp}_${zeros:0:$((4-${#n}))}$n
fi
local out_file="$out_file_grp/${out_id}$output_extension"
# Actual conversion
ocrd log info "processing image input file $in_id ($pageid)"
convert "${input_options[@]}" "$in_file" "${output_options[@]}" "$out_file"
# Add the output files
declare -a add_options
if [ -n "$pageid" ]; then
add_options=( -g $pageid )
else
add_options=()
fi
if [[ "${ocrd__argv[overwrite]}" = true ]];then
add_options+=( --force )
fi
add_options+=( -G $out_file_grp
-m "${params['output-format']}"
-i "$out_id"
"$out_file" )
declare -a workspace_options
if [[ -n "${ocrd__argv[mets_server_url]}" ]];then
workspace_options+=( -U "${ocrd__argv[mets_server_url]}" )
fi
ocrd workspace "${workspace_options[@]}" add "${add_options[@]}"
done
}
main "$@"