-
Notifications
You must be signed in to change notification settings - Fork 2
/
diff-pdf-page-statistics.py
397 lines (344 loc) · 21.4 KB
/
diff-pdf-page-statistics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
#!/usr/bin/python3
#NOTE: you probably need to increase the cache allowed: /etc/ImageMagick-*/policy.xml
# <policy domain="resource" name="disk" value="16GiB"/>
# find docx/ -name "*.docx" -execdir basename {} \; | xargs -L1 -I{} python3 ../diff-pdf-page-statistics.py --save_overlay=False --base_file="{}"
# - delete any CSV files. Delete the import/export folders in converted
# - rename converted DOC/PPT/XLS pdfs from .docx_mso.pdf to .doc_mso.pdf, etc.
# WARNING: this is not a tool for administrative statistics; there are just too many false positives for counts of red dots to be meaningful.
# It is only meant to be useful for QA in identifying regressions.
# Given:
# - a document (in the "download/file_type/" folder)
# - an "authoritative" _mso.PDF of how the document should look (also in the "download/file_type/" folder)
# - a Collabora .PDF of the document (in the "converted/original_file_type" folder)
# - a Microsoft .original_file_type_mso.PDF of the Collabora-round-tripped file (in the "converted/original_file_type" folder)
# - history folder: a copy of the converted folder from a previous Collabora version (identifying the commit range to search for regressions)
#
# Running the tool:
# - cd into the history folder (or specify the folder with --history_dir=)
# - ../diff-pdf-page-statistics.py --base_file=document_name.ext
# - look at the import/export overlay PNG results in the converted folder
#
# False positives:
# - automatically updating fields: dates, =rand(), slide date/time ...
# -
import argparse
import os
import wand # pip install wand && OS_INSTALLER install imagemagick
from wand.image import Image
from wand.exceptions import PolicyError
from wand.exceptions import CacheError
import time
MAX_PAGES = 10 # limit PDF comparison to the first ten pages
def printdebug(debug, *args, **kwargs):
"""
A conditional debug print function.
Prints messages only if the DEBUG variable is True.
Parameters:
*args: Positional arguments to pass to print().
**kwargs: Keyword arguments to pass to print().
"""
if debug:
print(*args, **kwargs)
def main():
parser = argparse.ArgumentParser(description="Look for import and export regressions.")
parser.add_argument("--base_file", default="lorem ipsum.docx")
parser.add_argument("--history_dir", default=".")
parser.add_argument("--save_overlay", default=True)
parser.add_argument("--debug", default=False)
args = parser.parse_args()
DEBUG = args.debug
if (
args.base_file == 'forum-mso-de-108371.xlsx' # =rand()
):
print("SKIPPING FILE", args.base_file, ": determined to be unusable for testing...")
exit(0)
print ("Processing: ", args.base_file)
base_dir = "./"
if args.history_dir == "." and not os.path.isdir('download') and os.path.isdir(os.path.join("..", 'download')):
base_dir = "../"
file_ext = os.path.splitext(args.base_file)
MS_ORIG = os.path.join(base_dir, "download", file_ext[1][1:], args.base_file + "_mso.pdf")
if not os.path.isfile(MS_ORIG):
print ("original PDF file [" + MS_ORIG +"] not found")
exit (1)
LO_ORIG = os.path.join(base_dir, "converted", file_ext[1][1:], file_ext[0] + ".pdf")
if not os.path.isfile(LO_ORIG):
print ("Collabora PDF file [" + LO_ORIG +"] not found")
exit (1)
MS_CONV = os.path.join(base_dir, "converted", file_ext[1][1:], args.base_file + "_mso.pdf")
if not os.path.isfile(MS_CONV):
print ("MS converted PDF file [" + MS_CONV +"] not found")
exit (1)
# This tool is not very useful without having a previous run to compare against.
# However, it still can create overlay images, which might be of some value.
LO_PREV = os.path.join(args.history_dir, file_ext[1][1:], file_ext[0] + ".pdf")
IS_FILE_LO_PREV = os.path.isfile(LO_PREV)
MS_PREV = os.path.join(args.history_dir, file_ext[1][1:], args.base_file + "_mso.pdf")
IS_FILE_MS_PREV = os.path.isfile(MS_PREV)
IMPORT_DIR = os.path.join(base_dir, "converted", "import", file_ext[1][1:])
if not os.path.isdir(IMPORT_DIR):
os.makedirs(IMPORT_DIR)
IMPORT = os.path.join(IMPORT_DIR, args.base_file + "_import.png")
PREV_IMPORT = os.path.join(IMPORT_DIR, args.base_file + "_prev-import.png")
EXPORT_DIR = os.path.join(base_dir, "converted", "export", file_ext[1][1:])
if not os.path.isdir(EXPORT_DIR):
os.makedirs(EXPORT_DIR)
EXPORT = os.path.join(EXPORT_DIR, args.base_file + "_export.png")
PREV_EXPORT = os.path.join(EXPORT_DIR, args.base_file + "_prev-export.png")
IMPORT_COMPARE_DIR = os.path.join(base_dir, "converted", "import-compare", file_ext[1][1:])
if not os.path.isdir(IMPORT_COMPARE_DIR):
os.makedirs(IMPORT_COMPARE_DIR)
IMPORT_COMPARE = os.path.join(IMPORT_COMPARE_DIR, args.base_file + "_import-compare.png")
EXPORT_COMPARE_DIR = os.path.join(base_dir, "converted", "export-compare", file_ext[1][1:])
if not os.path.isdir(EXPORT_COMPARE_DIR):
os.makedirs(EXPORT_COMPARE_DIR)
EXPORT_COMPARE = os.path.join(EXPORT_COMPARE_DIR, args.base_file + "_export-compare.png")
try:
# The "correct" PDF: created by MS Word of the original file
MS_ORIG_PDF = Image(filename=MS_ORIG, resolution=150)
# A PDF of how it is displayed in Writer - to be compared to MS_ORIG
LO_ORIG_PDF = Image(filename=LO_ORIG, resolution=150)
# A PDF of how MS Word displays Writer's round-tripped file - to be compared to MS_ORIG
MS_CONV_PDF = Image(filename=MS_CONV, resolution=150)
# A historical version of how it was displayed in Writer
LO_PREV_PDF = Image()
LO_PREV_PAGES = MAX_PAGES
if IS_FILE_LO_PREV:
LO_PREV_PDF = Image(filename=LO_PREV, resolution=150)
LO_PREV_PAGES = len(LO_PREV_PDF.sequence)
# A historical version of how the round-tripped file was displayed in Word
MS_PREV_PDF = Image()
MS_PREV_PAGES = MAX_PAGES
if IS_FILE_MS_PREV:
MS_PREV_PDF = Image(filename=MS_PREV, resolution=150)
MS_PREV_PAGES=len(MS_PREV_PDF.sequence)
except PolicyError:
print("Warning: Operation not allowed due to security policy restrictions for PDF files.")
print("Please modify the '/etc/ImageMagick-6/policy.xml' file to allow PDF processing.")
print("<policy domain=\"coder\" rights=\"read\" pattern=\"PDF\" />")
exit(1)
except CacheError as e:
print("Exception message: ", str(e))
print("You probably need to increase the cache allowed in /etc/ImageMagick-6/policy.xml")
print("<policy domain=\"resource\" name=\"disk\" value=\"16GiB\"/>")
exit(1)
pages = min(MAX_PAGES, len(MS_ORIG_PDF.sequence), len(LO_ORIG_PDF.sequence), len(MS_CONV_PDF.sequence), LO_PREV_PAGES, MS_PREV_PAGES)
printdebug(DEBUG, "DEBUG ", args.base_file, " pages[", pages, "] ", MAX_PAGES, len(MS_ORIG_PDF.sequence), len(LO_ORIG_PDF.sequence), len(MS_CONV_PDF.sequence), len(LO_PREV_PDF.sequence), len(MS_PREV_PDF.sequence))
MS_ORIG_SIZE = [] # total number of pixels on the page
MS_ORIG_CONTENT = [] # the number of non-background pixels
LO_ORIG_SIZE = []
LO_ORIG_CONTENT = []
MS_CONV_SIZE = []
MS_CONV_CONTENT = []
LO_PREV_SIZE = []
LO_PREV_CONTENT = []
MS_PREV_SIZE = []
MS_PREV_CONTENT = []
RED_COLOR = [] # the exact color of red: used as the histogram key
IMPORT_RED = [] # the number of red pixels on the page
EXPORT_RED = []
PREV_IMPORT_RED = []
PREV_EXPORT_RED = []
for pgnum in range(0, pages):
with MS_ORIG_PDF.sequence[pgnum] as page: # need this 'with' clause so that MS_ORIG_PDF is actually updated with the following changes
MS_ORIG_SIZE.append(page.height * page.width)
page.alpha_channel = 'remove' # so that 'red' will be painted as 'red' and not some transparent-ized shade of red
page.quantize(2) # reduced to two colors (assume background and non-background)
page.opaque_paint('black', 'red', fuzz=MS_ORIG_PDF.quantum_range * 0.95)
HIST_COLORS = list(page.histogram.keys())
HIST_PIXELS = list(page.histogram.values())
MS_ORIG_CONTENT.append(min(HIST_PIXELS))
printdebug(DEBUG, "DEBUG MS_WORD_ORIG ", args.base_file, " colorspace[" + page.colorspace +"] size[", MS_ORIG_SIZE[pgnum], "] content[", MS_ORIG_CONTENT[pgnum], "] PIXELS ", HIST_PIXELS, " COLORS ", HIST_COLORS)
# assuming that the background is at least 50%. Might be a bad assumption - especially with presentations.
# NOTE: this logic might not be necessary any more. I used it before removing the transparency - so perhaps I can always trust that 'red' will be 'red' now
if MS_ORIG_CONTENT[pgnum] == HIST_PIXELS[0]:
RED_COLOR.append(HIST_COLORS[0])
else:
RED_COLOR.append(HIST_COLORS[1])
printdebug(DEBUG, "DEBUG: RED_COLOR ", RED_COLOR[pgnum].normalized_string, " pixels[",MS_ORIG_CONTENT[pgnum] == HIST_PIXELS[0],"][", MS_ORIG_CONTENT[pgnum],"][",HIST_PIXELS[0],"]")
# Composed image: overlay red MS_ORIG with LO_ORIG
IMPORT_IMAGE = MS_ORIG_PDF.clone()
# Composed image: overlay red MS_ORIG with MS_CONV
EXPORT_IMAGE = MS_ORIG_PDF.clone()
# Composed image: overlay red MS_ORIG with LO_PREV
PREV_IMPORT_IMAGE = MS_ORIG_PDF.clone()
# Composed image: overlay red MS_ORIG with MS_PREV
PREV_EXPORT_IMAGE = MS_ORIG_PDF.clone()
# Composed image: overlay red LO_ORIG with LO_PREV
# This is the visual key to the whole tool. The overlay should be identical except for import fixes or regressions
IMPORT_COMPARE_IMAGE = LO_ORIG_PDF.clone()
# Composed image: overlay red MS_CONV with MS_PREV
# This is the visual key to the whole tool. The overlay should be identical except for export fixes or regressions
EXPORT_COMPARE_IMAGE = MS_CONV_PDF.clone()
for pgnum in range(0, pages):
tmp = LO_ORIG_PDF.clone() # don't make changes to these PDF pages - just get statistics...
with tmp.sequence[pgnum] as page:
page.quantize(2)
LO_ORIG_SIZE.append(page.height * page.width)
LO_ORIG_CONTENT.append(min(list(page.histogram.values()))) # assuming that the background is more than 50%
printdebug(DEBUG, "DEBUG LO_ORIG[", pgnum, "] size[", LO_ORIG_SIZE[pgnum], "] content[", LO_ORIG_CONTENT[pgnum], "] percent[", (LO_ORIG_CONTENT[pgnum] / LO_ORIG_SIZE[pgnum]), "] colorspace[", page.colorspace, "] background[", page.background_color, "] ", list(page.histogram.values()), list(page.histogram.keys()))
tmp = MS_CONV_PDF.clone()
with tmp.sequence[pgnum] as page:
page.quantize(2)
MS_CONV_SIZE.append(page.height * page.width)
MS_CONV_CONTENT.append(min(list(page.histogram.values())))
printdebug(DEBUG, "DEBUG MS_CONV[", pgnum, "] size[", MS_CONV_SIZE[pgnum], "] content[", MS_CONV_CONTENT[pgnum], "] ", list(page.histogram.values()), list(page.histogram.keys()), " percent[", MS_CONV_CONTENT[pgnum] / MS_CONV_SIZE[pgnum], "]")
if IS_FILE_LO_PREV:
tmp = LO_PREV_PDF.clone()
with tmp.sequence[pgnum] as page:
page.quantize(2)
LO_PREV_SIZE.append(page.height * page.width)
LO_PREV_CONTENT.append(min(list(page.histogram.values())))
printdebug(DEBUG, "DEBUG LO_PREV[", pgnum, "] size[", LO_PREV_SIZE[pgnum], "] content[", LO_PREV_CONTENT[pgnum], "] ", list(page.histogram.values()), list(page.histogram.keys()), " percent[", LO_PREV_CONTENT[pgnum] / LO_PREV_SIZE[pgnum], "]")
if IS_FILE_MS_PREV:
tmp = MS_PREV_PDF.clone()
with tmp.sequence[pgnum] as page:
page.quantize(2)
MS_PREV_SIZE.append(page.height * page.width)
MS_PREV_CONTENT.append(min(list(page.histogram.values())))
printdebug(DEBUG, "DEBUG MS_PREV[", pgnum, "] size[", MS_PREV_SIZE[pgnum], "] content[", MS_PREV_CONTENT[pgnum], "] ", list(page.histogram.values()), list(page.histogram.keys()), " percent[", MS_PREV_CONTENT[pgnum] / MS_PREV_SIZE[pgnum], "]")
with IMPORT_IMAGE.sequence[pgnum] as page:
LO_ORIG_PDF.transparent_color(LO_ORIG_PDF.background_color, 0, fuzz=LO_ORIG_PDF.quantum_range * 0.05)
LO_ORIG_PDF.sequence[pgnum].transform_colorspace('gray')
page.composite(LO_ORIG_PDF.sequence[pgnum]) # overlay (red) MS_ORIG with LO_ORIG
page.merge_layers('flatten')
page.alpha_channel = 'remove'
IMPORT_RED.append(0)
try:
IMPORT_RED[pgnum] = IMPORT_IMAGE.sequence[pgnum].histogram[RED_COLOR[pgnum]]
except:
printdebug(DEBUG, "IMPORT EXCEPTION: could not get red color from page ", pgnum)#, list(IMPORT_IMAGE.sequence[pgnum].histogram.keys()))
with EXPORT_IMAGE.sequence[pgnum] as page:
MS_CONV_PDF.transparent_color(MS_CONV_PDF.background_color, 0, fuzz=MS_CONV_PDF.quantum_range * 0.05)
MS_CONV_PDF.sequence[pgnum].transform_colorspace('gray')
page.composite(MS_CONV_PDF.sequence[pgnum]) # overlay (red) MS_ORIG with MS_CONV
page.merge_layers('flatten')
page.alpha_channel = 'remove'
EXPORT_RED.append(0)
try:
EXPORT_RED[pgnum] = EXPORT_IMAGE.sequence[pgnum].histogram[RED_COLOR[pgnum]]
except:
printdebug(DEBUG, "EXPORT EXCEPTION: could not get red color from page ", pgnum)# , list(EXPORT_IMAGE.sequence[pgnum].histogram.keys()))
PREV_IMPORT_RED.append(0)
if IS_FILE_LO_PREV:
with PREV_IMPORT_IMAGE.sequence[pgnum] as page:
LO_PREV_PDF.transparent_color(LO_PREV_PDF.background_color, 0, fuzz=LO_PREV_PDF.quantum_range * 0.05)
LO_PREV_PDF.sequence[pgnum].transform_colorspace('gray')
page.composite(LO_PREV_PDF.sequence[pgnum]) # overlay (red) MS_ORIG with LO_PREV
page.merge_layers('flatten')
page.alpha_channel = 'remove'
try:
PREV_IMPORT_RED[pgnum] = PREV_IMPORT_IMAGE.sequence[pgnum].histogram[RED_COLOR[pgnum]]
except:
printdebug(DEBUG, "PREV_IMPORT EXCEPTION: could not get red color from page ", pgnum)#, list(PREV_IMPORT_IMAGE.sequence[pgnum].histogram.keys()))
with IMPORT_COMPARE_IMAGE.sequence[pgnum] as page:
page.quantize(2)
page.opaque_paint('black', 'red', fuzz=LO_ORIG_PDF.quantum_range * 0.95)
page.composite(LO_PREV_PDF.sequence[pgnum]) # overlay (red) LO_ORIG with LO_PREV
page.merge_layers('flatten')
page.alpha_channel = 'remove'
PREV_EXPORT_RED.append(0)
if IS_FILE_MS_PREV:
with PREV_EXPORT_IMAGE.sequence[pgnum] as page:
MS_PREV_PDF.transparent_color(MS_PREV_PDF.background_color, 0, fuzz=MS_PREV_PDF.quantum_range * 0.05)
MS_PREV_PDF.sequence[pgnum].transform_colorspace('gray')
page.composite(MS_PREV_PDF.sequence[pgnum]) # overlay (red) MS_ORIG with MS_PREV
page.merge_layers('flatten')
page.alpha_channel = 'remove'
try:
PREV_EXPORT_RED[pgnum] = PREV_EXPORT_IMAGE.sequence[pgnum].histogram[RED_COLOR[pgnum]]
except:
printdebug(DEBUG, "PREV_EXPORT EXCEPTION: could not get red color from page ", pgnum)#, list(PREV_EXPORT_IMAGE.sequence[pgnum].histogram.keys()))
with EXPORT_COMPARE_IMAGE.sequence[pgnum] as page:
page.quantize(2)
page.opaque_paint('black', 'red', fuzz=MS_CONV_PDF.quantum_range * 0.95)
page.composite(MS_PREV_PDF.sequence[pgnum]) # overlay (red) MS_CONV with MS_PREV
page.merge_layers('flatten')
FORCE_SAVE = False
if (
(IS_FILE_LO_PREV and IMPORT_RED > PREV_IMPORT_RED)
or (IS_FILE_MS_PREV and EXPORT_RED > PREV_EXPORT_RED)
):
FORCE_SAVE = True
if args.save_overlay == True or FORCE_SAVE:
printdebug(DEBUG, "DEBUG saving " + args.base_file +" IMPORT["+ str(IMPORT_RED)+ "] PREV["+str(PREV_IMPORT_RED) +"] EXPORT["+str(EXPORT_RED)+"] PREV["+str(PREV_EXPORT_RED)+"]")
IMPORT_IMAGE.save(filename=IMPORT)
EXPORT_IMAGE.save(filename=EXPORT)
if IS_FILE_LO_PREV:
PREV_IMPORT_IMAGE.save(filename=PREV_IMPORT)
IMPORT_COMPARE_IMAGE.save(filename=IMPORT_COMPARE)
if IS_FILE_MS_PREV:
PREV_EXPORT_IMAGE.save(filename=PREV_EXPORT)
EXPORT_COMPARE_IMAGE.save(filename=EXPORT_COMPARE)
# allow the script to run in parallel - wait for lock on report to be released.
# if lock file exists, wait for one second and try again
# else
# create lock file and put the file name in it
# wait for a bit and then read to verify the lock is mine
while True:
LOCK_FILE="diff-pdf-" + file_ext[1][1:] + "-statistics.lock"
if os.path.isfile(LOCK_FILE):
printdebug(DEBUG, "DEBUG: waiting for file to unlock")
else:
with open(LOCK_FILE, 'w') as f:
f.write(args.base_file)
time.sleep(0.1) # one tenth of a second
with open(LOCK_FILE, 'r') as f:
LOCK = f.read()
printdebug(DEBUG, "DEBUG LOCK[", LOCK, "]")
if LOCK == args.base_file:
with open('diff-pdf-' + file_ext[1][1:] + '-import-statistics.csv', 'a') as f:
for pgnum in range(0, pages):
OUT_STRING = [ args.base_file ]
OUT_STRING.append(str(pgnum))
OUT_STRING.append(str(MS_ORIG_SIZE[pgnum]))
OUT_STRING.append(str(MS_ORIG_CONTENT[pgnum]))
OUT_STRING.append(str(MS_ORIG_CONTENT[pgnum] / MS_ORIG_SIZE[pgnum]))
OUT_STRING.append(str(LO_ORIG_SIZE[pgnum]))
OUT_STRING.append(str(LO_ORIG_CONTENT[pgnum]))
OUT_STRING.append(str(LO_ORIG_CONTENT[pgnum] / LO_ORIG_SIZE[pgnum]))
OUT_STRING.append(str(IMPORT_RED[pgnum]))
OUT_STRING.append(str(IMPORT_RED[pgnum] / LO_ORIG_CONTENT[pgnum]))
if IS_FILE_LO_PREV:
OUT_STRING.append(str(LO_PREV_SIZE[pgnum]))
OUT_STRING.append(str(LO_PREV_CONTENT[pgnum]))
OUT_STRING.append(str(LO_PREV_CONTENT[pgnum] / LO_PREV_SIZE[pgnum]))
OUT_STRING.append(str(PREV_IMPORT_RED[pgnum]))
OUT_STRING.append(str(PREV_IMPORT_RED[pgnum] / LO_PREV_CONTENT[pgnum]))
f.write(','.join(OUT_STRING) + '\n')
with open('diff-pdf-' + file_ext[1][1:] + '-export-statistics.csv', 'a') as f:
for pgnum in range(0, pages):
OUT_STRING = [ args.base_file ]
OUT_STRING.append(str(pgnum))
OUT_STRING.append(str(MS_ORIG_SIZE[pgnum]))
OUT_STRING.append(str(MS_ORIG_CONTENT[pgnum]))
OUT_STRING.append(str(MS_ORIG_CONTENT[pgnum] / MS_ORIG_SIZE[pgnum]))
OUT_STRING.append(str(MS_CONV_SIZE[pgnum]))
OUT_STRING.append(str(MS_CONV_CONTENT[pgnum]))
OUT_STRING.append(str(MS_CONV_CONTENT[pgnum] / MS_CONV_SIZE[pgnum]))
OUT_STRING.append(str(EXPORT_RED[pgnum]))
OUT_STRING.append(str(EXPORT_RED[pgnum] / MS_CONV_CONTENT[pgnum]))
if IS_FILE_MS_PREV:
OUT_STRING.append(str(MS_PREV_SIZE[pgnum]))
OUT_STRING.append(str(MS_PREV_CONTENT[pgnum]))
OUT_STRING.append(str(MS_PREV_CONTENT[pgnum] / MS_PREV_SIZE[pgnum]))
OUT_STRING.append(str(PREV_EXPORT_RED[pgnum]))
OUT_STRING.append(str(PREV_EXPORT_RED[pgnum] / MS_PREV_CONTENT[pgnum]))
f.write(','.join(OUT_STRING) + '\n')
with open('diff-pdf-' + file_ext[1][1:] + '-statistics-anomalies.csv', 'a') as f:
if IS_FILE_LO_PREV and len(LO_ORIG_PDF.sequence) != LO_PREV_PAGES:
f.write(args.base_file + ",import,page count different from " + args.history_dir + '\n')
if IS_FILE_MS_PREV and len(MS_CONV_PDF.sequence) != MS_PREV_PAGES:
f.write(args.base_file + ",export,page count different from " + args.history_dir + '\n')
for pgnum in range(0, len(RED_COLOR)):
printdebug(DEBUG, "DEBUG: red[", RED_COLOR[pgnum],"] compared to wand.color.Color('red') on page " + str(pgnum))
if RED_COLOR[pgnum] != wand.color.Color('red'):
if MS_ORIG_SIZE[pgnum] != MS_ORIG_CONTENT[pgnum]: # false positive: blank page
f.write(args.base_file + ",red color," + RED_COLOR/[pgnum] + '\n')
os.remove(LOCK_FILE)
return
else:
printdebug(DEBUG, "DEBUG: not my lock after all - try again")
time.sleep(1) # second
if __name__ == "__main__":
main()