From d408eba679619b4b4cc0e75f383098172c7ce75e Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Tue, 28 Dec 2021 10:45:23 +0100 Subject: [PATCH] chg: [hashlookup] if a file cannot be read (e.g. Permission denied) for hashing Those are skipped, accounted in the nonanalysed files. --- bin/hashlookup-analyser.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/bin/hashlookup-analyser.py b/bin/hashlookup-analyser.py index d8da250..1397460 100644 --- a/bin/hashlookup-analyser.py +++ b/bin/hashlookup-analyser.py @@ -167,18 +167,24 @@ def lookup(value=None): continue sha1 = hashlib.sha1() - with open(fn, 'rb') as f: - try: - size = os.fstat(f.fileno()).st_size - except: - size = 0 - pass - while True: - data = f.read(BUF_SIZE) - if not data: - break - sha1.update(data) - h = sha1.hexdigest().upper() + try: + with open(fn, 'rb') as f: + try: + size = os.fstat(f.fileno()).st_size + except: + size = 0 + pass + while True: + data = f.read(BUF_SIZE) + if not data: + break + sha1.update(data) + h = sha1.hexdigest().upper() + except Exception as e: + sys.stderr.write(f'Unable to read {e} file {fn}\n') + notanalysed_files.append(f'{fn},{e}') + stats['excluded'] += 1 + pass knowncachefile = f'{CACHE_DIR}/known/{h}' cachefile = f'{CACHE_DIR}/unknown/{h}'