From 1aad2e8591ebb0ad00030d8c6b5540cfe6c4cc2a Mon Sep 17 00:00:00 2001 From: T2hhbmEK Date: Sat, 22 Sep 2018 00:16:22 +0800 Subject: [PATCH] add functions to get size from memory buffer --- get_image_size.py | 346 ++++++++++++++++++++++++++-------------------- 1 file changed, 198 insertions(+), 148 deletions(-) diff --git a/get_image_size.py b/get_image_size.py index ec1ef74..0239f63 100644 --- a/get_image_size.py +++ b/get_image_size.py @@ -19,6 +19,7 @@ import collections import json import os +import io import struct FILE_UNKNOWN = "Sorry, don't know how to get size for this file." @@ -72,6 +73,19 @@ def get_image_size(file_path): return (img.width, img.height) +def get_image_size_from_bytesio(input, size): + """ + Return (width, height) for a given img file content - no external + dependencies except the os and struct builtin modules + + Args: + input (io.IOBase): io object support read & seek + size (int): size of buffer in byte + """ + img = get_image_metadata_from_bytesio(input, size) + return (img.width, img.height) + + def get_image_metadata(file_path): """ Return an `Image` object for a given img file content - no external @@ -86,156 +100,172 @@ def get_image_metadata(file_path): size = os.path.getsize(file_path) # be explicit with open arguments - we need binary mode - with open(file_path, "rb") as input: - height = -1 - width = -1 - data = input.read(26) - msg = " raised while trying to decode as JPEG." - - if (size >= 10) and data[:6] in (b'GIF87a', b'GIF89a'): - # GIFs - imgtype = GIF - w, h = struct.unpack("= 10) and data[:6] in (b'GIF87a', b'GIF89a'): + # GIFs + imgtype = GIF + w, h = struct.unpack("= 24) and data.startswith(b'\211PNG\r\n\032\n') + and (data[12:16] == b'IHDR')): + # PNGs + imgtype = PNG + w, h = struct.unpack(">LL", data[16:24]) + width = int(w) + height = int(h) + elif (size >= 16) and data.startswith(b'\211PNG\r\n\032\n'): + # older PNGs + imgtype = PNG + w, h = struct.unpack(">LL", data[8:16]) + width = int(w) + height = int(h) + elif (size >= 2) and data.startswith(b'\377\330'): + # JPEG + imgtype = JPEG + input.seek(0) + input.read(2) + b = input.read(1) + try: + while (b and ord(b) != 0xDA): + while (ord(b) != 0xFF): + b = input.read(1) + while (ord(b) == 0xFF): + b = input.read(1) + if (ord(b) >= 0xC0 and ord(b) <= 0xC3): + input.read(3) + h, w = struct.unpack(">HH", input.read(4)) + break + else: + input.read( + int(struct.unpack(">H", input.read(2))[0]) - 2) + b = input.read(1) width = int(w) height = int(h) - elif ((size >= 24) and data.startswith(b'\211PNG\r\n\032\n') - and (data[12:16] == b'IHDR')): - # PNGs - imgtype = PNG - w, h = struct.unpack(">LL", data[16:24]) + except struct.error: + raise UnknownImageFormat("StructError" + msg) + except ValueError: + raise UnknownImageFormat("ValueError" + msg) + except Exception as e: + raise UnknownImageFormat(e.__class__.__name__ + msg) + elif (size >= 26) and data.startswith(b'BM'): + # BMP + imgtype = 'BMP' + headersize = struct.unpack("= 16) and data.startswith(b'\211PNG\r\n\032\n'): - # older PNGs - imgtype = PNG - w, h = struct.unpack(">LL", data[8:16]) + elif headersize >= 40: + w, h = struct.unpack("= 2) and data.startswith(b'\377\330'): - # JPEG - imgtype = JPEG - input.seek(0) - input.read(2) - b = input.read(1) - try: - while (b and ord(b) != 0xDA): - while (ord(b) != 0xFF): - b = input.read(1) - while (ord(b) == 0xFF): - b = input.read(1) - if (ord(b) >= 0xC0 and ord(b) <= 0xC3): - input.read(3) - h, w = struct.unpack(">HH", input.read(4)) - break - else: - input.read( - int(struct.unpack(">H", input.read(2))[0]) - 2) - b = input.read(1) - width = int(w) - height = int(h) - except struct.error: - raise UnknownImageFormat("StructError" + msg) - except ValueError: - raise UnknownImageFormat("ValueError" + msg) - except Exception as e: - raise UnknownImageFormat(e.__class__.__name__ + msg) - elif (size >= 26) and data.startswith(b'BM'): - # BMP - imgtype = 'BMP' - headersize = struct.unpack("= 40: - w, h = struct.unpack("= 8) and data[:4] in (b"II\052\000", b"MM\000\052"): - # Standard TIFF, big- or little-endian - # BigTIFF and other different but TIFF-like formats are not - # supported currently - imgtype = TIFF - byteOrder = data[:2] - boChar = ">" if byteOrder == "MM" else "<" - # maps TIFF type id to size (in bytes) - # and python format char for struct - tiffTypes = { - 1: (1, boChar + "B"), # BYTE - 2: (1, boChar + "c"), # ASCII - 3: (2, boChar + "H"), # SHORT - 4: (4, boChar + "L"), # LONG - 5: (8, boChar + "LL"), # RATIONAL - 6: (1, boChar + "b"), # SBYTE - 7: (1, boChar + "c"), # UNDEFINED - 8: (2, boChar + "h"), # SSHORT - 9: (4, boChar + "l"), # SLONG - 10: (8, boChar + "ll"), # SRATIONAL - 11: (4, boChar + "f"), # FLOAT - 12: (8, boChar + "d") # DOUBLE - } - ifdOffset = struct.unpack(boChar + "L", data[4:8])[0] - try: - countSize = 2 - input.seek(ifdOffset) - ec = input.read(countSize) - ifdEntryCount = struct.unpack(boChar + "H", ec)[0] - # 2 bytes: TagId + 2 bytes: type + 4 bytes: count of values + 4 - # bytes: value offset - ifdEntrySize = 12 - for i in range(ifdEntryCount): - entryOffset = ifdOffset + countSize + i * ifdEntrySize - input.seek(entryOffset) - tag = input.read(2) - tag = struct.unpack(boChar + "H", tag)[0] - if(tag == 256 or tag == 257): - # if type indicates that value fits into 4 bytes, value - # offset is not an offset but value itself - type = input.read(2) - type = struct.unpack(boChar + "H", type)[0] - if type not in tiffTypes: - raise UnknownImageFormat( - "Unkown TIFF field type:" + - str(type)) - typeSize = tiffTypes[type][0] - typeChar = tiffTypes[type][1] - input.seek(entryOffset + 8) - value = input.read(typeSize) - value = int(struct.unpack(typeChar, value)[0]) - if tag == 256: - width = value - else: - height = value - if width > -1 and height > -1: - break - except Exception as e: - raise UnknownImageFormat(str(e)) - elif size >= 2: - # see http://en.wikipedia.org/wiki/ICO_(file_format) - imgtype = 'ICO' - input.seek(0) - reserved = input.read(2) - if 0 != struct.unpack(" 1: - import warnings - warnings.warn("ICO File contains more than one image") - # http://msdn.microsoft.com/en-us/library/ms997538.aspx - w = input.read(1) - h = input.read(1) - width = ord(w) - height = ord(h) + # as h is negative when stored upside down + height = abs(int(h)) else: + raise UnknownImageFormat( + "Unkown DIB header size:" + + str(headersize)) + elif (size >= 8) and data[:4] in (b"II\052\000", b"MM\000\052"): + # Standard TIFF, big- or little-endian + # BigTIFF and other different but TIFF-like formats are not + # supported currently + imgtype = TIFF + byteOrder = data[:2] + boChar = ">" if byteOrder == "MM" else "<" + # maps TIFF type id to size (in bytes) + # and python format char for struct + tiffTypes = { + 1: (1, boChar + "B"), # BYTE + 2: (1, boChar + "c"), # ASCII + 3: (2, boChar + "H"), # SHORT + 4: (4, boChar + "L"), # LONG + 5: (8, boChar + "LL"), # RATIONAL + 6: (1, boChar + "b"), # SBYTE + 7: (1, boChar + "c"), # UNDEFINED + 8: (2, boChar + "h"), # SSHORT + 9: (4, boChar + "l"), # SLONG + 10: (8, boChar + "ll"), # SRATIONAL + 11: (4, boChar + "f"), # FLOAT + 12: (8, boChar + "d") # DOUBLE + } + ifdOffset = struct.unpack(boChar + "L", data[4:8])[0] + try: + countSize = 2 + input.seek(ifdOffset) + ec = input.read(countSize) + ifdEntryCount = struct.unpack(boChar + "H", ec)[0] + # 2 bytes: TagId + 2 bytes: type + 4 bytes: count of values + 4 + # bytes: value offset + ifdEntrySize = 12 + for i in range(ifdEntryCount): + entryOffset = ifdOffset + countSize + i * ifdEntrySize + input.seek(entryOffset) + tag = input.read(2) + tag = struct.unpack(boChar + "H", tag)[0] + if(tag == 256 or tag == 257): + # if type indicates that value fits into 4 bytes, value + # offset is not an offset but value itself + type = input.read(2) + type = struct.unpack(boChar + "H", type)[0] + if type not in tiffTypes: + raise UnknownImageFormat( + "Unkown TIFF field type:" + + str(type)) + typeSize = tiffTypes[type][0] + typeChar = tiffTypes[type][1] + input.seek(entryOffset + 8) + value = input.read(typeSize) + value = int(struct.unpack(typeChar, value)[0]) + if tag == 256: + width = value + else: + height = value + if width > -1 and height > -1: + break + except Exception as e: + raise UnknownImageFormat(str(e)) + elif size >= 2: + # see http://en.wikipedia.org/wiki/ICO_(file_format) + imgtype = 'ICO' + input.seek(0) + reserved = input.read(2) + if 0 != struct.unpack(" 1: + import warnings + warnings.warn("ICO File contains more than one image") + # http://msdn.microsoft.com/en-us/library/ms997538.aspx + w = input.read(1) + h = input.read(1) + width = ord(w) + height = ord(h) + else: + raise UnknownImageFormat(FILE_UNKNOWN) return Image(path=file_path, type=imgtype, @@ -258,15 +288,35 @@ class Test_get_image_size(unittest.TestCase): def setUp(self): pass + def test_get_image_size_from_bytesio(self): + img = self.data[0] + p = img['path'] + with io.open(p, 'rb') as fp: + b = fp.read() + fp = io.BytesIO(b) + sz = len(b) + output = get_image_size_from_bytesio(fp, sz) + self.assertTrue(output) + self.assertEqual(output, + (img['width'], + img['height'])) + + def test_get_image_metadata_from_bytesio(self): + img = self.data[0] + p = img['path'] + with io.open(p, 'rb') as fp: + b = fp.read() + fp = io.BytesIO(b) + sz = len(b) + output = get_image_metadata_from_bytesio(fp, sz) + self.assertTrue(output) + for field in image_fields: + self.assertEqual(getattr(output, field), None if field == 'path' else img[field]) + def test_get_image_metadata(self): img = self.data[0] output = get_image_metadata(img['path']) self.assertTrue(output) - self.assertEqual(output.path, img['path']) - self.assertEqual(output.width, img['width']) - self.assertEqual(output.height, img['height']) - self.assertEqual(output.type, img['type']) - self.assertEqual(output.file_size, img['file_size']) for field in image_fields: self.assertEqual(getattr(output, field), img[field])