diff --git a/piexif/_exif.py b/piexif/_exif.py index c68d5ab..fc0200d 100644 --- a/piexif/_exif.py +++ b/piexif/_exif.py @@ -1,3 +1,6 @@ +from struct import calcsize + + class TYPES: Byte = 1 Ascii = 2 @@ -13,6 +16,29 @@ class TYPES: DFloat = 12 +TYPE_FORMAT = { + TYPES.Byte: 'B', + TYPES.Ascii: None, + TYPES.Short: 'H', + TYPES.Long: 'L', + TYPES.Rational: 'LL', + TYPES.SByte: 'b', + TYPES.Undefined: None, + TYPES.SShort: 'h', + TYPES.SLong: 'l', + TYPES.SRational: 'll', + TYPES.Float: 'f', + TYPES.DFloat: 'd', +} + + +TYPE_LENGTH = { + t: calcsize('=' + f) + for t, f in TYPE_FORMAT.items() + if f +} + + SIMPLE_NUMERICS = [ TYPES.Byte, TYPES.Short, diff --git a/piexif/_load.py b/piexif/_load.py index da91426..497ad51 100644 --- a/piexif/_load.py +++ b/piexif/_load.py @@ -34,8 +34,7 @@ def load(input_data, key_is_name=False): else: exifReader.endian_mark = ">" - pointer = unpack_from(exifReader.endian_mark + "L", - exifReader.tiftag, 4)[0] + pointer, = unpack_from(exifReader.endian_mark + "L", exifReader.tiftag, 4) exif_dict["0th"] = exifReader.get_ifd_dict(pointer, "0th") first_ifd_pointer = exif_dict["0th"].pop("first_ifd_pointer") if ImageIFD.ExifTag in exif_dict["0th"]: @@ -48,8 +47,7 @@ def load(input_data, key_is_name=False): pointer = exif_dict["Exif"][ExifIFD.InteroperabilityTag] exif_dict["Interop"] = exifReader.get_ifd_dict(pointer, "Interop") if first_ifd_pointer != b"\x00\x00\x00\x00": - pointer = unpack_from(exifReader.endian_mark + "L", - first_ifd_pointer)[0] + pointer, = unpack_from(exifReader.endian_mark + "L", first_ifd_pointer) exif_dict["1st"] = exifReader.get_ifd_dict(pointer, "1st") if (ImageIFD.JPEGInterchangeFormat in exif_dict["1st"] and ImageIFD.JPEGInterchangeFormatLength in exif_dict["1st"]): @@ -103,35 +101,71 @@ def __init__(self, data): else: raise InvalidImageDataError("Given file is neither JPEG nor TIFF.") + def _unpack_from(self, format, pointer): + return unpack_from(self.endian_mark + format, self.tiftag, pointer) + + def _read_tag(self, pointer): + tag, value_type, value_num = self._unpack_from("HHL", pointer) + if value_type not in TYPE_FORMAT: + return None + value_length = TYPE_LENGTH.get(value_type, 1) * value_num + if value_length > 4: + data_pointer, = self._unpack_from("L", pointer + 8) + else: + data_pointer = pointer + 8 + if data_pointer + value_length > len(self.tiftag): + return None + + format = TYPE_FORMAT.get(value_type, None) + + if format is None: + raw_value = self.tiftag[data_pointer:data_pointer+value_length] + # Ascii, Undefined and unknown types + if value_type == TYPES.Ascii: + # Crop ending zero + raw_value = raw_value.split(b'\0')[0] + values = (raw_value, ) + else: + # Unpacked types + values = self._unpack_from(format * value_num, data_pointer) + # Collate rationals + if len(format) > 1: + values = zip(*[iter(values)] * len(format)) + return tag, value_type, tuple(values) + def get_ifd_dict(self, pointer, ifd_name, read_unknown=False): ifd_dict = {} - tag_count = unpack_from(self.endian_mark + "H", - self.tiftag, pointer)[0] + if pointer > len(self.tiftag) - 2: + return {} + tag_count, = self._unpack_from("H", pointer) offset = pointer + 2 + tag_count = min(tag_count, (len(self.tiftag) - offset) // 12) if ifd_name in ["0th", "1st"]: t = "Image" else: t = ifd_name for x in range(tag_count): pointer = offset + 12 * x - tag, value_type, value_num = unpack_from( - self.endian_mark + "HHL", self.tiftag, pointer) - value = self.tiftag[pointer+8: pointer+12] - v_set = (value_type, value_num, value, tag) + read_result = self._read_tag(pointer) + if not read_result: + # Skip broken tags + continue + tag, value_type, values = read_result if tag in TAGS[t]: - converted = self.convert_value(v_set) expected_value_type = TAGS[t][tag]['type'] if value_type != expected_value_type: try: - converted = coerce(converted, value_type, expected_value_type) + values = coerce(values, value_type, expected_value_type) except ValueError: # Skip if coercion failed continue - if isinstance(converted, tuple) and (len(converted) == 1): - converted = converted[0] - ifd_dict[tag] = converted + if len(values) == 1: + values = values[0] + ifd_dict[tag] = values elif read_unknown: - ifd_dict[tag] = (v_set[0], v_set[1], v_set[2], self.tiftag) + value_num, = self._unpack_from("L", pointer + 4) + pointer_or_value = self.tiftag[pointer + 8: pointer + 12] + ifd_dict[tag] = value_type, value_num, pointer_or_value, self.tiftag else: pass @@ -140,96 +174,6 @@ def get_ifd_dict(self, pointer, ifd_name, read_unknown=False): ifd_dict["first_ifd_pointer"] = self.tiftag[pointer:pointer + 4] return ifd_dict - def convert_value(self, val): - data = None - t = val[0] - length = val[1] - value = val[2] - - if t == TYPES.Byte: # BYTE - if length > 4: - pointer = unpack_from(self.endian_mark + "L", value)[0] - data = unpack_from("B" * length, self.tiftag, pointer) - else: - data = unpack_from("B" * length, value) - elif t == TYPES.Ascii: # ASCII - if length > 4: - pointer = unpack_from(self.endian_mark + "L", value)[0] - data = self.tiftag[pointer: pointer+length - 1] - else: - data = value[0: length - 1] - elif t == TYPES.Short: # SHORT - if length > 2: - pointer = unpack_from(self.endian_mark + "L", value)[0] - data = unpack_from(self.endian_mark + "H" * length, - self.tiftag, pointer) - else: - data = unpack_from(self.endian_mark + "H" * length, value) - elif t == TYPES.Long: # LONG - if length > 1: - pointer = unpack_from(self.endian_mark + "L", value)[0] - data = unpack_from(self.endian_mark + "L" * length, - self.tiftag, pointer) - else: - data = unpack_from(self.endian_mark + "L" * length, value) - elif t == TYPES.Rational: # RATIONAL - pointer = unpack_from(self.endian_mark + "L", value)[0] - data = tuple( - unpack_from(self.endian_mark + "LL", - self.tiftag, pointer + x * 8) - for x in range(length) - ) - elif t == TYPES.SByte: # SIGNED BYTES - if length > 4: - pointer = unpack_from(self.endian_mark + "L", value)[0] - data = unpack_from("b" * length, self.tiftag, pointer) - else: - data = unpack_from("b" * length, value) - elif t == TYPES.Undefined: # UNDEFINED BYTES - if length > 4: - pointer = unpack_from(self.endian_mark + "L", value)[0] - data = self.tiftag[pointer: pointer+length] - else: - data = value[0: length] - elif t == TYPES.SShort: # SIGNED SHORT - if length > 2: - pointer = unpack_from(self.endian_mark + "L", value)[0] - data = unpack_from(self.endian_mark + "h" * length, - self.tiftag, pointer) - else: - data = unpack_from(self.endian_mark + "h" * length, value) - elif t == TYPES.SLong: # SLONG - if length > 1: - pointer = unpack_from(self.endian_mark + "L", value)[0] - data = unpack_from(self.endian_mark + "l" * length, - self.tiftag, pointer) - else: - data = unpack_from(self.endian_mark + "l" * length, value) - elif t == TYPES.SRational: # SRATIONAL - pointer = unpack_from(self.endian_mark + "L", value)[0] - data = tuple( - unpack_from(self.endian_mark + "ll", - self.tiftag, pointer + x * 8) - for x in range(length) - ) - elif t == TYPES.Float: # FLOAT - if length > 1: - pointer = unpack_from(self.endian_mark + "L", value)[0] - data = unpack_from(self.endian_mark + "f" * length, - self.tiftag, pointer) - else: - data = unpack_from(self.endian_mark + "f" * length, value) - elif t == TYPES.DFloat: # DOUBLE - pointer = unpack_from(self.endian_mark + "L", value)[0] - data = unpack_from(self.endian_mark + "d" * length, - self.tiftag, pointer) - else: - raise ValueError("Exif might be wrong. Got incorrect value " + - "type to decode.\n" + - "tag: " + str(val[3]) + "\ntype: " + str(t)) - - return data - def _get_key_name_dict(exif_dict): new_dict = { @@ -246,7 +190,7 @@ def coerce(value, type, target): if target == TYPES.Undefined: if type == TYPES.Byte: # Interpret numbers as byte values, to fit Pillow behaviour - return b''.join(min(x, 255).to_bytes(1, 'big') for x in value) + return ( bytes(value), ) elif target in SIMPLE_NUMERICS: if type in SIMPLE_NUMERICS: return value diff --git a/tests/s_test.py b/tests/s_test.py index 5d105de..1d1b619 100644 --- a/tests/s_test.py +++ b/tests/s_test.py @@ -589,14 +589,12 @@ def _compare_value(self, v1, v2): self.assertEqual(struct.pack("B", v2), v1) elif isinstance(v1, bytes) and isinstance(v2, str): try: - self.assertEqual(v1, v2.encode("latin1")) + # PIL does not crop at zero byte, do it here + self.assertEqual(v1, zero_crop(v2.encode("latin1"))) except: self.assertEqual(v1, v2) else: - try: - self.assertEqual(v1, v2.encode("latin1")) - except: - self.assertEqual(v1, v2) + assert False else: self.assertEqual(v1, v2) @@ -644,21 +642,32 @@ def _compare_piexifDict_PILDict(self, piexifDict, pilDict, p=True): class UTests(unittest.TestCase): def test_ExifReader_return_unknown(self): b1 = b"MM\x00\x2a\x00\x00\x00\x08" - b2 = b"\x00\x01" + b"\xff\xff\x00\x00\x00\x00" + b"\x00\x00\x00\x00" + b2 = b"\x00\x01" + b"\xff\xff\x00\x01\x00\x00\x00\x01" + b"\x00\x00\x00\x00" er = piexif._load._ExifReader(b1 + b2) if er.tiftag[0:2] == b"II": er.endian_mark = "<" else: er.endian_mark = ">" ifd = er.get_ifd_dict(8, "0th", True) - self.assertEqual(ifd[65535][0], 0) - self.assertEqual(ifd[65535][1], 0) - self.assertEqual(ifd[65535][2], b"\x00\x00") + self.assertEqual(ifd[65535][0], 1) + self.assertEqual(ifd[65535][1], 1) + self.assertEqual(ifd[65535][2], b"\x00\x00\x00\x00") - def test_ExifReader_convert_value_fail(self): - er = piexif._load._ExifReader(I1) - with self.assertRaises(ValueError): - er.convert_value((None, None, None, None)) + def test_truncated_ifd(self): + b1 = b"MM\x00\x2a\x00\x00\x00\x08" + b2 = b"\xff\xff" + b"\x00\x0b\x00\x02\x00\x00\x00\x04" + b"FOO\x00" + er = piexif._load._ExifReader(b1 + b2) + er.endian_mark = ">" + ifd = er.get_ifd_dict(8, "0th", True) + self.assertEqual(ifd[ImageIFD.ProcessingSoftware], b"FOO") + + def test_ascii_zero(self): + b1 = b"MM\x00\x2a\x00\x00\x00\x08" + b2 = b"\x00\x01" + b"\x00\x0b\x00\x02\x00\x00\x00\x04" + b"F\x00OO" + er = piexif._load._ExifReader(b1 + b2) + er.endian_mark = ">" + ifd = er.get_ifd_dict(8, "0th", True) + self.assertEqual(ifd[ImageIFD.ProcessingSoftware], b"F") def test_split_into_segments_fail1(self): with self.assertRaises(InvalidImageDataError): @@ -1052,5 +1061,8 @@ def suite(): return suite +def zero_crop(x): + return x.split(b'\0')[0] + if __name__ == '__main__': unittest.main()