Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RBT-273] Faulty exif fixes and code cleanup #4

Merged
merged 11 commits into from
Jan 21, 2022
26 changes: 26 additions & 0 deletions piexif/_exif.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from struct import calcsize


class TYPES:
Byte = 1
Ascii = 2
Expand All @@ -13,6 +16,29 @@ class TYPES:
DFloat = 12


TYPE_FORMAT = {
TYPES.Byte: 'B',
TYPES.Ascii: None,
TYPES.Short: 'H',
TYPES.Long: 'L',
TYPES.Rational: 'LL',
TYPES.SByte: 'b',
TYPES.Undefined: None,
TYPES.SShort: 'h',
TYPES.SLong: 'l',
TYPES.SRational: 'll',
TYPES.Float: 'f',
TYPES.DFloat: 'd',
}


TYPE_LENGTH = {
t: calcsize('=' + f)
for t, f in TYPE_FORMAT.items()
if f
}


SIMPLE_NUMERICS = [
TYPES.Byte,
TYPES.Short,
Expand Down
155 changes: 52 additions & 103 deletions piexif/_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,35 +103,74 @@ def __init__(self, data):
else:
raise InvalidImageDataError("Given file is neither JPEG nor TIFF.")

def _unpack_from(self, format, pointer):
return unpack_from(self.endian_mark + format, self.tiftag, pointer)

def _read_tag(self, pointer):
tag, value_type, value_num = self._unpack_from("HHL", pointer)
# Treat unknown types as `Undefined`
value_length = TYPE_LENGTH.get(value_type, 1) * value_num
if value_length > 4:
data_pointer = self._unpack_from("L", pointer + 8)[0]
mikhail-iurkov marked this conversation as resolved.
Show resolved Hide resolved
else:
data_pointer = pointer + 8

format = TYPE_FORMAT.get(value_type, None)

if format is None:
raw_value = self.tiftag[data_pointer:data_pointer+value_length]
# Ascii, Undefined and unknown types
if value_type == TYPES.Ascii:
# Crop ending zero
raw_value = raw_value.split(b'\0')[0]
values = (raw_value, )
else:
# Unpacked types
values = self._unpack_from(format * value_num, data_pointer)
# Collate rationals
if len(format) > 1:
stride = len(format)
values = tuple(
values[i*stride:(i+1)*stride] for i in range(value_num)
)
mikhail-iurkov marked this conversation as resolved.
Show resolved Hide resolved
return tag, value_type, tuple(values)

def get_ifd_dict(self, pointer, ifd_name, read_unknown=False):
ifd_dict = {}
tag_count = unpack_from(self.endian_mark + "H",
self.tiftag, pointer)[0]
if pointer > len(self.tiftag) - 2:
return {}
tag_count = self._unpack_from("H", pointer)[0]
ifd_length = 2 + 12 * tag_count
if pointer > len(self.tiftag) - ifd_length:
# Truncate IFD
tag_count = (len(self.tiftag) - 2) // 12
offset = pointer + 2
mikhail-iurkov marked this conversation as resolved.
Show resolved Hide resolved
if ifd_name in ["0th", "1st"]:
t = "Image"
else:
t = ifd_name
for x in range(tag_count):
pointer = offset + 12 * x
tag, value_type, value_num = unpack_from(
self.endian_mark + "HHL", self.tiftag, pointer)
value = self.tiftag[pointer+8: pointer+12]
v_set = (value_type, value_num, value, tag)
try:
tag, value_type, values = self._read_tag(pointer)
except struct.error:
# Skip broken tags
continue
if tag in TAGS[t]:
converted = self.convert_value(v_set)
expected_value_type = TAGS[t][tag]['type']
if value_type != expected_value_type:
try:
converted = coerce(converted, value_type, expected_value_type)
values = coerce(values, value_type, expected_value_type)
except ValueError:
# Skip if coercion failed
continue
if isinstance(converted, tuple) and (len(converted) == 1):
converted = converted[0]
ifd_dict[tag] = converted
if len(values) == 1:
values = values[0]
ifd_dict[tag] = values
elif read_unknown:
ifd_dict[tag] = (v_set[0], v_set[1], v_set[2], self.tiftag)
value_num, = self._unpack_from("L", pointer + 4)
pointer_or_value = self.tiftag[pointer + 8: pointer + 12]
ifd_dict[tag] = value_type, value_num, pointer_or_value, self.tiftag
else:
pass

Expand All @@ -140,96 +179,6 @@ def get_ifd_dict(self, pointer, ifd_name, read_unknown=False):
ifd_dict["first_ifd_pointer"] = self.tiftag[pointer:pointer + 4]
return ifd_dict

def convert_value(self, val):
data = None
t = val[0]
length = val[1]
value = val[2]

if t == TYPES.Byte: # BYTE
if length > 4:
pointer = unpack_from(self.endian_mark + "L", value)[0]
data = unpack_from("B" * length, self.tiftag, pointer)
else:
data = unpack_from("B" * length, value)
elif t == TYPES.Ascii: # ASCII
if length > 4:
pointer = unpack_from(self.endian_mark + "L", value)[0]
data = self.tiftag[pointer: pointer+length - 1]
else:
data = value[0: length - 1]
elif t == TYPES.Short: # SHORT
if length > 2:
pointer = unpack_from(self.endian_mark + "L", value)[0]
data = unpack_from(self.endian_mark + "H" * length,
self.tiftag, pointer)
else:
data = unpack_from(self.endian_mark + "H" * length, value)
elif t == TYPES.Long: # LONG
if length > 1:
pointer = unpack_from(self.endian_mark + "L", value)[0]
data = unpack_from(self.endian_mark + "L" * length,
self.tiftag, pointer)
else:
data = unpack_from(self.endian_mark + "L" * length, value)
elif t == TYPES.Rational: # RATIONAL
pointer = unpack_from(self.endian_mark + "L", value)[0]
data = tuple(
unpack_from(self.endian_mark + "LL",
self.tiftag, pointer + x * 8)
for x in range(length)
)
elif t == TYPES.SByte: # SIGNED BYTES
if length > 4:
pointer = unpack_from(self.endian_mark + "L", value)[0]
data = unpack_from("b" * length, self.tiftag, pointer)
else:
data = unpack_from("b" * length, value)
elif t == TYPES.Undefined: # UNDEFINED BYTES
if length > 4:
pointer = unpack_from(self.endian_mark + "L", value)[0]
data = self.tiftag[pointer: pointer+length]
else:
data = value[0: length]
elif t == TYPES.SShort: # SIGNED SHORT
if length > 2:
pointer = unpack_from(self.endian_mark + "L", value)[0]
data = unpack_from(self.endian_mark + "h" * length,
self.tiftag, pointer)
else:
data = unpack_from(self.endian_mark + "h" * length, value)
elif t == TYPES.SLong: # SLONG
if length > 1:
pointer = unpack_from(self.endian_mark + "L", value)[0]
data = unpack_from(self.endian_mark + "l" * length,
self.tiftag, pointer)
else:
data = unpack_from(self.endian_mark + "l" * length, value)
elif t == TYPES.SRational: # SRATIONAL
pointer = unpack_from(self.endian_mark + "L", value)[0]
data = tuple(
unpack_from(self.endian_mark + "ll",
self.tiftag, pointer + x * 8)
for x in range(length)
)
elif t == TYPES.Float: # FLOAT
if length > 1:
pointer = unpack_from(self.endian_mark + "L", value)[0]
data = unpack_from(self.endian_mark + "f" * length,
self.tiftag, pointer)
else:
data = unpack_from(self.endian_mark + "f" * length, value)
elif t == TYPES.DFloat: # DOUBLE
pointer = unpack_from(self.endian_mark + "L", value)[0]
data = unpack_from(self.endian_mark + "d" * length,
self.tiftag, pointer)
else:
raise ValueError("Exif might be wrong. Got incorrect value " +
mikhail-iurkov marked this conversation as resolved.
Show resolved Hide resolved
"type to decode.\n" +
"tag: " + str(val[3]) + "\ntype: " + str(t))

return data


def _get_key_name_dict(exif_dict):
new_dict = {
Expand All @@ -246,7 +195,7 @@ def coerce(value, type, target):
if target == TYPES.Undefined:
if type == TYPES.Byte:
# Interpret numbers as byte values, to fit Pillow behaviour
return b''.join(min(x, 255).to_bytes(1, 'big') for x in value)
return ( b''.join(min(x, 255).to_bytes(1, 'big') for x in value), )
mikhail-iurkov marked this conversation as resolved.
Show resolved Hide resolved
elif target in SIMPLE_NUMERICS:
if type in SIMPLE_NUMERICS:
return value
Expand Down
23 changes: 17 additions & 6 deletions tests/s_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -644,7 +644,7 @@ def _compare_piexifDict_PILDict(self, piexifDict, pilDict, p=True):
class UTests(unittest.TestCase):
def test_ExifReader_return_unknown(self):
b1 = b"MM\x00\x2a\x00\x00\x00\x08"
b2 = b"\x00\x01" + b"\xff\xff\x00\x00\x00\x00" + b"\x00\x00\x00\x00"
b2 = b"\x00\x01" + b"\xff\xff\x00\x00\x00\x00\x00\x00" + b"\x00\x00\x00\x00"
er = piexif._load._ExifReader(b1 + b2)
if er.tiftag[0:2] == b"II":
er.endian_mark = "<"
Expand All @@ -653,12 +653,23 @@ def test_ExifReader_return_unknown(self):
ifd = er.get_ifd_dict(8, "0th", True)
self.assertEqual(ifd[65535][0], 0)
self.assertEqual(ifd[65535][1], 0)
self.assertEqual(ifd[65535][2], b"\x00\x00")
self.assertEqual(ifd[65535][2], b"\x00\x00\x00\x00")

def test_ExifReader_convert_value_fail(self):
er = piexif._load._ExifReader(I1)
with self.assertRaises(ValueError):
er.convert_value((None, None, None, None))
def test_truncated_ifd(self):
b1 = b"MM\x00\x2a\x00\x00\x00\x08"
b2 = b"\xff\xff" + b"\x00\x0b\x00\x02\x00\x00\x00\x04" + b"FOO\x00"
er = piexif._load._ExifReader(b1 + b2)
er.endian_mark = ">"
ifd = er.get_ifd_dict(8, "0th", True)
self.assertEqual(ifd[ImageIFD.ProcessingSoftware], b"FOO")

def test_ascii_zero(self):
b1 = b"MM\x00\x2a\x00\x00\x00\x08"
b2 = b"\x00\x01" + b"\x00\x0b\x00\x02\x00\x00\x00\x04" + b"F\x00OO"
er = piexif._load._ExifReader(b1 + b2)
er.endian_mark = ">"
ifd = er.get_ifd_dict(8, "0th", True)
self.assertEqual(ifd[ImageIFD.ProcessingSoftware], b"F")

def test_split_into_segments_fail1(self):
with self.assertRaises(InvalidImageDataError):
Expand Down