diff --git a/enzyme/mkv.py b/enzyme/mkv.py index 9dbce9b..752cb42 100644 --- a/enzyme/mkv.py +++ b/enzyme/mkv.py @@ -2,11 +2,13 @@ from .exceptions import ParserError, MalformedMKVError from .parsers import ebml from datetime import timedelta +from sys import getsizeof +import xml.etree.ElementTree as xml import logging - +import binascii __all__ = ['VIDEO_TRACK', 'AUDIO_TRACK', 'SUBTITLE_TRACK', 'MKV', 'Info', 'Track', 'VideoTrack', - 'AudioTrack', 'SubtitleTrack', 'Tag', 'SimpleTag', 'Chapter'] + 'AudioTrack', 'SubtitleTrack', 'Tag', 'Targets', 'SimpleTag', 'Chapter', 'Attachment'] logger = logging.getLogger(__name__) @@ -16,11 +18,14 @@ class MKV(object): """Matroska Video file - - :param stream: seekable file-like object - """ - def __init__(self, stream, recurse_seek_head=False): + def __init__(self, stream, recurse_seek_head=False, load_attachments=True): + """Load a :class:`MKV` from a stream (binary opened ('rb') file) + + :param stream: seekable file-like object + :param recurse_seak_head: Whether to load recursive SeekHead elements, default False + :param load_attachments: Wheter to load attachments' data, default True + """ # default attributes self.info = None self.video_tracks = [] @@ -28,7 +33,7 @@ def __init__(self, stream, recurse_seek_head=False): self.subtitle_tracks = [] self.chapters = [] self.tags = [] - + self.attachments = [] # keep track of the elements parsed self.recurse_seek_head = recurse_seek_head self._parsed_positions = set() @@ -51,11 +56,11 @@ def __init__(self, stream, recurse_seek_head=False): if seek_head.name != 'SeekHead': raise MalformedMKVError('No SeekHead found') seek_head.load(stream, specs, ignore_element_names=['Void', 'CRC-32']) - self._parse_seekhead(seek_head, segment, stream, specs) + self._parse_seekhead(seek_head, segment, stream, specs, load_attachments) except ParserError as e: raise MalformedMKVError('Parsing error: %s' % e) - def _parse_seekhead(self, seek_head, segment, stream, specs): + def _parse_seekhead(self, seek_head, segment, stream, specs, load_attachments): for seek in seek_head: element_id = ebml.read_element_id(seek['SeekID'].data) element_name = specs[element_id][1] @@ -82,6 +87,10 @@ def _parse_seekhead(self, seek_head, segment, stream, specs): logger.info('Processing element %s from SeekHead at position %d', element_name, element_position) stream.seek(element_position) self.tags.extend([Tag.fromelement(t) for t in ebml.parse_element(stream, specs, True, ignore_element_names=['Void', 'CRC-32'])]) + elif element_name == 'Attachments': + logger.info('Processing element %s from SeekHead at position %d', element_name, element_position) + stream.seek(element_position) + self.attachments.extend([Attachment.fromelement(t, load_attachments) for t in ebml.parse_element(stream, specs, True, ignore_element_names=['Void', 'CRC-32'])]) elif element_name == 'SeekHead' and self.recurse_seek_head: logger.info('Processing element %s from SeekHead at position %d', element_name, element_position) stream.seek(element_position) @@ -90,18 +99,52 @@ def _parse_seekhead(self, seek_head, segment, stream, specs): logger.debug('Element %s ignored', element_name) self._parsed_positions.add(element_position) + def tags_to_xml(self): + """Return a :class:`xml.etree.ElementTree.Element` from all the tags in self + Useful for updating to the file changed tags with mkvpropedit + """ + root = xml.Element('Tags') + root.extend([tag.to_xml() for tag in self.tags]) + return root + + def tags_from_xml(self, tagsElement): + """Load all :class:`Tag` in self.tags from a :class:`xml.etree.ElementTree.Element` + Useful for creating MKV-like objects from non-mkv files or reading modified but not saved tags from xml files + + :param tagsElement: The XML element containing the tag structure + :type tagsElement: :class:`xml.etree.ElementTree.Element` + """ + for tagElement in tagsElement: + self.tags.append(Tag.fromXML(tagElement)) + def to_dict(self): - return {'info': self.info.__dict__, 'video_tracks': [t.__dict__ for t in self.video_tracks], + """Return a serializable dictionary from self + The returned dictionary is json-able. + """ + return {'info': self.info.to_dict(), 'video_tracks': [t.__dict__ for t in self.video_tracks], 'audio_tracks': [t.__dict__ for t in self.audio_tracks], 'subtitle_tracks': [t.__dict__ for t in self.subtitle_tracks], - 'chapters': [c.__dict__ for c in self.chapters], 'tags': [t.__dict__ for t in self.tags]} + 'chapters': [c.to_dict() for c in self.chapters], 'tags': [t.to_dict() for t in self.tags], 'attachments': [a.to_dict() for a in self.attachments]} + + def getTag(self, targettype): + """Return a list of all :class:`Tag` in self which have :class:`Targets` with the requested targettype or targettypevalue + Return a list even if there is only one or no corresponding tags. + + :param targettype: The TargetType or TargetTypeValue of tags to get + :type targettype: String or number + """ + if isinstance(targettype, str): # Then is is a targettype + return [tag for tag in self.tags if tag.targets.targettype == targettype] + else: # Then it is a targettypevalue + return [tag for tag in self.tags if tag.targets.targettypevalue == targettype] def __repr__(self): - return '<%s [%r, %r, %r, %r]>' % (self.__class__.__name__, self.info, self.video_tracks, self.audio_tracks, self.subtitle_tracks) + return '<%s [%r, %r, %r, %r, %d tags, %d attachments]>' % (self.__class__.__name__, self.info, self.video_tracks, self.audio_tracks, self.subtitle_tracks, len(self.tags), len(self.attachments)) class Info(object): """Object for the Info EBML element""" - def __init__(self, title=None, duration=None, date_utc=None, timecode_scale=None, muxing_app=None, writing_app=None): + def __init__(self, uid=None, title=None, duration=None, date_utc=None, timecode_scale=None, muxing_app=None, writing_app=None): + self.uid = uid self.title = title self.duration = timedelta(microseconds=duration * (timecode_scale or 1000000) // 1000) if duration else None self.date_utc = date_utc @@ -116,13 +159,20 @@ def fromelement(cls, element): :type element: :class:`~enzyme.parsers.ebml.Element` """ + uid = binascii.hexlify(element.get('SegmentUID').getvalue()).decode().upper() title = element.get('Title') duration = element.get('Duration') date_utc = element.get('DateUTC') timecode_scale = element.get('TimecodeScale') muxing_app = element.get('MuxingApp') writing_app = element.get('WritingApp') - return cls(title, duration, date_utc, timecode_scale, muxing_app, writing_app) + return cls(uid, title, duration, date_utc, timecode_scale, muxing_app, writing_app) + + def to_dict(self): + info_dict = self.__dict__.copy() + info_dict['duration'] = self.duration.seconds + self.duration.microseconds/1e6 + info_dict['date_utc'] = ('{0:%Y}-{0:%m}-{0:%d} {0:%H}:{0:%M}:{0:%S}:{0:%f}'.format(self.date_utc))[:-3], # The official Mastroka date standard + return info_dict def __repr__(self): return '<%s [title=%r, duration=%s, date=%s]>' % (self.__class__.__name__, self.title, self.duration, self.date_utc) @@ -134,7 +184,7 @@ def __str__(self): class Track(object): """Base object for the Tracks EBML element""" def __init__(self, type=None, number=None, name=None, language=None, enabled=None, default=None, forced=None, lacing=None, # @ReservedAssignment - codec_id=None, codec_name=None): + codec_id=None, codec_name=None, uid=None): self.type = type self.number = number self.name = name @@ -145,6 +195,7 @@ def __init__(self, type=None, number=None, name=None, language=None, enabled=Non self.lacing = lacing self.codec_id = codec_id self.codec_name = codec_name + self.uid = uid @classmethod def fromelement(cls, element): @@ -164,8 +215,10 @@ def fromelement(cls, element): lacing = bool(element.get('FlagLacing', 1)) codec_id = element.get('CodecID') codec_name = element.get('CodecName') + uid = element.get('TrackUID', 0) + return cls(type=type, number=number, name=name, language=language, enabled=enabled, default=default, - forced=forced, lacing=lacing, codec_id=codec_id, codec_name=codec_name) + forced=forced, lacing=lacing, codec_id=codec_id, codec_name=codec_name, uid=uid) def __repr__(self): return '<%s [%d, name=%r, language=%s]>' % (self.__class__.__name__, self.number, self.name, self.language) @@ -219,7 +272,7 @@ def fromelement(cls, element): def __repr__(self): return '<%s [%d, %dx%d, %s, name=%r, language=%s]>' % (self.__class__.__name__, self.number, self.width, self.height, - self.codec_id, self.name, self.language) + self.codec_id, self.name, self.language) def __str__(self): return str(self.__dict__) @@ -251,7 +304,7 @@ def fromelement(cls, element): def __repr__(self): return '<%s [%d, %d channel(s), %.0fHz, %s, name=%r, language=%s]>' % (self.__class__.__name__, self.number, self.channels, - self.sampling_frequency, self.codec_id, self.name, self.language) + self.sampling_frequency, self.codec_id, self.name, self.language) class SubtitleTrack(Track): @@ -262,7 +315,7 @@ class SubtitleTrack(Track): class Tag(object): """Object for the Tag EBML element""" def __init__(self, targets=None, simpletags=None): - self.targets = targets if targets is not None else [] + self.targets = targets self.simpletags = simpletags if simpletags is not None else [] @classmethod @@ -273,22 +326,122 @@ def fromelement(cls, element): :type element: :class:`~enzyme.parsers.ebml.Element` """ - targets = element['Targets'] if 'Targets' in element else [] + targets = Targets.fromelement(element['Targets']) if 'Targets' in element else Targets() simpletags = [SimpleTag.fromelement(s) for s in element if s.name == 'SimpleTag'] return cls(targets, simpletags) + @classmethod + def fromXML(cls, xmlElement): + """Load the :class:`Tag` from an :class:`xml.etree.ElementTree.Element` + + :param xmlElement: the Tag XML element + :type xmlElement: :class:`xml.etree.ElementTree.Element` + + """ + targets = Targets() if xmlElement.find('Targets') is None else Targets.fromXML(xmlElement.find('Targets')) + simpletags = [SimpleTag.fromXML(s) for s in xmlElement.iterfind('Simple')] + return cls(targets, simpletags) + + def __getitem__(self, tagName): + """Return a list of all :class:`SimpleTag` in self which have the requested name + Return a list even if there is only one or no corresponding SimpleTags. + + :param tagName: Name of the SimpleTags to return + :type tagName: string + """ + return [st for st in self.simpletags if st.name == tagName] + + def to_xml(self): + """Return a :class:`xml.etree.ElementTree.Element` from the :class:`Tag` + """ + root = xml.Element('Tag') + root.append(self.targets.to_xml()) + root.extend([simtag.to_xml() for simtag in self.simpletags]) + return root + + def to_dict(self): + """Return a serilizable dict from the :class:`Tag` + """ + return {'targets': self.targets.__dict__, 'simpletags': [st.to_dict() for st in self.simpletags]} + def __repr__(self): return '<%s [targets=%r, simpletags=%r]>' % (self.__class__.__name__, self.targets, self.simpletags) -class SimpleTag(object): +class Targets(object): + """Object for the Targets EBML element""" + def __init__(self, targettypevalue=50, targettype=None, trackUIDs=None, chapterUIDs=None, attachmentUIDs=None, editionUIDs=None): + self.targettypevalue = int(targettypevalue) + self.targettype = targettype + self.trackUIDs = trackUIDs if trackUIDs is not None else [] + self.chapterUIDs = chapterUIDs if chapterUIDs is not None else [] + self.attachmentUIDs = attachmentUIDs if attachmentUIDs is not None else [] + self.editionUIDs = editionUIDs if editionUIDs is not None else [] + + @classmethod + def fromelement(cls, element): + """Load the :class:`Targets` from an :class:`~enzyme.parsers.ebml.Element` + + :param element: the Targets element + :type element: :class:`~enzyme.parsers.ebml.Element` + + """ + targettype = element.get('TargetType') + targettypevalue = element.get('TargetTypeValue', 50) + trackUIDs = element.get_all('TagTrackUID') + chapterUIDs = element.get_all('TagChapterUID') + attachmentUIDs = element.get_all('TagAttachmentUID') + editionUIDs = element.get_all('TagEditionUID') + return cls(targettypevalue, targettype, trackUIDs, chapterUIDs, attachmentUIDs, editionUIDs) + + @classmethod + def fromXML(cls, xmlElement): + """Load the :class:`Targets` from an :class:`xml.etree.ElementTree.Element` + + :param xmlElement: the Targets XML element + :type xmlElement: :class:`xml.etree.ElementTree.Element` + + """ + targettype = None if xmlElement.find('TargetType') is None else xmlElement.find('TargetType').text + targettypevalue = 50 if xmlElement.find('TargetTypeValue') is None else xmlElement.find('TargetTypeValue').text + trackUIDs = None if xmlElement.find('TrackUID') is None else xmlElement.find('TrackUID').text + chapterUIDs = None if xmlElement.find('ChapterUID') is None else xmlElement.find('ChapterUID').text + attachmentUIDs = None if xmlElement.find('AttachmentUID') is None else xmlElement.find('AttachmentUID').text + editionUIDs = None if xmlElement.find('EditionUID') is None else xmlElement.find('EditionUID').text + return cls(targettypevalue, targettype, trackUIDs, chapterUIDs, attachmentUIDs, editionUIDs) + + def to_xml(self): + """Return a :class:`xml.etree.ElementTree.Element` from the :class:`Targets` + """ + root = xml.Element('Targets') + if self.targettypevalue is not None: + xml.SubElement(root, 'TargetTypeValue').text = str(self.targettypevalue) + if self.targettype is not None: + xml.SubElement(root, 'TargetType').text = self.targettype + for uids in self.trackUIDs: + xml.SubElement(root, 'TrackUID').text = str(uids) + for uids in self.chapterUIDs: + xml.SubElement(root, 'ChapterUID').text = str(uids) + for uids in self.attachmentUIDs: + xml.SubElement(root, 'AttachmentUID').text = str(uids) + for uids in self.editionUIDs: + xml.SubElement(root, 'EditionUID').text = str(uids) + return root + + def __repr__(self): + return '<%s [%s, targettype=%s, %d target UIDs]>' % (self.__class__.__name__, str(self.targettypevalue), self.targettype, + sum([len(t) for t in [self.chapterUIDs, self.trackUIDs, self.editionUIDs, self.attachmentUIDs]])) + + +class SimpleTag(Tag): """Object for the SimpleTag EBML element""" - def __init__(self, name, language='und', default=True, string=None, binary=None): + def __init__(self, name, language='und', default=True, string=None, binary=None, simpletags=None): self.name = name self.language = language self.default = default self.string = string self.binary = binary + self.simpletags = simpletags if simpletags is not None else [] @classmethod def fromelement(cls, element): @@ -303,10 +456,51 @@ def fromelement(cls, element): default = element.get('TagDefault', True) string = element.get('TagString') binary = element.get('TagBinary') - return cls(name, language, default, string, binary) + simpletags = [SimpleTag.fromelement(t) for t in element.get_master_elements()] + return cls(name, language, default, string, binary, simpletags) + + @classmethod + def fromXML(cls, xmlElement): + """Load the :class:`SimpleTag` from an :class:`xml.etree.ElementTree.Element` + + :param xmlElement: the SimpleTag xml element + :type xmlElement: :class:`xml.etree.ElementTree.Element` + + """ + name = None if xmlElement.find('Name') is None else xmlElement.find('Name').text + language = 'und' if xmlElement.find('TagLanguage') is None else xmlElement.find('TagLanguage').text + default = True if xmlElement.find('DefaultLanguage') is None else xmlElement.find('DefaultLanguage').text + string = None if xmlElement.find('String') is None else xmlElement.find('String').text + binary = None if xmlElement.find('Binary') is None else xmlElement.find('Binary').text + simpletags = [SimpleTag.fromXML(s) for s in xmlElement.findall('Simple')] + return cls(name, language, default, string, binary, simpletags) + + def to_xml(self): + """Return a :class:`xml.etree.ElementTree.Element` from the :class:`SimpleTag` + """ + root = xml.Element('Simple') + xml.SubElement(root, 'Name').text = self.name + if self.language != 'und' : xml.SubElement(root, 'TagLanguage').text = self.language + if not self.default: xml.SubElement(root, 'DefaultLanguage').text = str(int(self.default)) + if self.string is not None: + xml.SubElement(root, 'String').text = self.string + if self.binary is not None: + xml.SubElement(root, 'Binary').text = str(int(self.binary)) + root.extend([simtag.to_xml() for simtag in self.simpletags]) + return root + + def to_dict(self): + """Return a serializable dict from the :class:`SimpleTag` + """ + stag_dict = self.__dict__.copy() + stag_dict['simpletags'] = [st.to_dict() for st in self.simpletags] + return stag_dict def __repr__(self): - return '<%s [%s, language=%s, default=%s, string=%s]>' % (self.__class__.__name__, self.name, self.language, self.default, self.string) + if len(self.simpletags) == 0: + return '<%s [%s, language=%s, default=%s, string=%s]>' % (self.__class__.__name__, self.name, self.language, self.default, self.string) + else: + return '<%s [%s, language=%s, default=%s, string=%s, simpletags=%r]>' % (self.__class__.__name__, self.name, self.language, self.default, self.string, self.simpletags) class Chapter(object): @@ -318,17 +512,18 @@ class Chapter(object): are merged into the :class:`Chapter` """ - def __init__(self, start, hidden=False, enabled=False, end=None, string=None, language=None): + def __init__(self, start, hidden=False, enabled=False, end=None, uid=None, string=None, language=None): self.start = start self.hidden = hidden self.enabled = enabled self.end = end self.string = string self.language = language + self.uid = uid @classmethod def fromelement(cls, element): - """Load the :class:`Chapter` from an :class:`~enzyme.parsers.ebml.Element` + """Load the :class:`Chapter` from a :class:`~enzyme.parsers.ebml.Element` :param element: the ChapterAtom element :type element: :class:`~enzyme.parsers.ebml.Element` @@ -338,14 +533,59 @@ def fromelement(cls, element): hidden = element.get('ChapterFlagHidden', False) enabled = element.get('ChapterFlagEnabled', True) end = element.get('ChapterTimeEnd') + uid = element.get('ChapterUID') chapterdisplays = [c for c in element if c.name == 'ChapterDisplay'] if len(chapterdisplays) > 1: logger.warning('More than 1 (%d) ChapterDisplay element in the ChapterAtom, using the first one', len(chapterdisplays)) if chapterdisplays: string = chapterdisplays[0].get('ChapString') language = chapterdisplays[0].get('ChapLanguage') - return cls(start, hidden, enabled, end, string, language) - return cls(start, hidden, enabled, end) + return cls(start, hidden, enabled, end, uid, string, language) + return cls(start, hidden, enabled, end, uid) + + def to_dict(self): + chap_dict = self.__dict__.copy() + chap_dict['start'] = self.start.seconds + self.start.microseconds/1e6 + return chap_dict def __repr__(self): return '<%s [%s, enabled=%s]>' % (self.__class__.__name__, self.start, self.enabled) + + +class Attachment(object): + """Object for the Attachedfile EBML element""" + def __init__(self, description=None, name=None, mime_type=None, uid=None, data=None): + self.description = description + self.name = name + self.mime_type = mime_type + self.uid = uid + self.data = data + + @classmethod + def fromelement(cls, element, load_attachment=True): + """Load the :class:`Attachment` from a :class:`~enzyme.parsers.ebml.Element` + + :param element: the Attachedfile element element + :type element: :class:`~enzyme.parsers.ebml.Element` + :param load_attachment: whether to load the attachment data or not (Default : true) + :type load_attachment: :class:str + """ + description = element.get('FileDescription') + name = element.get('FileName') + mime_type = element.get('FileMimeType') + uid = element.get('FileUID') + if load_attachment: + data = element.get('FileData', None) + else: + data = None + return cls(description, name, mime_type, uid, data) + + def to_dict(self): + """Return a serializable dict from the :class:`Attachment` + """ + att_dict = self.__dict__.copy() + att_dict['data'] = None if self.data is None else self.data.read() + return att_dict + + def __repr__(self): + return '<%s [%s, type=%s, %d KB]>' % (self.__class__.__name__, self.description if not self.description == '' else self.name, self.mime_type, getsizeof(self.data)/1000) diff --git a/enzyme/parsers/ebml/core.py b/enzyme/parsers/ebml/core.py index ae025ac..4ca22f2 100644 --- a/enzyme/parsers/ebml/core.py +++ b/enzyme/parsers/ebml/core.py @@ -121,6 +121,24 @@ def get(self, name, default=None): raise ValueError('%s is a MasterElement' % name) return element.data + def get_all(self, name, default=None): + """Convenience method for ``[element.data for element in master_element if element.name == name]`` + + :param string name: the name of the children to get + :param default: default value if `name` is not in the :class:`MasterElement` + :return: list of the data of the children :class:`Element` or `default` + + """ + if name not in self: + return default if default is not None else [] + elements = [e for e in self.data if e.name == name] + if any([e.type == MASTER for e in elements]): + raise ValueError('%s elements contain MasterElement' % name) + return [e.data for e in elements] + + def get_master_elements(self): + return [e for e in self if e.type == MASTER] + def __getitem__(self, key): if isinstance(key, int): return self.data[key] diff --git a/enzyme/tests/test_mkv.py b/enzyme/tests/test_mkv.py index 2403661..3fc641c 100644 --- a/enzyme/tests/test_mkv.py +++ b/enzyme/tests/test_mkv.py @@ -1,11 +1,13 @@ # -*- coding: utf-8 -*- from datetime import timedelta, datetime -from enzyme.mkv import MKV, VIDEO_TRACK, AUDIO_TRACK, SUBTITLE_TRACK import io import os.path +from enzyme.mkv import MKV, VIDEO_TRACK, AUDIO_TRACK, SUBTITLE_TRACK, SimpleTag import requests import unittest import zipfile +import xml.etree.ElementTree as xml +from sys import getsizeof # Test directory @@ -14,9 +16,14 @@ def setUpModule(): if not os.path.exists(TEST_DIR): + # matroska test suite Wave 1 r = requests.get('http://downloads.sourceforge.net/project/matroska/test_files/matroska_test_w1_1.zip') with zipfile.ZipFile(io.BytesIO(r.content), 'r') as f: f.extractall(TEST_DIR) + # matroska cover_art test file + r = requests.get('http://downloads.sourceforge.net/project/matroska/test_files/cover_art.mkv') + with io.open(os.path.join(TEST_DIR, 'cover_art.mkv'), 'wb') as f: + f.write(r.content) class MKVTestCase(unittest.TestCase): @@ -24,6 +31,7 @@ def test_test1(self): with io.open(os.path.join(TEST_DIR, 'test1.mkv'), 'rb') as stream: mkv = MKV(stream) # info + self.assertTrue(mkv.info.uid == '922D19320F1E13C5B505630AAFD85336') self.assertTrue(mkv.info.title is None) self.assertTrue(mkv.info.duration == timedelta(minutes=1, seconds=27, milliseconds=336)) self.assertTrue(mkv.info.date_utc == datetime(2010, 8, 21, 7, 23, 3)) @@ -70,29 +78,41 @@ def test_test1(self): self.assertTrue(len(mkv.subtitle_tracks) == 0) # chapters self.assertTrue(len(mkv.chapters) == 0) - # tags - self.assertTrue(len(mkv.tags) == 1) - self.assertTrue(len(mkv.tags[0].simpletags) == 3) - self.assertTrue(mkv.tags[0].simpletags[0].name == 'TITLE') - self.assertTrue(mkv.tags[0].simpletags[0].default == True) - self.assertTrue(mkv.tags[0].simpletags[0].language == 'und') - self.assertTrue(mkv.tags[0].simpletags[0].string == 'Big Buck Bunny - test 1') - self.assertTrue(mkv.tags[0].simpletags[0].binary is None) - self.assertTrue(mkv.tags[0].simpletags[1].name == 'DATE_RELEASED') - self.assertTrue(mkv.tags[0].simpletags[1].default == True) - self.assertTrue(mkv.tags[0].simpletags[1].language == 'und') - self.assertTrue(mkv.tags[0].simpletags[1].string == '2010') - self.assertTrue(mkv.tags[0].simpletags[1].binary is None) - self.assertTrue(mkv.tags[0].simpletags[2].name == 'COMMENT') - self.assertTrue(mkv.tags[0].simpletags[2].default == True) - self.assertTrue(mkv.tags[0].simpletags[2].language == 'und') - self.assertTrue(mkv.tags[0].simpletags[2].string == 'Matroska Validation File1, basic MPEG4.2 and MP3 with only SimpleBlock') - self.assertTrue(mkv.tags[0].simpletags[2].binary is None) + # tags to xml + with io.open(os.path.join(TEST_DIR, 'test1-tag.xml'), 'r') as xmlfile: + xmlElement = xml.fromstring(''.join([line.strip() for line in xmlfile.readlines()])) + expectedXML = xml.tostring(xmlElement) + actualXML = xml.tostring(mkv.tags_to_xml()) + self.assertEqual(expectedXML, actualXML) + # tags normal and fromXML + for i in range(2): + self.assertTrue(len(mkv.tags) == 1) + self.assertTrue(len(mkv.tags[0].simpletags) == 3) + self.assertTrue(mkv.tags[0].simpletags[0].name == 'TITLE') + self.assertTrue(mkv.tags[0].simpletags[0].default == True) + self.assertTrue(mkv.tags[0].simpletags[0].language == 'und') + self.assertTrue(mkv.tags[0].simpletags[0].string == 'Big Buck Bunny - test 1') + self.assertTrue(mkv.tags[0].simpletags[0].binary is None) + self.assertTrue(mkv.tags[0].simpletags[1].name == 'DATE_RELEASED') + self.assertTrue(mkv.tags[0].simpletags[1].default == True) + self.assertTrue(mkv.tags[0].simpletags[1].language == 'und') + self.assertTrue(mkv.tags[0].simpletags[1].string == '2010') + self.assertTrue(mkv.tags[0].simpletags[1].binary is None) + self.assertTrue(mkv.tags[0].simpletags[2].name == 'COMMENT') + self.assertTrue(mkv.tags[0].simpletags[2].default == True) + self.assertTrue(mkv.tags[0].simpletags[2].language == 'und') + self.assertTrue(mkv.tags[0].simpletags[2].string == 'Matroska Validation File1, basic MPEG4.2 and MP3 with only SimpleBlock') + self.assertTrue(mkv.tags[0].simpletags[2].binary is None) + self.assertTrue(mkv.getTag(50)[0]['TITLE'][0] == mkv.tags[0].simpletags[0]) + if i == 0: + mkv.tags = [] # Empty the tags + mkv.tags_from_xml(xmlElement) # Fill the tags with the xmlElement from file, loop def test_test2(self): with io.open(os.path.join(TEST_DIR, 'test2.mkv'), 'rb') as stream: mkv = MKV(stream) # info + self.assertTrue(mkv.info.uid == '92B2CE318A9650039C482D67AA55CB7B') self.assertTrue(mkv.info.title is None) self.assertTrue(mkv.info.duration == timedelta(seconds=47, milliseconds=509)) self.assertTrue(mkv.info.date_utc == datetime(2011, 6, 2, 12, 45, 20)) @@ -157,11 +177,18 @@ def test_test2(self): self.assertTrue(mkv.tags[0].simpletags[2].language == 'und') self.assertTrue(mkv.tags[0].simpletags[2].string == 'Matroska Validation File 2, 100,000 timecode scale, odd aspect ratio, and CRC-32. Codecs are AVC and AAC') self.assertTrue(mkv.tags[0].simpletags[2].binary is None) + # tags to xml + with io.open(os.path.join(TEST_DIR, 'test2-tag.xml'), 'r') as xmlfile: + xmlString = ''.join([line.strip() for line in xmlfile.readlines()]) + expectedXML = xml.tostring(xml.fromstring(xmlString)) + actualXML = xml.tostring(mkv.tags_to_xml()) + self.assertEqual(expectedXML, actualXML) def test_test3(self): with io.open(os.path.join(TEST_DIR, 'test3.mkv'), 'rb') as stream: mkv = MKV(stream) # info + self.assertTrue(mkv.info.uid == '99499582DFAF06D4A676D2E64C02A507') self.assertTrue(mkv.info.title is None) self.assertTrue(mkv.info.duration == timedelta(seconds=49, milliseconds=64)) self.assertTrue(mkv.info.date_utc == datetime(2010, 8, 21, 21, 43, 25)) @@ -226,11 +253,18 @@ def test_test3(self): self.assertTrue(mkv.tags[0].simpletags[2].language == 'und') self.assertTrue(mkv.tags[0].simpletags[2].string == 'Matroska Validation File 3, header stripping on the video track and no SimpleBlock') self.assertTrue(mkv.tags[0].simpletags[2].binary is None) + # tags to xml + with io.open(os.path.join(TEST_DIR, 'test3-tag.xml'), 'r') as xmlfile: + xmlString = ''.join([line.strip() for line in xmlfile.readlines()]) + expectedXML = xml.tostring(xml.fromstring(xmlString)) + actualXML = xml.tostring(mkv.tags_to_xml()) + self.assertEqual(expectedXML, actualXML) def test_test5(self): with io.open(os.path.join(TEST_DIR, 'test5.mkv'), 'rb') as stream: mkv = MKV(stream) # info + self.assertTrue(mkv.info.uid == '9D516A0F927A12D286E1502D23D0FDB0') self.assertTrue(mkv.info.title is None) self.assertTrue(mkv.info.duration == timedelta(seconds=46, milliseconds=665)) self.assertTrue(mkv.info.date_utc == datetime(2010, 8, 21, 18, 6, 43)) @@ -373,6 +407,7 @@ def test_test5(self): self.assertTrue(len(mkv.chapters) == 0) # tags self.assertTrue(len(mkv.tags) == 1) + self.assertTrue(mkv.tags[0].targets.targettypevalue == 50) self.assertTrue(len(mkv.tags[0].simpletags) == 3) self.assertTrue(mkv.tags[0].simpletags[0].name == 'TITLE') self.assertTrue(mkv.tags[0].simpletags[0].default == True) @@ -389,11 +424,18 @@ def test_test5(self): self.assertTrue(mkv.tags[0].simpletags[2].language == 'und') self.assertTrue(mkv.tags[0].simpletags[2].string == 'Matroska Validation File 8, secondary audio commentary track, misc subtitle tracks') self.assertTrue(mkv.tags[0].simpletags[2].binary is None) + # tags to xml + with io.open(os.path.join(TEST_DIR, 'test5-tag.xml'), 'r') as xmlfile: + xmlString = ''.join([line.strip() for line in xmlfile.readlines()]) + expectedXML = xml.tostring(xml.fromstring(xmlString)) + actualXML = xml.tostring(mkv.tags_to_xml()) + self.assertEqual(expectedXML, actualXML) def test_test6(self): with io.open(os.path.join(TEST_DIR, 'test6.mkv'), 'rb') as stream: mkv = MKV(stream) # info + self.assertTrue(mkv.info.uid == '84FA5B60972A165B852766E7E5B0A283') self.assertTrue(mkv.info.title is None) self.assertTrue(mkv.info.duration == timedelta(seconds=87, milliseconds=336)) self.assertTrue(mkv.info.date_utc == datetime(2010, 8, 21, 16, 31, 55)) @@ -458,11 +500,18 @@ def test_test6(self): self.assertTrue(mkv.tags[0].simpletags[2].language == 'und') self.assertTrue(mkv.tags[0].simpletags[2].string == 'Matroska Validation File 6, random length to code the size of Clusters and Blocks, no Cues for seeking') self.assertTrue(mkv.tags[0].simpletags[2].binary is None) + # tags to xml + with io.open(os.path.join(TEST_DIR, 'test6-tag.xml'), 'r') as xmlfile: + xmlString = ''.join([line.strip() for line in xmlfile.readlines()]) + expectedXML = xml.tostring(xml.fromstring(xmlString)) + actualXML = xml.tostring(mkv.tags_to_xml()) + self.assertEqual(expectedXML, actualXML) def test_test7(self): with io.open(os.path.join(TEST_DIR, 'test7.mkv'), 'rb') as stream: mkv = MKV(stream) # info + self.assertTrue(mkv.info.uid == 'B9821FA651B1E247B679260DD2E7E371') self.assertTrue(mkv.info.title is None) self.assertTrue(mkv.info.duration == timedelta(seconds=37, milliseconds=43)) self.assertTrue(mkv.info.date_utc == datetime(2010, 8, 21, 17, 0, 23)) @@ -527,11 +576,18 @@ def test_test7(self): self.assertTrue(mkv.tags[0].simpletags[2].language == 'und') self.assertTrue(mkv.tags[0].simpletags[2].string == 'Matroska Validation File 7, junk elements are present at the beggining or end of clusters, the parser should skip it. There is also a damaged element at 451418') self.assertTrue(mkv.tags[0].simpletags[2].binary is None) + # tags to xml + with io.open(os.path.join(TEST_DIR, 'test7-tag.xml'), 'r') as xmlfile: + xmlString = ''.join([line.strip() for line in xmlfile.readlines()]) + expectedXML = xml.tostring(xml.fromstring(xmlString)) + actualXML = xml.tostring(mkv.tags_to_xml()) + self.assertEqual(expectedXML, actualXML) def test_test8(self): with io.open(os.path.join(TEST_DIR, 'test8.mkv'), 'rb') as stream: mkv = MKV(stream) # info + self.assertTrue(mkv.info.uid == '8A1E00BB51661380AF10D1FE09970B5D') self.assertTrue(mkv.info.title is None) self.assertTrue(mkv.info.duration == timedelta(seconds=47, milliseconds=341)) self.assertTrue(mkv.info.date_utc == datetime(2010, 8, 21, 17, 22, 14)) @@ -596,6 +652,197 @@ def test_test8(self): self.assertTrue(mkv.tags[0].simpletags[2].language == 'und') self.assertTrue(mkv.tags[0].simpletags[2].string == 'Matroska Validation File 8, audio missing between timecodes 6.019s and 6.360s') self.assertTrue(mkv.tags[0].simpletags[2].binary is None) + # tags to xml + with io.open(os.path.join(TEST_DIR, 'test8-tag.xml'), 'r') as xmlfile: + xmlString = ''.join([line.strip() for line in xmlfile.readlines()]) + expectedXML = xml.tostring(xml.fromstring(xmlString)) + actualXML = xml.tostring(mkv.tags_to_xml()) + self.assertEqual(expectedXML, actualXML) + + def test_cover_art(self): + with io.open(os.path.join(TEST_DIR, 'cover_art.mkv'), 'rb') as stream: + mkv = MKV(stream) + # info + self.assertTrue(mkv.info.uid == 'A5FA3864646301558CD35CE3F7B1DADC') + self.assertTrue(mkv.info.title is None) + self.assertTrue(mkv.info.duration == timedelta(seconds=156)) + self.assertTrue(mkv.info.date_utc == datetime(2010, 9, 23, 19, 1, 57)) + self.assertTrue(mkv.info.muxing_app == 'libebml2 v0.12.1 + libmatroska2 v0.11.1') + self.assertTrue(mkv.info.writing_app == "mkclean 0.6.0 r from libebml v1.0.0 + libmatroska v1.0.0 + mkvmerge v4.1.1 ('Bouncin' Back') built on Jul 3 2010 22:54:08") + # video track + self.assertTrue(len(mkv.video_tracks) == 1) + self.assertTrue(mkv.video_tracks[0].type == VIDEO_TRACK) + self.assertTrue(mkv.video_tracks[0].number == 2) + self.assertTrue(mkv.video_tracks[0].name is None) + self.assertTrue(mkv.video_tracks[0].language == 'und') + self.assertTrue(mkv.video_tracks[0].enabled == True) + self.assertTrue(mkv.video_tracks[0].default == True) + self.assertTrue(mkv.video_tracks[0].forced == False) + self.assertTrue(mkv.video_tracks[0].lacing == False) + self.assertTrue(mkv.video_tracks[0].codec_id == 'V_MPEG4/ISO/AVC') + self.assertTrue(mkv.video_tracks[0].codec_name is None) + self.assertTrue(mkv.video_tracks[0].width == 1272) + self.assertTrue(mkv.video_tracks[0].height == 720) + self.assertTrue(mkv.video_tracks[0].interlaced == False) + self.assertTrue(mkv.video_tracks[0].stereo_mode is None) + self.assertTrue(mkv.video_tracks[0].crop == {}) + self.assertTrue(mkv.video_tracks[0].display_width is None) + self.assertTrue(mkv.video_tracks[0].display_height is None) + self.assertTrue(mkv.video_tracks[0].display_unit is None) + self.assertTrue(mkv.video_tracks[0].aspect_ratio_type is None) + # audio track + self.assertTrue(len(mkv.audio_tracks) == 1) + self.assertTrue(mkv.audio_tracks[0].type == AUDIO_TRACK) + self.assertTrue(mkv.audio_tracks[0].number == 1) + self.assertTrue(mkv.audio_tracks[0].name is None) + self.assertTrue(mkv.audio_tracks[0].language == 'und') + self.assertTrue(mkv.audio_tracks[0].enabled == True) + self.assertTrue(mkv.audio_tracks[0].default == True) + self.assertTrue(mkv.audio_tracks[0].forced == False) + self.assertTrue(mkv.audio_tracks[0].lacing == True) + self.assertTrue(mkv.audio_tracks[0].codec_id == 'A_AAC') + self.assertTrue(mkv.audio_tracks[0].codec_name is None) + self.assertTrue(mkv.audio_tracks[0].sampling_frequency == 44100.0) + self.assertTrue(mkv.audio_tracks[0].channels == 2) + self.assertTrue(mkv.audio_tracks[0].output_sampling_frequency is None) + self.assertTrue(mkv.audio_tracks[0].bit_depth is None) + # subtitle track + self.assertTrue(len(mkv.subtitle_tracks) == 0) + # chapters + self.assertTrue(len(mkv.chapters) == 0) + # tags + self.assertTrue(len(mkv.tags) == 3) + # tag 0 + self.assertTrue(mkv.tags[0].targets.targettypevalue == 70) + self.assertTrue(mkv.tags[0].targets.targettype is None) + self.assertTrue(len(mkv.tags[0].targets.chapterUIDs) == 0) + self.assertTrue(len(mkv.tags[0].targets.trackUIDs) == 0) + self.assertTrue(len(mkv.tags[0].targets.editionUIDs) == 0) + self.assertTrue(len(mkv.tags[0].targets.attachmentUIDs) == 0) + self.assertTrue(len(mkv.tags[0].simpletags) == 2) + self.assertTrue(mkv.tags[0].simpletags[0].name == 'TITLE') + self.assertTrue(mkv.tags[0].simpletags[0].default == True) + self.assertTrue(mkv.tags[0].simpletags[0].language == 'und') + self.assertTrue(mkv.tags[0].simpletags[0].string == 'Dexter') + self.assertTrue(mkv.tags[0].simpletags[0].binary is None) + self.assertTrue(mkv.tags[0].simpletags[1].name == 'COPYRIGHT') + self.assertTrue(mkv.tags[0].simpletags[1].default == True) + self.assertTrue(mkv.tags[0].simpletags[1].language == 'und') + self.assertTrue(mkv.tags[0].simpletags[1].string == 'ShowTime') + self.assertTrue(mkv.tags[0].simpletags[1].binary is None) + self.assertTrue(len(mkv.tags[0].simpletags[1].simpletags) == 1) + self.assertTrue(mkv.tags[0].simpletags[1].simpletags[0].name == 'URL') + self.assertTrue(mkv.tags[0].simpletags[1].simpletags[0].default == True) + self.assertTrue(mkv.tags[0].simpletags[1].simpletags[0].language == 'und') + self.assertTrue(mkv.tags[0].simpletags[1].simpletags[0].string == 'http://www.sho.com/') + self.assertTrue(mkv.tags[0].simpletags[1].simpletags[0].binary is None) + # tag 1 + self.assertTrue(mkv.tags[1].targets.targettypevalue == 60) + self.assertTrue(mkv.tags[1].targets.targettype is None) + self.assertTrue(len(mkv.tags[1].targets.chapterUIDs) == 0) + self.assertTrue(len(mkv.tags[1].targets.trackUIDs) == 0) + self.assertTrue(len(mkv.tags[1].targets.editionUIDs) == 0) + self.assertTrue(len(mkv.tags[1].targets.attachmentUIDs) == 0) + self.assertTrue(len(mkv.tags[1].simpletags) == 2) + self.assertTrue(mkv.tags[1].simpletags[0].name == 'PART_NUMBER') + self.assertTrue(mkv.tags[1].simpletags[0].default == True) + self.assertTrue(mkv.tags[1].simpletags[0].language == 'und') + self.assertTrue(mkv.tags[1].simpletags[0].string == '5') + self.assertTrue(mkv.tags[1].simpletags[0].binary is None) + self.assertTrue(mkv.tags[1].simpletags[1].name == 'DATE_RELEASE') + self.assertTrue(mkv.tags[1].simpletags[1].default == True) + self.assertTrue(mkv.tags[1].simpletags[1].language == 'und') + self.assertTrue(mkv.tags[1].simpletags[1].string == '2010') + self.assertTrue(mkv.tags[1].simpletags[1].binary is None) + # tag 2 + self.assertTrue(len(mkv.tags[2].simpletags) == 3) + self.assertTrue(mkv.tags[2].targets.targettypevalue == 50) + self.assertTrue(mkv.tags[2].targets.targettype is None) + self.assertTrue(mkv.tags[2].simpletags[0].name == 'SAMPLE') + self.assertTrue(mkv.tags[2].simpletags[0].default == True) + self.assertTrue(mkv.tags[2].simpletags[0].language == 'und') + self.assertTrue(mkv.tags[2].simpletags[0].string is None) + self.assertTrue(mkv.tags[2].simpletags[0].binary is None) + self.assertTrue(len(mkv.tags[2].simpletags[0].simpletags) == 2) + self.assertTrue(mkv.tags[2].simpletags[0].simpletags[0].name == 'PART_NUMBER') + self.assertTrue(mkv.tags[2].simpletags[0].simpletags[0].default == True) + self.assertTrue(mkv.tags[2].simpletags[0].simpletags[0].language == 'und') + self.assertTrue(mkv.tags[2].simpletags[0].simpletags[0].string == '0') + self.assertTrue(mkv.tags[2].simpletags[0].simpletags[0].binary is None) + self.assertTrue(mkv.tags[2].simpletags[0].simpletags[1].name == 'TITLE') + self.assertTrue(mkv.tags[2].simpletags[0].simpletags[1].default == True) + self.assertTrue(mkv.tags[2].simpletags[0].simpletags[1].language == 'und') + self.assertTrue(mkv.tags[2].simpletags[0].simpletags[1].string == 'Trailer') + self.assertTrue(mkv.tags[2].simpletags[0].simpletags[1].binary is None) + self.assertTrue(mkv.tags[2].simpletags[1].name == 'TITLE') + self.assertTrue(mkv.tags[2].simpletags[1].default == True) + self.assertTrue(mkv.tags[2].simpletags[1].language == 'und') + self.assertTrue(mkv.tags[2].simpletags[1].string == 'Dexter Season 5 trailer') + self.assertTrue(mkv.tags[2].simpletags[1].binary is None) + self.assertTrue(mkv.tags[2].simpletags[2].name == 'ORIGINAL') + self.assertTrue(mkv.tags[2].simpletags[2].default == True) + self.assertTrue(mkv.tags[2].simpletags[2].language == 'und') + self.assertTrue(mkv.tags[2].simpletags[2].string is None) + self.assertTrue(mkv.tags[2].simpletags[2].binary is None) + self.assertTrue(len(mkv.tags[2].simpletags[2].simpletags) == 1) + self.assertTrue(mkv.tags[2].simpletags[2].simpletags[0].name == 'URL') + self.assertTrue(mkv.tags[2].simpletags[2].simpletags[0].default == True) + self.assertTrue(mkv.tags[2].simpletags[2].simpletags[0].language == 'und') + self.assertTrue(mkv.tags[2].simpletags[2].simpletags[0].string == 'http://www.youtube.com/watch?v=CUbCMbW-BRE') + self.assertTrue(mkv.tags[2].simpletags[2].simpletags[0].binary is None) + # attachments + self.assertTrue(len(mkv.attachments) == 4) + # attachment 0 + self.assertTrue(mkv.attachments[0].name == 'cover.jpg') + self.assertTrue(mkv.attachments[0].description is None) + self.assertTrue(mkv.attachments[0].mime_type == 'image/jpeg') + self.assertTrue(getsizeof(mkv.attachments[0].data)//1000 == 133) + # attachment 1 + self.assertTrue(mkv.attachments[1].name == 'small_cover.jpg') + self.assertTrue(mkv.attachments[1].description is None) + self.assertTrue(mkv.attachments[1].mime_type == 'image/jpeg') + self.assertTrue(getsizeof(mkv.attachments[1].data)//1000 == 18) + # attachment 2 + self.assertTrue(mkv.attachments[2].name == 'cover_land.jpg') + self.assertTrue(mkv.attachments[2].description is None) + self.assertTrue(mkv.attachments[2].mime_type == 'image/jpeg') + self.assertTrue(getsizeof(mkv.attachments[2].data)//1000 == 106) + # attachment 3 + self.assertTrue(mkv.attachments[3].name == 'small_cover_land.jpg') + self.assertTrue(mkv.attachments[3].description is None) + self.assertTrue(mkv.attachments[3].mime_type == 'image/jpeg') + self.assertTrue(getsizeof(mkv.attachments[3].data)//1000 == 16) + + # SimpleTag bracket-access + # Here, only testing some cases, not all values + self.assertTrue(mkv.getTag(50)[0]['SAMPLE'][0]['TITLE'][0].string == 'Trailer') + self.assertTrue(mkv.getTag(70)[0]['COPYRIGHT'][0]['URL'][0].default) + + def test_cover_art_no_attachments(self): + with io.open(os.path.join(TEST_DIR, 'cover_art.mkv'), 'rb') as stream: + mkv = MKV(stream, load_attachments=False) + # attachments + self.assertTrue(len(mkv.attachments) == 4) + # attachment 0 + self.assertTrue(mkv.attachments[0].name == 'cover.jpg') + self.assertTrue(mkv.attachments[0].description is None) + self.assertTrue(mkv.attachments[0].mime_type == 'image/jpeg') + self.assertTrue(mkv.attachments[0].data is None) + # attachment 1 + self.assertTrue(mkv.attachments[1].name == 'small_cover.jpg') + self.assertTrue(mkv.attachments[1].description is None) + self.assertTrue(mkv.attachments[1].mime_type == 'image/jpeg') + self.assertTrue(mkv.attachments[1].data is None) + # attachment 2 + self.assertTrue(mkv.attachments[2].name == 'cover_land.jpg') + self.assertTrue(mkv.attachments[2].description is None) + self.assertTrue(mkv.attachments[2].mime_type == 'image/jpeg') + self.assertTrue(mkv.attachments[2].data is None) + # attachment 3 + self.assertTrue(mkv.attachments[3].name == 'small_cover_land.jpg') + self.assertTrue(mkv.attachments[3].description is None) + self.assertTrue(mkv.attachments[3].mime_type == 'image/jpeg') + self.assertTrue(mkv.attachments[3].data is None) def suite(): @@ -604,4 +851,5 @@ def suite(): return suite if __name__ == '__main__': + print('running tests') unittest.TextTestRunner().run(suite())