Skip to content

Commit

Permalink
First pass of userdata implementation; added userdata to schema (may …
Browse files Browse the repository at this point in the history
…change)
  • Loading branch information
StokesMIDE committed Feb 23, 2024
1 parent 6420d42 commit 1a7e410
Show file tree
Hide file tree
Showing 2 changed files with 162 additions and 0 deletions.
21 changes: 21 additions & 0 deletions idelib/schemata/mide_ide.xml
Original file line number Diff line number Diff line change
Expand Up @@ -404,4 +404,25 @@
<BinaryElement name="ChannelDataMinMeanMax" id="0xBC" multiple="0" minver="1" precache="1">Statistical data for this block's payload consisting of 3 datapoints (min, mean, max) per subchannel. They are organized as [[sc0min] [sc1min] [sc2min] ...] [[sc0mean] [sc1mean] [sc2mean] ...] [[sc0max] [sc1max] [sc2max] ...]. The format and representation of the stat data exactly matches that of the input samples; that is, if the input samples are uint16_t, each stat entry is also a uint16_t.</BinaryElement>
<UIntegerElement name="MediaWriteLatency" id="0xBE" multiple="0" minver="2" precache="0">Super-optional diagnostic element indicating the latency between data acquisition and transfer to the output media. The exact meaning of this value is device-dependent, but may serve as a general indicator of excess activity load, retransmission or congestion (for transmission media) or media wear (for recording media).</UIntegerElement>
</MasterElement>

<!-- User-supplied metadata -->
<MasterElement name="UserData" id="0x10200000" multiple="0" mandatory="0">
<IntegerElement name="TimebaseOffset" id="0x10200010" multiple="0" mandatory="0" />
<BinaryElement name="WindowLayout" id="0x10200011" multiple="0" mandatory="0" />
<MasterElement name="AnnotationList" id="0x10200020" multiple="0" mandatory="0">
<MasterElement name="Annotation" id="0x10200021" multiple="1" mandatory="0">
<UnicodeElement name="AnnotationText" id="0x10200022" multiple="0" mandatory="1" />
<IntegerElement name="AnnotationStartTime" id="0x10200023" multiple="0" mandatory="1" />
<IntegerElement name="AnnotationEndTime" id="0x10200024" multiple="0" mandatory="0" />
<BinaryElement name="AnnotationStyle" id="0x10200025" multiple="0" mandatory="0" />
</MasterElement>
</MasterElement>

<!-- (reused existing elements) -->
<MasterElement name="CalibrationList" id="0x4B00" />
<MasterElement name="WarningList" id="0x5360" />
<MasterElement name="ChannelList" id="0x5270" />
</MasterElement>

<UIntegerElement name="UserDataOffset" id="0x10200001" multiple="0" mandatory="0">Position of the beginning of the user-defined metadata. This should be the last element in the file.</UIntegerElement>
</Schema>
141 changes: 141 additions & 0 deletions idelib/userdata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
"""
Functions for reading and writing application-specific data from/to the end
of IDE files. This data is intended primarily to retain user preferences for
the display of the `Dataset`.
"""

import os.path
import logging
from typing import Any, BinaryIO, Dict, Optional, Tuple, Union

from .dataset import Dataset

#===============================================================================
#
#===============================================================================

MIN_VOID_SIZE = 9

logger = logging.getLogger('idelib')

#===============================================================================
#
#===============================================================================

def getUserDataPos(dataset: Dataset) -> Tuple[bool, int, int]:
""" Get the offset of the start of the user data.
:param dataset: The `Dataset` in which to locate the user data.
:return: A tuple containing a bool (wheter or not data exists),
the offset of the user data, and the total length of the file.
"""
doc = dataset.ebmldoc
fs = doc.stream
hasdata = False

oldpos = fs.tell()
filesize = fs.seek(0, os.SEEK_END)
offset = filesize

example = doc.schema['UserDataOffset'].encode(1, length=8, lengthSize=8)
header = example[:-8]

try:
pos = offset - int(len(example) * 1.5)
fs.seek(pos, os.SEEK_SET)
chunk = fs.read()
if header in chunk:
fs.seek(pos + chunk.index(header), os.SEEK_SET)
el, _next = doc.parseElement(fs)
offset = el.value
hasdata = True

except IndexError:
# Problem with parsed chunk; shouldn't happen.
pass

finally:
fs.seek(oldpos, os.SEEK_SET)

return hasdata, offset, filesize


#===============================================================================
#
#===============================================================================

def readUserData(dataset: Dataset) -> Union[Dict[str, Any], None]:
""" Read application-specific user data from the end of an IDE file.
:param dataset: The `Dataset` from which to read the user data.
:return: A dictionary of user data, or `None` if no user data
could be read from the file (e.g., none exists).
"""
doc = dataset.ebmldoc
fs = doc.stream
oldpos = fs.tell()

hasdata, offset, filesize = getUserDataPos(dataset)

if not hasdata:
logger.debug('No user data found')
return None

try:
fs.seek(offset, os.SEEK_SET)
data, _next = doc.parseElement(fs)
return data.dump()

finally:
fs.seek(oldpos, os.SEEK_SET)


#===============================================================================
#
#===============================================================================

def writeUserData(dataset: Dataset,
userdata: Dict[str, Any]):
""" Write user data to the end of an IDE file.
:param dataset: The `Dataset` from which to read the user data.
:param userdata: A dictionary of user data, or `None` to remove
existing user data. Note that the file will not get smaller if
the user data is removed (or the new user data is smaller);
it is just overwritten with null data (an EBML `Void` element).
"""
schema = dataset.ebmldoc.schema
fs = dataset.ebmldoc.stream
oldpos = fs.tell()

try:
_hasdata, offset, filesize = getUserDataPos(dataset)

dataBin = schema.encodes({'UserData': userdata})
offsetBin = schema['UserDataOffset'].encode(offset, length=8, lengthSize=8)
newsize = (len(offsetBin) + len(dataBin) + offset + MIN_VOID_SIZE)
voidBin = schema['Void'].encode(None, length=max(0, filesize - newsize),
lengthSize=8)

userblob = dataBin + voidBin + offsetBin

if '+' not in fs.mode and 'w' not in fs.mode:
if not getattr(fs, 'name', None):
logger.debug(f'(userdata) Dataset stream read only (mode {fs.mode!r}) '
'and has no name, not writing user data')
return

with open(fs.name, 'br+') as newfs:
logger.debug(f'(userdata) Dataset stream read only (mode {fs.mode!r}), '
'using new stream')
newfs.seek(offset, os.SEEK_SET)
newfs.write(userblob)
else:
fs.seek(offset, os.SEEK_SET)
fs.write(userblob)

logger.debug(f'(userdata) Wrote {len(userblob)} bytes to {dataset} '
f'(file was {filesize}, now {newsize})')

finally:
fs.seek(oldpos, os.SEEK_SET)

0 comments on commit 1a7e410

Please sign in to comment.