Skip to content

Commit

Permalink
better error handling with invalid characters
Browse files Browse the repository at this point in the history
- fixes issue #34
  • Loading branch information
kienerj committed Sep 16, 2023
1 parent 3441156 commit d64e2cb
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions pycdxml/cdxml_converter/chemdraw_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,14 @@ def from_bytes(property_bytes: bytes, charset='iso-8859-1', fonttable=None) -> '
stream.seek(stream.tell() - text_length)
value = stream.read(text_length).decode('utf8')
except UnicodeDecodeError:
logger.warning("Found unsupported character. Retrying with 'utf8'.")
stream.seek(stream.tell() - text_length)
value = stream.read(text_length).decode('utf8')
if charset == 'utf8':
logger.warning("Found unsupported character for utf8. Retrying with errors=='replace'.")
else:
logger.warning(f"Found unsupported character for charset {charset}. "
f"Retrying with 'utf8' and errors=='replace'.")
value = stream.read(text_length).decode('utf8', errors="replace")

# Normalize to xml spec where all line breaks in attributes are represented by \n
value = value.replace("\r", "\n")
logger.debug(f"Read String '{value}' with {len(font_styles)} different styles.")
Expand Down

0 comments on commit d64e2cb

Please sign in to comment.