From 4b60005df383a8d8efc6ba60e48c578943a72d08 Mon Sep 17 00:00:00 2001 From: maxrosenblattl Date: Wed, 8 Mar 2017 16:36:16 +0100 Subject: [PATCH 1/2] Add HTML filter in addition to markdown filter --- secretary.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/secretary.py b/secretary.py index 723b81b..7177515 100644 --- a/secretary.py +++ b/secretary.py @@ -139,6 +139,7 @@ def __init__(self, environment=None, **kwargs): # Register filters self.environment.filters['pad'] = pad_string self.environment.filters['markdown'] = self.markdown_filter + self.environment.filters['html'] = self.html_filter self.environment.filters['image'] = self.image_filter self.media_path = kwargs.pop('media_path', '') @@ -351,8 +352,8 @@ def _prepare_document_tags(self, document): is_block = self._is_block_tag(content) scale_to = tag.getAttribute('text:description').strip().lower() - if content.lower().find('|markdown') > 0: - # Take whole paragraph when handling a markdown field + if content.lower().find('|markdown') > 0 or content.lower().find('|html') > 0: + # Take whole paragraph when handling a markdown or html field scale_to = 'text:p' if scale_to: @@ -716,16 +717,25 @@ def markdown_filter(self, markdown_text): if not isinstance(markdown_text, basestring): return '' - from xml.dom import Node - from markdown_map import transform_map - try: from markdown2 import markdown except ImportError: raise SecretaryError('Could not import markdown2 library. Install it using "pip install markdown2"') + return self.html_filter(markdown(markdown_text)) + + def html_filter(self, html_text): + """ + Convert an html text into a ODT formated text + """ + + if not isinstance(html_text, basestring): + return '' + + from xml.dom import Node + from markdown_map import transform_map + styles_cache = {} # cache styles searching - html_text = markdown(markdown_text) xml_object = parseString('%s' % html_text.encode('ascii', 'xmlcharrefreplace')) # Transform HTML tags as specified in transform_map From 5f78e7cd09b80c01a2dd160c3381acde8d95d7cd Mon Sep 17 00:00:00 2001 From: maxrosenblattl Date: Wed, 8 Mar 2017 16:38:40 +0100 Subject: [PATCH 2/2] Take style names from ODT styles.xml --- secretary.py | 44 +++++++++++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/secretary.py b/secretary.py index 7177515..7c580c5 100644 --- a/secretary.py +++ b/secretary.py @@ -29,6 +29,7 @@ import sys import logging import zipfile +import copy from os import path from mimetypes import guess_type, guess_extension from uuid import uuid4 @@ -733,7 +734,28 @@ def html_filter(self, html_text): return '' from xml.dom import Node - from markdown_map import transform_map + + try: + transform_map = self.transform_map + except AttributeError: + from markdown_map import transform_map + self.transform_map = copy.deepcopy (transform_map) + for definition in self.transform_map.values(): + style_name = definition.get('style_attributes', {}).get('style-name') + if style_name is not None: + pass + for style in self.styles.getElementsByTagName('style:style'): + if style.hasAttribute('style:family') and style.getAttribute('style:family') == 'paragraph': + if not style.hasAttribute('style:parent-style-name'): + self.transform_map['p']['style_attributes']['style-name'] = style.getAttribute('style:name') + for autostyle in self.styles.getElementsByTagName('office:automatic-styles'): + for liststyle in autostyle.getElementsByTagName('text:list-style'): + if liststyle.getElementsByTagName('text:list-level-style-bullet'): + definition = self.transform_map['ul'] + else: + definition = self.transform_map['ol'] + definition['style_attributes'] = definition.get('style_attributes', {}) + definition['style_attributes']['style-name'] = liststyle.getAttribute('style:name') styles_cache = {} # cache styles searching xml_object = parseString('%s' % html_text.encode('ascii', 'xmlcharrefreplace')) @@ -742,10 +764,10 @@ def html_filter(self, html_text): # Some tags may require extra attributes in ODT. # Additional attributes are indicated in the 'attributes' property - for tag in transform_map: + for tag in self.transform_map: html_nodes = xml_object.getElementsByTagName(tag) for html_node in html_nodes: - odt_node = xml_object.createElement(transform_map[tag]['replace_with']) + odt_node = xml_object.createElement(self.transform_map[tag]['replace_with']) # Transfer child nodes if html_node.hasChildNodes(): @@ -765,13 +787,13 @@ def html_filter(self, html_text): container.appendChild(child_node.cloneNode(True)) # Add style-attributes defined in transform_map - if 'style_attributes' in transform_map[tag]: - for k, v in transform_map[tag]['style_attributes'].items(): + if 'style_attributes' in self.transform_map[tag]: + for k, v in self.transform_map[tag]['style_attributes'].items(): odt_node.setAttribute('text:%s' % k, v) # Add defined attributes - if 'attributes' in transform_map[tag]: - for k, v in transform_map[tag]['attributes'].items(): + if 'attributes' in self.transform_map[tag]: + for k, v in self.transform_map[tag]['attributes'].items(): odt_node.setAttribute(k, v) # copy original href attribute in tag @@ -781,16 +803,16 @@ def html_filter(self, html_text): html_node.getAttribute('href')) # Does the node need to create an style? - if 'style' in transform_map[tag]: - name = transform_map[tag]['style']['name'] + if 'style' in self.transform_map[tag]: + name = self.transform_map[tag]['style']['name'] if not name in styles_cache: style_node = self.get_style_by_name(name) if style_node is None: # Create and cache the style node style_node = self.insert_style_in_content( - name, transform_map[tag]['style'].get('attributes', None), - **transform_map[tag]['style']['properties']) + name, self.transform_map[tag]['style'].get('attributes', None), + **self.transform_map[tag]['style']['properties']) styles_cache[name] = style_node html_node.parentNode.replaceChild(odt_node, html_node)