From d35559b731332d183d87005723840cacdb7ea208 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Wed, 25 Apr 2018 01:07:42 +0200 Subject: [PATCH] build page by api instead template --- ocrd/constants.py | 12 ------------ ocrd/model/ocrd_page.py | 28 +++++++++++++++++++--------- ocrd/model/ocrd_xml_base.py | 4 ++-- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/ocrd/constants.py b/ocrd/constants.py index a12adec1d..6eed0577a 100644 --- a/ocrd/constants.py +++ b/ocrd/constants.py @@ -13,18 +13,6 @@ 'xsl': 'http://www.w3.org/1999/XSL/Transform#', } -PAGE_XML_EMPTY = ''' - - - ocrd/core - 2018-01-01T00:00:00Z - 2018-01-01T00:00:00Z - - - - -''' - MIMETYPE_PAGE = 'text/page+xml' DEFAULT_UPLOAD_FOLDER = '/tmp/uploads-pyocrd' diff --git a/ocrd/model/ocrd_page.py b/ocrd/model/ocrd_page.py index 1bfafa899..7a4068a9d 100644 --- a/ocrd/model/ocrd_page.py +++ b/ocrd/model/ocrd_page.py @@ -1,4 +1,5 @@ from io import StringIO +from datetime import datetime # pylint: disable=unused-import from ocrd.model.ocrd_page_generateds import ( @@ -6,13 +7,16 @@ parseString, CoordsType, OrderedGroupType, + PcGtsType, + PageType, + MetadataType, ReadingOrderType, RegionRefIndexedType, TextEquivType, TextRegionType, TextLineType, ) -from ocrd.constants import PAGE_XML_EMPTY, NAMESPACES +from ocrd.constants import NAMESPACES, VERSION from ocrd.model.ocrd_exif import OcrdExif def to_xml(el): @@ -24,12 +28,19 @@ def page_from_image(input_file): if input_file.local_filename is None: raise Exception("input_file must have 'local_filename' property") exif = OcrdExif.from_filename(input_file.local_filename) - content = PAGE_XML_EMPTY.replace('', '' % ( - exif.width, - exif.height, - input_file.url - )) - return content + now = datetime.now() + return PcGtsType( + Metadata=MetadataType( + Creator="OCR-D/core %s" % VERSION, + Created=now, + LastChange=now + ), + Page=PageType( + imageWidth=exif.width, + imageHeight=exif.height, + imageFilename=input_file.local_filename + ) + ) def from_file(input_file): """ @@ -37,8 +48,7 @@ def from_file(input_file): """ # print("PARSING PARSING '%s'" % input_file) if input_file.mimetype.startswith('image'): - content = page_from_image(input_file) - return parseString(content.encode('utf-8'), silence=True) + return page_from_image(input_file) elif input_file.mimetype == 'text/page+xml': return parse(input_file.local_filename, silence=True) else: diff --git a/ocrd/model/ocrd_xml_base.py b/ocrd/model/ocrd_xml_base.py index 4ae3a6a76..6624d5717 100644 --- a/ocrd/model/ocrd_xml_base.py +++ b/ocrd/model/ocrd_xml_base.py @@ -1,3 +1,5 @@ +from lxml import etree as ET + from ocrd.constants import ( NAMESPACES, TAG_PAGE_COORDS @@ -8,8 +10,6 @@ coordinate_string_from_xywh ) -from lxml import etree as ET - for curie in NAMESPACES: ET.register_namespace(curie, NAMESPACES[curie])