Skip to content

Commit

Permalink
build page by api instead template
Browse files Browse the repository at this point in the history
  • Loading branch information
kba committed Apr 24, 2018
1 parent 03a8c31 commit d35559b
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 23 deletions.
12 changes: 0 additions & 12 deletions ocrd/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,6 @@
'xsl': 'http://www.w3.org/1999/XSL/Transform#',
}

PAGE_XML_EMPTY = '''<?xml version="1.0" encoding="UTF-8"?>
<PcGts xmlns="http://schema.primaresearch.org/PAGE/gts/pagecontent/2017-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2017-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2017-07-15/pagecontent.xsd">
<Metadata>
<Creator>ocrd/core</Creator>
<Created>2018-01-01T00:00:00Z</Created>
<LastChange>2018-01-01T00:00:00Z</LastChange>
</Metadata>
<Page>
</Page>
</PcGts>
'''

MIMETYPE_PAGE = 'text/page+xml'

DEFAULT_UPLOAD_FOLDER = '/tmp/uploads-pyocrd'
Expand Down
28 changes: 19 additions & 9 deletions ocrd/model/ocrd_page.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
from io import StringIO
from datetime import datetime

# pylint: disable=unused-import
from ocrd.model.ocrd_page_generateds import (
parse,
parseString,
CoordsType,
OrderedGroupType,
PcGtsType,
PageType,
MetadataType,
ReadingOrderType,
RegionRefIndexedType,
TextEquivType,
TextRegionType,
TextLineType,
)
from ocrd.constants import PAGE_XML_EMPTY, NAMESPACES
from ocrd.constants import NAMESPACES, VERSION
from ocrd.model.ocrd_exif import OcrdExif

def to_xml(el):
Expand All @@ -24,21 +28,27 @@ def page_from_image(input_file):
if input_file.local_filename is None:
raise Exception("input_file must have 'local_filename' property")
exif = OcrdExif.from_filename(input_file.local_filename)
content = PAGE_XML_EMPTY.replace('<Page>', '<Page imageWidth="%d" imageHeight="%i" imageFilename="%s">' % (
exif.width,
exif.height,
input_file.url
))
return content
now = datetime.now()
return PcGtsType(
Metadata=MetadataType(
Creator="OCR-D/core %s" % VERSION,
Created=now,
LastChange=now
),
Page=PageType(
imageWidth=exif.width,
imageHeight=exif.height,
imageFilename=input_file.local_filename
)
)

def from_file(input_file):
"""
Create a new PAGE-XML from a METS file representing a PAGE-XML or an image.
"""
# print("PARSING PARSING '%s'" % input_file)
if input_file.mimetype.startswith('image'):
content = page_from_image(input_file)
return parseString(content.encode('utf-8'), silence=True)
return page_from_image(input_file)
elif input_file.mimetype == 'text/page+xml':
return parse(input_file.local_filename, silence=True)
else:
Expand Down
4 changes: 2 additions & 2 deletions ocrd/model/ocrd_xml_base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from lxml import etree as ET

from ocrd.constants import (
NAMESPACES,
TAG_PAGE_COORDS
Expand All @@ -8,8 +10,6 @@
coordinate_string_from_xywh
)

from lxml import etree as ET

for curie in NAMESPACES:
ET.register_namespace(curie, NAMESPACES[curie])

Expand Down

0 comments on commit d35559b

Please sign in to comment.