Skip to content

Commit

Permalink
ocrd cli: use make_file_id and assert_file_grp_cardinality
Browse files Browse the repository at this point in the history
  • Loading branch information
kba committed Aug 7, 2020
1 parent be69c97 commit ccb0017
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 20 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ TAGS
.pytest_cache/
env/
env3/
test/assets
32 changes: 14 additions & 18 deletions ocrd_keraslm/wrapper/rate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,16 @@
from ocrd import Processor
from ocrd_validators.page_validator import PageValidator, ConsistencyError
from ocrd_utils import (
getLogger, concat_padded,
xywh_from_points, points_from_xywh,
getLogger,
make_file_id,
assert_file_grp_cardinality,
MIMETYPE_PAGE
)
from ocrd_modelfactory import page_from_file
from ocrd_models.ocrd_page import (
to_xml, GlyphType,
to_xml,
MetadataItemType, LabelsType, LabelType,
CoordsType, TextEquivType
TextEquivType
)

import networkx as nx
Expand Down Expand Up @@ -57,14 +58,16 @@ def process(self):
... explain incremental page-wise processing here ...
"""
assert_file_grp_cardinality(self.input_file_grp, 1)
assert_file_grp_cardinality(self.output_file_grp, 1)

level = self.parameter['textequiv_level']
beam_width = self.parameter['beam_width']
lm_weight = self.parameter['lm_weight']

prev_traceback = None
prev_pcgts = None
prev_file_id = None
prev_page_id = None
prev_file = None
for (n, input_file) in enumerate(self.input_files):
page_id = input_file.pageId or input_file.ID
LOG.info("INPUT FILE %i / %s", n, page_id)
Expand Down Expand Up @@ -127,9 +130,7 @@ def process(self):
page_update_higher_textequiv_levels(level, pcgts)

# write back result
file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp)
if file_id == input_file.ID:
file_id = concat_padded(self.output_file_grp, n)
file_id = make_file_id(input_file, self.output_file_grp)
self.workspace.add_file(
ID=file_id,
pageId=input_file.pageId,
Expand All @@ -155,20 +156,17 @@ def process(self):
page_update_higher_textequiv_levels(level, prev_pcgts)

# write back result
file_id = prev_file_id.replace(self.input_file_grp, self.output_file_grp)
if file_id == prev_file_id:
file_id = concat_padded(self.output_file_grp, n - 1)
file_id = make_file_id(prev_file, self.output_file_grp)
self.workspace.add_file(
ID=file_id,
pageId=prev_page_id,
pageId=prev_file.pageId,
file_grp=self.output_file_grp,
local_filename=os.path.join(self.output_file_grp, file_id + '.xml'),
mimetype=MIMETYPE_PAGE,
content=to_xml(prev_pcgts),
)

prev_page_id = input_file.pageId
prev_file_id = input_file.ID
prev_file = input_file
prev_pcgts = pcgts
prev_traceback = traceback

Expand All @@ -180,9 +178,7 @@ def process(self):
page_update_higher_textequiv_levels(level, prev_pcgts)

# write back result
file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp)
if file_id == input_file.ID:
file_id = concat_padded(self.output_file_grp, n)
file_id = make_file_id(input_file, self.output_file_grp)
self.workspace.add_file(
ID=file_id,
pageId=input_file.pageId,
Expand Down
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
ocrd >= 2.3.1
click
ocrd >= 2.13.1
keras == 2.3.*
numpy
tensorflow-gpu == 1.15.*
Expand Down

0 comments on commit ccb0017

Please sign in to comment.