Skip to content

Commit

Permalink
Merge pull request #10 from audricschiltknecht/release-1.8.1
Browse files Browse the repository at this point in the history
  • Loading branch information
ptudor committed Nov 15, 2017
2 parents 952fdc3 + be19737 commit c16a6ff
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 32 deletions.
79 changes: 51 additions & 28 deletions lgr/tools/cross_script_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,61 +6,84 @@

import logging

from lgr.tools.merge_set import get_script
from lgr.utils import format_cp
from lgr.tools.utils import read_labels
from lgr.exceptions import LGRException

logger = logging.getLogger(__name__)


def _generate_variants(merged_lgr, lgr_set, label):
def _generate_variants(lgr, label):
"""
Generate the variants of a label.
:param merged_lgr: The merged LGR from the set.
:param lgr_set: The list of LGRs in the set.
:param lgr: The LGR to use for variant generation.
:param label: The label to process, as an array of code points.
:return: Generator of (variant, disp, scripts), with:
- variant: The generated variant.
- disp: Variant disposition. Note: not sure disp will be != from "blocked".
- scripts: List of scripts used in the variants.
- disp: Variant disposition.
- script_mapping: Mapping of CP -> scripts
(for scripts outside of the LGR).
"""
unidb = lgr.unicode_database
lgr_scripts = frozenset(lgr.metadata.get_scripts())
if not lgr_scripts:
logger.error("Cannot generate cross-scripts variants "
"for LGR without languages")
raise Exception

try:
for variant, variant_disp, _, _, _ in merged_lgr.compute_label_disposition(label):
scripts = set()
for variant, variant_disp, _, _, _ in lgr.compute_label_disposition(label):
script_mapping = {}
for var_cp in variant:
char = merged_lgr.get_char(var_cp)
for lgr in lgr_set:
if char in lgr.repertoire:
scripts.add(get_script(lgr))
if scripts:
yield variant, variant_disp, scripts
char = lgr.get_char(var_cp)
for cp in char.cp:
cp_script = unidb.get_script(cp, alpha4=True)
if cp_script not in lgr_scripts:
script_mapping[cp] = cp_script
scripts = set([s for sc in script_mapping.values() for s in sc])
if not scripts <= lgr_scripts:
yield variant, variant_disp, script_mapping
except LGRException as ex:
yield label, ex, set()
yield label, ex, dict()


def cross_script_variants(merged_lgr, lgr_set, unidb, labels_input):
def cross_script_variants(lgr, labels_input):
"""
Compute cross-script variants of labels.
:param merged_lgr: The merged LGR from the set.
:param lgr_set: The list of LGRs in the set.
:param unidb: The unicode database to use.
:param lgr: The LGR to use for variant generation.
:param labels_input: The file containing the labels
"""
for label, valid, error in read_labels(labels_input, unidb):
if lgr.metadata is None:
logger.error("Cannot generate cross-scripts variants "
"for LGR without metadata")
raise Exception
if lgr.unicode_database is None:
logger.error("Cannot generate cross-scripts variants "
"for LGR without unicode database attached")
raise Exception
found = False
for label, valid, error in read_labels(labels_input, lgr.unicode_database):
if not valid:
yield "Input label {}: {}\n".format(label, error)
else:
label_cp = tuple([ord(c) for c in label])
yield "Input label {} ({})\n".format(format_cp(label_cp), label)
# Check that label is eligible in the merged LGR
result, _, _, _, _, _ = merged_lgr.test_label_eligible(label_cp)
result, _, _, _, _, _ = lgr.test_label_eligible(label_cp)
if not result:
continue
for variant, disp, scripts in _generate_variants(merged_lgr, lgr_set, label_cp):
yield "\t- Variant {} ({}), disposition {}, from LGR: {}\n".format(format_cp(variant),
''.join([unichr(c) for c in variant]),
disp,
', '.join(scripts))
label_displayed = False
for variant, disp, script_mapping in _generate_variants(lgr, label_cp):
if not label_displayed:
# Only display input label if it has x-variants
yield "Input label {} ({}) has cross-script variants:\n".format(format_cp(label_cp),
label)
label_displayed = True
found = True
yield "\t- Cross-variant {} ({}), disposition {}:\n".format(format_cp(variant),
''.join([unichr(c) for c in variant]),
disp)
yield '\t\t+ ' + '\t\t+ '.join(["{} ({}): {}\n".format(format_cp(c), unichr(c), s) for c, s in script_mapping.items()])

if not found:
yield 'No cross-script variants for input!'
4 changes: 1 addition & 3 deletions lgr/tools/merge_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,7 @@ def merge_description(lgr_set):

description_type = 'text/plain'
if all_html:
template = """
<pre>Script: '{script}' - MIME-type: '{type}'</pre>
{value}"""
template = """{value}"""
join_prefix = ''
description_type = 'text/html'
else:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

setup(
name="lgr-core",
version='1.8',
version='1.8.1',
author='Viagénie and Wil Tan',
author_email='[email protected]',
description="API for manipulating Label Generation Rules",
Expand Down

0 comments on commit c16a6ff

Please sign in to comment.