diff --git a/lgr/tools/cross_script_variants.py b/lgr/tools/cross_script_variants.py index 1dc01e2..1e418de 100644 --- a/lgr/tools/cross_script_variants.py +++ b/lgr/tools/cross_script_variants.py @@ -6,7 +6,6 @@ import logging -from lgr.tools.merge_set import get_script from lgr.utils import format_cp from lgr.tools.utils import read_labels from lgr.exceptions import LGRException @@ -14,53 +13,77 @@ logger = logging.getLogger(__name__) -def _generate_variants(merged_lgr, lgr_set, label): +def _generate_variants(lgr, label): """ Generate the variants of a label. - :param merged_lgr: The merged LGR from the set. - :param lgr_set: The list of LGRs in the set. + :param lgr: The LGR to use for variant generation. :param label: The label to process, as an array of code points. :return: Generator of (variant, disp, scripts), with: - variant: The generated variant. - - disp: Variant disposition. Note: not sure disp will be != from "blocked". - - scripts: List of scripts used in the variants. + - disp: Variant disposition. + - script_mapping: Mapping of CP -> scripts + (for scripts outside of the LGR). """ + unidb = lgr.unicode_database + lgr_scripts = frozenset(lgr.metadata.get_scripts()) + if not lgr_scripts: + logger.error("Cannot generate cross-scripts variants " + "for LGR without languages") + raise Exception + try: - for variant, variant_disp, _, _, _ in merged_lgr.compute_label_disposition(label): - scripts = set() + for variant, variant_disp, _, _, _ in lgr.compute_label_disposition(label): + script_mapping = {} for var_cp in variant: - char = merged_lgr.get_char(var_cp) - for lgr in lgr_set: - if char in lgr.repertoire: - scripts.add(get_script(lgr)) - if scripts: - yield variant, variant_disp, scripts + char = lgr.get_char(var_cp) + for cp in char.cp: + cp_script = unidb.get_script(cp, alpha4=True) + if cp_script not in lgr_scripts: + script_mapping[cp] = cp_script + scripts = set([s for sc in script_mapping.values() for s in sc]) + if not scripts <= lgr_scripts: + yield variant, variant_disp, script_mapping except LGRException as ex: - yield label, ex, set() + yield label, ex, dict() -def cross_script_variants(merged_lgr, lgr_set, unidb, labels_input): +def cross_script_variants(lgr, labels_input): """ Compute cross-script variants of labels. - :param merged_lgr: The merged LGR from the set. - :param lgr_set: The list of LGRs in the set. - :param unidb: The unicode database to use. + :param lgr: The LGR to use for variant generation. :param labels_input: The file containing the labels """ - for label, valid, error in read_labels(labels_input, unidb): + if lgr.metadata is None: + logger.error("Cannot generate cross-scripts variants " + "for LGR without metadata") + raise Exception + if lgr.unicode_database is None: + logger.error("Cannot generate cross-scripts variants " + "for LGR without unicode database attached") + raise Exception + found = False + for label, valid, error in read_labels(labels_input, lgr.unicode_database): if not valid: yield "Input label {}: {}\n".format(label, error) else: label_cp = tuple([ord(c) for c in label]) - yield "Input label {} ({})\n".format(format_cp(label_cp), label) - # Check that label is eligible in the merged LGR - result, _, _, _, _, _ = merged_lgr.test_label_eligible(label_cp) + result, _, _, _, _, _ = lgr.test_label_eligible(label_cp) if not result: continue - for variant, disp, scripts in _generate_variants(merged_lgr, lgr_set, label_cp): - yield "\t- Variant {} ({}), disposition {}, from LGR: {}\n".format(format_cp(variant), - ''.join([unichr(c) for c in variant]), - disp, - ', '.join(scripts)) \ No newline at end of file + label_displayed = False + for variant, disp, script_mapping in _generate_variants(lgr, label_cp): + if not label_displayed: + # Only display input label if it has x-variants + yield "Input label {} ({}) has cross-script variants:\n".format(format_cp(label_cp), + label) + label_displayed = True + found = True + yield "\t- Cross-variant {} ({}), disposition {}:\n".format(format_cp(variant), + ''.join([unichr(c) for c in variant]), + disp) + yield '\t\t+ ' + '\t\t+ '.join(["{} ({}): {}\n".format(format_cp(c), unichr(c), s) for c, s in script_mapping.items()]) + + if not found: + yield 'No cross-script variants for input!' diff --git a/lgr/tools/merge_set.py b/lgr/tools/merge_set.py index 47516dd..87ee8bf 100644 --- a/lgr/tools/merge_set.py +++ b/lgr/tools/merge_set.py @@ -79,9 +79,7 @@ def merge_description(lgr_set): description_type = 'text/plain' if all_html: - template = """ -
Script: '{script}' - MIME-type: '{type}'-{value}""" + template = """{value}""" join_prefix = '' description_type = 'text/html' else: diff --git a/setup.py b/setup.py index 46cb39e..a6f6889 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ setup( name="lgr-core", - version='1.8', + version='1.8.1', author='Viagénie and Wil Tan', author_email='support@viagenie.ca', description="API for manipulating Label Generation Rules",