Skip to content

Commit

Permalink
Further isolate entities implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
lognaturel committed Nov 18, 2022
1 parent 935685c commit f6cd4a8
Show file tree
Hide file tree
Showing 8 changed files with 150 additions and 134 deletions.
2 changes: 1 addition & 1 deletion pyxform/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import re

from pyxform import file_utils, utils
from pyxform.entity_declaration import EntityDeclaration
from pyxform.entities.entity_declaration import EntityDeclaration
from pyxform.errors import PyXFormError
from pyxform.external_instance import ExternalInstance
from pyxform.question import (
Expand Down
5 changes: 5 additions & 0 deletions pyxform/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,8 @@

ROW_FORMAT_STRING: str = "[row : %s]"
XML_IDENTIFIER_ERROR_MESSAGE = "must begin with a letter, colon, or underscore. Other characters can include numbers, dashes, and periods."
_MSG_SUPPRESS_SPELLING = (
" If you do not mean to include a sheet, to suppress this message, "
"prefix the sheet name with an underscore. For example 'setting' "
"becomes '_setting'."
)
90 changes: 90 additions & 0 deletions pyxform/entities/entities_parsing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from typing import Dict, List

from pyxform import constants
from pyxform.errors import PyXFormError
from pyxform.xlsparseutils import find_sheet_misspellings, is_valid_xml_tag


def get_entity_declaration(workbook_dict: Dict, warnings: List) -> Dict:
entities_sheet = workbook_dict.get(constants.ENTITIES, [])

if len(entities_sheet) == 0:
similar = find_sheet_misspellings(
key=constants.ENTITIES, keys=workbook_dict.keys()
)
if similar is not None:
warnings.append(similar + constants._MSG_SUPPRESS_SPELLING)
return {}
elif len(entities_sheet) > 1:
raise PyXFormError(
"This version of pyxform only supports declaring a single entity per form. Please make sure your entities sheet only declares one entity."
)

entity = entities_sheet[0]
dataset = entity["dataset"]

if dataset.startswith(constants.ENTITIES_RESERVED_PREFIX):
raise PyXFormError(
f"Invalid dataset name: '{dataset}' starts with reserved prefix {constants.ENTITIES_RESERVED_PREFIX}."
)

if not is_valid_xml_tag(dataset):
if isinstance(dataset, bytes):
dataset = dataset.encode("utf-8")

raise PyXFormError(
f"Invalid dataset name: '{dataset}'. Dataset names {constants.XML_IDENTIFIER_ERROR_MESSAGE}"
)

if not ("label" in entity):
raise PyXFormError("The entities sheet is missing the required label column.")

creation_condition = entity["create_if"] if "create_if" in entity else "1"

return {
"name": "entity",
"type": "entity",
"parameters": {
"dataset": dataset,
"create": creation_condition,
"label": entity["label"],
},
}


def validate_entity_saveto(row: Dict, row_number: int, entity_declaration: Dict):
save_to = row.get("bind", {}).get("entities:saveto", "")
if not save_to:
return

if len(entity_declaration) == 0:
raise PyXFormError(
"To save entity properties using the save_to column, you must add an entities sheet and declare an entity."
)

if constants.GROUP in row.get(constants.TYPE) or constants.REPEAT in row.get(
constants.TYPE
):
raise PyXFormError(
f"{constants.ROW_FORMAT_STRING % row_number} Groups and repeats can't be saved as entity properties."
)

error_start = f"{constants.ROW_FORMAT_STRING % row_number} Invalid save_to name:"

if save_to == "name" or save_to == "label":
raise PyXFormError(
f"{error_start} the entity property name '{save_to}' is reserved."
)

if save_to.startswith(constants.ENTITIES_RESERVED_PREFIX):
raise PyXFormError(
f"{error_start} the entity property name '{save_to}' starts with reserved prefix {constants.ENTITIES_RESERVED_PREFIX}."
)

if not is_valid_xml_tag(save_to):
if isinstance(save_to, bytes):
save_to = save_to.encode("utf-8")

raise PyXFormError(
f"{error_start} '{save_to}'. Entity property names {constants.XML_IDENTIFIER_ERROR_MESSAGE}"
)
File renamed without changes.
2 changes: 1 addition & 1 deletion pyxform/survey_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
BRACKETED_TAG_REGEX,
INVALID_XFORM_TAG_REGEXP,
default_is_dynamic,
is_valid_xml_tag,
node,
)
from pyxform.xls2json import print_pyobj_to_json
from pyxform.xlsparseutils import is_valid_xml_tag

if TYPE_CHECKING:
from typing import List
Expand Down
12 changes: 0 additions & 12 deletions pyxform/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,6 @@

SEP = "_"

# http://www.w3.org/TR/REC-xml/
TAG_START_CHAR = r"[a-zA-Z:_]"
TAG_CHAR = r"[a-zA-Z:_0-9\-.]"
XFORM_TAG_REGEXP = "%(start)s%(char)s*" % {"start": TAG_START_CHAR, "char": TAG_CHAR}

INVALID_XFORM_TAG_REGEXP = r"[^a-zA-Z:_][^a-zA-Z:_0-9\-.]*"

LAST_SAVED_INSTANCE_NAME = "__last-saved"
Expand Down Expand Up @@ -67,13 +62,6 @@ def writexml(self, writer, indent="", addindent="", newl=""):
writer.write(data)


def is_valid_xml_tag(tag):
"""
Use a regex to see if there are any invalid characters (i.e. spaces).
"""
return re.search(r"^" + XFORM_TAG_REGEXP + r"$", tag)


def node(*args, **kwargs):
"""
args[0] -- a XML tag
Expand Down
128 changes: 8 additions & 120 deletions pyxform/xls2json.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,30 +8,29 @@
import re
import sys
from collections import Counter
from typing import Any, Dict, KeysView, List, Optional
from typing import Any, Dict, List

from pyxform import aliases, constants
from pyxform.constants import (
ENTITIES_RESERVED_PREFIX,
_MSG_SUPPRESS_SPELLING,
EXTERNAL_INSTANCE_EXTENSIONS,
ROW_FORMAT_STRING,
TYPE,
XML_IDENTIFIER_ERROR_MESSAGE,
)
from pyxform.entities.entities_parsing import (
get_entity_declaration,
validate_entity_saveto,
)
from pyxform.errors import PyXFormError
from pyxform.utils import default_is_dynamic, is_valid_xml_tag, levenshtein_distance
from pyxform.utils import default_is_dynamic
from pyxform.validators.pyxform import parameters_generic, select_from_file_params
from pyxform.validators.pyxform.missing_translations_check import (
missing_translations_check,
)
from pyxform.xls2json_backends import csv_to_dict, xls_to_dict, xlsx_to_dict
from pyxform.xlsparseutils import find_sheet_misspellings, is_valid_xml_tag

SMART_QUOTES = {"\u2018": "'", "\u2019": "'", "\u201c": '"', "\u201d": '"'}
_MSG_SUPPRESS_SPELLING = (
" If you do not mean to include a sheet, to suppress this message, "
"prefix the sheet name with an underscore. For example 'setting' "
"becomes '_setting'."
)


def print_pyobj_to_json(pyobj, path=None):
Expand Down Expand Up @@ -323,117 +322,6 @@ def process_image_default(default_value):
return default_value


def find_sheet_misspellings(key: str, keys: "KeysView") -> "Optional[str]":
"""
Find possible sheet name misspellings to warn the user about.
It's possible that this will warn about sheet names for sheets that have
auxilliary metadata that is not meant for processing by pyxform. For
example the "osm" sheet name may be similar to many other initialisms.
:param key: The sheet name to look for.
:param keys: The workbook sheet names.
"""
candidates = tuple(
_k # thanks to black
for _k in keys
if 2 >= levenshtein_distance(_k.lower(), key)
and _k not in constants.SUPPORTED_SHEET_NAMES
and not _k.startswith("_")
)
if 0 < len(candidates):
msg = (
"When looking for a sheet named '{k}', the following sheets with "
"similar names were found: {c}."
).format(k=key, c=str(", ".join(("'{}'".format(c) for c in candidates))))
return msg
else:
return None


def get_entity_declaration(workbook_dict: Dict, warnings: List) -> Dict:
entities_sheet = workbook_dict.get(constants.ENTITIES, [])

if len(entities_sheet) == 0:
similar = find_sheet_misspellings(
key=constants.ENTITIES, keys=workbook_dict.keys()
)
if similar is not None:
warnings.append(similar + _MSG_SUPPRESS_SPELLING)
return {}
elif len(entities_sheet) > 1:
raise PyXFormError(
"This version of pyxform only supports declaring a single entity per form. Please make sure your entities sheet only declares one entity."
)

entity = entities_sheet[0]
dataset = entity["dataset"]

if dataset.startswith(ENTITIES_RESERVED_PREFIX):
raise PyXFormError(
f"Invalid dataset name: '{dataset}' starts with reserved prefix {ENTITIES_RESERVED_PREFIX}."
)

if not is_valid_xml_tag(dataset):
if isinstance(dataset, bytes):
dataset = dataset.encode("utf-8")

raise PyXFormError(
f"Invalid dataset name: '{dataset}'. Dataset names {XML_IDENTIFIER_ERROR_MESSAGE}"
)

if not ("label" in entity):
raise PyXFormError("The entities sheet is missing the required label column.")

creation_condition = entity["create_if"] if "create_if" in entity else "1"

return {
"name": "entity",
"type": "entity",
"parameters": {
"dataset": dataset,
"create": creation_condition,
"label": entity["label"],
},
}


def validate_entity_saveto(row: Dict, row_number: int, entity_declaration: Dict):
save_to = row.get("bind", {}).get("entities:saveto", "")
if not save_to:
return

if len(entity_declaration) == 0:
raise PyXFormError(
"To save entity properties using the save_to column, you must add an entities sheet and declare an entity."
)

if constants.GROUP in row.get(TYPE) or constants.REPEAT in row.get(TYPE):
raise PyXFormError(
f"{ROW_FORMAT_STRING % row_number} Groups and repeats can't be saved as entity properties."
)

error_start = f"{ROW_FORMAT_STRING % row_number} Invalid save_to name:"

if save_to == "name" or save_to == "label":
raise PyXFormError(
f"{error_start} the entity property name '{save_to}' is reserved."
)

if save_to.startswith(ENTITIES_RESERVED_PREFIX):
raise PyXFormError(
f"{error_start} the entity property name '{save_to}' starts with reserved prefix {ENTITIES_RESERVED_PREFIX}."
)

if not is_valid_xml_tag(save_to):
if isinstance(save_to, bytes):
save_to = save_to.encode("utf-8")

raise PyXFormError(
f"{error_start} '{save_to}'. Entity property names {XML_IDENTIFIER_ERROR_MESSAGE}"
)


def workbook_to_json(
workbook_dict,
form_name=None,
Expand Down
45 changes: 45 additions & 0 deletions pyxform/xlsparseutils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import re
from pyxform import constants
from typing import KeysView, Optional

from pyxform.utils import levenshtein_distance

# http://www.w3.org/TR/REC-xml/
TAG_START_CHAR = r"[a-zA-Z:_]"
TAG_CHAR = r"[a-zA-Z:_0-9\-.]"
XFORM_TAG_REGEXP = "%(start)s%(char)s*" % {"start": TAG_START_CHAR, "char": TAG_CHAR}


def find_sheet_misspellings(key: str, keys: "KeysView") -> "Optional[str]":
"""
Find possible sheet name misspellings to warn the user about.
It's possible that this will warn about sheet names for sheets that have
auxilliary metadata that is not meant for processing by pyxform. For
example the "osm" sheet name may be similar to many other initialisms.
:param key: The sheet name to look for.
:param keys: The workbook sheet names.
"""
candidates = tuple(
_k # thanks to black
for _k in keys
if 2 >= levenshtein_distance(_k.lower(), key)
and _k not in constants.SUPPORTED_SHEET_NAMES
and not _k.startswith("_")
)
if 0 < len(candidates):
msg = (
"When looking for a sheet named '{k}', the following sheets with "
"similar names were found: {c}."
).format(k=key, c=str(", ".join(("'{}'".format(c) for c in candidates))))
return msg
else:
return None


def is_valid_xml_tag(tag):
"""
Use a regex to see if there are any invalid characters (i.e. spaces).
"""
return re.search(r"^" + XFORM_TAG_REGEXP + r"$", tag)

0 comments on commit f6cd4a8

Please sign in to comment.