From a9ea0641be48d17b29ce807a2d27978a137f5034 Mon Sep 17 00:00:00 2001 From: Ben Jude Date: Mon, 13 May 2024 09:36:28 +0800 Subject: [PATCH 1/2] IdentifierConverter: Add the option to not escape underscores This allows for generating symbols with subscripts (possibly multiple layers), e.g. `x_2_i` becomes `x_{2_{i}}`. This option also creates the possibility of generating invalid latex if the identifier starts or ends with an underscore, or has a double underscore in it. In these cases we now raise a ValueError. --- src/latexify/codegen/identifier_converter.py | 51 +++++++++++- .../codegen/identifier_converter_test.py | 81 ++++++++++++++----- 2 files changed, 111 insertions(+), 21 deletions(-) diff --git a/src/latexify/codegen/identifier_converter.py b/src/latexify/codegen/identifier_converter.py index 19263f8..bca6a8e 100644 --- a/src/latexify/codegen/identifier_converter.py +++ b/src/latexify/codegen/identifier_converter.py @@ -17,8 +17,15 @@ class IdentifierConverter: _use_math_symbols: bool _use_mathrm: bool + _escape_underscores: bool - def __init__(self, *, use_math_symbols: bool, use_mathrm: bool = True) -> None: + def __init__( + self, + *, + use_math_symbols: bool, + use_mathrm: bool = True, + escape_underscores: bool = True, + ) -> None: r"""Initializer. Args: @@ -26,9 +33,12 @@ def __init__(self, *, use_math_symbols: bool, use_mathrm: bool = True) -> None: appropriate LaTeX command. use_mathrm: Whether to wrap the resulting expression by \mathrm, if applicable. + escape_underscores: Whether to prefix any underscores in identifiers with '\\', + disable to allow subscripts in generated latex """ self._use_math_symbols = use_math_symbols self._use_mathrm = use_mathrm + self._escape_underscores = escape_underscores def convert(self, name: str) -> tuple[str, bool]: """Converts Python identifier to LaTeX expression. @@ -41,14 +51,51 @@ def convert(self, name: str) -> tuple[str, bool]: - latex: Corresponding LaTeX expression. - is_single_character: Whether `latex` can be treated as a single character or not. + Raises: + LatexifyError: Resulting latex is not valid. This most likely occurs where the + symbol starts or ends with an underscore, and escape_underscores=False. """ + if not self._escape_underscores and "_" in name: + # Check if we are going to generate an invalid Latex string. Better to raise an + # exception here than have the resulting Latex fail to compile/display + if "__" in name: + raise ValueError( + f'Identifier "{name}" has a double underscore will result in ' + "invalid Latex when underscores are not escaped" + ) + if name.startswith("_"): + raise ValueError( + f'Identifier "{name}" starts with an underscore will result in ' + "invalid Latex when underscores are not escaped" + ) + if name.endswith("_"): + raise ValueError( + f'Identifier "{name}" ends with an underscore will result in ' + "invalid Latex when underscores are not escaped" + ) + elems = [ + IdentifierConverter( + use_math_symbols=self._use_math_symbols, + use_mathrm=False, + escape_underscores=True, + ).convert(n)[0] + for n in name.split("_") + ] + if name.count("_") == 1: + # One underscore, no need to wrap subsequent subscripts in braces + name = "_".join(elems) + else: + # Multi-layer subscripts require wrapping each subscript in nesting + # braces + name = "_{".join(elems) + "}" * (len(elems) - 1) + if self._use_math_symbols and name in expression_rules.MATH_SYMBOLS: return "\\" + name, True if len(name) == 1 and name != "_": return name, True - escaped = name.replace("_", r"\_") + escaped = name.replace("_", r"\_") if self._escape_underscores else name wrapped = rf"\mathrm{{{escaped}}}" if self._use_mathrm else escaped return wrapped, False diff --git a/src/latexify/codegen/identifier_converter_test.py b/src/latexify/codegen/identifier_converter_test.py index b46982d..bc65e3a 100644 --- a/src/latexify/codegen/identifier_converter_test.py +++ b/src/latexify/codegen/identifier_converter_test.py @@ -8,32 +8,75 @@ @pytest.mark.parametrize( - "name,use_math_symbols,use_mathrm,expected", + "name,use_math_symbols,use_mathrm,escape_underscores,expected", [ - ("a", False, True, ("a", True)), - ("_", False, True, (r"\mathrm{\_}", False)), - ("aa", False, True, (r"\mathrm{aa}", False)), - ("a1", False, True, (r"\mathrm{a1}", False)), - ("a_", False, True, (r"\mathrm{a\_}", False)), - ("_a", False, True, (r"\mathrm{\_a}", False)), - ("_1", False, True, (r"\mathrm{\_1}", False)), - ("__", False, True, (r"\mathrm{\_\_}", False)), - ("a_a", False, True, (r"\mathrm{a\_a}", False)), - ("a__", False, True, (r"\mathrm{a\_\_}", False)), - ("a_1", False, True, (r"\mathrm{a\_1}", False)), - ("alpha", False, True, (r"\mathrm{alpha}", False)), - ("alpha", True, True, (r"\alpha", True)), - ("foo", False, True, (r"\mathrm{foo}", False)), - ("foo", True, True, (r"\mathrm{foo}", False)), - ("foo", True, False, (r"foo", False)), + ("a", False, True, True, ("a", True)), + ("_", False, True, True, (r"\mathrm{\_}", False)), + ("aa", False, True, True, (r"\mathrm{aa}", False)), + ("a1", False, True, True, (r"\mathrm{a1}", False)), + ("a_", False, True, True, (r"\mathrm{a\_}", False)), + ("_a", False, True, True, (r"\mathrm{\_a}", False)), + ("_1", False, True, True, (r"\mathrm{\_1}", False)), + ("__", False, True, True, (r"\mathrm{\_\_}", False)), + ("a_a", False, True, True, (r"\mathrm{a\_a}", False)), + ("a__", False, True, True, (r"\mathrm{a\_\_}", False)), + ("a_1", False, True, True, (r"\mathrm{a\_1}", False)), + ("alpha", False, True, True, (r"\mathrm{alpha}", False)), + ("alpha", True, True, True, (r"\alpha", True)), + ("alphabet", True, True, True, (r"\mathrm{alphabet}", False)), + ("foo", False, True, True, (r"\mathrm{foo}", False)), + ("foo", True, True, True, (r"\mathrm{foo}", False)), + ("foo", True, False, True, (r"foo", False)), + ("aa", False, True, False, (r"\mathrm{aa}", False)), + ("a_a", False, True, False, (r"\mathrm{a_a}", False)), + ("a_1", False, True, False, (r"\mathrm{a_1}", False)), + ("alpha", True, False, False, (r"\alpha", True)), + ("alpha_1", True, False, False, (r"\alpha_1", False)), + ("x_alpha", True, False, False, (r"x_\alpha", False)), + ("x_alpha_beta", True, False, False, (r"x_{\alpha_{\beta}}", False)), + ("alpha_beta", True, False, False, (r"\alpha_\beta", False)), ], ) def test_identifier_converter( - name: str, use_math_symbols: bool, use_mathrm: bool, expected: tuple[str, bool] + name: str, + use_math_symbols: bool, + use_mathrm: bool, + escape_underscores: bool, + expected: tuple[str, bool], ) -> None: assert ( identifier_converter.IdentifierConverter( - use_math_symbols=use_math_symbols, use_mathrm=use_mathrm + use_math_symbols=use_math_symbols, + use_mathrm=use_mathrm, + escape_underscores=escape_underscores, ).convert(name) == expected ) + + +@pytest.mark.parametrize( + "name,use_math_symbols,use_mathrm,escape_underscores", + [ + ("_", False, True, False), + ("a_", False, True, False), + ("_a", False, True, False), + ("_1", False, True, False), + ("__", False, True, False), + ("a__", False, True, False), + ("alpha_", True, False, False), + ("_alpha", True, False, False), + ("x__alpha", True, False, False), + ], +) +def test_identifier_converter_failure( + name: str, + use_math_symbols: bool, + use_mathrm: bool, + escape_underscores: bool, +) -> None: + with pytest.raises(ValueError): + identifier_converter.IdentifierConverter( + use_math_symbols=use_math_symbols, + use_mathrm=use_mathrm, + escape_underscores=escape_underscores, + ).convert(name) From a15bfa7ab5965c4905a10c3d51f0152826a83a21 Mon Sep 17 00:00:00 2001 From: Ben Jude Date: Mon, 13 May 2024 09:36:45 +0800 Subject: [PATCH 2/2] CodeGen: Add the plumbing to allow the use of the new `escape_underscores` option in IdentifierConverter --- src/latexify/codegen/algorithmic_codegen.py | 23 ++++++++++++++++----- src/latexify/codegen/expression_codegen.py | 8 +++++-- src/latexify/codegen/function_codegen.py | 7 +++++-- src/latexify/config.py | 2 ++ src/latexify/generate_latex.py | 3 +++ 5 files changed, 34 insertions(+), 9 deletions(-) diff --git a/src/latexify/codegen/algorithmic_codegen.py b/src/latexify/codegen/algorithmic_codegen.py index 3c027c1..3504ad7 100644 --- a/src/latexify/codegen/algorithmic_codegen.py +++ b/src/latexify/codegen/algorithmic_codegen.py @@ -23,7 +23,11 @@ class AlgorithmicCodegen(ast.NodeVisitor): _indent_level: int def __init__( - self, *, use_math_symbols: bool = False, use_set_symbols: bool = False + self, + *, + use_math_symbols: bool = False, + use_set_symbols: bool = False, + escape_underscores: bool = True, ) -> None: """Initializer. @@ -33,11 +37,14 @@ def __init__( use_set_symbols: Whether to use set symbols or not. """ self._expression_codegen = expression_codegen.ExpressionCodegen( - use_math_symbols=use_math_symbols, use_set_symbols=use_set_symbols + use_math_symbols=use_math_symbols, + use_set_symbols=use_set_symbols, + escape_underscores=escape_underscores, ) self._identifier_converter = identifier_converter.IdentifierConverter( use_math_symbols=use_math_symbols, use_mathrm=False, + escape_underscores=escape_underscores, ) self._indent_level = 0 @@ -192,7 +199,11 @@ class IPythonAlgorithmicCodegen(ast.NodeVisitor): _indent_level: int def __init__( - self, *, use_math_symbols: bool = False, use_set_symbols: bool = False + self, + *, + use_math_symbols: bool = False, + use_set_symbols: bool = False, + escape_underscores: bool = True, ) -> None: """Initializer. @@ -202,10 +213,12 @@ def __init__( use_set_symbols: Whether to use set symbols or not. """ self._expression_codegen = expression_codegen.ExpressionCodegen( - use_math_symbols=use_math_symbols, use_set_symbols=use_set_symbols + use_math_symbols=use_math_symbols, + use_set_symbols=use_set_symbols, + escape_underscores=escape_underscores, ) self._identifier_converter = identifier_converter.IdentifierConverter( - use_math_symbols=use_math_symbols + use_math_symbols=use_math_symbols, escape_underscores=escape_underscores ) self._indent_level = 0 diff --git a/src/latexify/codegen/expression_codegen.py b/src/latexify/codegen/expression_codegen.py index 9239d72..f1acd12 100644 --- a/src/latexify/codegen/expression_codegen.py +++ b/src/latexify/codegen/expression_codegen.py @@ -18,7 +18,11 @@ class ExpressionCodegen(ast.NodeVisitor): _compare_ops: dict[type[ast.cmpop], str] def __init__( - self, *, use_math_symbols: bool = False, use_set_symbols: bool = False + self, + *, + use_math_symbols: bool = False, + use_set_symbols: bool = False, + escape_underscores: bool = True, ) -> None: """Initializer. @@ -28,7 +32,7 @@ def __init__( use_set_symbols: Whether to use set symbols or not. """ self._identifier_converter = identifier_converter.IdentifierConverter( - use_math_symbols=use_math_symbols + use_math_symbols=use_math_symbols, escape_underscores=escape_underscores ) self._bin_op_rules = ( diff --git a/src/latexify/codegen/function_codegen.py b/src/latexify/codegen/function_codegen.py index c9b01e2..5c89931 100644 --- a/src/latexify/codegen/function_codegen.py +++ b/src/latexify/codegen/function_codegen.py @@ -25,6 +25,7 @@ def __init__( use_math_symbols: bool = False, use_signature: bool = True, use_set_symbols: bool = False, + escape_underscores: bool = True, ) -> None: """Initializer. @@ -36,10 +37,12 @@ def __init__( use_set_symbols: Whether to use set symbols or not. """ self._expression_codegen = expression_codegen.ExpressionCodegen( - use_math_symbols=use_math_symbols, use_set_symbols=use_set_symbols + use_math_symbols=use_math_symbols, + use_set_symbols=use_set_symbols, + escape_underscores=escape_underscores, ) self._identifier_converter = identifier_converter.IdentifierConverter( - use_math_symbols=use_math_symbols + use_math_symbols=use_math_symbols, escape_underscores=escape_underscores ) self._use_signature = use_signature diff --git a/src/latexify/config.py b/src/latexify/config.py index 88ab7e0..e4c6cb1 100644 --- a/src/latexify/config.py +++ b/src/latexify/config.py @@ -35,6 +35,7 @@ class Config: use_math_symbols: bool use_set_symbols: bool use_signature: bool + escape_underscores: bool def merge(self, *, config: Config | None = None, **kwargs) -> Config: """Merge configuration based on old configuration and field values. @@ -75,4 +76,5 @@ def defaults() -> Config: use_math_symbols=False, use_set_symbols=False, use_signature=True, + escape_underscores=True, ) diff --git a/src/latexify/generate_latex.py b/src/latexify/generate_latex.py index 0a7615d..f1989ba 100644 --- a/src/latexify/generate_latex.py +++ b/src/latexify/generate_latex.py @@ -66,17 +66,20 @@ def get_latex( return codegen.AlgorithmicCodegen( use_math_symbols=merged_config.use_math_symbols, use_set_symbols=merged_config.use_set_symbols, + escape_underscores=merged_config.escape_underscores, ).visit(tree) elif style == Style.FUNCTION: return codegen.FunctionCodegen( use_math_symbols=merged_config.use_math_symbols, use_signature=merged_config.use_signature, use_set_symbols=merged_config.use_set_symbols, + escape_underscores=merged_config.escape_underscores, ).visit(tree) elif style == Style.IPYTHON_ALGORITHMIC: return codegen.IPythonAlgorithmicCodegen( use_math_symbols=merged_config.use_math_symbols, use_set_symbols=merged_config.use_set_symbols, + escape_underscores=merged_config.escape_underscores, ).visit(tree) raise ValueError(f"Unrecognized style: {style}")