Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Escape underscores #208

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions src/latexify/codegen/algorithmic_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,11 @@ class AlgorithmicCodegen(ast.NodeVisitor):
_indent_level: int

def __init__(
self, *, use_math_symbols: bool = False, use_set_symbols: bool = False
self,
*,
use_math_symbols: bool = False,
use_set_symbols: bool = False,
escape_underscores: bool = True,
) -> None:
"""Initializer.

Expand All @@ -33,11 +37,14 @@ def __init__(
use_set_symbols: Whether to use set symbols or not.
"""
self._expression_codegen = expression_codegen.ExpressionCodegen(
use_math_symbols=use_math_symbols, use_set_symbols=use_set_symbols
use_math_symbols=use_math_symbols,
use_set_symbols=use_set_symbols,
escape_underscores=escape_underscores,
)
self._identifier_converter = identifier_converter.IdentifierConverter(
use_math_symbols=use_math_symbols,
use_mathrm=False,
escape_underscores=escape_underscores,
)
self._indent_level = 0

Expand Down Expand Up @@ -192,7 +199,11 @@ class IPythonAlgorithmicCodegen(ast.NodeVisitor):
_indent_level: int

def __init__(
self, *, use_math_symbols: bool = False, use_set_symbols: bool = False
self,
*,
use_math_symbols: bool = False,
use_set_symbols: bool = False,
escape_underscores: bool = True,
) -> None:
"""Initializer.

Expand All @@ -202,10 +213,12 @@ def __init__(
use_set_symbols: Whether to use set symbols or not.
"""
self._expression_codegen = expression_codegen.ExpressionCodegen(
use_math_symbols=use_math_symbols, use_set_symbols=use_set_symbols
use_math_symbols=use_math_symbols,
use_set_symbols=use_set_symbols,
escape_underscores=escape_underscores,
)
self._identifier_converter = identifier_converter.IdentifierConverter(
use_math_symbols=use_math_symbols
use_math_symbols=use_math_symbols, escape_underscores=escape_underscores
)
self._indent_level = 0

Expand Down
8 changes: 6 additions & 2 deletions src/latexify/codegen/expression_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@ class ExpressionCodegen(ast.NodeVisitor):
_compare_ops: dict[type[ast.cmpop], str]

def __init__(
self, *, use_math_symbols: bool = False, use_set_symbols: bool = False
self,
*,
use_math_symbols: bool = False,
use_set_symbols: bool = False,
escape_underscores: bool = True,
) -> None:
"""Initializer.

Expand All @@ -28,7 +32,7 @@ def __init__(
use_set_symbols: Whether to use set symbols or not.
"""
self._identifier_converter = identifier_converter.IdentifierConverter(
use_math_symbols=use_math_symbols
use_math_symbols=use_math_symbols, escape_underscores=escape_underscores
)

self._bin_op_rules = (
Expand Down
7 changes: 5 additions & 2 deletions src/latexify/codegen/function_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def __init__(
use_math_symbols: bool = False,
use_signature: bool = True,
use_set_symbols: bool = False,
escape_underscores: bool = True,
) -> None:
"""Initializer.

Expand All @@ -36,10 +37,12 @@ def __init__(
use_set_symbols: Whether to use set symbols or not.
"""
self._expression_codegen = expression_codegen.ExpressionCodegen(
use_math_symbols=use_math_symbols, use_set_symbols=use_set_symbols
use_math_symbols=use_math_symbols,
use_set_symbols=use_set_symbols,
escape_underscores=escape_underscores,
)
self._identifier_converter = identifier_converter.IdentifierConverter(
use_math_symbols=use_math_symbols
use_math_symbols=use_math_symbols, escape_underscores=escape_underscores
)
self._use_signature = use_signature

Expand Down
51 changes: 49 additions & 2 deletions src/latexify/codegen/identifier_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,28 @@ class IdentifierConverter:

_use_math_symbols: bool
_use_mathrm: bool
_escape_underscores: bool

def __init__(self, *, use_math_symbols: bool, use_mathrm: bool = True) -> None:
def __init__(
self,
*,
use_math_symbols: bool,
use_mathrm: bool = True,
escape_underscores: bool = True,
) -> None:
r"""Initializer.

Args:
use_math_symbols: Whether to convert identifiers with math symbol names to
appropriate LaTeX command.
use_mathrm: Whether to wrap the resulting expression by \mathrm, if
applicable.
escape_underscores: Whether to prefix any underscores in identifiers with '\\',
disable to allow subscripts in generated latex
"""
self._use_math_symbols = use_math_symbols
self._use_mathrm = use_mathrm
self._escape_underscores = escape_underscores

def convert(self, name: str) -> tuple[str, bool]:
"""Converts Python identifier to LaTeX expression.
Expand All @@ -41,14 +51,51 @@ def convert(self, name: str) -> tuple[str, bool]:
- latex: Corresponding LaTeX expression.
- is_single_character: Whether `latex` can be treated as a single
character or not.
Raises:
LatexifyError: Resulting latex is not valid. This most likely occurs where the
symbol starts or ends with an underscore, and escape_underscores=False.
"""
if not self._escape_underscores and "_" in name:
# Check if we are going to generate an invalid Latex string. Better to raise an
# exception here than have the resulting Latex fail to compile/display
if "__" in name:
raise ValueError(
f'Identifier "{name}" has a double underscore will result in '
"invalid Latex when underscores are not escaped"
)
if name.startswith("_"):
raise ValueError(
f'Identifier "{name}" starts with an underscore will result in '
"invalid Latex when underscores are not escaped"
)
if name.endswith("_"):
raise ValueError(
f'Identifier "{name}" ends with an underscore will result in '
"invalid Latex when underscores are not escaped"
)
elems = [
IdentifierConverter(
use_math_symbols=self._use_math_symbols,
use_mathrm=False,
escape_underscores=True,
).convert(n)[0]
for n in name.split("_")
]
if name.count("_") == 1:
# One underscore, no need to wrap subsequent subscripts in braces
name = "_".join(elems)
else:
# Multi-layer subscripts require wrapping each subscript in nesting
# braces
name = "_{".join(elems) + "}" * (len(elems) - 1)

if self._use_math_symbols and name in expression_rules.MATH_SYMBOLS:
return "\\" + name, True

if len(name) == 1 and name != "_":
return name, True

escaped = name.replace("_", r"\_")
escaped = name.replace("_", r"\_") if self._escape_underscores else name
wrapped = rf"\mathrm{{{escaped}}}" if self._use_mathrm else escaped

return wrapped, False
81 changes: 62 additions & 19 deletions src/latexify/codegen/identifier_converter_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,75 @@


@pytest.mark.parametrize(
"name,use_math_symbols,use_mathrm,expected",
"name,use_math_symbols,use_mathrm,escape_underscores,expected",
[
("a", False, True, ("a", True)),
("_", False, True, (r"\mathrm{\_}", False)),
("aa", False, True, (r"\mathrm{aa}", False)),
("a1", False, True, (r"\mathrm{a1}", False)),
("a_", False, True, (r"\mathrm{a\_}", False)),
("_a", False, True, (r"\mathrm{\_a}", False)),
("_1", False, True, (r"\mathrm{\_1}", False)),
("__", False, True, (r"\mathrm{\_\_}", False)),
("a_a", False, True, (r"\mathrm{a\_a}", False)),
("a__", False, True, (r"\mathrm{a\_\_}", False)),
("a_1", False, True, (r"\mathrm{a\_1}", False)),
("alpha", False, True, (r"\mathrm{alpha}", False)),
("alpha", True, True, (r"\alpha", True)),
("foo", False, True, (r"\mathrm{foo}", False)),
("foo", True, True, (r"\mathrm{foo}", False)),
("foo", True, False, (r"foo", False)),
("a", False, True, True, ("a", True)),
("_", False, True, True, (r"\mathrm{\_}", False)),
("aa", False, True, True, (r"\mathrm{aa}", False)),
("a1", False, True, True, (r"\mathrm{a1}", False)),
("a_", False, True, True, (r"\mathrm{a\_}", False)),
("_a", False, True, True, (r"\mathrm{\_a}", False)),
("_1", False, True, True, (r"\mathrm{\_1}", False)),
("__", False, True, True, (r"\mathrm{\_\_}", False)),
("a_a", False, True, True, (r"\mathrm{a\_a}", False)),
("a__", False, True, True, (r"\mathrm{a\_\_}", False)),
("a_1", False, True, True, (r"\mathrm{a\_1}", False)),
("alpha", False, True, True, (r"\mathrm{alpha}", False)),
("alpha", True, True, True, (r"\alpha", True)),
("alphabet", True, True, True, (r"\mathrm{alphabet}", False)),
("foo", False, True, True, (r"\mathrm{foo}", False)),
("foo", True, True, True, (r"\mathrm{foo}", False)),
("foo", True, False, True, (r"foo", False)),
("aa", False, True, False, (r"\mathrm{aa}", False)),
("a_a", False, True, False, (r"\mathrm{a_a}", False)),
("a_1", False, True, False, (r"\mathrm{a_1}", False)),
("alpha", True, False, False, (r"\alpha", True)),
("alpha_1", True, False, False, (r"\alpha_1", False)),
("x_alpha", True, False, False, (r"x_\alpha", False)),
("x_alpha_beta", True, False, False, (r"x_{\alpha_{\beta}}", False)),
("alpha_beta", True, False, False, (r"\alpha_\beta", False)),
],
)
def test_identifier_converter(
name: str, use_math_symbols: bool, use_mathrm: bool, expected: tuple[str, bool]
name: str,
use_math_symbols: bool,
use_mathrm: bool,
escape_underscores: bool,
expected: tuple[str, bool],
) -> None:
assert (
identifier_converter.IdentifierConverter(
use_math_symbols=use_math_symbols, use_mathrm=use_mathrm
use_math_symbols=use_math_symbols,
use_mathrm=use_mathrm,
escape_underscores=escape_underscores,
).convert(name)
== expected
)


@pytest.mark.parametrize(
"name,use_math_symbols,use_mathrm,escape_underscores",
[
("_", False, True, False),
("a_", False, True, False),
("_a", False, True, False),
("_1", False, True, False),
("__", False, True, False),
("a__", False, True, False),
("alpha_", True, False, False),
("_alpha", True, False, False),
("x__alpha", True, False, False),
],
)
def test_identifier_converter_failure(
name: str,
use_math_symbols: bool,
use_mathrm: bool,
escape_underscores: bool,
) -> None:
with pytest.raises(ValueError):
identifier_converter.IdentifierConverter(
use_math_symbols=use_math_symbols,
use_mathrm=use_mathrm,
escape_underscores=escape_underscores,
).convert(name)
2 changes: 2 additions & 0 deletions src/latexify/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class Config:
use_math_symbols: bool
use_set_symbols: bool
use_signature: bool
escape_underscores: bool

def merge(self, *, config: Config | None = None, **kwargs) -> Config:
"""Merge configuration based on old configuration and field values.
Expand Down Expand Up @@ -75,4 +76,5 @@ def defaults() -> Config:
use_math_symbols=False,
use_set_symbols=False,
use_signature=True,
escape_underscores=True,
)
3 changes: 3 additions & 0 deletions src/latexify/generate_latex.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,17 +66,20 @@ def get_latex(
return codegen.AlgorithmicCodegen(
use_math_symbols=merged_config.use_math_symbols,
use_set_symbols=merged_config.use_set_symbols,
escape_underscores=merged_config.escape_underscores,
).visit(tree)
elif style == Style.FUNCTION:
return codegen.FunctionCodegen(
use_math_symbols=merged_config.use_math_symbols,
use_signature=merged_config.use_signature,
use_set_symbols=merged_config.use_set_symbols,
escape_underscores=merged_config.escape_underscores,
).visit(tree)
elif style == Style.IPYTHON_ALGORITHMIC:
return codegen.IPythonAlgorithmicCodegen(
use_math_symbols=merged_config.use_math_symbols,
use_set_symbols=merged_config.use_set_symbols,
escape_underscores=merged_config.escape_underscores,
).visit(tree)

raise ValueError(f"Unrecognized style: {style}")