Skip to content

Commit

Permalink
fix: improve toml converter
Browse files Browse the repository at this point in the history
it is not perfect and optimized for pyaptly config files, but should be good enough. We also add json as another option.
  • Loading branch information
Jean-Louis Fuchs committed May 30, 2024
1 parent 581d556 commit b878fce
Show file tree
Hide file tree
Showing 10 changed files with 552 additions and 277 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:

- name: Run tests
run: |
make test
HYPOTHESIS_PROFILE=ci make test
package:
runs-on: ubuntu-latest
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pull-requests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ jobs:

- name: Run tests
run: |
make test
HYPOTHESIS_PROFILE=ci make test
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ mypy: poetry-install

.PHONY: pytest
pytest: poetry-install ## run pytest
@docker compose exec testing poetry run pytest -vv --cov
@docker compose exec testing poetry run sh -c "HYPOTHESIS_PROFILE=$(HYPOTHESIS_PROFILE) pytest -vv --cov"

.PHONY: format
format: poetry-install ## format code with ruff
Expand Down
537 changes: 267 additions & 270 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pyaptly/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
def init_hypothesis():
"""Initialize hypothesis profile if hypothesis is available."""
try: # pragma: no cover
if "HYPOTHESIS_PROFILE" in os.environ:
if "HYPOTHESIS_PROFILE" in os.environ and os.environ["HYPOTHESIS_PROFILE"]:
from hypothesis import settings

settings.register_profile("ci", settings(max_examples=10000))
settings.register_profile("ci", settings(max_examples=500))
settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "default"))
except (ImportError, AttributeError): # pragma: no cover
pass
Expand Down
5 changes: 4 additions & 1 deletion pyaptly/config_file.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
"""Handling pyaptly config-files."""

# TODO: remove this as soon as most people have converted their config.

from pathlib import Path

import tomli_w
import yaml

from pyaptly import tomli_w


def yaml_to_toml(yaml_path: Path, toml_path: Path, *, add_defaults: bool = False):
"""Convert pyaptly config files from yaml to toml.
Expand Down
35 changes: 35 additions & 0 deletions pyaptly/tests/test_config_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import json

import tomli
from hypothesis import given
from hypothesis import strategies as st

from .. import tomli_w

try:
import tomllib
except ImportError: # pragma: no cover
tomllib = None # type: ignore

toml_strategy = st.recursive(
st.booleans()
| st.floats(allow_infinity=False, allow_nan=False)
| st.text()
| st.integers(),
lambda children: st.lists(children) | st.dictionaries(st.text(), children),
max_leaves=20,
)
table_strategy = st.dictionaries(st.text(), toml_strategy)


@given(data=table_strategy)
def test_convert(data):
# make sure we have valid json (if this fails the test is broken, not our code)
json.dumps(data, indent=2)
# make sure we can generate the data-structure
toml = tomli_w.dumps(data)
# make sure it is valid toml
tomli.loads(toml)
# if tomllib is avaible compare to it
if tomllib: # pragma: no cover
tomllib.loads(toml)
6 changes: 6 additions & 0 deletions pyaptly/tomli_w/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
__all__ = ("dumps", "dump")
__version__ = "1.0.0" # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT

from ._writer import dump, dumps

# TODO: remove this as soon as most people have converted their config.
232 changes: 232 additions & 0 deletions pyaptly/tomli_w/_writer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
from __future__ import annotations

# TODO: remove this as soon as most people have converted their config.
import string
from collections.abc import Generator, Mapping
from datetime import date, datetime, time
from decimal import Decimal
from types import MappingProxyType
from typing import IO, Any, NamedTuple

ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
ILLEGAL_BASIC_STR_CHARS = frozenset('"\\') | ASCII_CTRL - frozenset("\t")
BARE_KEY_CHARS = frozenset(string.ascii_letters + string.digits + "-_")
ARRAY_TYPES = (list, tuple)
ARRAY_INDENT = " " * 4
MAX_LINE_LENGTH = 100

COMPACT_ESCAPES = MappingProxyType(
{
"\u0008": "\\b", # backspace
"\u000a": "\\n", # linefeed
"\u000c": "\\f", # form feed
"\u000d": "\\r", # carriage return
"\u0022": '\\"', # quote
"\u005c": "\\\\", # backslash
}
)


def dump(
__obj: dict[str, Any], __fp: IO[bytes], *, multiline_strings: bool = False
) -> None:
ctx = Context(multiline_strings, {})
for chunk in gen_table_chunks(__obj, ctx, name=""):
__fp.write(chunk.encode())


def dumps(__obj: dict[str, Any], *, multiline_strings: bool = False) -> str:
ctx = Context(multiline_strings, {})
return "".join(gen_table_chunks(__obj, ctx, name=""))


class Context(NamedTuple):
allow_multiline: bool
# cache rendered inline tables (mapping from object id to rendered inline table)
inline_table_cache: dict[int, str]


_block_names = set(["repo", "mirror", "snapshot"])


def gen_table_chunks(
table: Mapping[str, Any],
ctx: Context,
*,
name: str,
inside_aot: bool = False,
inside_block: bool = False,
) -> Generator[str, None, None]:
yielded = False
literals = []
tables: list[tuple[str, Any, bool]] = [] # => [(key, value, inside_aot)]
blocks = []
for k, v in table.items():
if isinstance(v, dict):
if inside_block:
if is_suitable_inline_table(v, ctx):
literals.append((k, v))
else:
tables.append((k, v, False))
else:
if k in _block_names:
for sk, sv in v.items():
blocks.append((k, sk, sv))
else:
tables.append((k, v, False))
elif is_aot(v) and not all(is_suitable_inline_table(t, ctx) for t in v):
tables.extend((k, t, True) for t in v)
else:
literals.append((k, v))

if inside_aot or name and (literals or not tables):
yielded = True
yield f"[[{name}]]\n" if inside_aot else f"[{name}]\n"

if literals:
yielded = True
for k, v in literals:
yield f"{format_key_part(k)} = {format_literal(v, ctx)}\n"

for k, v, in_aot in tables:
if yielded:
yield "\n"
else:
yielded = True
key_part = format_key_part(k)
display_name = f"{name}.{key_part}" if name else key_part
yield from gen_table_chunks(
v,
ctx,
name=display_name,
inside_aot=in_aot,
inside_block=True,
)

for k, sk, v in blocks:
if yielded:
yield "\n"
else:
yielded = True
fsk = format_key_part(sk)
display_name = f"{k}.{fsk}"
yield from gen_table_chunks(
v, ctx, name=display_name, inside_aot=inside_aot, inside_block=True
)


def format_literal(obj: object, ctx: Context, *, nest_level: int = 0) -> str:
if isinstance(obj, bool):
return "true" if obj else "false"
if isinstance(obj, (int, float, date, datetime)):
return str(obj)
if isinstance(obj, Decimal):
return format_decimal(obj)
if isinstance(obj, time):
if obj.tzinfo:
raise ValueError("TOML does not support offset times")
return str(obj)
if isinstance(obj, str):
return format_string(obj, allow_multiline=ctx.allow_multiline)
if isinstance(obj, ARRAY_TYPES):
return format_inline_array(obj, ctx, nest_level)
if isinstance(obj, dict):
return format_inline_table(obj, ctx)
raise TypeError(f"Object of type {type(obj)} is not TOML serializable")


def format_decimal(obj: Decimal) -> str:
if obj.is_nan():
return "nan"
if obj == Decimal("inf"):
return "inf"
if obj == Decimal("-inf"):
return "-inf"
return str(obj)


def format_inline_table(obj: dict, ctx: Context) -> str:
# check cache first
obj_id = id(obj)
if obj_id in ctx.inline_table_cache:
return ctx.inline_table_cache[obj_id]

if not obj:
rendered = "{}"
else:
rendered = (
"{ "
+ ", ".join(
f"{format_key_part(k)} = {format_literal(v, ctx)}"
for k, v in obj.items()
)
+ " }"
)
ctx.inline_table_cache[obj_id] = rendered
return rendered


def format_inline_array(obj: tuple | list, ctx: Context, nest_level: int) -> str:
if not obj:
return "[]"
item_indent = ARRAY_INDENT * (1 + nest_level)
closing_bracket_indent = ARRAY_INDENT * nest_level
return (
"[\n"
+ ",\n".join(
item_indent + format_literal(item, ctx, nest_level=nest_level + 1)
for item in obj
)
+ f",\n{closing_bracket_indent}]"
)


def format_key_part(part: str) -> str:
if part and BARE_KEY_CHARS.issuperset(part):
return part
return format_string(part, allow_multiline=False)


def format_string(s: str, *, allow_multiline: bool) -> str:
do_multiline = allow_multiline and "\n" in s
if do_multiline:
result = '"""\n'
s = s.replace("\r\n", "\n")
else:
result = '"'

pos = seq_start = 0
while True:
try:
char = s[pos]
except IndexError:
result += s[seq_start:pos]
if do_multiline:
return result + '"""'
return result + '"'
if char in ILLEGAL_BASIC_STR_CHARS:
result += s[seq_start:pos]
if char in COMPACT_ESCAPES:
if do_multiline and char == "\n":
result += "\n"
else:
result += COMPACT_ESCAPES[char]
else:
result += "\\u" + hex(ord(char))[2:].rjust(4, "0")
seq_start = pos + 1
pos += 1


def is_aot(obj: Any) -> bool:
"""Decides if an object behaves as an array of tables (i.e. a nonempty list
of dicts)."""
return bool(
isinstance(obj, ARRAY_TYPES) and obj and all(isinstance(v, dict) for v in obj)
)


def is_suitable_inline_table(obj: dict, ctx: Context) -> bool:
"""Use heuristics to decide if the inline-style representation is a good
choice for a given table."""
rendered_inline = f"{ARRAY_INDENT}{format_inline_table(obj, ctx)},"
return len(rendered_inline) <= MAX_LINE_LENGTH and "\n" not in rendered_inline
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ python = "^3.11"
pyyaml = "^6.0.1"
click = "^8.0.0"
tomli = "^2.0.1"
tomli-w = "^1.0.0"
colorama = "^0.4.6"
frozendict = "^2.2.0"

Expand All @@ -53,6 +52,9 @@ ruff = "^0.3.2"
python-lsp-server = "^1.9.0"
python-lsp-ruff = "^2.2.0"

# TODO: remove this as soon as most people have converted their config.
[tool.coverage.run]
omit = ["pyaptly/tomli_w/_writer.py"]

[build-system]
requires = ["poetry-core"]
Expand Down

0 comments on commit b878fce

Please sign in to comment.