From 91183898bd7eec65bfd73834937d67c5e8487883 Mon Sep 17 00:00:00 2001 From: Marcel Bargull Date: Mon, 25 Mar 2024 21:20:33 +0100 Subject: [PATCH] Memoize .metadata.select_lines This is https://github.com/conda/conda-build/pull/5225/commits/31fefdb0ae9b556a9800a6b06acc2a2011263b68 which does not touch the original select_lines function body, but uses a much more complicated "try to make variant/selector namespace hashable". Signed-off-by: Marcel Bargull --- conda_build/metadata.py | 131 +++++++++++++++++++++++++--------------- 1 file changed, 81 insertions(+), 50 deletions(-) diff --git a/conda_build/metadata.py b/conda_build/metadata.py index 7a0f9da1a7..0922a1a314 100644 --- a/conda_build/metadata.py +++ b/conda_build/metadata.py @@ -36,7 +36,7 @@ ) if TYPE_CHECKING: - from typing import Any, Literal + from typing import Literal try: import yaml @@ -110,6 +110,10 @@ def remove_constructor(cls, tag): # used to avoid recomputing/rescanning recipe contents for used variables used_vars_cache = {} +# Placeholders singletons for os/os.environ used to memoize select_lines(). +_selector_placeholder_os = object() +_selector_placeholder_os_environ = object() + def get_selectors(config: Config) -> dict[str, bool]: """Aggregates selectors for use in recipe templating. @@ -268,64 +272,91 @@ def eval_selector(selector_string, namespace, variants_in_place): return eval_selector(next_string, namespace, variants_in_place) -@lru_cache(maxsize=None) -def _split_line_selector(text: str) -> tuple[tuple[str | None, str], ...]: - lines: list[tuple[str | None, str]] = [] - for line in text.splitlines(): - line = line.rstrip() +def select_lines(data, namespace, variants_in_place): + # Try to turn namespace into a hashable representation for memoization. + try: + namespace_copy = namespace.copy() + if namespace_copy.get("os") is os: + namespace_copy["os"] = _selector_placeholder_os + if namespace_copy.get("environ") is os.environ: + namespace_copy["environ"] = _selector_placeholder_os_environ + if "pin_run_as_build" in namespace_copy: + # This raises TypeError if pin_run_as_build is not a dict of dicts. + try: + namespace_copy["pin_run_as_build"] = tuple( + (key, tuple((k, v) for k, v in value.items())) + for key, value in namespace_copy["pin_run_as_build"].items() + ) + except (AttributeError, TypeError, ValueError): + # AttributeError: no .items method + # TypeError: .items() not iterable of iterables + # ValueError: .items() not iterable of only (k, v) tuples + raise TypeError + for k, v in namespace_copy.items(): + # Convert list/sets/tuple to tuples (of tuples if it contains + # list/set elements). Copy any other type verbatim and rather fall + # back to the non-memoized version to avoid wrong/lossy conversions. + if isinstance(v, (list, set, tuple)): + namespace_copy[k] = tuple( + tuple(e) if isinstance(e, (list, set)) else e + for e in v + ) + namespace_tuple = tuple(namespace_copy.items()) + # Raise TypeError if anything in namespace_tuple is not hashable. + hash(namespace_tuple) + except TypeError: + return _select_lines(data, namespace, variants_in_place) + return _select_lines_memoized(data, namespace_tuple, variants_in_place) - # skip comment lines, include a blank line as a placeholder - if line.lstrip().startswith("#"): - lines.append((None, "")) - continue - # include blank lines - if not line: - lines.append((None, "")) - continue +@lru_cache(maxsize=200) +def _select_lines_memoized(data, namespace_tuple, variants_in_place): + # Convert namespace_tuple to dict and undo the os/environ/pin_run_as_build + # replacements done in select_lines. + namespace = dict(namespace_tuple) + if namespace.get("os") is _selector_placeholder_os: + namespace["os"] = os + if namespace.get("environ") is _selector_placeholder_os_environ: + namespace["environ"] = os.environ + if "pin_run_as_build" in namespace: + namespace["pin_run_as_build"] = { + key: dict(value) + for key, value in namespace["pin_run_as_build"] + } + return _select_lines(data, namespace, variants_in_place) + + +def _select_lines(data, namespace, variants_in_place): lines = [] + + for i, line in enumerate(data.splitlines()): + line = line.rstrip() - # user may have quoted entire line to make YAML happy trailing_quote = "" if line and line[-1] in ("'", '"'): trailing_quote = line[-1] - match = sel_pat.match(line) - if match and (selector := match.group(3)): - # found a selector - lines.append((selector, (match.group(1) + trailing_quote).rstrip())) + if line.lstrip().startswith("#"): + # Don't bother with comment only lines + continue + m = sel_pat.match(line) + if m: + cond = m.group(3) + try: + if eval_selector(cond, namespace, variants_in_place): + lines.append(m.group(1) + trailing_quote) + except Exception as e: + sys.exit( + """\ +Error: Invalid selector in meta.yaml line %d: +offending line: +%s +exception: +%s +""" + % (i + 1, line, str(e)) + ) else: - # no selector found - lines.append((None, line)) - return tuple(lines) - - -def select_lines(text: str, namespace: dict[str, Any], variants_in_place: bool) -> str: - lines = [] - selector_cache: dict[str, bool] = {} - for i, (selector, line) in enumerate(_split_line_selector(text)): - if not selector: - # no selector? include line as is lines.append(line) - else: - # include lines with a selector that evaluates to True - try: - if selector_cache[selector]: - lines.append(line) - except KeyError: - # KeyError: cache miss - try: - value = bool(eval_selector(selector, namespace, variants_in_place)) - selector_cache[selector] = value - if value: - lines.append(line) - except Exception as e: - sys.exit( - f"Error: Invalid selector in meta.yaml line {i + 1}:\n" - f"offending line:\n" - f"{line}\n" - f"exception:\n" - f"{e.__class__.__name__}: {e}\n" - ) return "\n".join(lines) + "\n"