From 9caa1c6fa504878cb3f2f857f6e27b5874c2dfac Mon Sep 17 00:00:00 2001 From: Ken Odegard Date: Wed, 13 Mar 2024 09:15:45 -0500 Subject: [PATCH 01/11] Indent test_select_lines & mark as benchmark --- tests/test_metadata.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 0f6da9b089..bb5e7aaa59 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -54,6 +54,7 @@ def test_uses_vcs_in_metadata(testing_workdir, testing_metadata): assert not testing_metadata.uses_vcs_in_build +@pytest.mark.benchmark def test_select_lines(): lines = "\n".join( ( From 0522d17cca9e9d1f94fb4bdb4114a7e810056c9e Mon Sep 17 00:00:00 2001 From: Ken Odegard Date: Thu, 14 Mar 2024 15:17:34 -0500 Subject: [PATCH 02/11] Add test_select_lines_battery --- tests/test_metadata.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_metadata.py b/tests/test_metadata.py index bb5e7aaa59..0f6da9b089 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -54,7 +54,6 @@ def test_uses_vcs_in_metadata(testing_workdir, testing_metadata): assert not testing_metadata.uses_vcs_in_build -@pytest.mark.benchmark def test_select_lines(): lines = "\n".join( ( From cd963f3018aec6ebf8dc4bc2ec0ad45cb3088c66 Mon Sep 17 00:00:00 2001 From: Ken Odegard Date: Wed, 13 Mar 2024 09:17:03 -0500 Subject: [PATCH 03/11] Typing get_recipe_text --- conda_build/metadata.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/conda_build/metadata.py b/conda_build/metadata.py index e3b814d8a7..12c927214e 100644 --- a/conda_build/metadata.py +++ b/conda_build/metadata.py @@ -2086,8 +2086,11 @@ def uses_vcs_in_build(self) -> Literal["git", "svn", "mercurial"] | None: return None def get_recipe_text( - self, extract_pattern=None, force_top_level=False, apply_selectors=True - ): + self, + extract_pattern: str | None = None, + force_top_level: bool = False, + apply_selectors: bool = True, + ) -> str: meta_path = self.meta_path if meta_path: recipe_text = read_meta_file(meta_path) From c89995d684d32c9a5fa7a7dfb80989278451690a Mon Sep 17 00:00:00 2001 From: Ken Odegard Date: Wed, 13 Mar 2024 09:16:22 -0500 Subject: [PATCH 04/11] Remove duplicate extract_package_and_build_text call --- conda_build/metadata.py | 1 - 1 file changed, 1 deletion(-) diff --git a/conda_build/metadata.py b/conda_build/metadata.py index 12c927214e..0f3f15c9d6 100644 --- a/conda_build/metadata.py +++ b/conda_build/metadata.py @@ -1664,7 +1664,6 @@ def build_id(self): raise RuntimeError( f"Couldn't extract raw recipe text for {self.name()} output" ) - raw_recipe_text = self.extract_package_and_build_text() raw_manual_build_string = re.search(r"\s*string:", raw_recipe_text) # user setting their own build string. Don't modify it. if manual_build_string and not ( From b57721a7d8bf839c722a055ef52f6a3c3429732f Mon Sep 17 00:00:00 2001 From: Ken Odegard Date: Wed, 13 Mar 2024 09:15:45 -0500 Subject: [PATCH 05/11] Refactor select_lines Reworks select_lines into a new cached helper function (_split_line_selector) that returns the parsed lines and selectors eliminating repeat parsing of the same file. --- conda_build/metadata.py | 76 +++++++++++++++++++++++++++-------------- tests/test_metadata.py | 2 ++ 2 files changed, 53 insertions(+), 25 deletions(-) diff --git a/conda_build/metadata.py b/conda_build/metadata.py index 0f3f15c9d6..211023817e 100644 --- a/conda_build/metadata.py +++ b/conda_build/metadata.py @@ -35,7 +35,7 @@ ) if TYPE_CHECKING: - from typing import Literal + from typing import Any, Literal try: import yaml @@ -267,38 +267,64 @@ def eval_selector(selector_string, namespace, variants_in_place): return eval_selector(next_string, namespace, variants_in_place) -def select_lines(data, namespace, variants_in_place): - lines = [] - - for i, line in enumerate(data.splitlines()): +@lru_cache(maxsize=None) +def _split_line_selector(text: str) -> tuple[tuple[str | None, str], ...]: + lines: list[tuple[str | None, str]] = [] + for line in text.splitlines(): line = line.rstrip() + # skip comment lines, include a blank line as a placeholder + if line.lstrip().startswith("#"): + lines.append((None, "")) + continue + + # include blank lines + if not line: + lines.append((None, "")) + continue + + # user may have quoted entire line to make YAML happy trailing_quote = "" if line and line[-1] in ("'", '"'): trailing_quote = line[-1] - if line.lstrip().startswith("#"): - # Don't bother with comment only lines - continue - m = sel_pat.match(line) - if m: - cond = m.group(3) - try: - if eval_selector(cond, namespace, variants_in_place): - lines.append(m.group(1) + trailing_quote) - except Exception as e: - sys.exit( - """\ -Error: Invalid selector in meta.yaml line %d: -offending line: -%s -exception: -%s -""" - % (i + 1, line, str(e)) - ) + match = sel_pat.match(line) + if match and (selector := match.group(3)): + # found a selector + lines.append((selector, (match.group(1) + trailing_quote).rstrip())) else: + # no selector found + lines.append((None, line)) + return tuple(lines) + + +def select_lines(text: str, namespace: dict[str, Any], variants_in_place: bool) -> str: + lines = [] + selector_cache: dict[str, bool] = {} + for i, (selector, line) in enumerate(_split_line_selector(text)): + if not selector: + # no selector? include line as is lines.append(line) + else: + # include lines with a selector that evaluates to True + try: + if selector_cache[selector]: + lines.append(line) + except KeyError: + # KeyError: cache miss + try: + value = bool(eval_selector(selector, namespace, variants_in_place)) + selector_cache[selector] = value + if value: + lines.append(line) + except Exception as e: + sys.exit( + f"Error: Invalid selector in meta.yaml line {i + 1}:\n" + f"offending line:\n" + f"{line}\n" + f"exception:\n" + f"{e.__class__.__name__}: {e}\n" + ) return "\n".join(lines) + "\n" diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 0f6da9b089..1cee42432a 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -87,6 +87,7 @@ def test_select_lines(): " ' test '", ' " test "', "", + "", "test", " 'quoted'", ' "quoted"', @@ -108,6 +109,7 @@ def test_select_lines(): " ' test '", ' " test "', "", + "", "test {{ JINJA_VAR[:2] }}", "", # trailing newline ) From adef976ec3e3d690869639e11535f97678818647 Mon Sep 17 00:00:00 2001 From: Ken Odegard Date: Thu, 14 Mar 2024 15:38:55 -0500 Subject: [PATCH 06/11] Comments --- tests/test_metadata.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 1cee42432a..1b9fc34258 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -57,14 +57,14 @@ def test_uses_vcs_in_metadata(testing_workdir, testing_metadata): def test_select_lines(): lines = "\n".join( ( - "", + "", # preserve leading newline "test", "test [abc] no", "test [abc] # no", " ' test ' ", ' " test " ', - "", - "# comment line", + "", # preserve newline + "# comment line", # preserve comment line (but not the comment) "test [abc]", " 'quoted # [abc] '", ' "quoted # [abc] yes "', @@ -74,20 +74,20 @@ def test_select_lines(): "test {{ JINJA_VAR[:2] }} # stuff yes [abc]", "test {{ JINJA_VAR[:2] }} # [abc] stuff yes", '{{ environ["test"] }} # [abc]', - "", # trailing newline + "", # preserve trailing newline ) ) assert select_lines(lines, {"abc": True}, variants_in_place=True) == "\n".join( ( - "", + "", # preserve leading newline "test", "test [abc] no", "test [abc] # no", " ' test '", ' " test "', - "", - "", + "", # preserve newline + "", # preserve comment line (but not the comment) "test", " 'quoted'", ' "quoted"', @@ -97,21 +97,21 @@ def test_select_lines(): "test {{ JINJA_VAR[:2] }}", "test {{ JINJA_VAR[:2] }}", '{{ environ["test"] }}', - "", # trailing newline + "", # preserve trailing newline ) ) assert select_lines(lines, {"abc": False}, variants_in_place=True) == "\n".join( ( - "", + "", # preserve leading newline "test", "test [abc] no", "test [abc] # no", " ' test '", ' " test "', - "", - "", + "", # preserve newline + "", # preserve comment line (but not the comment) "test {{ JINJA_VAR[:2] }}", - "", # trailing newline + "", # preserve trailing newline ) ) From 7c8972ac919c6df523c0893f4a4767e1bd777871 Mon Sep 17 00:00:00 2001 From: Ken Odegard Date: Mon, 18 Mar 2024 09:50:04 -0500 Subject: [PATCH 07/11] Add news --- news/5237-select_lines-caching | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 news/5237-select_lines-caching diff --git a/news/5237-select_lines-caching b/news/5237-select_lines-caching new file mode 100644 index 0000000000..434a832350 --- /dev/null +++ b/news/5237-select_lines-caching @@ -0,0 +1,19 @@ +### Enhancements + +* Add `conda_build.metadata._split_line_selector` to cache line-selector parsed text. (#5237) + +### Bug fixes + +* + +### Deprecations + +* + +### Docs + +* + +### Other + +* From 345eb63711e3939daff0c66e17be0a199372a523 Mon Sep 17 00:00:00 2001 From: Ken Odegard Date: Fri, 12 Apr 2024 12:57:47 -0500 Subject: [PATCH 08/11] Revert "Remove duplicate extract_package_and_build_text call" This reverts commit c89995d684d32c9a5fa7a7dfb80989278451690a. --- conda_build/metadata.py | 1 + 1 file changed, 1 insertion(+) diff --git a/conda_build/metadata.py b/conda_build/metadata.py index 45875d687f..d5497d7778 100644 --- a/conda_build/metadata.py +++ b/conda_build/metadata.py @@ -1691,6 +1691,7 @@ def build_id(self): raise RuntimeError( f"Couldn't extract raw recipe text for {self.name()} output" ) + raw_recipe_text = self.extract_package_and_build_text() raw_manual_build_string = re.search(r"\s*string:", raw_recipe_text) # user setting their own build string. Don't modify it. if manual_build_string and not ( From 81dd0877d62e82f9532b6db0f008397726ee76e5 Mon Sep 17 00:00:00 2001 From: Marcel Bargull Date: Tue, 27 Feb 2024 08:18:05 +0100 Subject: [PATCH 09/11] Search static strings before costlier regex search Signed-off-by: Marcel Bargull --- conda_build/metadata.py | 9 +++++---- conda_build/variants.py | 4 ++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/conda_build/metadata.py b/conda_build/metadata.py index d5497d7778..e97ab21290 100644 --- a/conda_build/metadata.py +++ b/conda_build/metadata.py @@ -289,10 +289,11 @@ def _split_line_selector(text: str) -> tuple[tuple[str | None, str], ...]: if line and line[-1] in ("'", '"'): trailing_quote = line[-1] - match = sel_pat.match(line) - if match and (selector := match.group(3)): - # found a selector - lines.append((selector, (match.group(1) + trailing_quote).rstrip())) + # Checking for "[" and "]" before regex matching every line is a bit faster. + if "[" in line and "]" in line and (match := sel_pat.match(line)): + if selector := match.group(3): + # found a selector + lines.append((selector, (match.group(1) + trailing_quote).rstrip())) else: # no selector found lines.append((None, line)) diff --git a/conda_build/variants.py b/conda_build/variants.py index c5bbe9a41e..46862fdbe4 100644 --- a/conda_build/variants.py +++ b/conda_build/variants.py @@ -763,6 +763,8 @@ def find_used_variables_in_shell_script(variant, file_path): text = f.read() used_variables = set() for v in variant: + if v not in text: + continue variant_regex = r"(^[^$]*?\$\{?\s*%s\s*[\s|\}])" % v if re.search(variant_regex, text, flags=re.MULTILINE | re.DOTALL): used_variables.add(v) @@ -774,6 +776,8 @@ def find_used_variables_in_batch_script(variant, file_path): text = f.read() used_variables = set() for v in variant: + if v not in text: + continue variant_regex = r"\%" + v + r"\%" if re.search(variant_regex, text, flags=re.MULTILINE | re.DOTALL): used_variables.add(v) From 92595c402b07aead847e24e7f1471fb1fc0b1952 Mon Sep 17 00:00:00 2001 From: Ken Odegard Date: Fri, 12 Apr 2024 13:16:20 -0500 Subject: [PATCH 10/11] Combine if clauses --- conda_build/metadata.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/conda_build/metadata.py b/conda_build/metadata.py index e97ab21290..4172808e3c 100644 --- a/conda_build/metadata.py +++ b/conda_build/metadata.py @@ -290,10 +290,13 @@ def _split_line_selector(text: str) -> tuple[tuple[str | None, str], ...]: trailing_quote = line[-1] # Checking for "[" and "]" before regex matching every line is a bit faster. - if "[" in line and "]" in line and (match := sel_pat.match(line)): - if selector := match.group(3): - # found a selector - lines.append((selector, (match.group(1) + trailing_quote).rstrip())) + if ( + ("[" in line and "]" in line) + and (match := sel_pat.match(line)) + and (selector := match.group(3)) + ): + # found a selector + lines.append((selector, (match.group(1) + trailing_quote).rstrip())) else: # no selector found lines.append((None, line)) From 4035765248327171a1d75f860a2e6f0d21174d31 Mon Sep 17 00:00:00 2001 From: Ken Odegard Date: Wed, 17 Apr 2024 20:45:00 -0500 Subject: [PATCH 11/11] Remove unrelated changes --- conda_build/variants.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/conda_build/variants.py b/conda_build/variants.py index dd42371e3f..be59e36603 100644 --- a/conda_build/variants.py +++ b/conda_build/variants.py @@ -771,8 +771,6 @@ def find_used_variables_in_shell_script(variant, file_path): text = f.read() used_variables = set() for v in variant: - if v not in text: - continue variant_regex = rf"(^[^$]*?\$\{{?\s*{re.escape(v)}\s*[\s|\}}])" if re.search(variant_regex, text, flags=re.MULTILINE | re.DOTALL): used_variables.add(v) @@ -784,8 +782,6 @@ def find_used_variables_in_batch_script(variant, file_path): text = f.read() used_variables = set() for v in variant: - if v not in text: - continue variant_regex = rf"\%{re.escape(v)}\%" if re.search(variant_regex, text, flags=re.MULTILINE | re.DOTALL): used_variables.add(v)