diff --git a/conda_build/config.py b/conda_build/config.py index 5d4ba590d3..99c6853def 100644 --- a/conda_build/config.py +++ b/conda_build/config.py @@ -8,6 +8,7 @@ import copy import math import os +import pickle import re import shutil import time @@ -822,9 +823,16 @@ def clean_pkgs(self): def copy(self): new = copy.copy(self) - new.variant = copy.deepcopy(self.variant) + # Use picke.loads(pickle.dumps(...) as a faster deepcopy alternative, if possible. + try: + new.variant = pickle.loads(pickle.dumps(self.variant, -1)) + except: + new.variant = copy.deepcopy(self.variant) if hasattr(self, "variants"): - new.variants = copy.deepcopy(self.variants) + try: + new.variants = pickle.loads(pickle.dumps(self.variants, -1)) + except: + new.variants = copy.deepcopy(self.variants) return new # context management - automatic cleanup if self.dirty or self.keep_old_work is not True diff --git a/conda_build/metadata.py b/conda_build/metadata.py index 633b6de8fc..a5bcd938a0 100644 --- a/conda_build/metadata.py +++ b/conda_build/metadata.py @@ -109,6 +109,31 @@ def remove_constructor(cls, tag): # used to avoid recomputing/rescanning recipe contents for used variables used_vars_cache = {} +# Placeholders singletons for os/os.environ used to memoize select_lines(). +_selector_placeholder_os = object() +_selector_placeholder_os_environ = object() + + +@lru_cache(maxsize=200) +def re_findall(pattern, text, flags=0): + if isinstance(pattern, re.Pattern): + return pattern.findall(text, flags) + return re.findall(pattern, text, flags) + + +@lru_cache(maxsize=200) +def re_match(pattern, text, flags=0): + if isinstance(pattern, re.Pattern): + return pattern.match(text, flags) + return re.match(pattern, text, flags) + + +@lru_cache(maxsize=200) +def re_search(pattern, text, flags=0): + if isinstance(pattern, re.Pattern): + return pattern.search(text, flags) + return re.search(pattern, text, flags) + def get_selectors(config: Config) -> dict[str, bool]: """Aggregates selectors for use in recipe templating. @@ -241,7 +266,7 @@ def ns_cfg(config: Config) -> dict[str, bool]: # "NameError: name 'var' is not defined", where var is the variable that is not defined. This gets # returned def parseNameNotFound(error): - m = re.search("'(.+?)'", str(error)) + m = re_search("'(.+?)'", str(error)) if len(m.groups()) == 1: return m.group(1) else: @@ -268,6 +293,58 @@ def eval_selector(selector_string, namespace, variants_in_place): def select_lines(data, namespace, variants_in_place): + # Try to turn namespace into a hashable representation for memoization. + try: + namespace_copy = namespace.copy() + if namespace_copy.get("os") is os: + namespace_copy["os"] = _selector_placeholder_os + if namespace_copy.get("environ") is os.environ: + namespace_copy["environ"] = _selector_placeholder_os_environ + if "pin_run_as_build" in namespace_copy: + # This raises TypeError if pin_run_as_build is not a dict of dicts. + try: + namespace_copy["pin_run_as_build"] = tuple( + (key, tuple((k, v) for k, v in value.items())) + for key, value in namespace_copy["pin_run_as_build"].items() + ) + except (AttributeError, TypeError, ValueError): + # AttributeError: no .items method + # TypeError: .items() not iterable of iterables + # ValueError: .items() not iterable of only (k, v) tuples + raise TypeError + for k, v in namespace_copy.items(): + # Convert list/sets/tuple to tuples (of tuples if it contains + # list/set elements). Copy any other type verbatim and rather fall + # back to the non-memoized version to avoid wrong/lossy conversions. + if isinstance(v, (list, set, tuple)): + namespace_copy[k] = tuple( + tuple(e) if isinstance(e, (list, set)) else e for e in v + ) + namespace_tuple = tuple(namespace_copy.items()) + # Raise TypeError if anything in namespace_tuple is not hashable. + hash(namespace_tuple) + except TypeError: + return _select_lines(data, namespace, variants_in_place) + return _select_lines_memoized(data, namespace_tuple, variants_in_place) + + +@lru_cache(maxsize=200) +def _select_lines_memoized(data, namespace_tuple, variants_in_place): + # Convert namespace_tuple to dict and undo the os/environ/pin_run_as_build + # replacements done in select_lines. + namespace = dict(namespace_tuple) + if namespace.get("os") is _selector_placeholder_os: + namespace["os"] = os + if namespace.get("environ") is _selector_placeholder_os_environ: + namespace["environ"] = os.environ + if "pin_run_as_build" in namespace: + namespace["pin_run_as_build"] = { + key: dict(value) for key, value in namespace["pin_run_as_build"] + } + return _select_lines(data, namespace, variants_in_place) + + +def _select_lines(data, namespace, variants_in_place): lines = [] for i, line in enumerate(data.splitlines()): @@ -280,8 +357,7 @@ def select_lines(data, namespace, variants_in_place): if line.lstrip().startswith("#"): # Don't bother with comment only lines continue - m = sel_pat.match(line) - if m: + if "[" in line and "]" in line and (m := re_match(sel_pat, line)): cond = m.group(3) try: if eval_selector(cond, namespace, variants_in_place): @@ -888,8 +964,8 @@ def get_output_dicts_from_metadata(metadata, outputs=None): outputs.append(OrderedDict(name=metadata.name())) for out in outputs: if ( - "package:" in metadata.get_recipe_text() - and out.get("name") == metadata.name() + out.get("name") == metadata.name() + and "package:" in metadata.get_recipe_text() ): combine_top_level_metadata_with_output(metadata, out) return outputs @@ -1014,9 +1090,10 @@ def get_updated_output_dict_from_reparsed_metadata(original_dict, new_outputs): return output_d +@lru_cache(maxsize=200) def _filter_recipe_text(text, extract_pattern=None): if extract_pattern: - match = re.search(extract_pattern, text, flags=re.MULTILINE | re.DOTALL) + match = re_search(extract_pattern, text, flags=re.MULTILINE | re.DOTALL) text = ( "\n".join({string for string in match.groups() if string}) if match else "" ) @@ -1622,7 +1699,7 @@ def get_hash_contents(self): dependencies = [ req for req in dependencies - if not exclude_pattern.match(req) or " " in self.config.variant[req] + if not re_match(exclude_pattern, req) or " " in self.config.variant[req] ] # retrieve values - this dictionary is what makes up the hash. @@ -1666,12 +1743,11 @@ def build_id(self): raise RuntimeError( f"Couldn't extract raw recipe text for {self.name()} output" ) - raw_recipe_text = self.extract_package_and_build_text() - raw_manual_build_string = re.search(r"\s*string:", raw_recipe_text) + raw_manual_build_string = re_search(r"\s*string:", raw_recipe_text) # user setting their own build string. Don't modify it. if manual_build_string and not ( raw_manual_build_string - and re.findall(r"h\{\{\s*PKG_HASH\s*\}\}", raw_manual_build_string.string) + and re_findall(r"h\{\{\s*PKG_HASH\s*\}\}", raw_manual_build_string.string) ): check_bad_chrs(manual_build_string, "build/string") out = manual_build_string @@ -1680,7 +1756,7 @@ def build_id(self): out = build_string_from_metadata(self) if self.config.filename_hashing and self.final: hash_ = self.hash_dependencies() - if not re.findall("h[0-9a-f]{%s}" % self.config.hash_length, out): + if not re_findall("h[0-9a-f]{%s}" % self.config.hash_length, out): ret = out.rsplit("_", 1) try: int(ret[0]) @@ -2038,7 +2114,7 @@ def uses_jinja(self): return False with open(self.meta_path, "rb") as f: meta_text = UnicodeDammit(f.read()).unicode_markup - matches = re.findall(r"{{.*}}", meta_text) + matches = re_findall(r"{{.*}}", meta_text) return len(matches) > 0 @property @@ -2053,7 +2129,7 @@ def uses_vcs_in_meta(self) -> Literal["git", "svn", "mercurial"] | None: with open(self.meta_path, "rb") as f: meta_text = UnicodeDammit(f.read()).unicode_markup for _vcs in vcs_types: - matches = re.findall(rf"{_vcs.upper()}_[^\.\s\'\"]+", meta_text) + matches = re_findall(rf"{_vcs.upper()}_[^\.\s\'\"]+", meta_text) if len(matches) > 0 and _vcs != self.get_value("package/name"): if _vcs == "hg": _vcs = "mercurial" @@ -2076,7 +2152,7 @@ def uses_vcs_in_build(self) -> Literal["git", "svn", "mercurial"] | None: # 1. the vcs command, optionally with an exe extension # 2. a subcommand - for example, "clone" # 3. a target url or other argument - matches = re.findall( + matches = re_findall( rf"{vcs}(?:\.exe)?(?:\s+\w+\s+[\w\/\.:@]+)", build_script, flags=re.IGNORECASE, @@ -2146,7 +2222,7 @@ def extract_single_output_text( # first, need to figure out which index in our list of outputs the name matches. # We have to do this on rendered data, because templates can be used in output names recipe_text = self.extract_outputs_text(apply_selectors=apply_selectors) - output_matches = output_re.findall(recipe_text) + output_matches = re_findall(output_re, recipe_text) outputs = self.meta.get("outputs") or ( self.parent_outputs if hasattr(self, "parent_outputs") else None ) @@ -2187,15 +2263,15 @@ def numpy_xx(self) -> bool: """This is legacy syntax that we need to support for a while. numpy x.x means "pin run as build" for numpy. It was special-cased to only numpy.""" text = self.extract_requirements_text() - return bool(numpy_xx_re.search(text)) + return bool(re_search(numpy_xx_re, text)) @property def uses_numpy_pin_compatible_without_xx(self) -> tuple[bool, bool]: text = self.extract_requirements_text() - compatible_search = numpy_compatible_re.search(text) + compatible_search = re_search(numpy_compatible_re, text) max_pin_search = None if compatible_search: - max_pin_search = numpy_compatible_x_re.search(text) + max_pin_search = re_search(numpy_compatible_x_re, text) # compatible_search matches simply use of pin_compatible('numpy') # max_pin_search quantifies the actual number of x's in the max_pin field. The max_pin # field can be absent, which is equivalent to a single 'x' @@ -2212,7 +2288,8 @@ def uses_subpackage(self): if "name" in out: name_re = re.compile(r"^{}(\s|\Z|$)".format(out["name"])) in_reqs = any( - name_re.match(req) for req in self.get_depends_top_and_out("run") + re_match(name_re, req) + for req in self.get_depends_top_and_out("run") ) if in_reqs: break @@ -2220,13 +2297,13 @@ def uses_subpackage(self): if not in_reqs and self.meta_path: data = self.extract_requirements_text(force_top_level=True) if data: - subpackage_pin = re.search(r"{{\s*pin_subpackage\(.*\)\s*}}", data) + subpackage_pin = re_search(r"{{\s*pin_subpackage\(.*\)\s*}}", data) return in_reqs or bool(subpackage_pin) @property def uses_new_style_compiler_activation(self): text = self.extract_requirements_text() - return bool(re.search(r"\{\{\s*compiler\(.*\)\s*\}\}", text)) + return bool(re_search(r"\{\{\s*compiler\(.*\)\s*\}\}", text)) def validate_features(self): if any( @@ -2287,7 +2364,7 @@ def variant_in_source(self): # We use this variant in the top-level recipe. # constrain the stored variants to only this version in the output # variant mapping - if re.search( + if re_search( r"\s*\{\{\s*%s\s*(?:.*?)?\}\}" % key, self.extract_source_text() ): return True @@ -2371,15 +2448,19 @@ def get_output_metadata(self, output): ) if build_reqs: build_reqs = [ - req for req in build_reqs if not subpackage_pattern.match(req) + req + for req in build_reqs + if not re_match(subpackage_pattern, req) ] if host_reqs: host_reqs = [ - req for req in host_reqs if not subpackage_pattern.match(req) + req + for req in host_reqs + if not re_match(subpackage_pattern, req) ] if run_reqs: run_reqs = [ - req for req in run_reqs if not subpackage_pattern.match(req) + req for req in run_reqs if not re_match(subpackage_pattern, req) ] requirements = {} @@ -2642,12 +2723,13 @@ def get_loop_vars(self): return variants.get_vars(_variants, loop_only=True) def get_used_loop_vars(self, force_top_level=False, force_global=False): + loop_vars = set(self.get_loop_vars()) return { var for var in self.get_used_vars( force_top_level=force_top_level, force_global=force_global ) - if var in self.get_loop_vars() + if var in loop_vars } def get_rendered_recipe_text( @@ -2813,7 +2895,7 @@ def _get_used_vars_meta_yaml_helper( reqs_re = re.compile( r"requirements:.+?(?=^\w|\Z|^\s+-\s(?=name|type))", flags=re.M | re.S ) - reqs_text = reqs_re.search(recipe_text) + reqs_text = re_search(reqs_re, recipe_text) reqs_text = reqs_text.group() if reqs_text else "" return reqs_text, recipe_text diff --git a/conda_build/variants.py b/conda_build/variants.py index 2ece5f4bd6..1627dcc5df 100644 --- a/conda_build/variants.py +++ b/conda_build/variants.py @@ -707,16 +707,14 @@ def get_package_variants(recipedir_or_metadata, config=None, variants=None): def get_vars(variants, loop_only=False): """For purposes of naming/identifying, provide a way of identifying which variables contribute to the matrix dimensionality""" + first_variant, *other_variants = variants special_keys = {"pin_run_as_build", "zip_keys", "ignore_version"} - special_keys.update(set(ensure_list(variants[0].get("extend_keys")))) + special_keys.update(set(ensure_list(first_variant.get("extend_keys")))) loop_vars = [ k - for k in variants[0] + for k, v in first_variant.items() if k not in special_keys - and ( - not loop_only - or any(variant[k] != variants[0][k] for variant in variants[1:]) - ) + and (not loop_only or any(variant[k] != v for variant in other_variants)) ] return loop_vars @@ -773,7 +771,9 @@ def find_used_variables_in_shell_script(variant, file_path): text = f.read() used_variables = set() for v in variant: - variant_regex = r"(^[^$]*?\$\{?\s*%s\s*[\s|\}])" % v + if v not in text: + continue + variant_regex = rf"(^[^$]*?\$\{{?\s*{re.escape(v)}\s*[\s|\}}])" if re.search(variant_regex, text, flags=re.MULTILINE | re.DOTALL): used_variables.add(v) return used_variables @@ -784,7 +784,9 @@ def find_used_variables_in_batch_script(variant, file_path): text = f.read() used_variables = set() for v in variant: - variant_regex = r"\%" + v + r"\%" + if v not in text: + continue + variant_regex = rf"\%{re.escape(v)}\%" if re.search(variant_regex, text, flags=re.MULTILINE | re.DOTALL): used_variables.add(v) return used_variables diff --git a/news/5225-render-misc-speedups b/news/5225-render-misc-speedups new file mode 100644 index 0000000000..c6f84eab00 --- /dev/null +++ b/news/5225-render-misc-speedups @@ -0,0 +1,19 @@ +### Enhancements + +* Miscellaneous recipe rendering speed ups. (#5224 via #5225) + +### Bug fixes + +* + +### Deprecations + +* + +### Docs + +* + +### Other + +*