From 4a01ec9b925d7f27a2143a00b632cde6d929708f Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Tue, 29 Oct 2024 20:37:35 -0700 Subject: [PATCH 1/8] wip: make sys.executable work with script bootstrap --- CHANGELOG.md | 14 ++ python/private/BUILD.bazel | 8 + python/private/py_executable_bazel.bzl | 132 +++++++++++- python/private/py_runtime_info.bzl | 37 +++- python/private/py_runtime_rule.bzl | 12 ++ python/private/site_init_template.py | 199 ++++++++++++++++++ python/private/stage1_bootstrap_template.sh | 48 ++++- python/private/stage2_bootstrap_template.py | 108 +++------- python/private/zip_main_template.py | 33 ++- tests/bootstrap_impls/BUILD.bazel | 11 +- tests/bootstrap_impls/call_sys_exe.py | 51 +++++ .../sys_executable_inherits_sys_path_test.sh | 47 +++++ tests/bootstrap_impls/sys_path_order_test.py | 14 +- tests/support/sh_py_run_test.bzl | 17 +- 14 files changed, 642 insertions(+), 89 deletions(-) create mode 100644 python/private/site_init_template.py create mode 100644 tests/bootstrap_impls/call_sys_exe.py create mode 100755 tests/bootstrap_impls/sys_executable_inherits_sys_path_test.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index f067b4f69..c352def86 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -72,6 +72,8 @@ Unreleased changes template. Other changes: * (python_repository) Start honoring the `strip_prefix` field for `zstd` archives. * (pypi) {bzl:obj}`pip_parse.extra_hub_aliases` now works in WORKSPACE files. +* (binaries/tests) For {obj}`--bootstrap_impl=script`, a binary-specific (but + otherwise empty) virtual env is used to customize `sys.path` initialization. {#v0-0-0-fixed} ### Fixed @@ -81,6 +83,9 @@ Other changes: Fixes ([2337](https://github.com/bazelbuild/rules_python/issues/2337)). * (uv): Correct the sha256sum for the `uv` binary for aarch64-apple-darwin. Fixes ([2411](https://github.com/bazelbuild/rules_python/issues/2411)). +* (binaries/tests) ({obj}`--bootstrap_impl=scipt`) Using `sys.executable` will + use the same `sys.path` setup as the calling binary. + ([2169](https://github.com/bazelbuild/rules_python/issues/2169)). {#v0-0-0-added} ### Added @@ -119,7 +124,13 @@ Other changes: {#v0-40-added} ### Added +<<<<<<< HEAD * Nothing added. +======= +* (providers) Added {obj}`py_runtime_info.site_init_template` and + {obj}`PyRuntimeInfo.site_init_template` for specifying the template to use to + initialize the interpreter via venv startup hooks. +>>>>>>> a057e85e (wip: make sys.executable work with script bootstrap) {#v0-40-removed} ### Removed @@ -163,6 +174,9 @@ Other changes: * (precompiling) Skip precompiling (instead of erroring) if the legacy `@bazel_tools//tools/python:autodetecting_toolchain` is being used ([#2364](https://github.com/bazelbuild/rules_python/issues/2364)). +* (bzlmod) Generate `config_setting` values for all available toolchains instead + of only the registered toolchains, which restores the previous behaviour that + `bzlmod` users would have observed. {#v0-39-0-added} ### Added diff --git a/python/private/BUILD.bazel b/python/private/BUILD.bazel index 39af217bf..9772089e9 100644 --- a/python/private/BUILD.bazel +++ b/python/private/BUILD.bazel @@ -702,6 +702,14 @@ filegroup( visibility = ["//visibility:public"], ) +filegroup( + name = "site_init_template", + srcs = ["site_init_template.py"], + # Not actually public. Only public because it's an implicit dependency of + # py_runtime. + visibility = ["//visibility:public"], +) + # NOTE: Windows builds don't use this bootstrap. Instead, a native Windows # program locates some Python exe and runs `python.exe foo.zip` which # runs the __main__.py in the zip file. diff --git a/python/private/py_executable_bazel.bzl b/python/private/py_executable_bazel.bzl index 6f9c0947a..b5f519efb 100644 --- a/python/private/py_executable_bazel.bzl +++ b/python/private/py_executable_bazel.bzl @@ -81,6 +81,9 @@ the `srcs` of Python targets as required. "_py_toolchain_type": attr.label( default = TARGET_TOOLCHAIN_TYPE, ), + "_python_version_flag": attr.label( + default = "//python/config_settings:python_version", + ), "_windows_launcher_maker": attr.label( default = "@bazel_tools//tools/launcher:launcher_maker", cfg = "exec", @@ -177,6 +180,8 @@ def _create_executable( else: base_executable_name = executable.basename + venv = None + # The check for stage2_bootstrap_template is to support legacy # BuiltinPyRuntimeInfo providers, which is likely to come from # @bazel_tools//tools/python:autodetecting_toolchain, the toolchain used @@ -184,6 +189,13 @@ def _create_executable( if (BootstrapImplFlag.get_value(ctx) == BootstrapImplFlag.SCRIPT and runtime_details.effective_runtime and hasattr(runtime_details.effective_runtime, "stage2_bootstrap_template")): + venv = _create_venv( + ctx, + output_prefix = base_executable_name, + imports = imports, + runtime_details = runtime_details, + ) + stage2_bootstrap = _create_stage2_bootstrap( ctx, output_prefix = base_executable_name, @@ -192,11 +204,12 @@ def _create_executable( imports = imports, runtime_details = runtime_details, ) - extra_runfiles = ctx.runfiles([stage2_bootstrap]) + extra_runfiles = ctx.runfiles([stage2_bootstrap] + venv.files_without_interpreter) zip_main = _create_zip_main( ctx, stage2_bootstrap = stage2_bootstrap, runtime_details = runtime_details, + venv = venv, ) else: stage2_bootstrap = None @@ -272,6 +285,7 @@ def _create_executable( zip_file = zip_file, stage2_bootstrap = stage2_bootstrap, runtime_details = runtime_details, + venv = venv, ) elif bootstrap_output: _create_stage1_bootstrap( @@ -282,6 +296,7 @@ def _create_executable( is_for_zip = False, imports = imports, main_py = main_py, + venv = venv, ) else: # Otherwise, this should be the Windows case of launcher + zip. @@ -296,13 +311,20 @@ def _create_executable( build_zip_enabled = build_zip_enabled, )) + # The interpreter is added this late in the process so that it isn't + # added to the files that zipping processes. + if venv: + extra_runfiles = extra_runfiles.merge(ctx.runfiles([venv.interpreter])) return create_executable_result_struct( extra_files_to_build = depset(extra_files_to_build), output_groups = {"python_zip_file": depset([zip_file])}, extra_runfiles = extra_runfiles, ) -def _create_zip_main(ctx, *, stage2_bootstrap, runtime_details): +def _create_zip_main(ctx, *, stage2_bootstrap, runtime_details, venv): + python_binary = _runfiles_root_path(ctx, venv.interpreter.short_path) + python_binary_actual = _runfiles_root_path(ctx, venv.interpreter_actual_path) + # The location of this file doesn't really matter. It's added to # the zip file as the top-level __main__.py file and not included # elsewhere. @@ -311,7 +333,8 @@ def _create_zip_main(ctx, *, stage2_bootstrap, runtime_details): template = runtime_details.effective_runtime.zip_main_template, output = output, substitutions = { - "%python_binary%": runtime_details.executable_interpreter_path, + "%python_binary%": python_binary, + "%python_binary_actual%": python_binary_actual, "%stage2_bootstrap%": "{}/{}".format( ctx.workspace_name, stage2_bootstrap.short_path, @@ -321,6 +344,75 @@ def _create_zip_main(ctx, *, stage2_bootstrap, runtime_details): ) return output +# Create a venv the executable can use. +# For venv details and the venv startup process, see: +# * https://docs.python.org/3/library/venv.html +# * https://snarky.ca/how-virtual-environments-work/ +# * https://github.com/python/cpython/blob/main/Modules/getpath.py +# * https://github.com/python/cpython/blob/main/Lib/site.py +def _create_venv(ctx, output_prefix, imports, runtime_details): + venv = "_{}.venv".format(output_prefix.lstrip("_")) + + # The pyvenv.cfg file must be present to trigger the venv site hooks. + # Because it's paths are expected to be absolute paths, we can't reliably + # put much in it. See https://github.com/python/cpython/issues/83650 + pyvenv_cfg = ctx.actions.declare_file("{}/pyvenv.cfg".format(venv)) + ctx.actions.write(pyvenv_cfg, "") + + runtime = runtime_details.effective_runtime + if runtime.interpreter: + py_exe_basename = paths.basename(runtime.interpreter.short_path) + interpreter = ctx.actions.declare_file("{}/bin/{}".format(venv, py_exe_basename)) + ctx.actions.symlink(output = interpreter, target_file = runtime.interpreter) + interpreter_actual_path = runtime.interpreter.short_path + else: + py_exe_basename = paths.basename(runtime.interpreter_path) + interpreter = ctx.actions.declare_symlink("{}/bin/{}".format(venv, py_exe_basename)) + ctx.actions.symlink(output = interpreter, target_path = runtime.interpreter_path) + interpreter_actual_path = runtime.interpreter_path + + if runtime.interpreter_version_info: + version = "{}.{}".format( + runtime.interpreter_version_info.major, + runtime.interpreter_version_info.minor, + ) + else: + version_flag = ctx.attr._python_version_flag[config_common.FeatureFlagInfo].value + version_flag_parts = version_flag.split(".")[0:2] + version = "{}.{}".format(*version_flag_parts) + + # See site.py logic: free-threaded builds append "t" to the venv lib dir name + if "t" in runtime.abi_flags: + version += "t" + + site_packages = "{}/lib/python{}/site-packages".format(venv, version) + pth = ctx.actions.declare_file("{}/bazel.pth".format(site_packages)) + ctx.actions.write(pth, "import _bazel_site_init\n") + + site_init = ctx.actions.declare_file("{}/_bazel_site_init.py".format(site_packages)) + computed_subs = ctx.actions.template_dict() + computed_subs.add_joined("%imports%", imports, join_with = ":", map_each = _map_each_identity) + ctx.actions.expand_template( + template = runtime.site_init_template, + output = site_init, + substitutions = { + "%import_all%": "True" if ctx.fragments.bazel_py.python_import_all_repositories else "False", + "%site_init_runfiles_path%": "{}/{}".format(ctx.workspace_name, site_init.short_path), + "%workspace_name%": ctx.workspace_name, + }, + computed_substitutions = computed_subs, + ) + + return struct( + interpreter = interpreter, + # Runfiles-relative path or absolute path + interpreter_actual_path = interpreter_actual_path, + files_without_interpreter = [pyvenv_cfg, pth, site_init], + ) + +def _map_each_identity(v): + return v + def _create_stage2_bootstrap( ctx, *, @@ -363,6 +455,13 @@ def _create_stage2_bootstrap( ) return output +def _runfiles_root_path(ctx, path): + # The ../ comes from short_path for files in other repos. + if path.startswith("../"): + return path[3:] + else: + return "{}/{}".format(ctx.workspace_name, path) + def _create_stage1_bootstrap( ctx, *, @@ -371,12 +470,24 @@ def _create_stage1_bootstrap( stage2_bootstrap = None, imports = None, is_for_zip, - runtime_details): + runtime_details, + venv = None): runtime = runtime_details.effective_runtime + if venv: + python_binary_path = _runfiles_root_path(ctx, venv.interpreter.short_path) + else: + python_binary_path = runtime_details.executable_interpreter_path + + if is_for_zip and venv: + python_binary_actual = _runfiles_root_path(ctx, venv.interpreter_actual_path) + else: + python_binary_actual = "" + subs = { "%is_zipfile%": "1" if is_for_zip else "0", - "%python_binary%": runtime_details.executable_interpreter_path, + "%python_binary%": python_binary_path, + "%python_binary_actual%": python_binary_actual, "%target%": str(ctx.label), "%workspace_name%": ctx.workspace_name, } @@ -447,6 +558,7 @@ def _create_windows_exe_launcher( ) def _create_zip_file(ctx, *, output, original_nonzip_executable, zip_main, runfiles): + """Create a Python zipapp (zip with __main__.py entry point).""" workspace_name = ctx.workspace_name legacy_external_runfiles = _py_builtins.get_legacy_external_runfiles(ctx) @@ -524,7 +636,14 @@ def _get_zip_runfiles_path(path, workspace_name, legacy_external_runfiles): zip_runfiles_path = paths.normalize("{}/{}".format(workspace_name, path)) return "{}/{}".format(_ZIP_RUNFILES_DIRECTORY_NAME, zip_runfiles_path) -def _create_executable_zip_file(ctx, *, output, zip_file, stage2_bootstrap, runtime_details): +def _create_executable_zip_file( + ctx, + *, + output, + zip_file, + stage2_bootstrap, + runtime_details, + venv): prelude = ctx.actions.declare_file( "{}_zip_prelude.sh".format(output.basename), sibling = output, @@ -536,6 +655,7 @@ def _create_executable_zip_file(ctx, *, output, zip_file, stage2_bootstrap, runt stage2_bootstrap = stage2_bootstrap, runtime_details = runtime_details, is_for_zip = True, + venv = venv, ) else: ctx.actions.write(prelude, "#!/usr/bin/env python3\n") diff --git a/python/private/py_runtime_info.bzl b/python/private/py_runtime_info.bzl index ff95c4a44..038b4ef72 100644 --- a/python/private/py_runtime_info.bzl +++ b/python/private/py_runtime_info.bzl @@ -68,7 +68,8 @@ def _PyRuntimeInfo_init( interpreter_version_info = None, stage2_bootstrap_template = None, zip_main_template = None, - abi_flags = ""): + abi_flags = "", + site_init_template = None): if (interpreter_path and interpreter) or (not interpreter_path and not interpreter): fail("exactly one of interpreter or interpreter_path must be specified") @@ -117,6 +118,7 @@ def _PyRuntimeInfo_init( "interpreter_version_info": interpreter_version_info_struct_from_dict(interpreter_version_info), "pyc_tag": pyc_tag, "python_version": python_version, + "site_init_template": site_init_template, "stage2_bootstrap_template": stage2_bootstrap_template, "stub_shebang": stub_shebang, "zip_main_template": zip_main_template, @@ -126,6 +128,11 @@ PyRuntimeInfo, _unused_raw_py_runtime_info_ctor = define_bazel_6_provider( doc = """Contains information about a Python runtime, as returned by the `py_runtime` rule. +:::{warning} +This is an **unstable public** API. It may change more frequently and has weaker +compatibility guarantees. +::: + A Python runtime describes either a *platform runtime* or an *in-build runtime*. A platform runtime accesses a system-installed interpreter at a known path, whereas an in-build runtime points to a `File` that acts as the interpreter. In @@ -139,6 +146,9 @@ the same conventions as the standard CPython interpreter. :type: str The runtime's ABI flags, i.e. `sys.abiflags`. + +:::{versionadded} 0.39.0 +::: """, "bootstrap_template": """ :type: File @@ -160,7 +170,8 @@ is expected to behave and the substutitions performed. `%target%`, `%workspace_name`, `%coverage_tool%`, `%import_all%`, `%imports%`, `%main%`, `%shebang%` * `--bootstrap_impl=script` substititions: `%is_zipfile%`, `%python_binary%`, - `%target%`, `%workspace_name`, `%shebang%, `%stage2_bootstrap%` + `%python_binary_actual%`, `%target%`, `%workspace_name`, + `%shebang%`, `%stage2_bootstrap%` Substitution definitions: @@ -172,6 +183,19 @@ Substitution definitions: * An absolute path to a system interpreter (e.g. begins with `/`). * A runfiles-relative path to an interpreter (e.g. `somerepo/bin/python3`) * A program to search for on PATH, i.e. a word without spaces, e.g. `python3`. + + When `--bootstrap_impl=script` is used, this is always a runfiles-relative + path to a venv-based interpreter executable. + +* `%python_binary_actual%`: The path to the interpreter that + `%python_binary%` invokes. There are three types of paths: + * An absolute path to a system interpreter (e.g. begins with `/`). + * A runfiles-relative path to an interpreter (e.g. `somerepo/bin/python3`) + * A program to search for on PATH, i.e. a word without spaces, e.g. `python3`. + + Only set for zip builds with `--bootstrap_impl=script`; other builds will use + an empty string. + * `%workspace_name%`: The name of the workspace the target belongs to. * `%is_zipfile%`: The string `1` if this template is prepended to a zipfile to create a self-executable zip file. The string `0` otherwise. @@ -250,6 +274,15 @@ correctly. Indicates whether this runtime uses Python major version 2 or 3. Valid values are (only) `"PY2"` and `"PY3"`. +""", + "site_init_template": """ +:type: File + +The template to use for the binary-specific site-init hook run by the +interpreter at startup. + +:::{versionadded} VERSION_NEXT_FEATURE +::: """, "stage2_bootstrap_template": """ :type: File diff --git a/python/private/py_runtime_rule.bzl b/python/private/py_runtime_rule.bzl index 746cd19dc..bccf05c2d 100644 --- a/python/private/py_runtime_rule.bzl +++ b/python/private/py_runtime_rule.bzl @@ -129,6 +129,7 @@ def _py_runtime_impl(ctx): stage2_bootstrap_template = ctx.file.stage2_bootstrap_template, zip_main_template = ctx.file.zip_main_template, abi_flags = abi_flags, + site_init_template = ctx.file.site_init_template, )) if not IS_BAZEL_7_OR_HIGHER: @@ -316,6 +317,17 @@ However, in the future this attribute will be mandatory and have no default value. """, ), + "site_init_template": attr.label( + allow_single_file = True, + default = "//python/private:site_init_template", + doc = """ +The template to use for the binary-specific site-init hook run by the +interpreter at startup. + +:::{versionadded} VERSION_NEXT_FEATURE +::: +""", + ), "stage2_bootstrap_template": attr.label( default = "//python/private:stage2_bootstrap_template", allow_single_file = True, diff --git a/python/private/site_init_template.py b/python/private/site_init_template.py new file mode 100644 index 000000000..29e405a1a --- /dev/null +++ b/python/private/site_init_template.py @@ -0,0 +1,199 @@ +# Copyright 2024 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""site initialization logic for Bazel-built py_binary targets.""" +import os +import os.path +import sys + +# Colon-delimited string of runfiles-relative import paths to add +_IMPORTS_STR = "%imports%" +# Though the import all value is the correct literal, we quote it +# so this file is parsable by tools. +_IMPORT_ALL = True if "%import_all%" == "True" else False +_WORKSPACE_NAME = "%workspace_name%" +# runfiles-relative path to this file +_SELF_RUNFILES_RELATIVE_PATH = "%site_init_runfiles_path%" +# Runfiles-relative path to the coverage tool entry point, if any. +_COVERAGE_TOOL = "%coverage_tool%" + +# True if coverage was requested and successfully setup +COVERAGE_SETUP = False + + +def _is_verbose(): + return bool(os.environ.get("RULES_PYTHON_BOOTSTRAP_VERBOSE")) + + +def _print_verbose_coverage(*args): + if os.environ.get("VERBOSE_COVERAGE") or _is_verbose(): + _print_verbose(*args) + + +def _print_verbose(*args, mapping=None, values=None): + if not _is_verbose(): + return + + print("bazel_site_init:", *args, file=sys.stderr, flush=True) + + +_print_verbose("imports_str:", _IMPORTS_STR) +_print_verbose("import_all:", _IMPORT_ALL) +_print_verbose("workspace_name:", _WORKSPACE_NAME) +_print_verbose("self_runfiles_path:", _SELF_RUNFILES_RELATIVE_PATH) +_print_verbose("coverage_tool:", _COVERAGE_TOOL) + + +def _find_runfiles_root(): + # Give preference to the environment variables + runfiles_dir = os.environ.get("RUNFILES_DIR", None) + if not runfiles_dir: + runfiles_manifest_file = os.environ.get("RUNFILES_MANIFEST_FILE", "") + if runfiles_manifest_file.endswith( + ".runfiles_manifest" + ) or runfiles_manifest_file.endswith(".runfiles/MANIFEST"): + runfiles_dir = runfiles_manifest_file[:-9] + + # Be defensive: the runfiles dir should contain ourselves. If it doesn't, + # then it must not be our runfiles directory. + if runfiles_dir and os.path.exists( + os.path.join(runfiles_dir, _SELF_RUNFILES_RELATIVE_PATH) + ): + return runfiles_dir + + num_dirs_to_runfiles_root = _SELF_RUNFILES_RELATIVE_PATH.count("/") + 1 + runfiles_root = os.path.dirname(__file__) + for _ in range(num_dirs_to_runfiles_root): + runfiles_root = os.path.dirname(runfiles_root) + return runfiles_root + + +_RUNFILES_ROOT = _find_runfiles_root() + +_print_verbose("runfiles_root:", _RUNFILES_ROOT) + + +def _is_windows(): + return os.name == "nt" + + +def _get_windows_path_with_unc_prefix(path): + path = path.strip() + # No need to add prefix for non-Windows platforms. + if not _is_windows() or sys.version_info[0] < 3: + return path + + # Starting in Windows 10, version 1607(OS build 14393), MAX_PATH limitations have been + # removed from common Win32 file and directory functions. + # Related doc: https://docs.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation?tabs=cmd#enable-long-paths-in-windows-10-version-1607-and-later + import platform + + if platform.win32_ver()[1] >= "10.0.14393": + return path + + # import sysconfig only now to maintain python 2.6 compatibility + import sysconfig + + if sysconfig.get_platform() == "mingw": + return path + + # Lets start the unicode fun + unicode_prefix = "\\\\?\\" + if path.startswith(unicode_prefix): + return path + + # os.path.abspath returns a normalized absolute path + return unicode_prefix + os.path.abspath(path) + + +def _search_path(name): + """Finds a file in a given search path.""" + search_path = os.getenv("PATH", os.defpath).split(os.pathsep) + for directory in search_path: + if directory: + path = os.path.join(directory, name) + if os.path.isfile(path) and os.access(path, os.X_OK): + return path + return None + + +def _setup_sys_path(): + global COVERAGE_SETUP + seen = set(sys.path) + python_path_entries = [] + + def _maybe_add_path(path): + if path in seen: + return + path = _get_windows_path_with_unc_prefix(path) + if _is_windows(): + path = path.replace("/", os.sep) + + _print_verbose("append sys.path:", path) + sys.path.append(path) + seen.add(path) + + for rel_path in _IMPORTS_STR.split(":"): + abs_path = os.path.join(_RUNFILES_ROOT, rel_path) + _maybe_add_path(abs_path) + + if _IMPORT_ALL: + repo_dirs = sorted( + os.path.join(_RUNFILES_ROOT, d) for d in os.listdir(_RUNFILES_ROOT) + ) + for d in repo_dirs: + if os.path.isdir(d): + _maybe_add_path(d) + else: + _maybe_add_path(os.path.join(_RUNFILES_ROOT, _WORKSPACE_NAME)) + + # COVERAGE_DIR is set if coverage is enabled and instrumentation is configured + # for something, though it could be another program executing this one or + # one executed by this one (e.g. an extension module). + # NOTE: Coverage is added last to allow user dependencies to override it. + if os.environ.get("COVERAGE_DIR"): + cov_tool = _COVERAGE_TOOL + if cov_tool: + _print_verbose_coverage(f"Using toolchain coverage_tool {cov_tool}") + elif cov_tool := os.environ.get("PYTHON_COVERAGE"): + _print_verbose_coverage(f"PYTHON_COVERAGE: {cov_tool}") + + if cov_tool: + if os.path.isabs(cov_tool): + pass + elif os.sep in os.path.normpath(cov_tool): + cov_tool = os.path.join(_RUNFILES_ROOT, cov_tool) + else: + cov_tool = _search_path(cov_tool) + if cov_tool: + # The coverage entry point is `/coverage/coverage_main.py`, so + # we need to do twice the dirname so that `import coverage` works + coverage_dir = os.path.dirname(os.path.dirname(cov_tool)) + + # coverage library expects sys.path[0] to contain the library, and replaces + # it with the directory of the program it starts. Our actual sys.path[0] is + # the runfiles directory, which must not be replaced. + # CoverageScript.do_execute() undoes this sys.path[0] setting. + _maybe_add_path(coverage_dir) + COVERAGE_SETUP = True + else: + _print_verbose_coverage( + "Coverage was enabled, but python coverage tool was not configured." + + "To enable coverage, consult the docs at " + + "https://rules-python.readthedocs.io/en/latest/coverage.html" + ) + + return None + + +_setup_sys_path() diff --git a/python/private/stage1_bootstrap_template.sh b/python/private/stage1_bootstrap_template.sh index e7e418caf..afa1ee84b 100644 --- a/python/private/stage1_bootstrap_template.sh +++ b/python/private/stage1_bootstrap_template.sh @@ -9,8 +9,12 @@ fi # runfiles-relative path STAGE2_BOOTSTRAP="%stage2_bootstrap%" -# runfiles-relative path, absolute path, or single word +# runfiles-relative path PYTHON_BINARY='%python_binary%' +# The path that PYTHON_BINARY should symlink to. +# runfiles-relative path, absolute path, or single word. +# Only applicable for zip files. +PYTHON_BINARY_ACTUAL="%python_binary_actual%" # 0 or 1 IS_ZIPFILE="%is_zipfile%" @@ -96,6 +100,48 @@ function find_python_interpreter() { } python_exe=$(find_python_interpreter $RUNFILES_DIR $PYTHON_BINARY) + +# Zip files have to re-create the venv bin/python3 symlink because they +# don't contain it already. +if [[ "$IS_ZIPFILE" == "1" ]]; then + # It should always be under runfiles, but double check this. We don't + # want to accidentally create symlinks elsewhere. + if [[ "$python_exe" != $RUNFILES_DIR/* ]]; then + echo >&2 "ERROR: Program's venv binary not under runfiles: $python_exe" + exit 1 + fi + if [[ "$PYTHON_BINARY_ACTUAL" == /* ]]; then + # An absolute path, i.e. platform runtime, e.g. /usr/bin/python3 + symlink_to=$PYTHON_BINARY_ACTUAL + elif [[ "$PYTHON_BINARY_ACTUAL" == */* ]]; then + # A runfiles-relative path + symlink_to=$RUNFILES_DIR/$PYTHON_BINARY_ACTUAL + else + # A plain word, e.g. "python3". Symlink to where PATH leads + symlink_to=$(which $PYTHON_BINARY_ACTUAL) + # Guard against trying to symlink to an empty value + if [[ $? -ne 0 ]]; then + echo >&2 "ERROR: Python to use found on PATH: $PYTHON_BINARY_ACTUAL" + exit 1 + fi + fi + # The bin/ directory may not exist if it is empty. + mkdir -p "$(dirname $python_exe)" + ln -s "$symlink_to" "$python_exe" +fi + +# At this point, we should have a valid reference to the interpreter. +# Check that so we can give an nicer failure if things went wrong. +if [[ ! -x "$python_exe" ]]; then + if [[ ! -e "$python_exe" ]]; then + echo >&2 "ERROR: Python interpreter not found: $python_exe" + exit 1 + elif [[ ! -x "$python_exe" ]]; then + echo >&2 "ERROR: Python interpreter not executable: $python_exe" + exit 1 + fi +fi + stage2_bootstrap="$RUNFILES_DIR/$STAGE2_BOOTSTRAP" declare -a interpreter_env diff --git a/python/private/stage2_bootstrap_template.py b/python/private/stage2_bootstrap_template.py index f66c28bd5..b505d3649 100644 --- a/python/private/stage2_bootstrap_template.py +++ b/python/private/stage2_bootstrap_template.py @@ -24,12 +24,6 @@ # Runfiles-relative path to the main Python source file. MAIN = "%main%" -# Colon-delimited string of runfiles-relative import paths to add -IMPORTS_STR = "%imports%" -WORKSPACE_NAME = "%workspace_name%" -# Though the import all value is the correct literal, we quote it -# so this file is parsable by tools. -IMPORT_ALL = True if "%import_all%" == "True" else False # Runfiles-relative path to the coverage tool entry point, if any. COVERAGE_TOOL = "%coverage_tool%" @@ -120,17 +114,17 @@ def is_verbose_coverage(): return os.environ.get("VERBOSE_COVERAGE") or is_verbose() -def find_coverage_entry_point(module_space): - cov_tool = COVERAGE_TOOL - if cov_tool: - print_verbose_coverage("Using toolchain coverage_tool %r" % cov_tool) - else: - cov_tool = os.environ.get("PYTHON_COVERAGE") - if cov_tool: - print_verbose_coverage("PYTHON_COVERAGE: %r" % cov_tool) - if cov_tool: - return find_binary(module_space, cov_tool) - return None +##def find_coverage_entry_point(module_space): +## cov_tool = COVERAGE_TOOL +## if cov_tool: +## print_verbose_coverage("Using toolchain coverage_tool %r" % cov_tool) +## else: +## cov_tool = os.environ.get("PYTHON_COVERAGE") +## if cov_tool: +## print_verbose_coverage("PYTHON_COVERAGE: %r" % cov_tool) +## if cov_tool: +## return find_binary(module_space, cov_tool) +## return None def find_binary(module_space, bin_name): @@ -154,9 +148,9 @@ def find_binary(module_space, bin_name): return search_path(bin_name) -def create_python_path_entries(python_imports, module_space): - parts = python_imports.split(":") - return [module_space] + ["%s/%s" % (module_space, path) for path in parts] +##def create_python_path_entries(python_imports, module_space): +## parts = python_imports.split(":") +## return [module_space] + ["%s/%s" % (module_space, path) for path in parts] def find_runfiles_root(main_rel_path): @@ -204,12 +198,12 @@ def find_runfiles_root(main_rel_path): # Returns repository roots to add to the import path. -def get_repositories_imports(module_space, import_all): - if import_all: - repo_dirs = [os.path.join(module_space, d) for d in os.listdir(module_space)] - repo_dirs.sort() - return [d for d in repo_dirs if os.path.isdir(d)] - return [os.path.join(module_space, WORKSPACE_NAME)] +##def get_repositories_imports(module_space, import_all): +## if import_all: +## repo_dirs = [os.path.join(module_space, d) for d in os.listdir(module_space)] +## repo_dirs.sort() +## return [d for d in repo_dirs if os.path.isdir(d)] +## return [os.path.join(module_space, WORKSPACE_NAME)] def runfiles_envvar(module_space): @@ -430,31 +424,30 @@ def main(): # # To replicate this behavior, we add main's directory within the runfiles # when safe path isn't enabled. + python_path_entries = [] + prepend_path_entries = [] if not getattr(sys.flags, "safe_path", False): prepend_path_entries = [ os.path.join(module_space, os.path.dirname(main_rel_path)) ] else: prepend_path_entries = [] - python_path_entries = create_python_path_entries(IMPORTS_STR, module_space) - python_path_entries += get_repositories_imports(module_space, IMPORT_ALL) + ##python_path_entries = create_python_path_entries(IMPORTS_STR, module_space) + ##python_path_entries += get_repositories_imports(module_space, IMPORT_ALL) python_path_entries = [ get_windows_path_with_unc_prefix(d) for d in python_path_entries ] # Remove duplicates to avoid overly long PYTHONPATH (#10977). Preserve order, # keep first occurrence only. - python_path_entries = deduplicate(python_path_entries) + python_path_entries = list(deduplicate(python_path_entries)) - if is_windows(): - python_path_entries = [p.replace("/", os.sep) for p in python_path_entries] - else: - # deduplicate returns a generator, but we need a list after this. - python_path_entries = list(python_path_entries) + ##if is_windows(): + ## python_path_entries = [p.replace("/", os.sep) for p in python_path_entries] + ##else: + ## # deduplicate returns a generator, but we need a list after this. + ## python_path_entries = list(python_path_entries) - # We're emulating PYTHONPATH being set, so we insert at the start - # This isn't a great idea (it can shadow the stdlib), but is the historical - # behavior. runfiles_envkey, runfiles_envvalue = runfiles_envvar(module_space) if runfiles_envkey: os.environ[runfiles_envkey] = runfiles_envvalue @@ -468,43 +461,6 @@ def main(): "Cannot exec() %r: file not readable." % main_filename ) - # COVERAGE_DIR is set if coverage is enabled and instrumentation is configured - # for something, though it could be another program executing this one or - # one executed by this one (e.g. an extension module). - if os.environ.get("COVERAGE_DIR"): - cov_tool = find_coverage_entry_point(module_space) - if cov_tool is None: - print_verbose_coverage( - "Coverage was enabled, but python coverage tool was not configured." - + "To enable coverage, consult the docs at " - + "https://rules-python.readthedocs.io/en/latest/coverage.html" - ) - else: - # Inhibit infinite recursion: - if "PYTHON_COVERAGE" in os.environ: - del os.environ["PYTHON_COVERAGE"] - - if not os.path.exists(cov_tool): - raise EnvironmentError( - "Python coverage tool %r not found. " - "Try running with VERBOSE_COVERAGE=1 to collect more information." - % cov_tool - ) - - # coverage library expects sys.path[0] to contain the library, and replaces - # it with the directory of the program it starts. Our actual sys.path[0] is - # the runfiles directory, which must not be replaced. - # CoverageScript.do_execute() undoes this sys.path[0] setting. - # - # Update sys.path such that python finds the coverage package. The coverage - # entry point is coverage.coverage_main, so we need to do twice the dirname. - coverage_dir = os.path.dirname(os.path.dirname(cov_tool)) - print_verbose("coverage: adding to sys.path:", coverage_dir) - python_path_entries.append(coverage_dir) - python_path_entries = deduplicate(python_path_entries) - else: - cov_tool = None - sys.stdout.flush() # Add the user imports after the stdlib, but before the runtime's @@ -529,7 +485,9 @@ def main(): # user import accidentally triggering the site-packages logic above. sys.path[0:0] = prepend_path_entries - with _maybe_collect_coverage(enable=cov_tool is not None): + import _bazel_site_init + + with _maybe_collect_coverage(enable=_bazel_site_init.COVERAGE_SETUP): # The first arg is this bootstrap, so drop that for the re-invocation. _run_py(main_filename, args=sys.argv[1:]) sys.exit(0) diff --git a/python/private/zip_main_template.py b/python/private/zip_main_template.py index 2d3aea7b7..b4c9d279a 100644 --- a/python/private/zip_main_template.py +++ b/python/private/zip_main_template.py @@ -23,8 +23,12 @@ import tempfile import zipfile +# runfiles-relative path _STAGE2_BOOTSTRAP = "%stage2_bootstrap%" +# runfiles-relative path _PYTHON_BINARY = "%python_binary%" +# runfiles-relative path, absolute path, or single word +_PYTHON_BINARY_ACTUAL = "%python_binary_actual%" _WORKSPACE_NAME = "%workspace_name%" @@ -257,10 +261,37 @@ def main(): "Cannot exec() %r: file not readable." % main_filename ) - program = python_program = find_python_binary(module_space) + python_program = find_python_binary(module_space) if python_program is None: raise AssertionError("Could not find python binary: " + _PYTHON_BINARY) + # The python interpreter should always be under runfiles, but double check. + # We don't want to accidentally create symlinks elsewhere. + if not python_program.startswith(module_space): + raise AssertionError( + "Program's venv binary not under runfiles: {python_program}" + ) + + if os.path.isabs(_PYTHON_BINARY_ACTUAL): + symlink_to = _PYTHON_BINARY_ACTUAL + elif "/" in _PYTHON_BINARY_ACTUAL: + symlink_to = os.path.join(module_space, _PYTHON_BINARY_ACTUAL) + else: + symlink_to = search_path(_PYTHON_BINARY_ACTUAL) + if not symlink_to: + raise AssertionError( + f"Python interpreter to use not found on PATH: {_PYTHON_BINARY_ACTUAL}" + ) + + # The bin/ directory may not exist if it is empty. + os.makedirs(os.path.dirname(python_program), exist_ok=True) + try: + os.symlink(_PYTHON_BINARY_ACTUAL, python_program) + except OSError as e: + raise Exception( + f"Unable to create venv python interpreter symlink: {python_program} -> {PYTHON_BINARY_ACTUAL}" + ) from e + # Some older Python versions on macOS (namely Python 3.7) may unintentionally # leave this environment variable set after starting the interpreter, which # causes problems with Python subprocesses correctly locating sys.executable, diff --git a/tests/bootstrap_impls/BUILD.bazel b/tests/bootstrap_impls/BUILD.bazel index cd5771533..158682189 100644 --- a/tests/bootstrap_impls/BUILD.bazel +++ b/tests/bootstrap_impls/BUILD.bazel @@ -57,7 +57,7 @@ py_reconfig_test( srcs = ["sys_path_order_test.py"], bootstrap_impl = "script", env = {"BOOTSTRAP": "script"}, - imports = ["./site-packages"], + imports = ["./USER_IMPORT/site-packages"], main = "sys_path_order_test.py", target_compatible_with = _SUPPORTS_BOOTSTRAP_SCRIPT, ) @@ -78,3 +78,12 @@ sh_py_run_test( sh_src = "inherit_pythonsafepath_env_test.sh", target_compatible_with = _SUPPORTS_BOOTSTRAP_SCRIPT, ) + +sh_py_run_test( + name = "sys_executable_inherits_sys_path", + bootstrap_impl = "script", + imports = ["./MARKER"], + py_src = "call_sys_exe.py", + sh_src = "sys_executable_inherits_sys_path_test.sh", + target_compatible_with = _SUPPORTS_BOOTSTRAP_SCRIPT, +) diff --git a/tests/bootstrap_impls/call_sys_exe.py b/tests/bootstrap_impls/call_sys_exe.py new file mode 100644 index 000000000..0c6157048 --- /dev/null +++ b/tests/bootstrap_impls/call_sys_exe.py @@ -0,0 +1,51 @@ +# Copyright 2024 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import subprocess +import sys + +print("outer sys.path:") +for i, x in enumerate(sys.path): + print(i, x) +print() + +outer_paths = set(sys.path) +output = subprocess.check_output( + [ + sys.executable, + "-c", + "import sys; [print(v) for v in sys.path]", + ], + text=True, +) +inner_lines = [v for v in output.splitlines() if v.strip()] +print("inner sys.path:") +for i, v in enumerate(inner_lines): + print(i, v) +print() + +inner_paths = set(inner_lines) +common = sorted(outer_paths.intersection(inner_paths)) +extra_outer = sorted(outer_paths - inner_paths) +extra_inner = sorted(inner_paths - outer_paths) + +for v in common: + print("common:", v) +print() +for v in extra_outer: + print("extra_outer:", v) +print() +for v in extra_inner: + print("extra_inner:", v) diff --git a/tests/bootstrap_impls/sys_executable_inherits_sys_path_test.sh b/tests/bootstrap_impls/sys_executable_inherits_sys_path_test.sh new file mode 100755 index 000000000..ca4d7aa0a --- /dev/null +++ b/tests/bootstrap_impls/sys_executable_inherits_sys_path_test.sh @@ -0,0 +1,47 @@ +# Copyright 2024 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# --- begin runfiles.bash initialization v3 --- +# Copy-pasted from the Bazel Bash runfiles library v3. +set -uo pipefail; set +e; f=bazel_tools/tools/bash/runfiles/runfiles.bash +source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \ + source "$0.runfiles/$f" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \ + { echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e +# --- end runfiles.bash initialization v3 --- +set +e + +bin=$(rlocation $BIN_RLOCATION) +if [[ -z "$bin" ]]; then + echo "Unable to locate test binary: $BIN_RLOCATION" + exit 1 +fi + +actual=$($bin) +function assert_pattern () { + expected_pattern=$1 + if ! (echo "$actual" | grep "$expected_pattern" ) >/dev/null; then + echo "Test case failed" + echo "expected output to match: $expected_pattern" + echo "but got: " + echo "$actual" + exit 1 + fi +} + +assert_pattern "common.*/MARKER" + +exit 0 diff --git a/tests/bootstrap_impls/sys_path_order_test.py b/tests/bootstrap_impls/sys_path_order_test.py index 2e3346415..ae8c163db 100644 --- a/tests/bootstrap_impls/sys_path_order_test.py +++ b/tests/bootstrap_impls/sys_path_order_test.py @@ -37,7 +37,7 @@ def test_sys_path_order(self): # counts as a user directory, not stdlib directory. if value == sys.prefix: category = "user" - elif value.startswith(sys.prefix): + elif value.startswith(sys.base_prefix): # The runtime's site-package directory might be called # dist-packages when using Debian's system python. if os.path.basename(value).endswith("-packages"): @@ -67,19 +67,29 @@ def test_sys_path_order(self): self.fail( "Failed to find position for one of:\n" + f"{last_stdlib=} {first_user=} {first_runtime_site=}\n" + + f"for sys.prefix={sys.prefix}\n" + + f"for sys.exec_prefix={sys.exec_prefix}\n" + + f"for sys.base_prefix={sys.base_prefix}\n" + f"for sys.path:\n{sys_path_str}" ) if os.environ["BOOTSTRAP"] == "script": self.assertTrue( last_stdlib < first_user < first_runtime_site, - f"Expected {last_stdlib=} < {first_user=} < {first_runtime_site=}\n" + "Expected overall order to be (stdlib, user imports, runtime site) " + + f"with {last_stdlib=} < {first_user=} < {first_runtime_site=}\n" + + f"for sys.prefix={sys.prefix}\n" + + f"for sys.exec_prefix={sys.exec_prefix}\n" + + f"for sys.base_prefix={sys.base_prefix}\n" + f"for sys.path:\n{sys_path_str}", ) else: self.assertTrue( first_user < last_stdlib < first_runtime_site, f"Expected {first_user=} < {last_stdlib=} < {first_runtime_site=}\n" + + f"for sys.prefix={sys.prefix}\n" + + f"for sys.exec_prefix={sys.exec_prefix}\n" + + f"for sys.base_prefix={sys.base_prefix}\n" + f"for sys.path:\n{sys_path_str}", ) diff --git a/tests/support/sh_py_run_test.bzl b/tests/support/sh_py_run_test.bzl index c191b3974..7fb7016ee 100644 --- a/tests/support/sh_py_run_test.bzl +++ b/tests/support/sh_py_run_test.bzl @@ -136,7 +136,7 @@ def py_reconfig_test(*, name, **kwargs): reconfig_kwargs["env"] = kwargs.get("env") reconfig_kwargs["target_compatible_with"] = kwargs.get("target_compatible_with") - inner_name = "_{}_inner" + name + inner_name = "_{}_inner".format(name) _py_reconfig_test( name = name, target = inner_name, @@ -149,6 +149,14 @@ def py_reconfig_test(*, name, **kwargs): ) def sh_py_run_test(*, name, sh_src, py_src, **kwargs): + """Run a py_binary within a sh_test. + + Args: + name: name of the sh_test and base name of inner targets. + sh_src: .sh file to run as a test + py_src: .py file for the py_binary + **kwargs: additional kwargs passed onto py_binary and/or sh_test + """ bin_name = "_{}_bin".format(name) sh_test( name = name, @@ -162,6 +170,12 @@ def sh_py_run_test(*, name, sh_src, py_src, **kwargs): }, ) + py_binary_kwargs = { + key: kwargs.pop(key) + for key in ("imports", "deps") + if key in kwargs + } + _py_reconfig_binary( name = bin_name, tags = ["manual"], @@ -174,6 +188,7 @@ def sh_py_run_test(*, name, sh_src, py_src, **kwargs): srcs = [py_src], main = py_src, tags = ["manual"], + **py_binary_kwargs ) def _current_build_settings_impl(ctx): From 8a5fce059397ac5a6a28090160f27c29423cde5b Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Thu, 14 Nov 2024 15:26:11 -0800 Subject: [PATCH 2/8] print info about python exe used --- python/private/stage1_bootstrap_template.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/private/stage1_bootstrap_template.sh b/python/private/stage1_bootstrap_template.sh index afa1ee84b..dc5d1b0b0 100644 --- a/python/private/stage1_bootstrap_template.sh +++ b/python/private/stage1_bootstrap_template.sh @@ -128,6 +128,11 @@ if [[ "$IS_ZIPFILE" == "1" ]]; then # The bin/ directory may not exist if it is empty. mkdir -p "$(dirname $python_exe)" ln -s "$symlink_to" "$python_exe" +else + set -x + stat "$python_exe" + ls -l $(dirname $python_exe) + set +x fi # At this point, we should have a valid reference to the interpreter. From 028f9d1eb67fa49a034c407b35e4e4c3b1364b66 Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Thu, 14 Nov 2024 15:54:33 -0800 Subject: [PATCH 3/8] always use declare_symlink --- .bazelrc | 4 +- CHANGELOG.md | 12 +-- python/private/py_executable_bazel.bzl | 9 +- python/private/py_runtime_info.bzl | 4 +- python/private/py_runtime_rule.bzl | 2 +- python/private/stage1_bootstrap_template.sh | 9 +- python/private/stage2_bootstrap_template.py | 95 +------------------- tests/bootstrap_impls/sys_path_order_test.py | 2 +- 8 files changed, 22 insertions(+), 115 deletions(-) diff --git a/.bazelrc b/.bazelrc index 66a644e28..c44124d96 100644 --- a/.bazelrc +++ b/.bazelrc @@ -4,8 +4,8 @@ # (Note, we cannot use `common --deleted_packages` because the bazel version command doesn't support it) # To update these lines, execute # `bazel run @rules_bazel_integration_test//tools:update_deleted_packages` -build --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/python/private,gazelle/pythonconfig,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/custom_commands,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/local_toolchains,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/py_cc_toolchain_registered -query --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/python/private,gazelle/pythonconfig,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/custom_commands,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/local_toolchains,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/py_cc_toolchain_registered +build --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/pythonconfig,gazelle/python/private,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/custom_commands,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/local_toolchains,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/py_cc_toolchain_registered +query --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/pythonconfig,gazelle/python/private,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/custom_commands,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/local_toolchains,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/py_cc_toolchain_registered test --test_output=errors diff --git a/CHANGELOG.md b/CHANGELOG.md index c352def86..edca70fee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -100,6 +100,9 @@ Other changes: for the latest toolchain versions for each minor Python version. You can control the toolchain selection by using the {bzl:obj}`//python/config_settings:py_linux_libc` build flag. +* (providers) Added {obj}`py_runtime_info.site_init_template` and + {obj}`PyRuntimeInfo.site_init_template` for specifying the template to use to + initialize the interpreter via venv startup hooks. {#v0-0-0-removed} ### Removed @@ -124,13 +127,7 @@ Other changes: {#v0-40-added} ### Added -<<<<<<< HEAD * Nothing added. -======= -* (providers) Added {obj}`py_runtime_info.site_init_template` and - {obj}`PyRuntimeInfo.site_init_template` for specifying the template to use to - initialize the interpreter via venv startup hooks. ->>>>>>> a057e85e (wip: make sys.executable work with script bootstrap) {#v0-40-removed} ### Removed @@ -174,9 +171,6 @@ Other changes: * (precompiling) Skip precompiling (instead of erroring) if the legacy `@bazel_tools//tools/python:autodetecting_toolchain` is being used ([#2364](https://github.com/bazelbuild/rules_python/issues/2364)). -* (bzlmod) Generate `config_setting` values for all available toolchains instead - of only the registered toolchains, which restores the previous behaviour that - `bzlmod` users would have observed. {#v0-39-0-added} ### Added diff --git a/python/private/py_executable_bazel.bzl b/python/private/py_executable_bazel.bzl index b5f519efb..ac58b9b15 100644 --- a/python/private/py_executable_bazel.bzl +++ b/python/private/py_executable_bazel.bzl @@ -362,9 +362,14 @@ def _create_venv(ctx, output_prefix, imports, runtime_details): runtime = runtime_details.effective_runtime if runtime.interpreter: py_exe_basename = paths.basename(runtime.interpreter.short_path) - interpreter = ctx.actions.declare_file("{}/bin/{}".format(venv, py_exe_basename)) - ctx.actions.symlink(output = interpreter, target_file = runtime.interpreter) + + # A real symlink is required to support RBE. RBE may materialize what + # ctx.actions.symlink() points to. + interpreter = ctx.actions.declare_symlink("{}/bin/{}".format(venv, py_exe_basename)) interpreter_actual_path = runtime.interpreter.short_path + parent = "/".join([".."] * (interpreter_actual_path.count("/") + 1)) + rel_path = parent + "/" + interpreter_actual_path + ctx.actions.symlink(output = interpreter, target_path = rel_path) else: py_exe_basename = paths.basename(runtime.interpreter_path) interpreter = ctx.actions.declare_symlink("{}/bin/{}".format(venv, py_exe_basename)) diff --git a/python/private/py_runtime_info.bzl b/python/private/py_runtime_info.bzl index 038b4ef72..34be0db69 100644 --- a/python/private/py_runtime_info.bzl +++ b/python/private/py_runtime_info.bzl @@ -147,7 +147,7 @@ the same conventions as the standard CPython interpreter. The runtime's ABI flags, i.e. `sys.abiflags`. -:::{versionadded} 0.39.0 +:::{versionadded} 0.41.0 ::: """, "bootstrap_template": """ @@ -281,7 +281,7 @@ are (only) `"PY2"` and `"PY3"`. The template to use for the binary-specific site-init hook run by the interpreter at startup. -:::{versionadded} VERSION_NEXT_FEATURE +:::{versionadded} 0.41.0 ::: """, "stage2_bootstrap_template": """ diff --git a/python/private/py_runtime_rule.bzl b/python/private/py_runtime_rule.bzl index bccf05c2d..5ce8161cf 100644 --- a/python/private/py_runtime_rule.bzl +++ b/python/private/py_runtime_rule.bzl @@ -324,7 +324,7 @@ value. The template to use for the binary-specific site-init hook run by the interpreter at startup. -:::{versionadded} VERSION_NEXT_FEATURE +:::{versionadded} 0.41.0 ::: """, ), diff --git a/python/private/stage1_bootstrap_template.sh b/python/private/stage1_bootstrap_template.sh index dc5d1b0b0..1a79476e3 100644 --- a/python/private/stage1_bootstrap_template.sh +++ b/python/private/stage1_bootstrap_template.sh @@ -129,10 +129,11 @@ if [[ "$IS_ZIPFILE" == "1" ]]; then mkdir -p "$(dirname $python_exe)" ln -s "$symlink_to" "$python_exe" else - set -x - stat "$python_exe" - ls -l $(dirname $python_exe) - set +x + : + #set -x + #stat "$python_exe" + #ls -l $(dirname $python_exe) + #set +x fi # At this point, we should have a valid reference to the interpreter. diff --git a/python/private/stage2_bootstrap_template.py b/python/private/stage2_bootstrap_template.py index b505d3649..30f0b7749 100644 --- a/python/private/stage2_bootstrap_template.py +++ b/python/private/stage2_bootstrap_template.py @@ -16,7 +16,6 @@ import os import re import runpy -import subprocess import uuid # ===== Template substitutions start ===== @@ -24,8 +23,6 @@ # Runfiles-relative path to the main Python source file. MAIN = "%main%" -# Runfiles-relative path to the coverage tool entry point, if any. -COVERAGE_TOOL = "%coverage_tool%" # ===== Template substitutions end ===== @@ -114,45 +111,6 @@ def is_verbose_coverage(): return os.environ.get("VERBOSE_COVERAGE") or is_verbose() -##def find_coverage_entry_point(module_space): -## cov_tool = COVERAGE_TOOL -## if cov_tool: -## print_verbose_coverage("Using toolchain coverage_tool %r" % cov_tool) -## else: -## cov_tool = os.environ.get("PYTHON_COVERAGE") -## if cov_tool: -## print_verbose_coverage("PYTHON_COVERAGE: %r" % cov_tool) -## if cov_tool: -## return find_binary(module_space, cov_tool) -## return None - - -def find_binary(module_space, bin_name): - """Finds the real binary if it's not a normal absolute path.""" - if not bin_name: - return None - if bin_name.startswith("//"): - # Case 1: Path is a label. Not supported yet. - raise AssertionError( - "Bazel does not support execution of Python interpreters via labels yet" - ) - elif os.path.isabs(bin_name): - # Case 2: Absolute path. - return bin_name - # Use normpath() to convert slashes to os.sep on Windows. - elif os.sep in os.path.normpath(bin_name): - # Case 3: Path is relative to the repo root. - return os.path.join(module_space, bin_name) - else: - # Case 4: Path has to be looked up in the search path. - return search_path(bin_name) - - -##def create_python_path_entries(python_imports, module_space): -## parts = python_imports.split(":") -## return [module_space] + ["%s/%s" % (module_space, path) for path in parts] - - def find_runfiles_root(main_rel_path): """Finds the runfiles tree.""" # When the calling process used the runfiles manifest to resolve the @@ -197,15 +155,6 @@ def find_runfiles_root(main_rel_path): raise AssertionError("Cannot find .runfiles directory for %s" % sys.argv[0]) -# Returns repository roots to add to the import path. -##def get_repositories_imports(module_space, import_all): -## if import_all: -## repo_dirs = [os.path.join(module_space, d) for d in os.listdir(module_space)] -## repo_dirs.sort() -## return [d for d in repo_dirs if os.path.isdir(d)] -## return [os.path.join(module_space, WORKSPACE_NAME)] - - def runfiles_envvar(module_space): """Finds the runfiles manifest or the runfiles directory. @@ -245,15 +194,6 @@ def runfiles_envvar(module_space): return (None, None) -def deduplicate(items): - """Efficiently filter out duplicates, keeping the first element only.""" - seen = set() - for it in items: - if it not in seen: - seen.add(it) - yield it - - def instrumented_file_paths(): """Yields tuples of realpath of each instrumented file with the relative path.""" manifest_filename = os.environ.get("COVERAGE_MANIFEST") @@ -424,29 +364,12 @@ def main(): # # To replicate this behavior, we add main's directory within the runfiles # when safe path isn't enabled. - python_path_entries = [] - prepend_path_entries = [] if not getattr(sys.flags, "safe_path", False): prepend_path_entries = [ os.path.join(module_space, os.path.dirname(main_rel_path)) ] else: prepend_path_entries = [] - ##python_path_entries = create_python_path_entries(IMPORTS_STR, module_space) - ##python_path_entries += get_repositories_imports(module_space, IMPORT_ALL) - python_path_entries = [ - get_windows_path_with_unc_prefix(d) for d in python_path_entries - ] - - # Remove duplicates to avoid overly long PYTHONPATH (#10977). Preserve order, - # keep first occurrence only. - python_path_entries = list(deduplicate(python_path_entries)) - - ##if is_windows(): - ## python_path_entries = [p.replace("/", os.sep) for p in python_path_entries] - ##else: - ## # deduplicate returns a generator, but we need a list after this. - ## python_path_entries = list(python_path_entries) runfiles_envkey, runfiles_envvalue = runfiles_envvar(module_space) if runfiles_envkey: @@ -463,28 +386,12 @@ def main(): sys.stdout.flush() - # Add the user imports after the stdlib, but before the runtime's - # site-packages directory. This gives the stdlib precedence, while allowing - # users to override non-stdlib packages that may have been bundled with - # the runtime (usually pip). - # NOTE: There isn't a good way to identify the stdlib paths, so we just - # expect site-packages comes after it, per - # https://docs.python.org/3/library/sys_path_init.html#sys-path-init - for i, path in enumerate(sys.path): - # dist-packages is a debian convention, see - # https://wiki.debian.org/Python#Deviations_from_upstream - if os.path.basename(path) in ("site-packages", "dist-packages"): - sys.path[i:i] = python_path_entries - break - else: - # Otherwise, no site-packages directory was found, which is odd but ok. - sys.path.extend(python_path_entries) - # NOTE: The sys.path must be modified before coverage is imported/activated # NOTE: Perform this after the user imports are appended. This avoids a # user import accidentally triggering the site-packages logic above. sys.path[0:0] = prepend_path_entries + # todo: also check COVERAGE_DIR env var import _bazel_site_init with _maybe_collect_coverage(enable=_bazel_site_init.COVERAGE_SETUP): diff --git a/tests/bootstrap_impls/sys_path_order_test.py b/tests/bootstrap_impls/sys_path_order_test.py index ae8c163db..97c62a6be 100644 --- a/tests/bootstrap_impls/sys_path_order_test.py +++ b/tests/bootstrap_impls/sys_path_order_test.py @@ -35,7 +35,7 @@ def test_sys_path_order(self): for i, value in enumerate(sys.path): # The runtime's root repo may be added to sys.path, but it # counts as a user directory, not stdlib directory. - if value == sys.prefix: + if value in (sys.prefix, sys.base_prefix): category = "user" elif value.startswith(sys.base_prefix): # The runtime's site-package directory might be called From 11b1ec4c4660c7604221844332386b44acd6a134 Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Tue, 19 Nov 2024 13:05:10 -0800 Subject: [PATCH 4/8] clarify comment, remove commented out debug code --- python/private/py_executable_bazel.bzl | 6 ++++-- python/private/stage1_bootstrap_template.sh | 6 ------ 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/python/private/py_executable_bazel.bzl b/python/private/py_executable_bazel.bzl index ac58b9b15..a3e2ab121 100644 --- a/python/private/py_executable_bazel.bzl +++ b/python/private/py_executable_bazel.bzl @@ -363,8 +363,10 @@ def _create_venv(ctx, output_prefix, imports, runtime_details): if runtime.interpreter: py_exe_basename = paths.basename(runtime.interpreter.short_path) - # A real symlink is required to support RBE. RBE may materialize what - # ctx.actions.symlink() points to. + # Even though ctx.actions.symlink() is used, using + # declare_symlink() is required to ensure that the resulting file + # in runfiles is always a symlink. An RBE implementation, for example, + # may choose to write what symlink() points to instead. interpreter = ctx.actions.declare_symlink("{}/bin/{}".format(venv, py_exe_basename)) interpreter_actual_path = runtime.interpreter.short_path parent = "/".join([".."] * (interpreter_actual_path.count("/") + 1)) diff --git a/python/private/stage1_bootstrap_template.sh b/python/private/stage1_bootstrap_template.sh index 1a79476e3..afa1ee84b 100644 --- a/python/private/stage1_bootstrap_template.sh +++ b/python/private/stage1_bootstrap_template.sh @@ -128,12 +128,6 @@ if [[ "$IS_ZIPFILE" == "1" ]]; then # The bin/ directory may not exist if it is empty. mkdir -p "$(dirname $python_exe)" ln -s "$symlink_to" "$python_exe" -else - : - #set -x - #stat "$python_exe" - #ls -l $(dirname $python_exe) - #set +x fi # At this point, we should have a valid reference to the interpreter. From ae24ef662fda30eb80ea9e7667f527f37e9a5396 Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Tue, 19 Nov 2024 13:18:06 -0800 Subject: [PATCH 5/8] Update python/private/site_init_template.py Co-authored-by: Ignas Anikevicius <240938+aignas@users.noreply.github.com> --- python/private/site_init_template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/private/site_init_template.py b/python/private/site_init_template.py index 29e405a1a..3f5db9632 100644 --- a/python/private/site_init_template.py +++ b/python/private/site_init_template.py @@ -20,7 +20,7 @@ _IMPORTS_STR = "%imports%" # Though the import all value is the correct literal, we quote it # so this file is parsable by tools. -_IMPORT_ALL = True if "%import_all%" == "True" else False +_IMPORT_ALL = "%import_all%" == "True" _WORKSPACE_NAME = "%workspace_name%" # runfiles-relative path to this file _SELF_RUNFILES_RELATIVE_PATH = "%site_init_runfiles_path%" From a25df944bb9da9869f5d11cb0534afdfbe1d55fe Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Tue, 19 Nov 2024 13:28:03 -0800 Subject: [PATCH 6/8] Update python/private/py_executable_bazel.bzl Co-authored-by: Ignas Anikevicius <240938+aignas@users.noreply.github.com> --- python/private/py_executable_bazel.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/private/py_executable_bazel.bzl b/python/private/py_executable_bazel.bzl index a3e2ab121..60c3815c9 100644 --- a/python/private/py_executable_bazel.bzl +++ b/python/private/py_executable_bazel.bzl @@ -312,7 +312,7 @@ def _create_executable( )) # The interpreter is added this late in the process so that it isn't - # added to the files that zipping processes. + # added to the zipped files. if venv: extra_runfiles = extra_runfiles.merge(ctx.runfiles([venv.interpreter])) return create_executable_result_struct( From cd33fc79b0f488ee82dac6f03346a7d794b915c4 Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Wed, 20 Nov 2024 19:25:23 -0800 Subject: [PATCH 7/8] assign coverage_setup instead of mutable global; check coverage_dir env var --- python/private/site_init_template.py | 11 ++++------- python/private/stage2_bootstrap_template.py | 9 ++++++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/python/private/site_init_template.py b/python/private/site_init_template.py index 3f5db9632..7a32210bf 100644 --- a/python/private/site_init_template.py +++ b/python/private/site_init_template.py @@ -27,9 +27,6 @@ # Runfiles-relative path to the coverage tool entry point, if any. _COVERAGE_TOOL = "%coverage_tool%" -# True if coverage was requested and successfully setup -COVERAGE_SETUP = False - def _is_verbose(): return bool(os.environ.get("RULES_PYTHON_BOOTSTRAP_VERBOSE")) @@ -128,7 +125,6 @@ def _search_path(name): def _setup_sys_path(): - global COVERAGE_SETUP seen = set(sys.path) python_path_entries = [] @@ -161,6 +157,7 @@ def _maybe_add_path(path): # for something, though it could be another program executing this one or # one executed by this one (e.g. an extension module). # NOTE: Coverage is added last to allow user dependencies to override it. + coverage_setup = False if os.environ.get("COVERAGE_DIR"): cov_tool = _COVERAGE_TOOL if cov_tool: @@ -185,7 +182,7 @@ def _maybe_add_path(path): # the runfiles directory, which must not be replaced. # CoverageScript.do_execute() undoes this sys.path[0] setting. _maybe_add_path(coverage_dir) - COVERAGE_SETUP = True + coverage_setup = True else: _print_verbose_coverage( "Coverage was enabled, but python coverage tool was not configured." @@ -193,7 +190,7 @@ def _maybe_add_path(path): + "https://rules-python.readthedocs.io/en/latest/coverage.html" ) - return None + return coverage_setup -_setup_sys_path() +COVERAGE_SETUP = _setup_sys_path() diff --git a/python/private/stage2_bootstrap_template.py b/python/private/stage2_bootstrap_template.py index 30f0b7749..dfa608331 100644 --- a/python/private/stage2_bootstrap_template.py +++ b/python/private/stage2_bootstrap_template.py @@ -391,10 +391,13 @@ def main(): # user import accidentally triggering the site-packages logic above. sys.path[0:0] = prepend_path_entries - # todo: also check COVERAGE_DIR env var - import _bazel_site_init + if os.environ.get("COVERAGE_DIR"): + import _bazel_site_init + coverage_enabled = _bazel_site_init.COVERAGE_SETUP + else: + coverage_enabled = False - with _maybe_collect_coverage(enable=_bazel_site_init.COVERAGE_SETUP): + with _maybe_collect_coverage(enable=coverage_enabled): # The first arg is this bootstrap, so drop that for the re-invocation. _run_py(main_filename, args=sys.argv[1:]) sys.exit(0) From c589335e90ec2c199631a5f5a19384e658dc1944 Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Wed, 20 Nov 2024 19:38:34 -0800 Subject: [PATCH 8/8] remove outdated comment --- python/private/stage2_bootstrap_template.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/private/stage2_bootstrap_template.py b/python/private/stage2_bootstrap_template.py index dfa608331..d2c749779 100644 --- a/python/private/stage2_bootstrap_template.py +++ b/python/private/stage2_bootstrap_template.py @@ -386,9 +386,6 @@ def main(): sys.stdout.flush() - # NOTE: The sys.path must be modified before coverage is imported/activated - # NOTE: Perform this after the user imports are appended. This avoids a - # user import accidentally triggering the site-packages logic above. sys.path[0:0] = prepend_path_entries if os.environ.get("COVERAGE_DIR"):