Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bazel: pull in some dependencies to the internal repo #17382

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,31 @@ bazel_dep(name = "rules_rust", version = "0.49.1")

bazel_dep(name = "buildifier_prebuilt", version = "6.4.0", dev_dependency = True)

rust = use_extension("@rules_rust//rust:extensions.bzl", "rust")

rust_host_tools = use_extension("@rules_rust//rust:extensions.bzl", "rust_host_tools")

rust.toolchain(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this definition really relevant for the external repo? Keeping the versions in sync internally and externally is not-so-great, and toolchains need to be defined in the root module.

In particular, we will never build (release) universal binaries from the public repo, so the extra target triples also are, as far as I can see, unnecessary, if we can avoid triggering the lipo transition.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think sync would be necessary, but in order to have a lipo rule in this repository one needs the toolchain definition to pull in the additional triples.

Anyway, I'll drop this PR for another approach

edition = "2021",
# We need those extra target triples so that we can build universal binaries on macos
extra_target_triples = [
"x86_64-apple-darwin",
"aarch64-apple-darwin",
],
# Keep this version in sync with the one below for the host toolchain
versions = ["1.74.0"],
)

# Don't download a second toolchain as host toolchain, make sure this is the same version as above
rust_host_tools.host_tools(
edition = "2021",
version = "1.74.0",
)

use_repo(rust, "rust_toolchains")

register_toolchains("@rust_toolchains//:all")

# crate_py but shortened due to Windows file path considerations
cp = use_extension(
"@rules_rust//crate_universe:extension.bzl",
Expand Down
19 changes: 19 additions & 0 deletions misc/bazel/glibc_symbols_check.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
load("@rules_python//python:defs.bzl", "py_test")

def glibc_symbols_check(name, binary):
"""
Checks that the supplied binary doesn't use symbols that are not available in older glibc versions.
"""
# Note this accesses system binaries that are not declared anywhere,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can be removed iirc, as rules_python now ships a hermetic python interpreter (verification of that would be nice, though).

# thus breaking build hermeticity

py_test(
name = name,
srcs = ["@codeql//misc/bazel/internal:check_glibc_symbols.py"],
main = "@codeql//misc/bazel/internal:check_glibc_symbols.py",
data = [binary],
args = ["$(location :%s)" % binary],
target_compatible_with = ["@platforms//os:linux", "@codeql//misc/bazel/platforms:bundled"],
size = "medium",
tags = ["glibc-symbols-check"],
)
2 changes: 1 addition & 1 deletion misc/bazel/internal/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1 +1 @@
exports_files(["install.py"])
exports_files(glob(["*.py"]))
172 changes: 172 additions & 0 deletions misc/bazel/internal/check_glibc_symbols.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
#!/usr/bin/env python

import re
import subprocess
import sys
import os
import argparse

# Some of these limits are conservative and the required versions could be
# comfortably increased, especially if they're no newer than the versions that
# Java depends on.
default_limits = {
'GCC': '3.0',

# Default limit for versions of GLIBC symbols used by target program.
# GLIBC_2.17 was released on 2012-12-25.
# https://sourceware.org/glibc/wiki/Glibc%20Timeline
'GLIBC': '2.17',

# Default limit for versions of GLIBCXX (and GLIBCPP) symbols used
# by target program. GLIBCXX_3.4 implies at least libstdc++.6.0,
# and was adopted by GCC 3.4, release on 18/4/2004.
# https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html
'GLIBCXX': '3.4',
'GLIBCPP': '3.4',
'CXXABI': '1.3',
}

# List of libraries which must not be dynamically loaded
# On linux, the llvm libc++ libraries are statically linked and should not appear
# in the output of `ldd`
no_dynlink_libs = [
"libstdc++",
"libc++",
"libc++abi",
"libunwind",
]

def get_glibc_version():
version = subprocess.check_output(['ldd', '--version']).decode("utf-8").split('\n')[0]
return float(version.split(' ')[-1])

def isTrue(var):
return var in os.environ and os.environ[var].lower() in ['true', 'yes', '1']

def memoize(f):
'''Memoize decorator'''
memo = {}
def helper(x):
if x not in memo:
memo[x] = f(x)
return memo[x]
return helper

def normalise_ver(ver):
'''Convert a dot delimited numerical version string into list of integers.

This conversion facilitates comparisons between version numbers.'''
return [int(p) for p in ver.split('.')]

def too_new(cat, ver, limits):
'''Compare version string ver with the limit for cat.
Return True if ver is greater than the limit, or if there is no limit for cat.'''
if cat in limits:
limit = limits[cat]
return normalise_ver(ver) > normalise_ver(limit)
else:
return True

@memoize
def get_libs(prog):
'''Get list of dynamically linked libraries'''
# Find paths to all libraries linked
re_lib = re.compile('^.* (/.*) \(0x[0-9a-f]+\)$')
try:
subprocess.check_output(['ldd', prog], stderr=subprocess.STDOUT).decode('utf-8').split('\n')
except subprocess.CalledProcessError as e:
# ldd will have a non zero exitcode of the binary is not dynamically linked.
return []
except:
raise

return [m.group(1) for m in [ re_lib.search(l) for l in subprocess.check_output(['ldd', prog]).decode('utf-8').split('\n') ] if m]

def gather_min_symbol_versions(prog, limits):
'''Inspect the given executable 'prog' using `ldd` to discover which libraries it is linked
against. For each library, use `readelf` to discover the symbols therein, and for each
symbol with a GLIBC, GLIBCXX or GLIBCPP version record the latest version of each symbol
found that does not exceed the given limits, or the earliest available version if no
version is found within the limits.

Return a dict mapping symbol names to strings of the form "GLIBC_2.5". The dict
thus indicates the earliest available versions of each symbol.'''
libs = get_libs(prog)
# Find earliest versions of all symbols
sym_ver = dict()
re_sym = re.compile('(\\w+)@+(.+)_([0-9.]+)')
for lib in libs:
for (sym, cat, ver) in re.findall(re_sym, subprocess.check_output(['readelf', '-Ws', lib]).decode('utf-8')):
if sym in sym_ver:
(cat2, ver2) = sym_ver[sym]
if cat != cat2:
raise Exception('Mismatched categories for symbol: ' + str(sym, cat, ver, cat2, ver2))
if (normalise_ver(ver) < normalise_ver(ver2) and too_new(cat2, ver2, limits)) or \
(normalise_ver(ver) > normalise_ver(ver2) and not too_new(cat, ver, limits)):
sym_ver[sym] = (cat, ver)
else:
sym_ver[sym] = (cat, ver)
return sym_ver

def gather_linked_symbols(prog):
'''Inspect the given executable 'prog' using `nm` to discover which symbols it links,
and for each symbol with a GLIBC, GLIBCXX, or GLIBCPP version record the version
in a dict mapping symbol names to versions.'''
re_obj = re.compile('U (\\w+)@+(.+)_([0-9.]+)')
return re_obj.findall(subprocess.check_output(['nm', '-u', prog]).decode('utf-8'))

def verify_dynlinked_libraries(prog):
'''Return the intersection set between dynamically linked libraries
that should not be dynamically loaded. See `no_dynlink_libs`.'''
libs = get_libs(prog)
bad_libs = []
for lib in libs:
lib_name = os.path.basename(lib).split(".")[0]
if lib_name in no_dynlink_libs:
bad_libs += [lib]

return bad_libs

def main():
if isTrue('CODEQL_SKIP_COMPATIBILITY') and not isTrue('CI'):
# Respect CODEQL_SKIP_COMPATIBILITY which tells us to skip this check, unless we are on CI
sys.exit(0)

# Verify parameters
parser = argparse.ArgumentParser()
parser.add_argument("program")
# create outfile - this is needed for the bazel aspect integration
parser.add_argument("output", nargs="?", type=argparse.FileType('w'))
prog = parser.parse_args().program

# Gather versions of symbols actually linked
prog_symbols = gather_linked_symbols(prog)
# Check whether any symbols exceed the maximum version restrictions
bad_syms = [ (sym, cat, ver) for sym, cat, ver in prog_symbols if too_new(cat, ver, default_limits) ]
if bad_syms != []:
# Scan for minimum versions of symbols available in linked libraries
available_symbols = gather_min_symbol_versions(prog, default_limits)
for sym, cat, ver in bad_syms:
print(sym + ' is too new or from an unknown category: it requires ' + cat + '_' + ver
+ ', but we are limited to ' + str(default_limits))
if sym in available_symbols:
(cat, ver) = available_symbols[sym]
if not too_new(cat, ver, default_limits):
print('\tconsider adding: SET_GLIBC_VERSION(%s_%s,%s) { ... } to glibc_compatibility.cpp, ' % (cat, ver, sym))
print('\tand add \'-Wl,--wrap=%s\' when linking. ' % (sym))
else:
print('\tThe earliest available symbol has version %s_%s' % (cat, ver))

bad_libs = verify_dynlinked_libraries(prog)
if bad_libs != []:
print("Binary dynamically links against:")
for bad_lib in bad_libs:
print("\t%s" % (bad_lib))
print("These libraries should be statically linked on linux")

if bad_syms != [] or bad_libs != []:
sys.exit(1)
sys.exit(0)

if __name__ == '__main__':
main()
44 changes: 44 additions & 0 deletions misc/bazel/lipo.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
load("@bazel_skylib//lib:paths.bzl", "paths")
load("//misc/bazel:transitions.bzl", "forward_binary_from_transition", "get_transition_attrs", "universal_binary_transition")

def _universal_binary_impl(ctx):
# Two cases: Either we're on macos, and we need to lipo the two binaries that the transition generated
# together, or we're on another platform, where we just copy along the binary, and forward the DefaultInfo data
binaries = [dep[DefaultInfo].files_to_run.executable for dep in ctx.attr.dep]
if len(binaries) == 0:
fail("No executable inputs found")

(_, extension) = paths.split_extension(binaries[0].basename)
new_executable = ctx.actions.declare_file(ctx.label.name + extension)

# We're using a split transition on the `dep` attribute on macos. If we are on macos, that has the function that
# a) ctx.split_attr has two entries (if we need to retrieve the per-architecture binaries), and that
# ctx.addr.dep is a list with two elements - one for each platform.
# While not using a split transition, ctx.attr.dep is a list with one element, as we just have a single platform.
# We use this to distinguish whether we should lipo the binaries together, or just forward the binary.
if len(binaries) == 1:
return forward_binary_from_transition(ctx)
else:
ctx.actions.run_shell(
inputs = binaries,
outputs = [new_executable],
command = "lipo -create %s -output %s" % (" ".join([binary.path for binary in binaries]), new_executable.path),
)
files = depset(direct = [new_executable])
runfiles = ctx.runfiles([new_executable]).merge_all([dep[DefaultInfo].default_runfiles for dep in ctx.attr.dep])

providers = [
DefaultInfo(
files = files,
runfiles = runfiles,
executable = new_executable,
),
]
return providers

universal_binary = rule(
implementation = _universal_binary_impl,
attrs = get_transition_attrs(universal_binary_transition),
doc = """On macOS: Create a universal (fat) binary from the input rule, by applying two transitions and lipoing the result together.
No-op on other platforms, just forward the binary.""",
)
66 changes: 66 additions & 0 deletions misc/bazel/platforms/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package(default_visibility = ["//visibility:public"])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't enjoy that we now have the platform definitions externally, but the toolchain defs internally (and the toolchains themselves belong to and need to stay internally AFAIUI).
Is there any way we can put this file back to the internal repo?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't mind too much, as this is defining constraints, not the actual toolchains. But in any case, this will go


constraint_setting(name = "host_or_bundled")

constraint_value(
name = "bundled",
constraint_setting = ":host_or_bundled",
)

constraint_value(
name = "host",
constraint_setting = ":host_or_bundled",
)

alias(
name = "detected",
actual = "@local_config_platform//:host",
)

platform(
name = "bundled_toolchain",
constraint_values = [":bundled"],
parents = [":detected"],
)

platform(
name = "bundled_toolchain_arm64",
constraint_values = ["@platforms//cpu:arm64"],
parents = [":bundled_toolchain"],
)

platform(
name = "bundled_toolchain_x86_64",
constraint_values = ["@platforms//cpu:x86_64"],
parents = [":bundled_toolchain"],
)

platform(
name = "bundled_toolchain_x86_32",
constraint_values = ["@platforms//cpu:x86_32"],
parents = [":bundled_toolchain"],
)

platform(
name = "host_toolchain",
constraint_values = [":host"],
parents = [":detected"],
)

platform(
name = "host_toolchain_arm64",
constraint_values = ["@platforms//cpu:arm64"],
parents = [":host_toolchain"],
)

platform(
name = "host_toolchain_x86_64",
constraint_values = ["@platforms//cpu:x86_64"],
parents = [":host_toolchain"],
)

platform(
name = "host_toolchain_x86_32",
constraint_values = ["@platforms//cpu:x86_32"],
parents = [":host_toolchain"],
)
4 changes: 2 additions & 2 deletions misc/bazel/rust.bzl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
load("@rules_rust//rust:defs.bzl", "rust_binary")
load("@semmle_code//buildutils-internal:glibc_symbols_check.bzl", "glibc_symbols_check")
load("@semmle_code//buildutils-internal:lipo.bzl", "universal_binary")
load("//misc/bazel:glibc_symbols_check.bzl", "glibc_symbols_check")
load("//misc/bazel:lipo.bzl", "universal_binary")

def codeql_rust_binary(
name,
Expand Down
Loading
Loading