-
Notifications
You must be signed in to change notification settings - Fork 1.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Bazel: pull in some dependencies to the internal repo #17382
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
load("@rules_python//python:defs.bzl", "py_test") | ||
|
||
def glibc_symbols_check(name, binary): | ||
""" | ||
Checks that the supplied binary doesn't use symbols that are not available in older glibc versions. | ||
""" | ||
# Note this accesses system binaries that are not declared anywhere, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This can be removed iirc, as rules_python now ships a hermetic python interpreter (verification of that would be nice, though). |
||
# thus breaking build hermeticity | ||
|
||
py_test( | ||
name = name, | ||
srcs = ["@codeql//misc/bazel/internal:check_glibc_symbols.py"], | ||
main = "@codeql//misc/bazel/internal:check_glibc_symbols.py", | ||
data = [binary], | ||
args = ["$(location :%s)" % binary], | ||
target_compatible_with = ["@platforms//os:linux", "@codeql//misc/bazel/platforms:bundled"], | ||
size = "medium", | ||
tags = ["glibc-symbols-check"], | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
exports_files(["install.py"]) | ||
exports_files(glob(["*.py"])) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
#!/usr/bin/env python | ||
|
||
import re | ||
import subprocess | ||
import sys | ||
import os | ||
import argparse | ||
|
||
# Some of these limits are conservative and the required versions could be | ||
# comfortably increased, especially if they're no newer than the versions that | ||
# Java depends on. | ||
default_limits = { | ||
'GCC': '3.0', | ||
|
||
# Default limit for versions of GLIBC symbols used by target program. | ||
# GLIBC_2.17 was released on 2012-12-25. | ||
# https://sourceware.org/glibc/wiki/Glibc%20Timeline | ||
'GLIBC': '2.17', | ||
|
||
# Default limit for versions of GLIBCXX (and GLIBCPP) symbols used | ||
# by target program. GLIBCXX_3.4 implies at least libstdc++.6.0, | ||
# and was adopted by GCC 3.4, release on 18/4/2004. | ||
# https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html | ||
'GLIBCXX': '3.4', | ||
'GLIBCPP': '3.4', | ||
'CXXABI': '1.3', | ||
} | ||
|
||
# List of libraries which must not be dynamically loaded | ||
# On linux, the llvm libc++ libraries are statically linked and should not appear | ||
# in the output of `ldd` | ||
no_dynlink_libs = [ | ||
"libstdc++", | ||
"libc++", | ||
"libc++abi", | ||
"libunwind", | ||
] | ||
|
||
def get_glibc_version(): | ||
version = subprocess.check_output(['ldd', '--version']).decode("utf-8").split('\n')[0] | ||
return float(version.split(' ')[-1]) | ||
|
||
def isTrue(var): | ||
return var in os.environ and os.environ[var].lower() in ['true', 'yes', '1'] | ||
|
||
def memoize(f): | ||
'''Memoize decorator''' | ||
memo = {} | ||
def helper(x): | ||
if x not in memo: | ||
memo[x] = f(x) | ||
return memo[x] | ||
return helper | ||
|
||
def normalise_ver(ver): | ||
'''Convert a dot delimited numerical version string into list of integers. | ||
|
||
This conversion facilitates comparisons between version numbers.''' | ||
return [int(p) for p in ver.split('.')] | ||
|
||
def too_new(cat, ver, limits): | ||
'''Compare version string ver with the limit for cat. | ||
Return True if ver is greater than the limit, or if there is no limit for cat.''' | ||
if cat in limits: | ||
limit = limits[cat] | ||
return normalise_ver(ver) > normalise_ver(limit) | ||
else: | ||
return True | ||
|
||
@memoize | ||
def get_libs(prog): | ||
'''Get list of dynamically linked libraries''' | ||
# Find paths to all libraries linked | ||
re_lib = re.compile('^.* (/.*) \(0x[0-9a-f]+\)$') | ||
try: | ||
subprocess.check_output(['ldd', prog], stderr=subprocess.STDOUT).decode('utf-8').split('\n') | ||
except subprocess.CalledProcessError as e: | ||
# ldd will have a non zero exitcode of the binary is not dynamically linked. | ||
return [] | ||
except: | ||
raise | ||
|
||
return [m.group(1) for m in [ re_lib.search(l) for l in subprocess.check_output(['ldd', prog]).decode('utf-8').split('\n') ] if m] | ||
|
||
def gather_min_symbol_versions(prog, limits): | ||
'''Inspect the given executable 'prog' using `ldd` to discover which libraries it is linked | ||
against. For each library, use `readelf` to discover the symbols therein, and for each | ||
symbol with a GLIBC, GLIBCXX or GLIBCPP version record the latest version of each symbol | ||
found that does not exceed the given limits, or the earliest available version if no | ||
version is found within the limits. | ||
|
||
Return a dict mapping symbol names to strings of the form "GLIBC_2.5". The dict | ||
thus indicates the earliest available versions of each symbol.''' | ||
libs = get_libs(prog) | ||
# Find earliest versions of all symbols | ||
sym_ver = dict() | ||
re_sym = re.compile('(\\w+)@+(.+)_([0-9.]+)') | ||
for lib in libs: | ||
for (sym, cat, ver) in re.findall(re_sym, subprocess.check_output(['readelf', '-Ws', lib]).decode('utf-8')): | ||
if sym in sym_ver: | ||
(cat2, ver2) = sym_ver[sym] | ||
if cat != cat2: | ||
raise Exception('Mismatched categories for symbol: ' + str(sym, cat, ver, cat2, ver2)) | ||
if (normalise_ver(ver) < normalise_ver(ver2) and too_new(cat2, ver2, limits)) or \ | ||
(normalise_ver(ver) > normalise_ver(ver2) and not too_new(cat, ver, limits)): | ||
sym_ver[sym] = (cat, ver) | ||
else: | ||
sym_ver[sym] = (cat, ver) | ||
return sym_ver | ||
|
||
def gather_linked_symbols(prog): | ||
'''Inspect the given executable 'prog' using `nm` to discover which symbols it links, | ||
and for each symbol with a GLIBC, GLIBCXX, or GLIBCPP version record the version | ||
in a dict mapping symbol names to versions.''' | ||
re_obj = re.compile('U (\\w+)@+(.+)_([0-9.]+)') | ||
return re_obj.findall(subprocess.check_output(['nm', '-u', prog]).decode('utf-8')) | ||
|
||
def verify_dynlinked_libraries(prog): | ||
'''Return the intersection set between dynamically linked libraries | ||
that should not be dynamically loaded. See `no_dynlink_libs`.''' | ||
libs = get_libs(prog) | ||
bad_libs = [] | ||
for lib in libs: | ||
lib_name = os.path.basename(lib).split(".")[0] | ||
if lib_name in no_dynlink_libs: | ||
bad_libs += [lib] | ||
|
||
return bad_libs | ||
|
||
def main(): | ||
if isTrue('CODEQL_SKIP_COMPATIBILITY') and not isTrue('CI'): | ||
# Respect CODEQL_SKIP_COMPATIBILITY which tells us to skip this check, unless we are on CI | ||
sys.exit(0) | ||
|
||
# Verify parameters | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("program") | ||
# create outfile - this is needed for the bazel aspect integration | ||
parser.add_argument("output", nargs="?", type=argparse.FileType('w')) | ||
prog = parser.parse_args().program | ||
|
||
# Gather versions of symbols actually linked | ||
prog_symbols = gather_linked_symbols(prog) | ||
# Check whether any symbols exceed the maximum version restrictions | ||
bad_syms = [ (sym, cat, ver) for sym, cat, ver in prog_symbols if too_new(cat, ver, default_limits) ] | ||
if bad_syms != []: | ||
# Scan for minimum versions of symbols available in linked libraries | ||
available_symbols = gather_min_symbol_versions(prog, default_limits) | ||
for sym, cat, ver in bad_syms: | ||
print(sym + ' is too new or from an unknown category: it requires ' + cat + '_' + ver | ||
+ ', but we are limited to ' + str(default_limits)) | ||
if sym in available_symbols: | ||
(cat, ver) = available_symbols[sym] | ||
if not too_new(cat, ver, default_limits): | ||
print('\tconsider adding: SET_GLIBC_VERSION(%s_%s,%s) { ... } to glibc_compatibility.cpp, ' % (cat, ver, sym)) | ||
print('\tand add \'-Wl,--wrap=%s\' when linking. ' % (sym)) | ||
else: | ||
print('\tThe earliest available symbol has version %s_%s' % (cat, ver)) | ||
|
||
bad_libs = verify_dynlinked_libraries(prog) | ||
if bad_libs != []: | ||
print("Binary dynamically links against:") | ||
for bad_lib in bad_libs: | ||
print("\t%s" % (bad_lib)) | ||
print("These libraries should be statically linked on linux") | ||
|
||
if bad_syms != [] or bad_libs != []: | ||
sys.exit(1) | ||
sys.exit(0) | ||
|
||
if __name__ == '__main__': | ||
main() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
load("@bazel_skylib//lib:paths.bzl", "paths") | ||
load("//misc/bazel:transitions.bzl", "forward_binary_from_transition", "get_transition_attrs", "universal_binary_transition") | ||
|
||
def _universal_binary_impl(ctx): | ||
# Two cases: Either we're on macos, and we need to lipo the two binaries that the transition generated | ||
# together, or we're on another platform, where we just copy along the binary, and forward the DefaultInfo data | ||
binaries = [dep[DefaultInfo].files_to_run.executable for dep in ctx.attr.dep] | ||
if len(binaries) == 0: | ||
fail("No executable inputs found") | ||
|
||
(_, extension) = paths.split_extension(binaries[0].basename) | ||
new_executable = ctx.actions.declare_file(ctx.label.name + extension) | ||
|
||
# We're using a split transition on the `dep` attribute on macos. If we are on macos, that has the function that | ||
# a) ctx.split_attr has two entries (if we need to retrieve the per-architecture binaries), and that | ||
# ctx.addr.dep is a list with two elements - one for each platform. | ||
# While not using a split transition, ctx.attr.dep is a list with one element, as we just have a single platform. | ||
# We use this to distinguish whether we should lipo the binaries together, or just forward the binary. | ||
if len(binaries) == 1: | ||
return forward_binary_from_transition(ctx) | ||
else: | ||
ctx.actions.run_shell( | ||
inputs = binaries, | ||
outputs = [new_executable], | ||
command = "lipo -create %s -output %s" % (" ".join([binary.path for binary in binaries]), new_executable.path), | ||
) | ||
files = depset(direct = [new_executable]) | ||
runfiles = ctx.runfiles([new_executable]).merge_all([dep[DefaultInfo].default_runfiles for dep in ctx.attr.dep]) | ||
|
||
providers = [ | ||
DefaultInfo( | ||
files = files, | ||
runfiles = runfiles, | ||
executable = new_executable, | ||
), | ||
] | ||
return providers | ||
|
||
universal_binary = rule( | ||
implementation = _universal_binary_impl, | ||
attrs = get_transition_attrs(universal_binary_transition), | ||
doc = """On macOS: Create a universal (fat) binary from the input rule, by applying two transitions and lipoing the result together. | ||
No-op on other platforms, just forward the binary.""", | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
package(default_visibility = ["//visibility:public"]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't enjoy that we now have the platform definitions externally, but the toolchain defs internally (and the toolchains themselves belong to and need to stay internally AFAIUI). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't mind too much, as this is defining constraints, not the actual toolchains. But in any case, this will go |
||
|
||
constraint_setting(name = "host_or_bundled") | ||
|
||
constraint_value( | ||
name = "bundled", | ||
constraint_setting = ":host_or_bundled", | ||
) | ||
|
||
constraint_value( | ||
name = "host", | ||
constraint_setting = ":host_or_bundled", | ||
) | ||
|
||
alias( | ||
name = "detected", | ||
actual = "@local_config_platform//:host", | ||
) | ||
|
||
platform( | ||
name = "bundled_toolchain", | ||
constraint_values = [":bundled"], | ||
parents = [":detected"], | ||
) | ||
|
||
platform( | ||
name = "bundled_toolchain_arm64", | ||
constraint_values = ["@platforms//cpu:arm64"], | ||
parents = [":bundled_toolchain"], | ||
) | ||
|
||
platform( | ||
name = "bundled_toolchain_x86_64", | ||
constraint_values = ["@platforms//cpu:x86_64"], | ||
parents = [":bundled_toolchain"], | ||
) | ||
|
||
platform( | ||
name = "bundled_toolchain_x86_32", | ||
constraint_values = ["@platforms//cpu:x86_32"], | ||
parents = [":bundled_toolchain"], | ||
) | ||
|
||
platform( | ||
name = "host_toolchain", | ||
constraint_values = [":host"], | ||
parents = [":detected"], | ||
) | ||
|
||
platform( | ||
name = "host_toolchain_arm64", | ||
constraint_values = ["@platforms//cpu:arm64"], | ||
parents = [":host_toolchain"], | ||
) | ||
|
||
platform( | ||
name = "host_toolchain_x86_64", | ||
constraint_values = ["@platforms//cpu:x86_64"], | ||
parents = [":host_toolchain"], | ||
) | ||
|
||
platform( | ||
name = "host_toolchain_x86_32", | ||
constraint_values = ["@platforms//cpu:x86_32"], | ||
parents = [":host_toolchain"], | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this definition really relevant for the external repo? Keeping the versions in sync internally and externally is not-so-great, and toolchains need to be defined in the root module.
In particular, we will never build (release) universal binaries from the public repo, so the extra target triples also are, as far as I can see, unnecessary, if we can avoid triggering the lipo transition.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think sync would be necessary, but in order to have a lipo rule in this repository one needs the toolchain definition to pull in the additional triples.
Anyway, I'll drop this PR for another approach