From fb7e1d92b12a40fa7d6d6d9384827c2c7bc39146 Mon Sep 17 00:00:00 2001 From: Aaron Siddhartha Mondal Date: Tue, 9 May 2023 23:12:16 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=80=20[WIP]=20Add=20SYCL=20toolchains?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Doesn't work yet. Just for reference. --- MODULE.bazel | 2 + bazel-wrapper/default.nix | 12 ++ examples/sycl_cpu_example/BUILD.bazel | 18 ++ examples/sycl_cpu_example/main.cpp | 39 +++++ ll/BUILD.bazel | 32 ++++ ll/args.bzl | 54 +++++- ll/attributes.bzl | 32 +++- ll/environment.bzl | 4 +- ll/init.bzl | 10 ++ ll/inputs.bzl | 31 +++- ll/ll.bzl | 11 ++ ll/toolchain.bzl | 5 + ll/tools.bzl | 9 +- ll/transitions.bzl | 3 + llvm-project-overlay/openmp/BUILD.bazel | 9 +- patches/libcxx_combined_init_patch.diff | 32 ++++ patches/rules_ll_overlay_patch.diff | 13 +- third-party-overlays/hipamd.BUILD.bazel | 8 + third-party-overlays/opensycl.BUILD.bazel | 203 ++++++++++++++++++++++ 19 files changed, 501 insertions(+), 26 deletions(-) create mode 100644 examples/sycl_cpu_example/BUILD.bazel create mode 100644 examples/sycl_cpu_example/main.cpp create mode 100644 patches/libcxx_combined_init_patch.diff create mode 100644 third-party-overlays/opensycl.BUILD.bazel diff --git a/MODULE.bazel b/MODULE.bazel index 82c8bbd5..7f86a646 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -40,6 +40,7 @@ llvm_project_overlay.configure( "@rules_ll//patches:rules_ll_overlay_patch.diff", "@rules_ll//patches:clang_new_offload_driver.diff", "@rules_ll//patches:llvm_use_zlib-ng.diff", + "@rules_ll//patches:libcxx_combined_init_patch.diff", ], ) @@ -62,6 +63,7 @@ use_repo( "comgr", "hip", "hipamd", + "opensycl", "rocclr", "rocm-device-libs", "rocm-opencl-runtime", diff --git a/bazel-wrapper/default.nix b/bazel-wrapper/default.nix index df41dbb4..56287431 100644 --- a/bazel-wrapper/default.nix +++ b/bazel-wrapper/default.nix @@ -60,6 +60,16 @@ LL_AMD_RPATHS=${(pkgs.lib.concatStringsSep ":" [ "-rpath=${pkgs.libglvnd}/lib" "-rpath=${pkgs.xorg.libX11}/lib" ])} + +LL_SYCL_INCLUDES=-isystem${(pkgs.boost182.override { + enableStatic = true; + enableShared = false; +}).dev}/include + +LL_SYCL_LIBRARIES=-L${(pkgs.boost182.override { + enableStatic = true; + enableShared = false; +})}/lib ${pkgs.lib.strings.optionalString unfree '' # Flags for CUDA dependencies. @@ -103,6 +113,8 @@ if [[ --action_env=LL_AMD_INCLUDES=$LL_AMD_INCLUDES \ --action_env=LL_AMD_LIBRARIES=$LL_AMD_LIBRARIES \ --action_env=LL_AMD_RPATHS=$LL_AMD_RPATHS \ + --action_env=LL_SYCL_INCLUDES=$LL_SYCL_INCLUDES \ + --action_env=LL_SYCL_LIBRARIES=$LL_SYCL_LIBRARIES \ --action_env=LL_CUDA_TOOLKIT=$LL_CUDA_TOOLKIT \ --action_env=LL_CUDA_RUNTIME=$LL_CUDA_RUNTIME \ --action_env=LL_CUDA_DRIVER=$LL_CUDA_DRIVER \ diff --git a/examples/sycl_cpu_example/BUILD.bazel b/examples/sycl_cpu_example/BUILD.bazel new file mode 100644 index 00000000..36ab4c52 --- /dev/null +++ b/examples/sycl_cpu_example/BUILD.bazel @@ -0,0 +1,18 @@ +load("@rules_ll//ll:defs.bzl", "ll_binary") + +ll_binary( + name = "amdgpu", + srcs = ["main.cpp"], + compilation_mode = "sycl_amdgpu", + compile_flags = [ + "-std=c++20", + "--offload-arch=gfx1100", + ], +) + +ll_binary( + name = "cpu", + srcs = ["main.cpp"], + compilation_mode = "sycl_cpu", + compile_flags = ["-std=c++20"], +) diff --git a/examples/sycl_cpu_example/main.cpp b/examples/sycl_cpu_example/main.cpp new file mode 100644 index 00000000..72e9a19a --- /dev/null +++ b/examples/sycl_cpu_example/main.cpp @@ -0,0 +1,39 @@ +#include +#include + +class vector_addition; + +int main(int, char **) { + sycl::float4 a = {1.0, 2.0, 3.0, 4.0}; + sycl::float4 b = {4.0, 3.0, 1.0, 1.0}; + sycl::float4 c = {0.0, 0.0, 0.0, 0.0}; + + sycl::queue queue(sycl::default_selector{}); + std::cout << "Running on " + << queue.get_device().get_info() << "\n"; + + { // start of scope, ensures data copied back to host + sycl::buffer a_sycl(&a, sycl::range<1>{1}); + sycl::buffer b_sycl(&b, sycl::range<1>{1}); + sycl::buffer c_sycl(&c, sycl::range<1>{1}); + + queue.submit([&](sycl::handler &cgh) { + auto a_acc = a_sycl.get_access(cgh); + auto b_acc = b_sycl.get_access(cgh); + auto c_acc = c_sycl.get_access(cgh); + + cgh.single_task( + [=]() { c_acc[0] = a_acc[0] + b_acc[0]; }); + }); + } // end of scope, ensures data copied back to host + + std::cout << " A { " << a.x() << ", " << a.y() << ", " << a.z() << ", " + << a.w() << " }\n" + << "+ B { " << b.x() << ", " << b.y() << ", " << b.z() << ", " + << b.w() << " }\n" + << "------------------\n" + << "= C { " << c.x() << ", " << c.y() << ", " << c.z() << ", " + << c.w() << " }" << std::endl; + + return 0; +} diff --git a/ll/BUILD.bazel b/ll/BUILD.bazel index a6d2426b..11773cda 100644 --- a/ll/BUILD.bazel +++ b/ll/BUILD.bazel @@ -134,10 +134,15 @@ ll_toolchain( "@hip//:headers", "@hipamd//:headers", ], + ":sycl_amdgpu": [ + "@hip//:headers", + "@hipamd//:headers", + ], "//conditions:default": [], }), hip_runtime = select({ ":hip_amdgpu": "@hipamd//:libamdhip64", + ":sycl_amdgpu": "@hipamd//:libamdhip64", "//conditions:default": None, }), leak_sanitizer = select({ @@ -148,6 +153,7 @@ ll_toolchain( }), libomp = select({ "omp_cpu": "@llvm-project//openmp:libomp", + "sycl_cpu": "@llvm-project//openmp:libomp", "//conditions:default": None, }), memory_sanitizer = select({ @@ -159,6 +165,7 @@ ll_toolchain( }), omp_header = select({ "omp_cpu": "@llvm-project//openmp:omp_header", + "sycl_cpu": "@llvm-project//openmp:omp_header", "//conditions:default": None, }), profile = select({ @@ -167,6 +174,31 @@ ll_toolchain( }), rocm_device_libs = select({ ":hip_amdgpu": "@rocm-device-libs//:rocm-device-libs", + ":sycl_amdgpu": "@rocm-device-libs//:rocm-device-libs", + "//conditions:default": None, + }), + sycl_headers = select({ + ":sycl_amdgpu": "@opensycl//:sycl_headers", + ":sycl_cpu": "@opensycl//:sycl_headers", + "//conditions:default": None, + }), + sycl_hip_backend = select({ + ":sycl_amdgpu": "@opensycl//:librt-backend-hip", + "//conditions:default": None, + }), + sycl_omp_backend = select({ + ":sycl_cpu": "@opensycl//:librt-backend-omp", + ":sycl_amdgpu": "@opensycl//:librt-backend-omp", + "//conditions:default": None, + }), + sycl_plugin = select({ + ":sycl_amdgpu": "@opensycl//:opensycl-clang", + ":sycl_cpu": "@opensycl//:opensycl-clang", + "//conditions:default": None, + }), + sycl_runtime = select({ + ":sycl_amdgpu": "@opensycl//:libhipSYCL-rt", + ":sycl_cpu": "@opensycl//:libhipSYCL-rt", "//conditions:default": None, }), thread_sanitizer = select({ diff --git a/ll/args.bzl b/ll/args.bzl index d05c48f3..c61e09eb 100644 --- a/ll/args.bzl +++ b/ll/args.bzl @@ -181,6 +181,7 @@ def compile_object_args( "cuda_nvptx", "hip_amdgpu", "hip_nvptx", + "sycl_amdgpu", ] and ctx.var["COMPILATION_MODE"] != "dbg": args.add_all(["-Xarch_device", "-O3"]) @@ -207,7 +208,7 @@ def compile_object_args( args.add("-fPIC") # Maybe enable OpenMP. - if ctx.attr.compilation_mode == "omp_cpu": + if ctx.attr.compilation_mode in ["omp_cpu", "sycl_cpu"]: args.add("-fopenmp") # TODO: This is obviously not the way lol. @@ -221,6 +222,7 @@ def compile_object_args( "cuda_nvptx", "hip_amdgpu", "hip_nvptx", + "sycl_amdgpu", ]: args.add("--offload-new-driver") @@ -235,7 +237,7 @@ def compile_object_args( ctx.configuration.default_shell_env["LL_CUDA_TOOLKIT"], format = "--cuda-path=%s", ) - if ctx.attr.compilation_mode in ["hip_nvptx", "hip_amdgpu"]: + if ctx.attr.compilation_mode in ["hip_nvptx", "hip_amdgpu", "sycl_amdgpu"]: args.add_all( [ Label("@hip").workspace_root, @@ -244,9 +246,22 @@ def compile_object_args( format_each = "-I%s/include", ) + if ctx.attr.compilation_mode in ["sycl_amdgpu", "sycl_cpu"]: + args.add_all( + [ + Label("@opensycl").workspace_root, + paths.join( + ctx.var["GENDIR"], # For hipSYCL/common/config.hpp + Label("@opensycl").workspace_root, + "opensycl", + ), + ], + format_each = "-I%s/include", + ) + clang_resource_dir = paths.join(llvm_bindir_path(ctx), "clang/staging") - if ctx.attr.compilation_mode == "hip_amdgpu": + if ctx.attr.compilation_mode in ["hip_amdgpu", "sycl_amdgpu"]: args.add("-xhip") args.add(toolchain.hip_runtime.path, format = "--rocm-path=%s") args.add(clang_resource_dir, format = "-isystem%s") @@ -300,7 +315,7 @@ def compile_object_args( # become system includes. format_each = "-isystem%s", ) - for flags in ["LL_CFLAGS", "LL_AMD_INCLUDES"]: + for flags in ["LL_CFLAGS", "LL_AMD_INCLUDES", "LL_SYCL_INCLUDES"]: if ctx.configuration.default_shell_env.get(flags) != "": args.add_all( ctx.configuration.default_shell_env[flags].split(":"), @@ -353,6 +368,16 @@ def compile_object_args( # re-enable abi-tagging. args.add("-D_LIBCPP_NO_ABI_TAG") + if ctx.attr.compilation_mode == "sycl_amdgpu": + args.add( + "-D__HIPSYCL_ENABLE_HIP_TARGET__", + "-D__HIPSYCL_CLANG__", + ) + if ctx.attr.compilation_mode == "sycl_cpu": + args.add( + "-D__HIPSYCL_ENABLE_OMPHOST_TARGET__", + ) + # Additional compile flags. args.add_all(ctx.attr.compile_flags) @@ -419,7 +444,12 @@ def link_executable_args(ctx, in_files, out_file, mode): if ctx.var["COMPILATION_MODE"] == "dbg": args.add("--verbose") - for flags in ["LL_LDFLAGS", "LL_AMD_LIBRARIES", "LL_AMD_RPATHS"]: + for flags in [ + "LL_LDFLAGS", + "LL_AMD_LIBRARIES", + "LL_AMD_RPATHS", + "LL_SYCL_LIBRARIES", + ]: if flags in ctx.configuration.default_shell_env.keys(): args.add_all( ctx.configuration.default_shell_env[flags].split(":"), @@ -507,7 +537,7 @@ def link_executable_args(ctx, in_files, out_file, mode): args.add("-lcuda") args.add("-lcudart_static") args.add("-lcupti_static") - if ctx.attr.compilation_mode == "hip_amdgpu": + if ctx.attr.compilation_mode in ["hip_amdgpu", "sycl_amdgpu"]: args.add(toolchain.hip_runtime.dirname, format = "-L%s") args.add(toolchain.hip_runtime.basename, format = "-l:%s") hip_runtime_rpath = paths.join( @@ -519,6 +549,18 @@ def link_executable_args(ctx, in_files, out_file, mode): hip_runtime_rpath, format = "-rpath=$ORIGIN/%s", ) + if ctx.attr.compilation_mode in ["sycl_amdgpu", "sycl_cpu"]: + args.add(toolchain.sycl_runtime.dirname, format = "-L%s") + args.add(toolchain.sycl_runtime.basename, format = "-l:%s") + sycl_runtime_rpath = paths.join( + "{}.runfiles".format(ctx.label.name), + ctx.workspace_name, + paths.dirname(toolchain.sycl_runtime.short_path), + ) + args.add( + sycl_runtime_rpath, + format = "-rpath=$ORIGIN/%s", + ) # Additional system libraries. args.add("-lm") # Math. diff --git a/ll/attributes.bzl b/ll/attributes.bzl index 51b03847..c95b4ed7 100644 --- a/ll/attributes.bzl +++ b/ll/attributes.bzl @@ -35,12 +35,14 @@ DEFAULT_ATTRS = { default = "cpp", # TODO: hip_amd, sycl_amd values = [ + "bootstrap", "cpp", - "omp_cpu", "cuda_nvptx", "hip_amdgpu", "hip_nvptx", - "bootstrap", + "omp_cpu", + "sycl_cpu", + "sycl_amdgpu", ], ), "compile_flags": attr.string_list( @@ -496,9 +498,33 @@ LL_TOOLCHAIN_ATTRS = { ), "rocm_device_libs": attr.label( doc = "The ROCm-Device-Libs.", - # default "@rocm-device-libs//:rocm-device-libs", + # default = "@rocm-device-libs//:rocm-device-libs", cfg = transition_to_cpp, ), + "sycl_headers": attr.label( + doc = "The SYCL headers.", + # default = "@opensycl//:sycl_headers" + ), + "sycl_hip_backend": attr.label( + doc = "The AMDGPU backend SYCL.", + # default = "@opensycl//:librt-backend-hip", + allow_single_file = True, + ), + "sycl_omp_backend": attr.label( + doc = "The AMDGPU backend OpenMP/CPU.", + # default = "@opensycl//:librt-backend-omp", + allow_single_file = True, + ), + "sycl_plugin": attr.label( + doc = "The compiler plugin for OpenSYCL.", + # default = "@opensycl//:opensycl-clang", + allow_single_file = True, + ), + "sycl_runtime": attr.label( + doc = "The SYCL runtime.", + # default = "@opensycl//:libhipSYCL-rt", + allow_single_file = True, + ), "symbolizer": attr.label( doc = "The `llvm-symbolizer`.", cfg = transition_to_bootstrap, diff --git a/ll/environment.bzl b/ll/environment.bzl index 2d6dbe59..bcf5c49d 100644 --- a/ll/environment.bzl +++ b/ll/environment.bzl @@ -20,7 +20,7 @@ def compile_object_environment(ctx): config = ctx.attr.toolchain_configuration[BuildSettingInfo].value toolchain = ctx.toolchains["//ll:toolchain_type"] - if config in ["cpp", "omp_cpu"]: + if config in ["cpp", "omp_cpu", "sycl_cpu"]: return { "LINK": toolchain.bitcode_linker.path, "LLD": toolchain.linker.path, @@ -30,7 +30,7 @@ def compile_object_environment(ctx): toolchain.linker_executable.dirname, ]), } - elif config in ["cuda_nvptx", "hip_amdgpu", "hip_nvptx"]: + elif config in ["cuda_nvptx", "hip_amdgpu", "hip_nvptx", "sycl_amdgpu"]: return { "CLANG_OFFLOAD_BUNDLER": toolchain.offload_bundler.path, "LINK": toolchain.bitcode_linker.path, diff --git a/ll/init.bzl b/ll/init.bzl index 47dae10b..dff3f3e7 100644 --- a/ll/init.bzl +++ b/ll/init.bzl @@ -118,6 +118,16 @@ def _initialize_rules_ll_impl(_): patch_args = ["-p1"], ) + http_archive( + name = "opensycl", + build_file = "@rules_ll//third-party-overlays:opensycl.BUILD.bazel", + sha256 = "a01f5633ca62664b163f504495855fd009d340eb38bbf05f69bc71bd9a208afc", + strip_prefix = "OpenSYCL-12fdcaedfa990ab58ddf8bce304fa8cf917e6182", + urls = [ + "https://github.com/OpenSYCL/OpenSYCL/archive/12fdcaedfa990ab58ddf8bce304fa8cf917e6182.zip", + ], + ) + http_archive( name = "rocclr", build_file = "@rules_ll//third-party-overlays:rocclr.BUILD.bazel", diff --git a/ll/inputs.bzl b/ll/inputs.bzl index 45ce394c..28cf934d 100644 --- a/ll/inputs.bzl +++ b/ll/inputs.bzl @@ -85,6 +85,24 @@ def compile_object_inputs( toolchain.rocm_device_libs + [toolchain.hip_runtime] ) + elif config == "sycl_amdgpu": + direct += ( + toolchain.hip_libraries + + toolchain.rocm_device_libs + + toolchain.sycl_headers + + [ + toolchain.hip_runtime, + # toolchain.sycl_runtime, + # toolchain.sycl_hip_backend, + ] + ) + elif config == "sycl_cpu": + direct += toolchain.sycl_headers + toolchain.omp_header + ( + [ + toolchain.sycl_runtime, + toolchain.sycl_omp_backend, + ] + ) else: fail("Cannot compile with this toolchain config: {}.".format(config)) @@ -137,17 +155,20 @@ def link_executable_inputs(ctx, in_files): if config == "cpp": pass - elif config == "omp_cpu": + elif config in ["omp_cpu", "sycl_cpu"]: direct += toolchain.libomp elif config == "cuda_nvptx": pass - elif config in ["hip_nvptx", "hip_amdgpu"]: - if config == "hip_amdgpu": + elif config in ["hip_nvptx", "hip_amdgpu", "sycl_amdgpu"]: + if config in ["hip_amdgpu", "sycl_amdgpu"]: direct.append(toolchain.hip_runtime) direct += toolchain.hip_libraries else: fail("Cannot link with this toolchain.") + if config in ["sycl_amdgpu", "sycl_cpu"]: + direct.append(toolchain.sycl_runtime) + return depset(direct) def link_shared_object_inputs(ctx, in_files): @@ -187,9 +208,11 @@ def link_shared_object_inputs(ctx, in_files): if config == "cpp": pass + elif config == "omp_cpu": + direct += toolchain.libomp elif config == "cuda_nvptx": pass - elif config in ["hip_nvptx", "hip_amdgpu"]: + elif config in ["hip_nvptx", "hip_amdgpu", "sycl_amdgpu"]: direct += ( toolchain.hip_libraries ) diff --git a/ll/ll.bzl b/ll/ll.bzl index c1af0a34..eaf13c31 100644 --- a/ll/ll.bzl +++ b/ll/ll.bzl @@ -206,6 +206,17 @@ def _ll_binary_impl(ctx): toolchain = ctx.toolchains["//ll:toolchain_type"] runfiles = ctx.runfiles(files = [toolchain.hip_runtime]) + if ctx.attr.compilation_mode == "sycl_amdgpu": + toolchain = ctx.toolchains["//ll:toolchain_type"] + runfiles = ctx.runfiles( + files = [ + toolchain.hip_runtime, + toolchain.sycl_runtime, + toolchain.sycl_omp_backend, + toolchain.sycl_hip_backend, + ], + ) + return [ DefaultInfo( files = depset([out_file]), diff --git a/ll/toolchain.bzl b/ll/toolchain.bzl index e6d39eeb..b7ccb747 100644 --- a/ll/toolchain.bzl +++ b/ll/toolchain.bzl @@ -56,6 +56,11 @@ def _ll_toolchain_impl(ctx): clang_tidy_runner = ctx.executable.clang_tidy_runner, rocm_device_libs = ctx.files.rocm_device_libs, symbolizer = ctx.executable.symbolizer, + sycl_headers = ctx.files.sycl_headers, + sycl_hip_backend = ctx.file.sycl_hip_backend, + sycl_omp_backend = ctx.file.sycl_omp_backend, + sycl_plugin = ctx.file.sycl_plugin, + sycl_runtime = ctx.file.sycl_runtime, machine_code_tool = ctx.executable.machine_code_tool, hip_libraries = ctx.files.hip_libraries, hip_runtime = ctx.file.hip_runtime, diff --git a/ll/tools.bzl b/ll/tools.bzl index 0a5e3656..569025c2 100644 --- a/ll/tools.bzl +++ b/ll/tools.bzl @@ -5,6 +5,10 @@ Tools used by actions. load("@bazel_skylib//rules:common_settings.bzl", "BuildSettingInfo") +# TODO: Would it be a bad idea to add *all* tools to *every* action? +# The ll_toolchain target already decides whether the toolchain symbols +# are actually populated, so this wouldn't lead to unnecessary builds. + def compile_object_tools(ctx): """Tools for use in compile actions. @@ -36,12 +40,15 @@ def compile_object_tools(ctx): if config in ["cpp", "omp_cpu"]: return tools - if config in ["cuda_nvptx", "hip_nvptx", "hip_amdgpu"]: + if config in ["cuda_nvptx", "hip_nvptx", "hip_amdgpu", "sycl_amdgpu"]: return tools + [ toolchain.offload_bundler, toolchain.offload_packager, ] + if config in ["sycl_cpu", "sycl_amdgpu"]: + return tools + [toolchain.sycl_plugin] + fail("Unregognized toolchain toolchain configuration.") def linking_tools(ctx): diff --git a/ll/transitions.bzl b/ll/transitions.bzl index 645d7f76..97fddb48 100644 --- a/ll/transitions.bzl +++ b/ll/transitions.bzl @@ -10,6 +10,9 @@ COMPILATION_MODES = [ "cuda_nvptx", "hip_amdgpu", "hip_nvptx", + "sycl_cpu", + "sycl_amdgpu", + # "sycl_nvptx" ] def _ll_transition_impl( diff --git a/llvm-project-overlay/openmp/BUILD.bazel b/llvm-project-overlay/openmp/BUILD.bazel index 9a45fba9..c76c3a05 100644 --- a/llvm-project-overlay/openmp/BUILD.bazel +++ b/llvm-project-overlay/openmp/BUILD.bazel @@ -55,8 +55,8 @@ expand_template( substitutions = { "@LIBOMP_VERSION_MAJOR@": "5", "@LIBOMP_VERSION_MINOR@": "0", - "@LIBOMP_VERSION_BUILD@": "0", - "@LIBOMP_BUILD_DATE@": "Masked", + "@LIBOMP_VERSION_BUILD@": "20140926", + "@LIBOMP_BUILD_DATE@": "No_Timestamp", }, template = "runtime/src/include/omp.h.var", visibility = ["//visibility:public"], @@ -77,7 +77,8 @@ expand_template( "#cmakedefine01 DEBUG_BUILD": "#define DEBUG_BUILD 0", "#cmakedefine01 RELWITHDEBINFO_BUILD": "#define RELWITHDEBINFO_BUILD 0", "#cmakedefine01 LIBOMP_USE_ITT_NOTIFY": "#define LIBOMP_USE_ITT_NOTIFY 1", - "#cmakedefine01 LIBOMP_USE_VERSION_SYMBOLS": "#define LIBOMP_USE_VERSION_SYMBOLS 1", + # Enabling version symbols breaks explicit linking of this library. Bug? + "#cmakedefine01 LIBOMP_USE_VERSION_SYMBOLS": "#define LIBOMP_USE_VERSION_SYMBOLS 0", "#cmakedefine01 LIBOMP_HAVE_WEAK_ATTRIBUTE": "#define LIBOMP_HAVE_WEAK_ATTRIBUTE 1", "#cmakedefine01 LIBOMP_HAVE_PSAPI": "#define LIBOMP_HAVE_PSAPI 0", "#cmakedefine01 LIBOMP_STATS": "#define LIBOMP_STATS 0", @@ -92,7 +93,7 @@ expand_template( "#cmakedefine01 LIBOMP_OMPT_OPTIONAL": "#define LIBOMP_OMPT_OPTIONAL 1", "#cmakedefine01 LIBOMP_USE_ADAPTIVE_LOCKS": "#define LIBOMP_USE_ADAPTIVE_LOCKS 1", "#cmakedefine01 LIBOMP_USE_INTERNODE_ALIGNMENT": "#define LIBOMP_USE_INTERNODE_ALIGNMENT 0", - "#cmakedefine01 LIBOMP_ENABLE_ASSERTIONS": "#define LIBOMP_ENABLE_ASSERTIONS 0", + "#cmakedefine01 LIBOMP_ENABLE_ASSERTIONS": "#define LIBOMP_ENABLE_ASSERTIONS 1", "#cmakedefine01 LIBOMP_USE_HIER_SCHED": "#define LIBOMP_USE_HIER_SCHED 0", "#cmakedefine01 STUBS_LIBRARY": "#define STUBS_LIBRARY 0", "#cmakedefine01 LIBOMP_USE_HWLOC": "#define LIBOMP_USE_HWLOC 0", diff --git a/patches/libcxx_combined_init_patch.diff b/patches/libcxx_combined_init_patch.diff new file mode 100644 index 00000000..81393dd1 --- /dev/null +++ b/patches/libcxx_combined_init_patch.diff @@ -0,0 +1,32 @@ +diff --git a/libcxx/include/__pstl/internal/unseq_backend_simd.h b/libcxx/include/__pstl/internal/unseq_backend_simd.h +index c68a5b99806f..4602eb44a87a 100644 +--- a/libcxx/include/__pstl/internal/unseq_backend_simd.h ++++ b/libcxx/include/__pstl/internal/unseq_backend_simd.h +@@ -513,11 +513,11 @@ __simd_scan(_InputIterator __first, _Size __n, _OutputIterator __result, _UnaryO + _CombinerType __combined_init{__init, &__binary_op}; + + _PSTL_PRAGMA_DECLARE_REDUCTION(__bin_op, _CombinerType) +- _PSTL_PRAGMA_SIMD_SCAN(__bin_op : __init_) ++ _PSTL_PRAGMA_SIMD_SCAN(__bin_op : __combined_init) + for (_Size __i = 0; __i < __n; ++__i) + { + __result[__i] = __combined_init.__value_; +- _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(__init_) ++ _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(__combined_init) + __combined_init.__value_ = __binary_op(__combined_init.__value_, __unary_op(__first[__i])); + } + return std::make_pair(__result + __n, __combined_init.__value_); +@@ -553,11 +553,11 @@ __simd_scan(_InputIterator __first, _Size __n, _OutputIterator __result, _UnaryO + _CombinerType __combined_init{__init, &__binary_op}; + + _PSTL_PRAGMA_DECLARE_REDUCTION(__bin_op, _CombinerType) +- _PSTL_PRAGMA_SIMD_SCAN(__bin_op : __init_) ++ _PSTL_PRAGMA_SIMD_SCAN(__bin_op : __combined_init) + for (_Size __i = 0; __i < __n; ++__i) + { + __combined_init.__value_ = __binary_op(__combined_init.__value_, __unary_op(__first[__i])); +- _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(__init_) ++ _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(__combined_init) + __result[__i] = __combined_init.__value_; + } + return std::make_pair(__result + __n, __combined_init.__value_); diff --git a/patches/rules_ll_overlay_patch.diff b/patches/rules_ll_overlay_patch.diff index a186108d..262511d4 100644 --- a/patches/rules_ll_overlay_patch.diff +++ b/patches/rules_ll_overlay_patch.diff @@ -1621,10 +1621,10 @@ index f8448fb0e726..7540cd9f8495 100644 +) diff --git a/utils/bazel/llvm-project-overlay/openmp/BUILD.bazel b/utils/bazel/llvm-project-overlay/openmp/BUILD.bazel new file mode 100644 -index 000000000000..9a45fba9fdc0 +index 000000000000..c76c3a05aa16 --- /dev/null +++ b/utils/bazel/llvm-project-overlay/openmp/BUILD.bazel -@@ -0,0 +1,157 @@ +@@ -0,0 +1,158 @@ +load("@bazel_skylib//rules:expand_template.bzl", "expand_template") +load("@rules_ll//ll:defs.bzl", "ll_library") + @@ -1682,8 +1682,8 @@ index 000000000000..9a45fba9fdc0 + substitutions = { + "@LIBOMP_VERSION_MAJOR@": "5", + "@LIBOMP_VERSION_MINOR@": "0", -+ "@LIBOMP_VERSION_BUILD@": "0", -+ "@LIBOMP_BUILD_DATE@": "Masked", ++ "@LIBOMP_VERSION_BUILD@": "20140926", ++ "@LIBOMP_BUILD_DATE@": "No_Timestamp", + }, + template = "runtime/src/include/omp.h.var", + visibility = ["//visibility:public"], @@ -1704,7 +1704,8 @@ index 000000000000..9a45fba9fdc0 + "#cmakedefine01 DEBUG_BUILD": "#define DEBUG_BUILD 0", + "#cmakedefine01 RELWITHDEBINFO_BUILD": "#define RELWITHDEBINFO_BUILD 0", + "#cmakedefine01 LIBOMP_USE_ITT_NOTIFY": "#define LIBOMP_USE_ITT_NOTIFY 1", -+ "#cmakedefine01 LIBOMP_USE_VERSION_SYMBOLS": "#define LIBOMP_USE_VERSION_SYMBOLS 1", ++ # Enabling version symbols breaks explicit linking of this library. Bug? ++ "#cmakedefine01 LIBOMP_USE_VERSION_SYMBOLS": "#define LIBOMP_USE_VERSION_SYMBOLS 0", + "#cmakedefine01 LIBOMP_HAVE_WEAK_ATTRIBUTE": "#define LIBOMP_HAVE_WEAK_ATTRIBUTE 1", + "#cmakedefine01 LIBOMP_HAVE_PSAPI": "#define LIBOMP_HAVE_PSAPI 0", + "#cmakedefine01 LIBOMP_STATS": "#define LIBOMP_STATS 0", @@ -1719,7 +1720,7 @@ index 000000000000..9a45fba9fdc0 + "#cmakedefine01 LIBOMP_OMPT_OPTIONAL": "#define LIBOMP_OMPT_OPTIONAL 1", + "#cmakedefine01 LIBOMP_USE_ADAPTIVE_LOCKS": "#define LIBOMP_USE_ADAPTIVE_LOCKS 1", + "#cmakedefine01 LIBOMP_USE_INTERNODE_ALIGNMENT": "#define LIBOMP_USE_INTERNODE_ALIGNMENT 0", -+ "#cmakedefine01 LIBOMP_ENABLE_ASSERTIONS": "#define LIBOMP_ENABLE_ASSERTIONS 0", ++ "#cmakedefine01 LIBOMP_ENABLE_ASSERTIONS": "#define LIBOMP_ENABLE_ASSERTIONS 1", + "#cmakedefine01 LIBOMP_USE_HIER_SCHED": "#define LIBOMP_USE_HIER_SCHED 0", + "#cmakedefine01 STUBS_LIBRARY": "#define STUBS_LIBRARY 0", + "#cmakedefine01 LIBOMP_USE_HWLOC": "#define LIBOMP_USE_HWLOC 0", diff --git a/third-party-overlays/hipamd.BUILD.bazel b/third-party-overlays/hipamd.BUILD.bazel index 2b445a41..78d492a6 100644 --- a/third-party-overlays/hipamd.BUILD.bazel +++ b/third-party-overlays/hipamd.BUILD.bazel @@ -21,6 +21,14 @@ filegroup( visibility = ["//visibility:public"], ) +# Target for OpenSYCL. +ll_library( + name = "hipamd_headers", + exposed_hdrs = [":headers"], + exposed_angled_includes = ["include"], + visibility = ["//visibility:public"], +) + ll_library( name = "libamdhip64", includes = ["src"], diff --git a/third-party-overlays/opensycl.BUILD.bazel b/third-party-overlays/opensycl.BUILD.bazel new file mode 100644 index 00000000..1ee556c9 --- /dev/null +++ b/third-party-overlays/opensycl.BUILD.bazel @@ -0,0 +1,203 @@ +"Build file for OpenSYCL." + +load("@bazel_skylib//rules:expand_template.bzl", "expand_template") +load("@rules_ll//ll:defs.bzl", "ll_library") + +HIPSYCL_DEFINES = [ + "WITH_ROCM_BACKEND", + + # "WITH_ACCELERATED_CPU", + # "BUILD_CLANG_PLUGIN", +] + +ROCM_CXX_FLAGS = [ + "-U__FLOAT128__", + "-U__SIZEOF_FLOAT128__", + "-fhip-new-launch-api", + "-mllvm", + "-amdgpu-early-inline-all=true", + "-mllvm", + "-amdgpu-function-calls=false", + "-D__HIP_ROCclr__", +] + +OMP_CXX_FLAGS = ["-D_ENABLE_EXTENDED_ALIGNED_STORAGE"] + +expand_template( + name = "hipsycl_config", + out = "opensycl/include/hipSYCL/common/config.hpp", + substitutions = { + "@HIPSYCL_VERSION_MAJOR@": "0", + "@HIPSYCL_VERSION_MINOR@": "9", + "@HIPSYCL_VERSION_PATCH@": "4", + "@CMAKE_INSTALL_PREFIX@": "", + "@HIPSYCL_RT_LIBRARY_OUTPUT_NAME@": "libhipSYCL-rt", + "@HIPSYCL_COMMON_LIBRARY_OUTPUT_NAME@": "libhipSYCL-rt", + "@CXX_FILESYSTEM_HEADER@": "filesystem", + "@CXX_FILESYSTEM_NAMESPACE@": "std::filesystem", + }, + template = "include/hipSYCL/common/config.hpp.in", +) + +# Target for the toolchains. +filegroup( + name = "sycl_headers", + srcs = glob(["include/**"]) + [":hipsycl_config"], + visibility = ["//visibility:public"], +) + +ll_library( + name = "libhipSYCL-rt", + srcs = [ + "src/runtime/application.cpp", + "src/runtime/runtime.cpp", + "src/runtime/error.cpp", + "src/runtime/backend.cpp", + "src/runtime/backend_loader.cpp", + "src/runtime/hints.cpp", + "src/runtime/device_id.cpp", + "src/runtime/operations.cpp", + "src/runtime/data.cpp", + "src/runtime/inorder_executor.cpp", + "src/runtime/kernel_cache.cpp", + "src/runtime/multi_queue_executor.cpp", + "src/runtime/dag.cpp", + "src/runtime/dag_node.cpp", + "src/runtime/dag_builder.cpp", + "src/runtime/dag_direct_scheduler.cpp", + "src/runtime/dag_unbound_scheduler.cpp", + "src/runtime/dag_manager.cpp", + "src/runtime/dag_submitted_ops.cpp", + "src/runtime/settings.cpp", + "src/runtime/generic/async_worker.cpp", + "src/runtime/hw_model/memcpy.cpp", + "src/runtime/serialization/serialization.cpp", + ], + hdrs = glob([ + "include/hipSYCL/**/*.hpp", + ]) + [ + ":hipsycl_config", + ], + exposed_hdrs = [":hipsycl_config"], + compile_flags = [ + "-std=c++17", + "-Wall", + ], + defines = ["_ENABLE_EXTENDED_ALIGNED_STORAGE"], + includes = [ + "$(GENERATED)/opensycl/include", + "include", + ], + emit = ["shared_object"], + shared_object_link_flags = ["--no-undefined"], + visibility = ["//visibility:public"], +) + +ll_library( + name = "librt-backend-hip", + srcs = [ + "src/runtime/hip/hip_event.cpp", + "src/runtime/hip/hip_event_pool.cpp", + "src/runtime/hip/hip_queue.cpp", + "src/runtime/hip/hip_instrumentation.cpp", + "src/runtime/hip/hip_allocator.cpp", + "src/runtime/hip/hip_device_manager.cpp", + "src/runtime/hip/hip_hardware_manager.cpp", + "src/runtime/hip/hip_backend.cpp", + "src/runtime/hip/hip_code_object.cpp", + ], + hdrs = glob([ + "include/hipSYCL/**/*.hpp", + ]), + includes = [ + "include", + ], + defines = [ + "HIPSYCL_RT_HIP_TARGET_ROCM=1", + "HIPSYCL_RT_HIP_SUPPORTS_UNIFIED_MEMORY=1", + ], + deps = [ + "@hip//:hip_headers", + "@hipamd//:hipamd_headers", + "@hipamd//:libamdhip64", + ":libhipSYCL-rt", + ], + emit = ["shared_object"], + shared_object_link_flags = ["--no-undefined", "--rpath=$ORIGIN/.."], + visibility = ["//visibility:public"], +) + +ll_library( + name = "librt-backend-omp", + srcs = [ + "src/runtime/omp/omp_allocator.cpp", + "src/runtime/omp/omp_backend.cpp", + "src/runtime/omp/omp_event.cpp", + "src/runtime/omp/omp_hardware_manager.cpp", + "src/runtime/omp/omp_queue.cpp", + ], + compile_flags = ["-std=c++17", "-Wall"], + hdrs = glob([ + "include/hipSYCL/**/*.hpp", + ]), + includes = [ + "include", + ], + defines = ["_ENABLE_EXTENDED_ALIGNED_STORAGE"], + compilation_mode = "omp_cpu", + deps = [ + "@llvm-project//openmp:libomp", + ":libhipSYCL-rt", + ], + emit = ["shared_object"], + shared_object_link_flags = ["--no-undefined"], + visibility = ["//visibility:public"], +) + +ll_library( + name = "opensycl-clang", + srcs = [ + "src/compiler/cbs/LoopSplitterInlining.cpp", + "src/compiler/cbs/SplitterAnnotationAnalysis.cpp", + "src/compiler/cbs/IRUtils.cpp", + "src/compiler/cbs/KernelFlattening.cpp", + "src/compiler/cbs/LoopsParallelMarker.cpp", + "src/compiler/cbs/PHIsToAllocas.cpp", + "src/compiler/cbs/RemoveBarrierCalls.cpp", + "src/compiler/cbs/CanonicalizeBarriers.cpp", + "src/compiler/cbs/SimplifyKernel.cpp", + "src/compiler/cbs/LoopSimplify.cpp", + "src/compiler/cbs/PipelineBuilder.cpp", + "src/compiler/cbs/SubCfgFormation.cpp", + "src/compiler/cbs/UniformityAnalysis.cpp", + "src/compiler/cbs/VectorShape.cpp", + "src/compiler/cbs/VectorizationInfo.cpp", + "src/compiler/cbs/AllocaSSA.cpp", + "src/compiler/cbs/VectorShapeTransformer.cpp", + "src/compiler/cbs/Region.cpp", + "src/compiler/cbs/SyncDependenceAnalysis.cpp", + + # Plugin. + "src/compiler/OpenSYCLClangPlugin.cpp", + "src/compiler/GlobalsPruningPass.cpp", + ], + compile_flags = ["-std=c++17"], + hdrs = glob([ + "include/hipSYCL/**/*.hpp", + ]) + [ + ":hipsycl_config", + ], + angled_includes = ["include"], + includes = [ + "$(GENERATED)/opensycl/include", + ], + depends_on_llvm = 1, + defines = [ + "HIPSYCL_WITH_ACCELERATED_CPU", + "HIPSYCL_COMPILER_COMPONENT", + "__HIPSYCL_USE_ACCELERATED_CPU__", + ], + emit = ["shared_object"], + shared_object_link_flags = ["--no-undefined"], + visibility = ["//visibility:public"], +)