[Torch] Updated recipe (#9785)

* Updated Torch recipe * Changed to provide lazy artifacts. * Replaced CUDA_full sources with CUDA module dependencies (and similar for CUDA 11.3). * Updated cuda-part of script accordingly. * Added xcrun executable for macOS which can handle `--show-sdk-path`. * Using LLVM v13 (on macOS, v17 has issues). * Adding cuda="none" tag to non-CUDA platforms with matching CUDA platforms. * Added comment to fancy_toys.jl `should_build_platform` `should_build_platform` returns true, e.g. for "x86_64-linux-gnu-cxx11" when ARGS = ["x86_64-linux-gnu-cxx11-cuda+10.2"]. * Added NVTX RuntimeDependency * Using CUDA.is_supported for tagging CUDA-supported non-CUDA platforms with cuda="none"
JuliaPackaging · Dec 8, 2024 · 7deefac · 7deefac
1 parent 3db1836
commit 7deefac
Show file tree

Hide file tree

Showing 2 changed files with 82 additions and 47 deletions.
diff --git a/T/Torch/build_tarballs.jl b/T/Torch/build_tarballs.jl
@@ -1,23 +1,33 @@
-# Note that this script can accept some limited command-line arguments, run
-# `julia build_tarballs.jl --help` to see a usage message.
 using BinaryBuilder, Pkg
+using BinaryBuilderBase: get_addable_spec
 
+include(joinpath(@__DIR__, "..", "..", "fancy_toys.jl"))
 include(joinpath(@__DIR__, "..", "..", "platforms", "cuda.jl"))
 
 name = "Torch"
 version = v"1.10.2"
 
-# Collection of sources required to complete build
 sources = [
     GitSource("https://github.com/pytorch/pytorch.git", "71f889c7d265b9636b93ede9d651c0a9c4bee191"),
     FileSource("https://micromamba.snakepit.net/api/micromamba/linux-64/0.21.1", "c907423887b43bec4e8b24f17471262c8087b7095683f41dcef4a4e24e9a3bbd"; filename = "micromamba.tar.bz2"),
-    ArchiveSource("https://github.com/JuliaBinaryWrappers/CUDA_full_jll.jl/releases/download/CUDA_full-v10.2.89%2B5/CUDA_full.v10.2.89.x86_64-linux-gnu.tar.gz", "60e6f614db3b66d955b7e6aa02406765e874ff475c69e2b4a04eb95ba65e4f3b"; unpack_target = "CUDA_full.v10.2"),
-    ArchiveSource("https://github.com/JuliaBinaryWrappers/CUDA_full_jll.jl/releases/download/CUDA_full-v11.3.1%2B1/CUDA_full.v11.3.1.x86_64-linux-gnu.tar.gz", "9ae00d36d39b04e8e99ace63641254c93a931dcf4ac24c8eddcdfd4625ab57d6"; unpack_target = "CUDA_full.v11.3"),
     DirectorySource("./bundled"),
 ]
 
-# Bash recipe for building across all platforms
 script = raw"""
+cat > /opt/bin/$bb_full_target/xcrun << EOF
+#!/bin/bash
+if [[ "\${@}" == *"--show-sdk-path"* ]]; then
+   echo /opt/$target/$target/sys-root
+elif [[ "\${@}" == *"--show-sdk-version"* ]]; then
+   grep -A1 '<key>Version</key>' /opt/$target/$target/sys-root/SDKSettings.plist \
+   | tail -n1 \
+   | sed -E -e 's/\s*<string>([^<]+)<\/string>\s*/\1/'
+else
+   exec "\${@}"
+fi
+EOF
+chmod +x /opt/bin/$bb_full_target/xcrun
+
 cd $WORKSPACE/srcdir
 
 mkdir micromamba
@@ -101,31 +111,19 @@ if [[ $target == x86_64-apple-darwin* # Fails to compile: /workspace/srcdir/pyto
     cmake_extra_args+="-DUSE_MKLDNN=OFF "
 fi
 
-if [[ $bb_full_target == *cuda* ]]; then
-    cuda_version=`echo $bb_full_target | sed -E -e 's/.*cuda\+([0-9]+\.[0-9]+).*/\1/'`
-    cuda_version_major=`echo $cuda_version | cut -d . -f 1`
-    cuda_version_minor=`echo $cuda_version | cut -d . -f 2`
-    cuda_full_path="$WORKSPACE/srcdir/CUDA_full.v$cuda_version/cuda"
-    export PATH=$PATH:$cuda_full_path/bin
-    export CUDACXX=$cuda_full_path/bin/nvcc
-    export CUDAHOSTCXX=$CXX
+cuda_version=${bb_full_target##*-cuda+}
+if [[ $bb_full_target == *cuda* ]] && [[ $cuda_version != none ]]; then
+    export CUDA_PATH="$prefix/cuda"
     mkdir $WORKSPACE/tmpdir
     export TMPDIR=$WORKSPACE/tmpdir
     cmake_extra_args+="\
         -DUSE_CUDA=ON \
         -DUSE_CUDNN=ON \
         -DUSE_MAGMA=ON \
-        -DCUDA_TOOLKIT_ROOT_DIR=$cuda_full_path \
-        -DCUDA_CUDART_LIBRARY=$cuda_full_path/lib64/libcudart.$dlext \
-        -DCMAKE_CUDA_FLAGS='-cudart shared' \
-        -DCUDA_cublas_LIBRARY=$cuda_full_path/lib64/libcublas.$dlext \
-        -DCUDA_cufft_LIBRARY=$cuda_full_path/lib64/libcufft.$dlext \
-        -DCUDA_curand_LIBRARY=$cuda_full_path/lib64/libcurand.$dlext \
-        -DCUDA_cusolver_LIBRARY=$cuda_full_path/lib64/libcusolver.$dlext \
-        -DCUDA_cusparse_LIBRARY=$cuda_full_path/lib64/libcusparse.$dlext \
-        -DCUDA_TOOLKIT_INCLUDE=$includedir;$cuda_full_path/include \
+        -DCUDA_TOOLKIT_ROOT_DIR=$CUDA_PATH \
         -DCUB_INCLUDE_DIR=$WORKSPACE/srcdir/pytorch/third_party/cub "
-    include_paths+=":$cuda_full_path/include"
+    cuda_version_major=`echo $cuda_version | cut -d . -f 1`
+    cuda_version_minor=`echo $cuda_version | cut -d . -f 2`
     micromamba install -y magma-cuda${cuda_version_major}${cuda_version_minor} -c pytorch
     git submodule update --init \
         third_party/cub \
@@ -181,19 +179,16 @@ configure() {
         $cmake_extra_args \
         ..
 }
-if [[ $bb_full_target != *cuda* ]]; then
-    configure
+if [[ $bb_full_target == *cuda* ]] && [[ $cuda_version != none ]]; then
+    configure || configure
 else
     configure
-    configure || configure
 fi
 cmake --build . -- -j $nproc
 make install
 install_license ../LICENSE
 """
 
-# These are the platforms we will build for by default, unless further
-# platforms are passed in on the command line
 platforms = supported_platforms()
 filter!(p -> !(Sys.islinux(p) && libc(p) == "musl"), platforms) # musl fails due to conflicting declaration of C function ‘void __assert_fail(const char*, const char*, int, const char*) - between /opt/x86_64-linux-musl/x86_64-linux-musl/include/c++/8.1.0/cassert:44 and /opt/x86_64-linux-musl/x86_64-linux-musl/sys-root/usr/include/assert.h
 filter!(!Sys.iswindows, platforms) # ONNX does not support cross-compiling for w64-mingw32 on linux
@@ -202,6 +197,19 @@ filter!(p -> arch(p) != "armv7l", platforms) # armv7l is not supported by XNNPAC
 filter!(p -> arch(p) != "powerpc64le", platforms) # PowerPC64LE is not supported by XNNPACK
 filter!(!Sys.isfreebsd, platforms) # Build fails: Clang v12 crashes compiling aten/src/ATen/native/cpu/MaxUnpoolKernel.cpp.
 
+let cuda_platforms = CUDA.supported_platforms(min_version=v"10.2", max_version=v"11")
+    filter!(p -> arch(p) != "aarch64", cuda_platforms) # Cmake toolchain breaks on aarch64
+    push!(cuda_platforms, Platform("x86_64", "Linux"; cuda = "11.3"))
+
+     # Tag non-CUDA platforms matching CUDA platforms with cuda="none"
+    for platform in platforms
+        if CUDA.is_supported(platform) && arch(platform) != "aarch64"
+            platform["cuda"] = "none"
+        end
+    end
+    append!(platforms, cuda_platforms)
+end
+
 accelerate_platforms = [
     Platform("aarch64", "macos"),
     Platform("x86_64", "macos"),
@@ -217,48 +225,72 @@ mkl_platforms = Platform[
 
 openblas_platforms = filter(p -> p ∉ union(mkl_platforms, accelerate_platforms), platforms)
 
-cuda_platforms = [
-    Platform("x86_64", "Linux"; cuda = "10.2"),
-    Platform("x86_64", "Linux"; cuda = "11.3"),
-]
-for p in cuda_platforms
-    push!(platforms, p)
-end
-
 platforms = expand_cxxstring_abis(platforms)
 accelerate_platforms = expand_cxxstring_abis(accelerate_platforms)
 mkl_platforms = expand_cxxstring_abis(mkl_platforms)
 openblas_platforms = expand_cxxstring_abis(openblas_platforms)
-cuda_platforms = expand_cxxstring_abis(cuda_platforms)
 
-# The products that we will ensure are always built
 products = [
     FileProduct("share/ATen/Declarations.yaml", :declarations_yaml),
     LibraryProduct(["libtorch", "torch"], :libtorch),
     LibraryProduct(["libtorch_cpu", "torch_cpu"], :libtorch_cpu),
 ]
 
-# Dependencies that must be installed before this package can be built
 dependencies = [
     Dependency(PackageSpec(name="CompilerSupportLibraries_jll", uuid="e66e0078-7015-5450-92f7-15fbd957f2ae")),
     Dependency("CPUInfo_jll"; compat = "0.0.20201217"),
-    Dependency("CUDNN_jll", v"8.2.4"; compat = "8", platforms = cuda_platforms),
     Dependency("Gloo_jll";  compat = "0.0.20210521", platforms = filter(p -> nbits(p) == 64, platforms)),
     Dependency("LAPACK_jll"; platforms = openblas_platforms),
     # Dependency("MKL_jll"; platforms = mkl_platforms), # MKL is avoided for all platforms
     # BuildDependency("MKL_Headers_jll"; platforms = mkl_platforms), # MKL is avoided for all platforms
+
+    # libtorch, libtorch_cuda, and libtorch_global_deps all link with `libnvToolsExt`
+    # maleadt: `libnvToolsExt is not shipped by CUDA anymore, so the best solution is definitely static linking. CUDA 10.2 shipped it, later it became a header-only library which we do compile into a dynamic one for use with NVTX.jl, but there's no guarantees that the library we build has the same symbols as the "old" libnvToolsExt shipped by CUDA 10.2
+    RuntimeDependency("NVTX_jll"), # TODO: Replace RuntimeDependency with static linking.
+
     Dependency("OpenBLAS32_jll"; platforms = openblas_platforms),
     Dependency("PThreadPool_jll"; compat = "0.0.20210414"),
     Dependency("SLEEF_jll", v"3.5.2"; compat = "3"),
     # Dependency("TensorRT_jll"; platforms = cuda_platforms), # Building with TensorRT is not supported: https://github.com/pytorch/pytorch/issues/60228
     Dependency("XNNPACK_jll"; compat = "0.0.20210622"),
     Dependency(PackageSpec("protoc_jll", Base.UUID("c7845625-083e-5bbe-8504-b32d602b7110")); compat="~3.13.0"),
     HostBuildDependency(PackageSpec("protoc_jll", Base.UUID("c7845625-083e-5bbe-8504-b32d602b7110"), v"3.13.0")),
-    RuntimeDependency("CUDA_Runtime_jll"),
 ]
 
-# Build the tarballs, and possibly a `build.jl` as well.
-build_tarballs(ARGS, name, version, sources, script, platforms, products, dependencies;
-    preferred_gcc_version = v"8",
-    julia_compat = "1.6",
-    augment_platform_block = CUDA.augment)
+builds = []
+for platform in platforms
+    should_build_platform(platform) || continue
+    additional_deps = BinaryBuilder.AbstractDependency[]
+    if haskey(platform, "cuda") && platform["cuda"] != "none"
+        if platform["cuda"] == "11.3"
+            additional_deps = BinaryBuilder.AbstractDependency[
+                BuildDependency(PackageSpec("CUDA_full_jll", v"11.3.1")),
+                Dependency("CUDA_Runtime_jll", v"0.7.0"), # Using v"0.7.0" to get support for cuda = "11.3" - using Dependency rather than RuntimeDependency to be sure to pass audit
+            ]
+        else
+            additional_deps = CUDA.required_dependencies(platform, static_sdk = true)
+        end
+        push!(additional_deps,
+            Dependency(get_addable_spec("CUDNN_jll", v"8.2.4+0"); compat = "8"), # Using v"8.2.4+0" to get support for cuda = "11.3"
+        )
+    end
+    push!(builds, (; platforms=[platform], dependencies=[dependencies; additional_deps]))
+end
+
+# don't allow `build_tarballs` to override platform selection based on ARGS.
+# we handle that ourselves by calling `should_build_platform`
+non_platform_ARGS = filter(arg -> startswith(arg, "--"), ARGS)
+
+# `--register` should only be passed to the latest `build_tarballs` invocation
+non_reg_ARGS = filter(arg -> arg != "--register", non_platform_ARGS)
+
+for (i, build) in enumerate(builds)
+    build_tarballs(i == lastindex(builds) ? non_platform_ARGS : non_reg_ARGS,
+                   name, version, sources, script,
+                   build.platforms, products, build.dependencies;
+        preferred_gcc_version = v"8",
+        preferred_llvm_version = v"13",
+        julia_compat = "1.6",
+        augment_platform_block = CUDA.augment,
+        lazy_artifacts = true)
+end
diff --git a/fancy_toys.jl b/fancy_toys.jl
@@ -14,6 +14,9 @@ Return whether the tarballs for the given `platform` should be built.
 This is useful when the builder has different platform-dependent elements
 (sources, script, products, etc...) that make it hard to have a single
 `build_tarballs` call.
+
+Note that the platform must be augmented, e.g. wrt. CUDA, if the platform
+supplied in ARGS is augmented.
 """
 function should_build_platform(platform)
     # If you need inspiration for how to use this function, look at the builder