Skip to content

Commit

Permalink
[Torch] Updated recipe (#9785)
Browse files Browse the repository at this point in the history
* Updated Torch recipe

* Changed to provide lazy artifacts.
* Replaced CUDA_full sources with CUDA module dependencies (and similar for CUDA 11.3).
* Updated cuda-part of script accordingly.
* Added xcrun executable for macOS which can handle `--show-sdk-path`.
* Using LLVM v13 (on macOS, v17 has issues).
* Adding cuda="none" tag to non-CUDA platforms with matching CUDA platforms.

* Added comment to fancy_toys.jl `should_build_platform`

`should_build_platform` returns true, e.g. for "x86_64-linux-gnu-cxx11" when ARGS = ["x86_64-linux-gnu-cxx11-cuda+10.2"].

* Added NVTX RuntimeDependency

* Using CUDA.is_supported for tagging CUDA-supported non-CUDA platforms with cuda="none"
  • Loading branch information
stemann authored Dec 8, 2024
1 parent 3db1836 commit 7deefac
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 47 deletions.
126 changes: 79 additions & 47 deletions T/Torch/build_tarballs.jl
Original file line number Diff line number Diff line change
@@ -1,23 +1,33 @@
# Note that this script can accept some limited command-line arguments, run
# `julia build_tarballs.jl --help` to see a usage message.
using BinaryBuilder, Pkg
using BinaryBuilderBase: get_addable_spec

include(joinpath(@__DIR__, "..", "..", "fancy_toys.jl"))
include(joinpath(@__DIR__, "..", "..", "platforms", "cuda.jl"))

name = "Torch"
version = v"1.10.2"

# Collection of sources required to complete build
sources = [
GitSource("https://github.com/pytorch/pytorch.git", "71f889c7d265b9636b93ede9d651c0a9c4bee191"),
FileSource("https://micromamba.snakepit.net/api/micromamba/linux-64/0.21.1", "c907423887b43bec4e8b24f17471262c8087b7095683f41dcef4a4e24e9a3bbd"; filename = "micromamba.tar.bz2"),
ArchiveSource("https://github.com/JuliaBinaryWrappers/CUDA_full_jll.jl/releases/download/CUDA_full-v10.2.89%2B5/CUDA_full.v10.2.89.x86_64-linux-gnu.tar.gz", "60e6f614db3b66d955b7e6aa02406765e874ff475c69e2b4a04eb95ba65e4f3b"; unpack_target = "CUDA_full.v10.2"),
ArchiveSource("https://github.com/JuliaBinaryWrappers/CUDA_full_jll.jl/releases/download/CUDA_full-v11.3.1%2B1/CUDA_full.v11.3.1.x86_64-linux-gnu.tar.gz", "9ae00d36d39b04e8e99ace63641254c93a931dcf4ac24c8eddcdfd4625ab57d6"; unpack_target = "CUDA_full.v11.3"),
DirectorySource("./bundled"),
]

# Bash recipe for building across all platforms
script = raw"""
cat > /opt/bin/$bb_full_target/xcrun << EOF
#!/bin/bash
if [[ "\${@}" == *"--show-sdk-path"* ]]; then
echo /opt/$target/$target/sys-root
elif [[ "\${@}" == *"--show-sdk-version"* ]]; then
grep -A1 '<key>Version</key>' /opt/$target/$target/sys-root/SDKSettings.plist \
| tail -n1 \
| sed -E -e 's/\s*<string>([^<]+)<\/string>\s*/\1/'
else
exec "\${@}"
fi
EOF
chmod +x /opt/bin/$bb_full_target/xcrun
cd $WORKSPACE/srcdir
mkdir micromamba
Expand Down Expand Up @@ -101,31 +111,19 @@ if [[ $target == x86_64-apple-darwin* # Fails to compile: /workspace/srcdir/pyto
cmake_extra_args+="-DUSE_MKLDNN=OFF "
fi
if [[ $bb_full_target == *cuda* ]]; then
cuda_version=`echo $bb_full_target | sed -E -e 's/.*cuda\+([0-9]+\.[0-9]+).*/\1/'`
cuda_version_major=`echo $cuda_version | cut -d . -f 1`
cuda_version_minor=`echo $cuda_version | cut -d . -f 2`
cuda_full_path="$WORKSPACE/srcdir/CUDA_full.v$cuda_version/cuda"
export PATH=$PATH:$cuda_full_path/bin
export CUDACXX=$cuda_full_path/bin/nvcc
export CUDAHOSTCXX=$CXX
cuda_version=${bb_full_target##*-cuda+}
if [[ $bb_full_target == *cuda* ]] && [[ $cuda_version != none ]]; then
export CUDA_PATH="$prefix/cuda"
mkdir $WORKSPACE/tmpdir
export TMPDIR=$WORKSPACE/tmpdir
cmake_extra_args+="\
-DUSE_CUDA=ON \
-DUSE_CUDNN=ON \
-DUSE_MAGMA=ON \
-DCUDA_TOOLKIT_ROOT_DIR=$cuda_full_path \
-DCUDA_CUDART_LIBRARY=$cuda_full_path/lib64/libcudart.$dlext \
-DCMAKE_CUDA_FLAGS='-cudart shared' \
-DCUDA_cublas_LIBRARY=$cuda_full_path/lib64/libcublas.$dlext \
-DCUDA_cufft_LIBRARY=$cuda_full_path/lib64/libcufft.$dlext \
-DCUDA_curand_LIBRARY=$cuda_full_path/lib64/libcurand.$dlext \
-DCUDA_cusolver_LIBRARY=$cuda_full_path/lib64/libcusolver.$dlext \
-DCUDA_cusparse_LIBRARY=$cuda_full_path/lib64/libcusparse.$dlext \
-DCUDA_TOOLKIT_INCLUDE=$includedir;$cuda_full_path/include \
-DCUDA_TOOLKIT_ROOT_DIR=$CUDA_PATH \
-DCUB_INCLUDE_DIR=$WORKSPACE/srcdir/pytorch/third_party/cub "
include_paths+=":$cuda_full_path/include"
cuda_version_major=`echo $cuda_version | cut -d . -f 1`
cuda_version_minor=`echo $cuda_version | cut -d . -f 2`
micromamba install -y magma-cuda${cuda_version_major}${cuda_version_minor} -c pytorch
git submodule update --init \
third_party/cub \
Expand Down Expand Up @@ -181,19 +179,16 @@ configure() {
$cmake_extra_args \
..
}
if [[ $bb_full_target != *cuda* ]]; then
configure
if [[ $bb_full_target == *cuda* ]] && [[ $cuda_version != none ]]; then
configure || configure
else
configure
configure || configure
fi
cmake --build . -- -j $nproc
make install
install_license ../LICENSE
"""

# These are the platforms we will build for by default, unless further
# platforms are passed in on the command line
platforms = supported_platforms()
filter!(p -> !(Sys.islinux(p) && libc(p) == "musl"), platforms) # musl fails due to conflicting declaration of C function ‘void __assert_fail(const char*, const char*, int, const char*) - between /opt/x86_64-linux-musl/x86_64-linux-musl/include/c++/8.1.0/cassert:44 and /opt/x86_64-linux-musl/x86_64-linux-musl/sys-root/usr/include/assert.h
filter!(!Sys.iswindows, platforms) # ONNX does not support cross-compiling for w64-mingw32 on linux
Expand All @@ -202,6 +197,19 @@ filter!(p -> arch(p) != "armv7l", platforms) # armv7l is not supported by XNNPAC
filter!(p -> arch(p) != "powerpc64le", platforms) # PowerPC64LE is not supported by XNNPACK
filter!(!Sys.isfreebsd, platforms) # Build fails: Clang v12 crashes compiling aten/src/ATen/native/cpu/MaxUnpoolKernel.cpp.

let cuda_platforms = CUDA.supported_platforms(min_version=v"10.2", max_version=v"11")
filter!(p -> arch(p) != "aarch64", cuda_platforms) # Cmake toolchain breaks on aarch64
push!(cuda_platforms, Platform("x86_64", "Linux"; cuda = "11.3"))

# Tag non-CUDA platforms matching CUDA platforms with cuda="none"
for platform in platforms
if CUDA.is_supported(platform) && arch(platform) != "aarch64"
platform["cuda"] = "none"
end
end
append!(platforms, cuda_platforms)
end

accelerate_platforms = [
Platform("aarch64", "macos"),
Platform("x86_64", "macos"),
Expand All @@ -217,48 +225,72 @@ mkl_platforms = Platform[

openblas_platforms = filter(p -> p union(mkl_platforms, accelerate_platforms), platforms)

cuda_platforms = [
Platform("x86_64", "Linux"; cuda = "10.2"),
Platform("x86_64", "Linux"; cuda = "11.3"),
]
for p in cuda_platforms
push!(platforms, p)
end

platforms = expand_cxxstring_abis(platforms)
accelerate_platforms = expand_cxxstring_abis(accelerate_platforms)
mkl_platforms = expand_cxxstring_abis(mkl_platforms)
openblas_platforms = expand_cxxstring_abis(openblas_platforms)
cuda_platforms = expand_cxxstring_abis(cuda_platforms)

# The products that we will ensure are always built
products = [
FileProduct("share/ATen/Declarations.yaml", :declarations_yaml),
LibraryProduct(["libtorch", "torch"], :libtorch),
LibraryProduct(["libtorch_cpu", "torch_cpu"], :libtorch_cpu),
]

# Dependencies that must be installed before this package can be built
dependencies = [
Dependency(PackageSpec(name="CompilerSupportLibraries_jll", uuid="e66e0078-7015-5450-92f7-15fbd957f2ae")),
Dependency("CPUInfo_jll"; compat = "0.0.20201217"),
Dependency("CUDNN_jll", v"8.2.4"; compat = "8", platforms = cuda_platforms),
Dependency("Gloo_jll"; compat = "0.0.20210521", platforms = filter(p -> nbits(p) == 64, platforms)),
Dependency("LAPACK_jll"; platforms = openblas_platforms),
# Dependency("MKL_jll"; platforms = mkl_platforms), # MKL is avoided for all platforms
# BuildDependency("MKL_Headers_jll"; platforms = mkl_platforms), # MKL is avoided for all platforms

# libtorch, libtorch_cuda, and libtorch_global_deps all link with `libnvToolsExt`
# maleadt: `libnvToolsExt is not shipped by CUDA anymore, so the best solution is definitely static linking. CUDA 10.2 shipped it, later it became a header-only library which we do compile into a dynamic one for use with NVTX.jl, but there's no guarantees that the library we build has the same symbols as the "old" libnvToolsExt shipped by CUDA 10.2
RuntimeDependency("NVTX_jll"), # TODO: Replace RuntimeDependency with static linking.

Dependency("OpenBLAS32_jll"; platforms = openblas_platforms),
Dependency("PThreadPool_jll"; compat = "0.0.20210414"),
Dependency("SLEEF_jll", v"3.5.2"; compat = "3"),
# Dependency("TensorRT_jll"; platforms = cuda_platforms), # Building with TensorRT is not supported: https://github.com/pytorch/pytorch/issues/60228
Dependency("XNNPACK_jll"; compat = "0.0.20210622"),
Dependency(PackageSpec("protoc_jll", Base.UUID("c7845625-083e-5bbe-8504-b32d602b7110")); compat="~3.13.0"),
HostBuildDependency(PackageSpec("protoc_jll", Base.UUID("c7845625-083e-5bbe-8504-b32d602b7110"), v"3.13.0")),
RuntimeDependency("CUDA_Runtime_jll"),
]

# Build the tarballs, and possibly a `build.jl` as well.
build_tarballs(ARGS, name, version, sources, script, platforms, products, dependencies;
preferred_gcc_version = v"8",
julia_compat = "1.6",
augment_platform_block = CUDA.augment)
builds = []
for platform in platforms
should_build_platform(platform) || continue
additional_deps = BinaryBuilder.AbstractDependency[]
if haskey(platform, "cuda") && platform["cuda"] != "none"
if platform["cuda"] == "11.3"
additional_deps = BinaryBuilder.AbstractDependency[
BuildDependency(PackageSpec("CUDA_full_jll", v"11.3.1")),
Dependency("CUDA_Runtime_jll", v"0.7.0"), # Using v"0.7.0" to get support for cuda = "11.3" - using Dependency rather than RuntimeDependency to be sure to pass audit
]
else
additional_deps = CUDA.required_dependencies(platform, static_sdk = true)
end
push!(additional_deps,
Dependency(get_addable_spec("CUDNN_jll", v"8.2.4+0"); compat = "8"), # Using v"8.2.4+0" to get support for cuda = "11.3"
)
end
push!(builds, (; platforms=[platform], dependencies=[dependencies; additional_deps]))
end

# don't allow `build_tarballs` to override platform selection based on ARGS.
# we handle that ourselves by calling `should_build_platform`
non_platform_ARGS = filter(arg -> startswith(arg, "--"), ARGS)

# `--register` should only be passed to the latest `build_tarballs` invocation
non_reg_ARGS = filter(arg -> arg != "--register", non_platform_ARGS)

for (i, build) in enumerate(builds)
build_tarballs(i == lastindex(builds) ? non_platform_ARGS : non_reg_ARGS,
name, version, sources, script,
build.platforms, products, build.dependencies;
preferred_gcc_version = v"8",
preferred_llvm_version = v"13",
julia_compat = "1.6",
augment_platform_block = CUDA.augment,
lazy_artifacts = true)
end
3 changes: 3 additions & 0 deletions fancy_toys.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ Return whether the tarballs for the given `platform` should be built.
This is useful when the builder has different platform-dependent elements
(sources, script, products, etc...) that make it hard to have a single
`build_tarballs` call.
Note that the platform must be augmented, e.g. wrt. CUDA, if the platform
supplied in ARGS is augmented.
"""
function should_build_platform(platform)
# If you need inspiration for how to use this function, look at the builder
Expand Down

0 comments on commit 7deefac

Please sign in to comment.