Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

{devel}[foss/2024a] Triton v3.1.0 w/ CUDA 12.6.0 #22064

Open
wants to merge 15 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
##
# This file is an EasyBuild reciPY as per https://github.com/easybuilders/easybuild
#
# Copyright:: Copyright 2013-2015 Dmitri Gribenko, Ward Poelmans
# Authors:: Dmitri Gribenko <[email protected]>
# Authors:: Ward Poelmans <[email protected]>
# License:: GPLv2 or later, MIT, three-clause BSD.
# $Id$
##

# thembl: according to cmake/llvm-hash.txt Triton 3.1.0 (5fe38ffd73c2ac6ed6323b554205186696631c6f) requires
# this commit 10dc3a8e916d73291269e5e2b82dd22681489aa1 of LLVM

name = 'Clang'
# local_date = '20240523'
local_commit = '10dc3a8e916d73291269e5e2b82dd22681489aa1'
version = '18.1.6_%s' % local_commit[:7]
versionsuffix = '-CUDA-%(cudaver)s'

homepage = 'https://clang.llvm.org/'
description = """C, C++, Objective-C compiler, based on LLVM. Does not
include C++ standard library -- use libstdc++ from GCC."""

# Clang also depends on libstdc++ during runtime, but this dependency is
# already specified as the toolchain.
toolchain = {'name': 'GCCcore', 'version': '13.3.0'}

source_urls = ['https://github.com/llvm/llvm-project/archive/']
sources = [{
'download_filename': '%s.tar.gz' % local_commit,
'filename': 'llvm-project-%s.tar.gz' % version,
}]
checksums = ['6ee5e0f9a49d41b5f48ebc4613ce3371f686bf70fcece9f849aba3c37bdeb3e8']

builddependencies = [
('CMake', '3.29.3'),
('Perl', '5.38.2'),
# Including Python bindings would require this as a runtime dep
# and SWIG as an additional build dep
('Python', '3.12.3'),
]
dependencies = [
# since Clang is a compiler, binutils is a runtime dependency too
('binutils', '2.42'),
('hwloc', '2.10.0'),
('libxml2', '2.12.7'),
('ncurses', '6.5'),
('GMP', '6.3.0'),
('Z3', '4.13.0'),
('CUDA', '12.6.0', '', SYSTEM),

]
configopts = '-DUSE_DEPRECATED_GCC_INSTALL_PREFIX=1 '
# clang easyblock requires an update:
# GCC_INSTALL_PREFIX is deprecated and will be removed. Use configuration
# files (https://clang.llvm.org/docs/UsersManual.html#configuration-files)to
# specify the default --gcc-install-dir= or --gcc-triple=. --gcc-toolchain=
# is discouraged. See https://github.com/llvm/llvm-project/pull/77537 for
# detail.

# If True, Flang does not currently support building with LLVM exceptions enabled.
enable_rtti = False

assertions = True
python_bindings = False
skip_all_tests = True

llvm_runtimes = ['libunwind', 'libcxx', 'libcxxabi']
llvm_projects = ['polly', 'lld', 'lldb', 'clang-tools-extra', 'flang']

moduleclass = 'compiler'
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# Update 3.1.0: Thomas Hoffmann, EMBL Heidelberg, [email protected], 2024/12
# TODO: run unittests
easyblock = 'PythonBundle'

name = 'Triton'
version = '3.1.0'
versionsuffix = '-CUDA-%(cudaver)s'
# There is no 3.1 in pypi and no 3.1-tag at github. However, 5fe38ffd is version bump 3.1 in the release_3.1.x branch:
local_commit = '5fe38ffd73c2ac6ed6323b554205186696631c6f'
# Triton 3.1.0 commit 5fe38ffd73c2ac6ed6323b554205186696631c6f requires a particular commit of LLVM
# (acc. to cmake/llvm-hash.txt: 10dc3a8e916d73291269e5e2b82dd22681489aa1, 2024/05/23):
local_clang_commit = '10dc3a8e916d73291269e5e2b82dd22681489aa1'

homepage = 'https://triton-lang.org/'

description = """Triton is a language and compiler for parallel programming. It aims to provide a
Python-based programming environment for productively writing custom DNN compute
kernels capable of running at maximal throughput on modern GPU hardware."""

toolchain = {'name': 'foss', 'version': '2024a'}

github_account = 'openai'

builddependencies = [
('Clang', '18.1.6_%s' % local_clang_commit[:7], versionsuffix),
('CMake', '3.29.3'),
('Ninja', '1.12.1'),
('pybind11', '2.12.0'),
('poetry', '1.8.3'),
('nlohmann_json', '3.11.3'),
('googletest', '1.15.2'),
]

dependencies = [
('CUDA', '12.6.0', '', SYSTEM),
('Python', '3.12.3'),
('Z3', '4.13.0'),
]

use_pip = True

_tr_start_dir = 'python'
ThomasHoffmann77 marked this conversation as resolved.
Show resolved Hide resolved

_tr_preinstallopts = 'export PYBIND11_SYSPATH=$EBROOTPYBIND11 && '
_tr_preinstallopts += 'export JSON_SYSPATH=$EBROOTNLOHMANN_JSON && '
_tr_preinstallopts += 'export LLVM_INCLUDE_DIRS=$EBROOTCLANG/include && '
_tr_preinstallopts += 'export LLVM_LIBRARY_DIR=$EBROOTCLANG/lib && '
_tr_preinstallopts += 'export LLVM_SYSPATH=$EBROOTCLANG && '
_tr_preinstallopts += 'export TRITON_BUILD_WITH_CLANG_LLD=1 && '
_tr_preinstallopts += 'export TRITON_HOME=%(builddir)s && '
_tr_installopts = "-v "

exts_list = [
(name, version, {
'installopts': "-v ",
'patches': [
# disable AMD GPU, disable all downloads, use pybind11 and json from EB:
'Triton-3.1.0_5fe38ff_eb_env_python_build.patch'
],
'postinstallcmds': [
'rm -rf %(installdir)s/lib/python%(pyshortver)s/site-packages/triton/backends/nvidia/lib/libdevice.10.bc'
],
'preinstallopts': _tr_preinstallopts,
'source_urls': ['https://github.com/triton-lang/triton/archive/'],
'sources': [{
'filename': 'v%%(version)s-%s.tar.gz' % local_commit[:7],
'download_filename': '%s.tar.gz' % local_commit,
}],
'start_dir': 'python',
'checksums': [
{'v3.1.0-5fe38ff.tar.gz': '933babc32b69872efbce05fe8be61129fecf52c724fadea42d8c7b2d10e16ad9'},
{'Triton-3.1.0_5fe38ff_eb_env_python_build.patch':
'6b46064b4892c7df340b6afd7ffb4abb2ea4486df9406626cd9b2c92a748705d'},
],
}),
('filelock', '3.15.1', {
'checksums': ['58a2549afdf9e02e10720eaa4d4470f56386d7a6f72edd7d0596337af8ed7ad8'],
}),
]

sanity_pip_check = True

modluafooter = 'setenv("TRITON_PTXAS_PATH", pathJoin(os.getenv("CUDA_HOME"), "bin", "ptxas"));'
modluafooter += 'setenv("TRITON_LIBDEVICE_PATH", pathJoin(os.getenv("CUDA_HOME"), "nvvm", '
modluafooter += '"libdevice", "libdevice.10.bc")) '

moduleclass = 'devel'
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
# Thomas Hoffmann, EMBL Heidelberg, [email protected], 2024/12
# (adapted from Triton-2.1.0-use_eb_env_python_build.patch and Triton-2.1.0-disable_rocm_support.patch)
# - disable support for AMD GPUs
# - disable all downloads at build time.

diff -ru triton-5fe38ffd73c2ac6ed6323b554205186696631c6f/CMakeLists.txt triton-5fe38ffd73c2ac6ed6323b554205186696631c6f_eb_env_python_build/CMakeLists.txt
--- triton-5fe38ffd73c2ac6ed6323b554205186696631c6f/CMakeLists.txt 2024-09-10 23:44:54.000000000 +0200
+++ triton-5fe38ffd73c2ac6ed6323b554205186696631c6f_eb_env_python_build/CMakeLists.txt 2024-12-19 11:17:38.959269261 +0100
@@ -184,7 +184,7 @@
${triton_plugins}

# mlir
- MLIRAMDGPUDialect
+ #MLIRAMDGPUDialect
MLIRNVVMDialect
MLIRNVVMToLLVMIRTranslation
MLIRGPUToNVVMTransforms
@@ -208,9 +208,9 @@
# LLVM
LLVMPasses
LLVMNVPTXCodeGen
- # LLVMNVPTXAsmPrinter
- LLVMAMDGPUCodeGen
- LLVMAMDGPUAsmParser
+ #LLVMNVPTXAsmPrinter
+ #LLVMAMDGPUCodeGen
+ #LLVMAMDGPUAsmParser

)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" OR # Linux arm64
diff -ru triton-5fe38ffd73c2ac6ed6323b554205186696631c6f/python/setup.py triton-5fe38ffd73c2ac6ed6323b554205186696631c6f_eb_env_python_build/python/setup.py
--- triton-5fe38ffd73c2ac6ed6323b554205186696631c6f/python/setup.py 2024-09-10 23:44:54.000000000 +0200
+++ triton-5fe38ffd73c2ac6ed6323b554205186696631c6f_eb_env_python_build/python/setup.py 2024-12-19 12:36:55.358107277 +0100
@@ -130,13 +130,15 @@
version = pybind11_version_file.read().strip()
name = f"pybind11-{version}"
url = f"https://github.com/pybind/pybind11/archive/refs/tags/v{version}.tar.gz"
- return Package("pybind11", name, url, "PYBIND11_INCLUDE_DIR", "", "PYBIND11_SYSPATH")
+ #return Package("pybind11", name, url, "PYBIND11_INCLUDE_DIR", "", "PYBIND11_SYSPATH")
+ return Package("pybind11", name, '', "PYBIND11_INCLUDE_DIR", "", "PYBIND11_SYSPATH")


# json
def get_json_package_info():
url = "https://github.com/nlohmann/json/releases/download/v3.11.3/include.zip"
- return Package("json", "", url, "JSON_INCLUDE_DIR", "", "JSON_SYSPATH")
+ #return Package("json", "", url, "JSON_INCLUDE_DIR", "", "JSON_SYSPATH")
+ return Package("json", "", '', "JSON_INCLUDE_DIR", "", "JSON_SYSPATH")


# llvm
@@ -201,7 +203,9 @@


def get_triton_cache_path():
- user_home = os.getenv("HOME") or os.getenv("USERPROFILE") or os.getenv("HOMEPATH") or None
+ user_home = os.getenv("TRITON_HOME")
+ if not user_home:
+ user_home = os.getenv("HOME") or os.getenv("USERPROFILE") or os.getenv("HOMEPATH") or None
if not user_home:
raise RuntimeError("Could not find user home directory")
return os.path.join(user_home, ".triton")
@@ -222,14 +226,14 @@
shutil.rmtree(package_root_dir)
os.makedirs(package_root_dir, exist_ok=True)
print(f'downloading and extracting {p.url} ...')
- with open_url(p.url) as response:
- if p.url.endswith(".zip"):
- file_bytes = BytesIO(response.read())
- with zipfile.ZipFile(file_bytes, "r") as file:
- file.extractall(path=package_root_dir)
- else:
- with tarfile.open(fileobj=response, mode="r|*") as file:
- file.extractall(path=package_root_dir)
+ #with open_url(p.url) as response:
+ # if p.url.endswith(".zip"):
+ # file_bytes = BytesIO(response.read())
+ # with zipfile.ZipFile(file_bytes, "r") as file:
+ # file.extractall(path=package_root_dir)
+ # else:
+ # with tarfile.open(fileobj=response, mode="r|*") as file:
+ # file.extractall(path=package_root_dir)
# write version url to package_dir
with open(os.path.join(package_dir, "version.txt"), "w") as f:
f.write(p.url)
@@ -363,12 +367,14 @@
"-G", "Ninja", # Ninja is much faster than make
"-DCMAKE_MAKE_PROGRAM=" +
ninja_dir, # Pass explicit path to ninja otherwise cmake may cache a temporary path
- "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON", "-DLLVM_ENABLE_WERROR=ON",
+ "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON",
+ # "-DLLVM_ENABLE_WERROR=ON",
"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + extdir, "-DTRITON_BUILD_TUTORIALS=OFF",
"-DTRITON_BUILD_PYTHON_MODULE=ON", "-DPython3_EXECUTABLE:FILEPATH=" + sys.executable,
"-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON", "-DPYTHON_INCLUDE_DIRS=" + python_include_dir,
"-DTRITON_CODEGEN_BACKENDS=" + ';'.join([b.name for b in backends if not b.is_external]),
- "-DTRITON_PLUGIN_DIRS=" + ';'.join([b.src_dir for b in backends if b.is_external])
+ "-DTRITON_PLUGIN_DIRS=" + ';'.join([b.src_dir for b in backends if b.is_external]),
+ "-DFETCHCONTENT_FULLY_DISCONNECTED=1",
]
if lit_dir is not None:
cmake_args.append("-DLLVM_EXTERNAL_LIT=" + lit_dir)
@@ -432,54 +438,54 @@
with open(nvidia_version_path, "r") as nvidia_version_file:
NVIDIA_TOOLCHAIN_VERSION = nvidia_version_file.read().strip()

-download_and_copy(
- name="ptxas",
- src_path="bin/ptxas",
- variable="TRITON_PTXAS_PATH",
- version=NVIDIA_TOOLCHAIN_VERSION,
- url_func=lambda arch, version:
- f"https://anaconda.org/nvidia/cuda-nvcc/{version}/download/linux-{arch}/cuda-nvcc-{version}-0.tar.bz2",
-)
-download_and_copy(
- name="cuobjdump",
- src_path="bin/cuobjdump",
- variable="TRITON_CUOBJDUMP_PATH",
- version=NVIDIA_TOOLCHAIN_VERSION,
- url_func=lambda arch, version:
- f"https://anaconda.org/nvidia/cuda-cuobjdump/{version}/download/linux-{arch}/cuda-cuobjdump-{version}-0.tar.bz2",
-)
-download_and_copy(
- name="nvdisasm",
- src_path="bin/nvdisasm",
- variable="TRITON_NVDISASM_PATH",
- version=NVIDIA_TOOLCHAIN_VERSION,
- url_func=lambda arch, version:
- f"https://anaconda.org/nvidia/cuda-nvdisasm/{version}/download/linux-{arch}/cuda-nvdisasm-{version}-0.tar.bz2",
-)
-download_and_copy(
- name="cudacrt",
- src_path="include",
- variable="TRITON_CUDACRT_PATH",
- version=NVIDIA_TOOLCHAIN_VERSION,
- url_func=lambda arch, version:
- f"https://anaconda.org/nvidia/cuda-nvcc/{version}/download/linux-{arch}/cuda-nvcc-{version}-0.tar.bz2",
-)
-download_and_copy(
- name="cudart",
- src_path="include",
- variable="TRITON_CUDART_PATH",
- version=NVIDIA_TOOLCHAIN_VERSION,
- url_func=lambda arch, version:
- f"https://anaconda.org/nvidia/cuda-cudart-dev/{version}/download/linux-{arch}/cuda-cudart-dev-{version}-0.tar.bz2",
-)
-download_and_copy(
- name="cupti",
- src_path="include",
- variable="TRITON_CUPTI_PATH",
- version=NVIDIA_TOOLCHAIN_VERSION,
- url_func=lambda arch, version:
- f"https://anaconda.org/nvidia/cuda-cupti/{version}/download/linux-{arch}/cuda-cupti-{version}-0.tar.bz2",
-)
+#download_and_copy(
+# name="ptxas",
+# src_path="bin/ptxas",
+# variable="TRITON_PTXAS_PATH",
+# version=NVIDIA_TOOLCHAIN_VERSION,
+# url_func=lambda arch, version:
+# f"https://anaconda.org/nvidia/cuda-nvcc/{version}/download/linux-{arch}/cuda-nvcc-{version}-0.tar.bz2",
+#)
+#download_and_copy(
+# name="cuobjdump",
+# src_path="bin/cuobjdump",
+# variable="TRITON_CUOBJDUMP_PATH",
+# version=NVIDIA_TOOLCHAIN_VERSION,
+# url_func=lambda arch, version:
+# f"https://anaconda.org/nvidia/cuda-cuobjdump/{version}/download/linux-{arch}/cuda-cuobjdump-{version}-0.tar.bz2",
+#)
+#download_and_copy(
+# name="nvdisasm",
+# src_path="bin/nvdisasm",
+# variable="TRITON_NVDISASM_PATH",
+# version=NVIDIA_TOOLCHAIN_VERSION,
+# url_func=lambda arch, version:
+# f"https://anaconda.org/nvidia/cuda-nvdisasm/{version}/download/linux-{arch}/cuda-nvdisasm-{version}-0.tar.bz2",
+#)
+#download_and_copy(
+# name="cudacrt",
+# src_path="include",
+# variable="TRITON_CUDACRT_PATH",
+# version=NVIDIA_TOOLCHAIN_VERSION,
+# url_func=lambda arch, version:
+# f"https://anaconda.org/nvidia/cuda-nvcc/{version}/download/linux-{arch}/cuda-nvcc-{version}-0.tar.bz2",
+#)
+#download_and_copy(
+# name="cudart",
+# src_path="include",
+# variable="TRITON_CUDART_PATH",
+# version=NVIDIA_TOOLCHAIN_VERSION,
+# url_func=lambda arch, version:
+# f"https://anaconda.org/nvidia/cuda-cudart-dev/{version}/download/linux-{arch}/cuda-cudart-dev-{version}-0.tar.bz2",
+#)
+#download_and_copy(
+# name="cupti",
+# src_path="include",
+# variable="TRITON_CUPTI_PATH",
+# version=NVIDIA_TOOLCHAIN_VERSION,
+# url_func=lambda arch, version:
+# f"https://anaconda.org/nvidia/cuda-cupti/{version}/download/linux-{arch}/cuda-cupti-{version}-0.tar.bz2",
+#)

backends = [*BackendInstaller.copy(["nvidia", "amd"]), *BackendInstaller.copy_externals()]

diff -ru triton-5fe38ffd73c2ac6ed6323b554205186696631c6f/unittest/CMakeLists.txt triton-5fe38ffd73c2ac6ed6323b554205186696631c6f_eb_env_python_build/unittest/CMakeLists.txt
--- triton-5fe38ffd73c2ac6ed6323b554205186696631c6f/unittest/CMakeLists.txt 2024-09-10 23:44:54.000000000 +0200
+++ triton-5fe38ffd73c2ac6ed6323b554205186696631c6f_eb_env_python_build/unittest/CMakeLists.txt 2024-12-19 13:43:42.815629305 +0100
@@ -1,8 +1,11 @@
-include (${CMAKE_CURRENT_SOURCE_DIR}/googletest.cmake)
+#include (${CMAKE_CURRENT_SOURCE_DIR}/googletest.cmake)

include(GoogleTest)
enable_testing()

+find_package(GTest REQUIRED)
+include_directories(${GTEST_INCLUDE_DIR})
+
get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)
get_property(triton_libs GLOBAL PROPERTY TRITON_LIBS)
Loading