Skip to content

Commit

Permalink
Merge pull request #12 from openai/main
Browse files Browse the repository at this point in the history
pr
  • Loading branch information
1proprogrammerchant authored Sep 20, 2023
2 parents fd22f45 + ed5a530 commit 879a916
Show file tree
Hide file tree
Showing 177 changed files with 7,959 additions and 5,542 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/documentation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ jobs:
run: |
pip3 install tabulate
pip3 install cmake
pip3 install sphinx
pip3 install myst_parser
#- name: Fetch dependent branches
# run: |
Expand All @@ -33,7 +35,7 @@ jobs:
run: |
cd docs
export PATH=$(python3 -c "import cmake; print(cmake.CMAKE_BIN_DIR)"):$PATH
python3 -m sphinx_multiversion . _build/html/
python3 -m sphinx . _build/html/main
- name: Update docs
run: |
Expand Down
28 changes: 24 additions & 4 deletions .github/workflows/integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,13 @@ jobs:
run: |
if [ x"${{ github.repository }}" == x"openai/triton" ]; then
echo '::set-output name=matrix-required::[["self-hosted", "A100"], ["self-hosted", "H100"]]'
echo '::set-output name=matrix-optional::[]'
echo '::set-output name=matrix-optional::[["self-hosted", "gfx908"], ["self-hosted", "arc770"]]'
else
echo '::set-output name=matrix-required::["ubuntu-latest"]'
echo '::set-output name=matrix-optional::["ubuntu-latest"]'
fi
Integration-Tests-Nvidia:
needs: Runner-Preparation

Expand All @@ -44,8 +45,9 @@ jobs:

steps:
- name: Checkout
uses: actions/checkout@v2

uses: actions/checkout@v3
with:
submodules: 'true'
- name: Set CUDA ENV
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'V100' || matrix.runner[1] == 'A100' || matrix.runner[1] == 'H100')}}
run: |
Expand All @@ -62,12 +64,18 @@ jobs:
run: |
echo "PATH=${HOME}/.local/bin:${PATH}" >> "${GITHUB_ENV}"
- name: Check pre-commit
run: |
python3 -m pip install --upgrade pre-commit
python3 -m pre_commit run --all-files --verbose
- name: Install Triton
if: ${{ env.BACKEND == 'CUDA'}}
run: |
cd python
python3 -m pip install --upgrade pip
python3 -m pip install cmake==3.24
python3 -m pip install ninja
python3 -m pip install --no-build-isolation -vvv '.[tests]'
python3 -m pip install pytest-xdist
Expand Down Expand Up @@ -97,6 +105,8 @@ jobs:
python3 -m pytest runtime/
# run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
TRITON_DISABLE_LINE_INFO=0 python3 -m pytest language/test_line_info.py
#run hopper/test_flashattention.py to avoid out of gpu memory
python3 -m pytest hopper/test_flashattention.py
- name: Run python tests on CUDA with ENABLE_TMA=0 and ENABLE_MMA_V3=0
if: ${{ env.BACKEND == 'CUDA' && env.ENABLE_TMA == '0' && env.ENABLE_MMA_V3 == '0'}}
Expand All @@ -112,6 +122,14 @@ jobs:
run: |
rm -rf ~/.triton
- name: Run interpreter tests
env:
# TRITON_INTERPRET: "1"
CUA_VISIBLE_DEVICES: ""
run: |
cd python/test/unit
python3 -m pytest -vs operators/test_flash_attention.py
- name: Run partial tests on CUDA with ENABLE_TMA=1 and ENABLE_MMA_V3=1
if: ${{ env.BACKEND == 'CUDA' && env.ENABLE_TMA == '1' && env.ENABLE_MMA_V3 == '1'}}
run: |
Expand Down Expand Up @@ -204,10 +222,12 @@ jobs:
- name: Install Triton on ROCM
if: ${{ env.BACKEND == 'ROCM'}}
run: |
git submodule update --init --recursive
cd python
python3 -m pip install --upgrade pip
python3 -m pip install cmake==3.24
python3 -m pip install torch==1.13.1 --index-url https://download.pytorch.org/whl/rocm5.2
export TRITON_CODEGEN_AMD_HIP_BACKEND=1
python3 -m pip install --no-build-isolation -vvv '.[tests]'
- name: Install Triton on XPU
Expand All @@ -229,7 +249,7 @@ jobs:
if: ${{ env.BACKEND == 'ROCM'}}
run: |
cd python/test/unit/language
python3 -m pytest --capture=tee-sys -rfs --verbose "test_core.py::test_empty_kernel"
python3 -m pytest --capture=tee-sys -rfs --verbose "test_core.py"
- name: Run python tests on XPU
if: ${{ env.BACKEND == 'XPU'}}
Expand Down
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,14 @@ venv.bak/
cmake-build-*

# Third-party binaries
cuobjdump
nvdisasm
ptxas

# Docs
docs/_build/
docs/python-api/generated/
docs/dialects/
docs/getting-started/tutorials
!python/tutorials/*.py
!python/tutorials/*.rst
4 changes: 4 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
[submodule "third_party/intel_xpu_backend"]
path = third_party/intel_xpu_backend
url = http://github.com/intel/intel-xpu-backend-for-triton
[submodule "third_party/amd_hip_backend"]
path = third_party/amd_hip_backend
url = https://github.com/ROCmSoftwarePlatform/triton
branch = third_party_backend_2
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ repos:
^docs/conf.py$
)
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v14.0.6
rev: v16.0.6
hooks:
- id: clang-format
stages: [commit, push, manual]
Expand Down
1 change: 0 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,6 @@ if(TRITON_BUILD_PYTHON_MODULE)
TritonNvidiaGPUTransforms
TritonLLVMIR
TritonPTX
TritonHSACO
${dialect_libs}
${conversion_libs}

Expand Down
87 changes: 80 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,36 @@

[![Wheels](https://github.com/openai/triton/actions/workflows/wheels.yml/badge.svg?branch=release/2.0.x)](https://github.com/openai/triton/actions/workflows/wheels.yml)

We're hiring! If you are interested in working on Triton at OpenAI, we have roles open for [Compiler Engineers](https://openai.com/careers/software-engineer-triton-compiler) and [Kernel Engineers](https://openai.com/careers/kernel-engineer).

**`Documentation`** |
------------------- |
[![Documentation](https://github.com/openai/triton/actions/workflows/documentation.yml/badge.svg)](https://triton-lang.org/)

# Triton Developer Conference Registration Now Closed
The Triton Developer Conference will be held in a hybrid mode at the Microsoft Silicon Valley Campus in Mountain View, California. The conference will be held on September 20th from 10am to 4pm, followed by a reception till 5:30 pm.

Tentative Agenda for the conference (subject to change):

|Time |Title |Speaker
|--------|-------|-------|
|10:00 AM|Welcome|Kevin Scott (Microsoft)|
|10:20 AM|The Triton Compiler: Past, Present and Future|Phil Tillet (OpenAI)|
|11:00 AM|**Break**||
|11:20 AM|Hopper support in Triton|Gustav Zhu (Nvidia)|
|11:40 AM|Bringing Triton to AMD GPUs|Jason Furmanek, Lixun Zhang (AMD)|
|12:00 PM|Intel XPU Backend for Triton|Eikan Wang (Intel)|
|12:20 PM|Vectorization of Triton Kernels for Qualcomm Hexagon Backend|Javed Absar (Qualcomm)|
|12:30 PM|**Lunch**||
|1:40 PM |Triton for MTIA|Roman Levenstein et al, (Meta)|
|2:00 PM |Using Triton IR for high-performance fusions in XLA|George Karpenkov (Google)|
|2:20 PM |Triton for All: Triton as a device-independent language|Ian Bearman (Microsoft)|
|2:40 PM|**Break**||
|3:00 PM|PyTorch 2.0 and TorchInductor|Jason Ansel, Horace He (Meta)|
|3:20 PM|Pallas: A JAX Kernel Language|Sharad Vikram (Google)|
|3:40 PM|Writing Grouped GEMMs in Triton|Vinod Grover (Nvidia)|
|4:00 PM|**Reception**||


# Triton

Expand Down Expand Up @@ -37,12 +62,64 @@ pip install -U --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/

```
git clone https://github.com/openai/triton.git;
cd triton/python;
pip install cmake; # build-time dependency
pip install -e .
cd triton;
pip install ninja cmake; # build-time dependencies
pip install -e python
```

Or with a virtualenv:

```
git clone https://github.com/openai/triton.git;
cd triton;
python -m venv .venv --prompt triton;
source .venv/bin/activate;
pip install ninja cmake; # build-time dependencies
pip install -e python
```

# Building with a custom LLVM

Triton uses LLVM to generate code for GPUs and CPUs. Normally, the Triton build
downloads a prebuilt LLVM, but you can also build LLVM from source and use that.

LLVM does not have a stable API, so the Triton build will not work at an
arbitrary LLVM version.

1. Find the version of LLVM that Triton builds against. Check `python/setup.py`
for a line like

version = "llvm-17.0.0-c5dede880d17"

This means that the version of Triton you have builds against
[LLVM](https://github.com/llvm/llvm-project) c5dede880d17.

2. `git checkout` LLVM at this revision. Optionally, make additional
modifications to LLVM.

3. [Build LLVM](https://llvm.org/docs/CMake.html). For example, you might run

$ cd $HOME/llvm-project # your clone of LLVM.
$ mkdir build
$ cd build
$ cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON ../llvm -DLLVM_ENABLE_PROJECTS="mlir"
$ ninja

4. Grab a snack, this will take a while.

5. Build Triton as above, but set the following environment variables.

# Modify as appropriate to point to your LLVM build.
$ export LLVM_BUILD_DIR=$HOME/llvm-project/build

$ cd <triton install>/python
$ LLVM_INCLUDE_DIRS=$LLVM_BUILD_DIR/include \
LLVM_LIBRARY_DIR=$LLVM_BUILD_DIR \
LLVM_SYSPATH=$LLVM_BUILD_DIR \
pip install -e python

# Changelog

Expand All @@ -56,10 +133,6 @@ Version 2.0 is out! New features include:

Community contributions are more than welcome, whether it be to fix bugs or to add new features at [github](https://github.com/openai/triton/). For more detailed instructions, please visit our [contributor's guide](CONTRIBUTING.md).

If you’re interested in joining our team and working on Triton & GPU kernels, [we’re hiring](https://openai.com/jobs/#acceleration)!




# Compatibility

Expand Down
18 changes: 17 additions & 1 deletion bin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ llvm_update_compile_flags(triton-translate)
TritonNvidiaGPUTransforms
TritonLLVMIR
TritonPTX
TritonHSACO
${dialect_libs}
${conversion_libs}
# tests
Expand All @@ -80,3 +79,20 @@ llvm_update_compile_flags(triton-translate)
MLIRROCDLToLLVMIRTranslation
)
mlir_check_all_link_libraries(triton-translate)

add_llvm_executable(triton-llvm-opt
triton-llvm-opt.cpp

DEPENDS
intrinsics_gen
SUPPORT_PLUGINS
)
target_link_libraries(triton-llvm-opt PRIVATE
TritonLLVMIR

LLVMCore
LLVMSupport
LLVMOption
LLVMCodeGen
)
export_executable_symbols_for_plugins(triton-llvm-opt)
4 changes: 3 additions & 1 deletion bin/RegisterTritonDialects.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#pragma once
#include "triton/Dialect/NVGPU/IR/Dialect.h"
#include "triton/Dialect/Triton/IR/Dialect.h"
#include "triton/Dialect/TritonGPU/IR/Dialect.h"
#include "triton/Dialect/TritonNvidiaGPU/IR/Dialect.h"
Expand Down Expand Up @@ -40,5 +41,6 @@ inline void registerTritonDialects(mlir::DialectRegistry &registry) {
mlir::triton::nvidia_gpu::TritonNvidiaGPUDialect,
mlir::triton::gpu::TritonGPUDialect, mlir::math::MathDialect,
mlir::arith::ArithDialect, mlir::scf::SCFDialect,
mlir::gpu::GPUDialect>();
mlir::gpu::GPUDialect, mlir::LLVM::LLVMDialect,
mlir::triton::nvgpu::NVGPUDialect>();
}
Loading

0 comments on commit 879a916

Please sign in to comment.