Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into ci_test_a_branch_that…
Browse files Browse the repository at this point in the history
…_fails_to_build
  • Loading branch information
xwang233 committed Aug 9, 2024
2 parents 3b5a25a + c3f8037 commit 5c14e7c
Show file tree
Hide file tree
Showing 607 changed files with 122,350 additions and 37,026 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/blossom-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ jobs:

# This job only runs for pull request comments
if: |
contains( 'xwang233,jjsjann123,', format('{0},', github.actor)) &&
github.event.comment.body == '!dontusethis'
github.event.comment.body == '!dontusethis' &&
(github.actor == 'xwang233' || github.actor == 'jjsjann123')
steps:
- name: Check if comment is issued by authorized person
run: blossom-ci
Expand Down
13 changes: 13 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,19 @@ env:
working_directory: .

jobs:
check-license:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Look for missing license headers
working-directory: ${{ env.working_directory }}
run: |
find . -regextype posix-extended -regex '.*\.(cpp|h|py|md|svg)' | egrep -v '^./(\.|third_party|bin|build|nvfuser/include|nvfuser/version.py|tools/linter/adapters|csrc/serde/fusion_cache_generated.h)' | xargs grep -L SPDX-FileCopyrightText | tee missing-header-files.txt
# test that file is empty
test ! -s missing-header-files.txt
clang-tidy:
runs-on: ubuntu-latest
steps:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/nvfuser-ci-trigger.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ jobs:

# This job only runs for pull request comments
if: |
contains(',xwang233,jjsjann123,chang-l,csarofeen,drzejan2,IvanYashchuk,jacobhinkle,kevinstephano,liqiangxl,mmigdal-nv,naoyam,ptrblck,rdspring1,samnordmann,zasdfgbnm,crcrpar,nWEIdia,Priya2698,wujingyue,tfogal,protonu,cowanmeg,', format(',{0},', github.actor)) &&
startsWith(github.event.comment.body, '!build')
startsWith(github.event.comment.body, '!build') &&
(github.actor == 'xwang233' || github.actor == 'jjsjann123' || github.actor == 'chang-l' || github.actor == 'csarofeen' || github.actor == 'drzejan2' || github.actor == 'IvanYashchuk' || github.actor == 'jacobhinkle' || github.actor == 'kevinstephano' || github.actor == 'liqiangxl' || github.actor == 'mmigdal-nv' || github.actor == 'naoyam' || github.actor == 'ptrblck' || github.actor == 'rdspring1' || github.actor == 'samnordmann' || github.actor == 'zasdfgbnm' || github.actor == 'crcrpar' || github.actor == 'nWEIdia' || github.actor == 'Priya2698' || github.actor == 'wujingyue' || github.actor == 'tfogal' || github.actor == 'protonu' || github.actor == 'cowanmeg' || github.actor == 'nsarka')
steps:
- name: Check if comment is issued by authorized person
run: blossom-ci
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,6 @@ foo.bin
.pre-commit-config.yaml

*_generated.*

# Mac OS internal file
.DS_Store
2 changes: 1 addition & 1 deletion .lintrunner.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ init_command = [
'python3',
'tools/linter/adapters/pip_init.py',
'--dry-run={{DRYRUN}}',
'clang-format==16.0.6',
'clang-format==18.1.8',
]
is_formatter = true

Expand Down
407 changes: 234 additions & 173 deletions CMakeLists.txt

Large diffs are not rendered by default.

36 changes: 29 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
<!--
* SPDX-FileCopyrightText: Copyright (c) 2023-present NVIDIA CORPORATION & AFFILIATES.
* All rights reserved.
* SPDX-License-Identifier: BSD-3-Clause
-->

# Fuser

A Fusion Code Generator for NVIDIA GPUs (commonly known as "nvFuser")
Expand All @@ -8,31 +14,47 @@ We publish nightly wheel packages on https://pypi.nvidia.com

built-env | cuda 11.8 | cuda 12.1
:---: | :---: | :---:
torch 2.1 | nvfuser-cu118-torch21 | nvfuser-cu121-torch21
torch 2.2 | nvfuser-cu118-torch22 | nvfuser-cu121-torch22
torch nightly wheel | nvfuser-cu118 | nvfuser-cu121

Note that nvfuser built against torch-2.1 isn't compatible with nightly pytorch wheel, so ensure you pick the right version suiting your environment.
Note that nvfuser built against torch-2.2 isn't compatible with nightly pytorch wheel, so ensure you pick the right version suiting your environment.

### nightly nvfuser pip wheel

You can instll a given nvfuser version with `pip install --pre nvfuser-cu121 --extra-index-url https://pypi.nvidia.com`
You can instll a nightly nvfuser pip package built against torch nightly code base with `pip install --pre nvfuser-cu121 --extra-index-url https://pypi.nvidia.com`

As we build against nightly torch wheel and there's no compatibility promised on nightly wheels, we have explicitly marked the nightly torch wheel as an optinoal dependency. You can choose to install the torch wheel along with nvfuser package. e.g.
`pip install --pre "nvfuser-cu121[torch]" --extra-index-url https://pypi.nvidia.com`.
Note that this may uninstall your local pytorch installation and install the compatible nightly pytorch.

Versioned nvfuser will be published on pypi.org [WIP]
### nvfuser pip wheel against pytorch stable release

PyPI: [https://pypi.org/project/nvfuser/](https://pypi.org/search/?q=nvfuser)
nvfuser pip wheel built against stable torch releases is published on pypi.org. Pick the right cuda toolkit version to match your torch installation. e.g. `pip install nvfuser-cu121-torch22`

PyPI: [https://pypi.org/project/nvfuser/](https://pypi.org/search/?q=nvfuser)

## Developer

Docs: https://github.com/NVIDIA/Fuser/wiki

Supported compilers:
- gcc 11.4+
- clang14+

**GCC:**

We support all "supported releases" of gcc as specified in [the official site](https://gcc.gnu.org/).
As of 6/20/2024, they are:

- gcc 11.4
- gcc 12.4
- gcc 13.2
- gcc 14.1

**Clang:**

- clang 14+

Supported C++ standard:

- C++17
- C++20

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@

#include <cuda_runtime.h>

#include <benchmark/utils.h>
#include <test/utils.h>
#include <benchmarks/cpp/utils.h>
#include <tests/cpp/utils.h>

using namespace nvfuser;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@

#include <cuda_runtime.h>

#include <benchmark/utils.h>
#include <test/utils.h>
#include <benchmarks/cpp/utils.h>
#include <tests/cpp/utils.h>

using namespace nvfuser;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@

#include <cuda_runtime.h>

#include <benchmark/utils.h>
#include <test/utils.h>
#include <benchmarks/cpp/utils.h>
#include <tests/cpp/utils.h>

using namespace nvfuser;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@

#include <cuda_runtime.h>

#include <benchmark/utils.h>
#include <test/utils.h>
#include <benchmarks/cpp/utils.h>
#include <tests/cpp/utils.h>

using namespace nvfuser;

Expand Down
4 changes: 2 additions & 2 deletions benchmark/bert.cpp → benchmarks/cpp/bert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@

#include <sstream>

#include <benchmark/utils.h>
#include <test/utils.h>
#include <benchmarks/cpp/utils.h>
#include <tests/cpp/utils.h>

using namespace nvfuser;

Expand Down
4 changes: 2 additions & 2 deletions benchmark/broadcast.cpp → benchmarks/cpp/broadcast.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@

#include <sstream>

#include <benchmark/utils.h>
#include <test/utils.h>
#include <benchmarks/cpp/utils.h>
#include <tests/cpp/utils.h>

using namespace nvfuser;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@

#include <cuda_runtime.h>

#include <benchmark/utils.h>
#include <test/utils.h>
#include <benchmarks/cpp/utils.h>
#include <tests/cpp/utils.h>

using namespace nvfuser;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@

#include <sstream>

#include <benchmark/utils.h>
#include <test/utils.h>
#include <benchmarks/cpp/utils.h>
#include <tests/cpp/utils.h>

using namespace nvfuser;

Expand Down Expand Up @@ -114,9 +114,12 @@ static void NvFuserScheduler_GeluBackwardReduction(

runBenchmarkIterations(benchmark_state, fusion_executor_cache, aten_inputs);

// inputs: gradient tensor + input tensor
// outputs: output, output_of_reduction
benchmark_state.SetBytesProcessed(
int64_t(benchmark_state.iterations()) *
(iter_size * reduction_size + iter_size) * int64_t(dataTypeSize(dtype)));
(iter_size * reduction_size * 3 + iter_size) *
int64_t(dataTypeSize(dtype)));
}

static void Baseline_GeluBackwardReduction(
Expand Down Expand Up @@ -172,7 +175,8 @@ static void Baseline_GeluBackwardReduction(

benchmark_state.SetBytesProcessed(
int64_t(benchmark_state.iterations()) *
(iter_size * reduction_size + iter_size) * int64_t(dataTypeSize(dtype)));
(iter_size * reduction_size * 3 + iter_size) *
int64_t(dataTypeSize(dtype)));
}

//------------------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@

#include <cuda_runtime.h>

#include <benchmark/utils.h>
#include <test/utils.h>
#include <benchmarks/cpp/utils.h>
#include <tests/cpp/utils.h>

using namespace nvfuser;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@

#include <cuda_runtime.h>

#include <benchmark/utils.h>
#include <test/utils.h>
#include <benchmarks/cpp/utils.h>
#include <tests/cpp/utils.h>

using namespace nvfuser;

Expand Down
4 changes: 2 additions & 2 deletions benchmark/indexselect.cpp → benchmarks/cpp/indexselect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@

#include <cuda_runtime.h>

#include <benchmark/utils.h>
#include <test/utils.h>
#include <benchmarks/cpp/utils.h>
#include <tests/cpp/utils.h>

using namespace nvfuser;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

#include <cuda_runtime.h>

#include <benchmark/utils.h>
#include <test/utils.h>
#include <benchmarks/cpp/utils.h>
#include <tests/cpp/utils.h>

using namespace nvfuser;

Expand Down
4 changes: 2 additions & 2 deletions benchmark/layer_norm.cpp → benchmarks/cpp/layer_norm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@

#include <cuda_runtime.h>

#include <benchmark/utils.h>
#include <test/utils.h>
#include <benchmarks/cpp/utils.h>
#include <tests/cpp/utils.h>

using namespace nvfuser;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@

#include <cuda_runtime.h>

#include <benchmark/utils.h>
#include <test/utils.h>
#include <benchmarks/cpp/utils.h>
#include <tests/cpp/utils.h>

using namespace nvfuser;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@

#include <cuda_runtime.h>

#include <benchmark/utils.h>
#include <test/utils.h>
#include <benchmarks/cpp/utils.h>
#include <tests/cpp/utils.h>

using namespace nvfuser;

Expand Down Expand Up @@ -59,7 +59,7 @@ static void setupLayerNormFused(Fusion* fusion, DataType dtype) {
auto tv19 = broadcast(tv18, {false, true});

nvfuser::Val* num_features = IrBuilder::create<Val>(1.0);
num_features = mul(num_features, tv0->getLeafDomain()[0]->extent());
num_features = mul(num_features, tv0->getLogicalDomain()[0]->extent());
auto s20 = num_features;

auto s21 = reciprocal(s20);
Expand Down
4 changes: 2 additions & 2 deletions benchmark/lstm_cell.cpp → benchmarks/cpp/lstm_cell.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@

#include <cuda_runtime.h>

#include <benchmark/utils.h>
#include <test/utils.h>
#include <benchmarks/cpp/utils.h>
#include <tests/cpp/utils.h>

using namespace nvfuser;

Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
#include <ops/arith.h>

#include <benchmark/benchmark.h>
#include <benchmark/utils.h>
#include <test/utils.h>
#include <benchmarks/cpp/utils.h>
#include <tests/cpp/utils.h>

using namespace nvfuser;

Expand Down
Loading

0 comments on commit 5c14e7c

Please sign in to comment.