Skip to content

Commit

Permalink
Merge remote-tracking branch 'xinhao/merged_bert' into bert_fix2
Browse files Browse the repository at this point in the history
  • Loading branch information
xinhaoc committed Sep 26, 2024
2 parents 8488ba0 + 2e363c4 commit 1753e7e
Show file tree
Hide file tree
Showing 641 changed files with 92,433 additions and 11,708 deletions.
8 changes: 8 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,11 @@ python/flexflow/core/legion_cffi_header.py
*.pb.h
*.o
*.a

# Ignore inference assets
/inference/weights/*
/inference/tokenizer/*
/inference/prompt/*
/inference/output/*

/tests/inference/python_test_configs/*.json
3 changes: 0 additions & 3 deletions .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,3 @@ Linked Issues:
Issues closed by this PR:
- Closes #

**Before merging:**

- [ ] Did you update the [flexflow-third-party](https://github.com/flexflow/flexflow-third-party) repo, if modifying any of the Cmake files, the build configs, or the submodules?
255 changes: 255 additions & 0 deletions .github/README.md

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions .github/workflows/build-skip.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ on:
pull_request:
paths-ignore:
- "include/**"
- "inference/**"
- "cmake/**"
- "config/**"
- "deps/**"
Expand Down
71 changes: 47 additions & 24 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ on:
pull_request:
paths:
- "include/**"
- "inference/**"
- "cmake/**"
- "config/**"
- "deps/**"
Expand All @@ -15,6 +16,7 @@ on:
- "master"
paths:
- "include/**"
- "inference/**"
- "cmake/**"
- "config/**"
- "deps/**"
Expand All @@ -38,6 +40,8 @@ jobs:
matrix:
gpu_backend: ["cuda", "hip_rocm"]
fail-fast: false
env:
FF_GPU_BACKEND: ${{ matrix.gpu_backend }}
steps:
- name: Checkout Git Repository
uses: actions/checkout@v3
Expand All @@ -48,39 +52,49 @@ jobs:
run: .github/workflows/helpers/free_space_on_runner.sh

- name: Install CUDA
uses: Jimver/[email protected]
uses: Jimver/[email protected]
if: ${{ matrix.gpu_backend == 'cuda' }}
id: cuda-toolkit
with:
cuda: "11.8.0"
cuda: "12.1.1"
# Disable caching of the CUDA binaries, since it does not give us any significant performance improvement
use-github-cache: "false"
log-file-suffix: 'cmake_${{matrix.gpu_backend}}.txt'

- name: Install system dependencies
run: FF_GPU_BACKEND=${{ matrix.gpu_backend }} .github/workflows/helpers/install_dependencies.sh
run: .github/workflows/helpers/install_dependencies.sh

- name: Install conda and FlexFlow dependencies
uses: conda-incubator/setup-miniconda@v2
with:
activate-environment: flexflow
environment-file: conda/environment.yml
environment-file: conda/flexflow.yml
auto-activate-base: false

- name: Build FlexFlow
run: |
export CUDNN_DIR="$CUDA_PATH"
export CUDA_DIR="$CUDA_PATH"
export FF_HOME=$(pwd)
export FF_GPU_BACKEND=${{ matrix.gpu_backend }}
export FF_CUDA_ARCH=70
export FF_HIP_ARCH=gfx1100,gfx1036
export hip_version=5.6
export FF_BUILD_ALL_INFERENCE_EXAMPLES=ON
if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then
export FF_BUILD_ALL_EXAMPLES=ON
export FF_BUILD_UNIT_TESTS=ON
else
export FF_BUILD_ALL_EXAMPLES=OFF
export FF_BUILD_UNIT_TESTS=OFF
fi
cores_available=$(nproc --all)
n_build_cores=$(( cores_available -1 ))
if (( $n_build_cores < 1 )) ; then n_build_cores=1 ; fi
mkdir build
cd build
if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then
export FF_BUILD_ALL_EXAMPLES=ON
export FF_BUILD_UNIT_TESTS=ON
fi
../config/config.linux
make -j $n_build_cores
Expand All @@ -89,35 +103,44 @@ jobs:
export CUDNN_DIR="$CUDA_PATH"
export CUDA_DIR="$CUDA_PATH"
export FF_HOME=$(pwd)
export FF_GPU_BACKEND=${{ matrix.gpu_backend }}
export FF_CUDA_ARCH=70
cd build
export FF_HIP_ARCH=gfx1100,gfx1036
export hip_version=5.6
export FF_BUILD_ALL_INFERENCE_EXAMPLES=ON
if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then
export FF_BUILD_ALL_EXAMPLES=ON
export FF_BUILD_ALL_EXAMPLES=ON
export FF_BUILD_UNIT_TESTS=ON
else
export FF_BUILD_ALL_EXAMPLES=OFF
export FF_BUILD_UNIT_TESTS=OFF
fi
cd build
../config/config.linux
sudo make install
sudo ldconfig
- name: Check availability of Python flexflow.core module
if: ${{ matrix.gpu_backend == 'cuda' }}
run: |
export LD_LIBRARY_PATH="$CUDA_PATH/lib64/stubs:$LD_LIBRARY_PATH"
sudo ln -s "$CUDA_PATH/lib64/stubs/libcuda.so" "$CUDA_PATH/lib64/stubs/libcuda.so.1"
export CPU_ONLY_TEST=1
python -c "import flexflow.core; exit()"
- name: Run C++ unit tests
if: ${{ matrix.gpu_backend == 'cuda' }}
run: |
export CUDNN_DIR="$CUDA_PATH"
export CUDA_DIR="$CUDA_PATH"
export LD_LIBRARY_PATH="$CUDA_PATH/lib64/stubs:$LD_LIBRARY_PATH"
export FF_HOME=$(pwd)
sudo ln -s "$CUDA_PATH/lib64/stubs/libcuda.so" "$CUDA_PATH/lib64/stubs/libcuda.so.1"
cd build
./tests/unit/unit-test
- name: Check availability of flexflow modules in Python
run: |
if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then
export LD_LIBRARY_PATH="$CUDA_PATH/lib64/stubs:$LD_LIBRARY_PATH"
fi
# Remove build folder to check that the installed version can run independently of the build files
rm -rf build
python -c "import flexflow.core; import flexflow.serve as ff; exit()"
makefile-build:
name: Build FlexFlow with the Makefile
runs-on: ubuntu-20.04
Expand All @@ -134,11 +157,12 @@ jobs:
run: .github/workflows/helpers/free_space_on_runner.sh

- name: Install CUDA
uses: Jimver/[email protected].11
uses: Jimver/[email protected].16
id: cuda-toolkit
with:
cuda: "11.8.0"
cuda: "12.1.1"
use-github-cache: "false"
log-file-suffix: 'makefile_${{matrix.gpu_backend}}.txt'

- name: Install system dependencies
run: .github/workflows/helpers/install_dependencies.sh
Expand All @@ -147,7 +171,7 @@ jobs:
uses: conda-incubator/setup-miniconda@v2
with:
activate-environment: flexflow
environment-file: conda/environment.yml
environment-file: conda/flexflow.yml
auto-activate-base: false

- name: Build FlexFlow
Expand All @@ -163,5 +187,4 @@ jobs:
cd python
make -j $n_build_cores
export CPU_ONLY_TEST=1
python -c 'import flexflow.core'
2 changes: 1 addition & 1 deletion .github/workflows/clang-format-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
- check: "src"
exclude: '\.proto$'
- check: "include"
- check: "nmt"
- check: "inference"
- check: "python"
- check: "scripts"
- check: "tests"
Expand Down
33 changes: 14 additions & 19 deletions .github/workflows/docker-build-skip.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,27 +13,22 @@ concurrency:
cancel-in-progress: true

jobs:
docker-build:
name: Build and Install FlexFlow in a Docker Container
runs-on: ubuntu-20.04
docker-build-rocm:
name: Build and Install FlexFlow in a Docker Container (ROCm backend)
runs-on: ubuntu-latest
strategy:
matrix:
gpu_backend: ["cuda", "hip_rocm"]
cuda_version: ["11.1", "11.2", "11.3", "11.5", "11.6", "11.7", "11.8"]
# The CUDA version doesn't matter when building for hip_rocm, so we just pick one arbitrarily (11.8) to avoid building for hip_rocm once per number of CUDA version supported
exclude:
- gpu_backend: "hip_rocm"
cuda_version: "11.1"
- gpu_backend: "hip_rocm"
cuda_version: "11.2"
- gpu_backend: "hip_rocm"
cuda_version: "11.3"
- gpu_backend: "hip_rocm"
cuda_version: "11.5"
- gpu_backend: "hip_rocm"
cuda_version: "11.6"
- gpu_backend: "hip_rocm"
cuda_version: "11.7"
hip_version: ["5.3", "5.4", "5.5", "5.6"]
fail-fast: false
steps:
- run: 'echo "No docker-build required"'

docker-build-cuda:
name: Build and Install FlexFlow in a Docker Container (CUDA backend)
runs-on: ubuntu-latest
strategy:
matrix:
cuda_version: ["11.1", "11.6", "11.7", "11.8", "12.0", "12.1", "12.2"]
fail-fast: false
steps:
- run: 'echo "No docker-build required"'
Loading

0 comments on commit 1753e7e

Please sign in to comment.