diff --git a/.github/workflows/build-kernels.yml b/.github/workflows/build-kernels.yml new file mode 100644 index 0000000..f2d55d4 --- /dev/null +++ b/.github/workflows/build-kernels.yml @@ -0,0 +1,35 @@ +name: Build Kernels + +on: + pull_request: + +jobs: + build: + + runs-on: [self-hosted, cpu] + + steps: + - uses: actions/checkout@v3 + with: + submodules: 'recursive' + + - name: environment + run: | + python --version + nvcc --version + python -c "import torch; print('torch:', torch.__version__, torch)" + python -c "import torch; print('CUDA available:', torch.cuda.is_available())" + + - name: build kernels + run: | + ts=$(date +%s) + DS_KERNELS_MAKE_JOBS=10 DS_KERNELS_BUILD_STRING=".dev${ts}" CUDA_ARCH_LIST="80;86;89;90" python setup.py bdist_wheel + fname=$(ls dist) + nname=$(echo $fname | sed 's/cp[0-9]\+-cp[0-9]\+/py3-none/' | sed 's/linux/manylinux1/') + mv "dist/$fname" "dist/$nname" + ls -al + + - uses: actions/upload-artifact@v3 + with: + name: deepspeed-kernels-whl + path: dist/*.whl diff --git a/builder/builder.py b/builder/builder.py index 23d6fbb..611aa9f 100644 --- a/builder/builder.py +++ b/builder/builder.py @@ -72,8 +72,12 @@ def build_extension(self, ext): abs_build_lib = os.path.join(os.path.abspath(self.build_lib), "dskernels") - subprocess.check_call(['cmake', '-B', abs_build_temp, + subprocess.check_call(['cmake', '-B', abs_build_temp, f'-DLIB_OUTPUT_DIR={abs_build_lib}', f'-DCUDA_ARCH_LIST={cuda_arch_list}'], cwd=ext.source) - subprocess.check_call(['make', '-j'], cwd=abs_build_temp) + + # Allow user to specify degree of make parallelism + make_jobs = os.environ.get('DS_KERNELS_MAKE_JOBS', None) + make_cmd = f"make -j {make_jobs}" if make_jobs is not None else "make -j" + subprocess.check_call(make_cmd.split(" "), cwd=abs_build_temp) diff --git a/release/release.sh b/release/release.sh index 748b66d..c236ee9 100644 --- a/release/release.sh +++ b/release/release.sh @@ -2,6 +2,9 @@ set -ex rm -rf dist +# enable to reduce overall memory consumption if running on a small VM +#export DS_KERNELS_MAKE_JOBS=10 + ts=$(date +%s) DS_KERNELS_BUILD_STRING=".dev${ts}" CUDA_ARCH_LIST="80;86" python setup.py bdist_wheel