From 434a56b73e7264107754963777b15f42459205b9 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Thu, 4 Apr 2024 10:06:26 +0000 Subject: [PATCH 01/46] update --- .github/workflows/install.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index 1c9478ab0..70231ef4b 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -9,7 +9,7 @@ on: # yamllint disable-line rule:truthy jobs: import: - runs-on: ubuntu-latest + runs-on: [windows-latest] strategy: matrix: From 2d0d74772fa894321f986aa208d3bc16d3de13e2 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Thu, 4 Apr 2024 10:12:11 +0000 Subject: [PATCH 02/46] update --- .github/workflows/install.yml | 4 +++- CHANGELOG.md | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index 70231ef4b..950739419 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -9,10 +9,11 @@ on: # yamllint disable-line rule:truthy jobs: import: - runs-on: [windows-latest] + runs-on: ${{ matrix.os }} strategy: matrix: + os: [windows-latest] cuda-version: ['cpu', 'cu121'] steps: @@ -30,6 +31,7 @@ jobs: run: | source ./.github/workflows/cuda/${{ runner.os }}-env.sh ${{ matrix.cuda-version }} pip install --verbose -e . + shell: bash - name: Test imports run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c8a5e97c..c069e6c2a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ## [0.5.0] - 2023-MM-DD ### Added +- Added Windows support ([#315](https://github.com/pyg-team/pyg-lib/pull/315)) - Added macOS Apple Silicon support ([#310](https://github.com/pyg-team/pyg-lib/pull/310)) ### Changed ### Removed From ef9ef00ca5c3f159351d6c17741db5420aedfe41 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Thu, 4 Apr 2024 10:23:51 +0000 Subject: [PATCH 03/46] update --- .github/workflows/building.yml | 1 + .github/workflows/cuda/Windows.sh | 10 +++++----- .github/workflows/install.yml | 3 +-- .github/workflows/nightly.yml | 1 + 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/workflows/building.yml b/.github/workflows/building.yml index 65ea31698..d0398ac7f 100644 --- a/.github/workflows/building.yml +++ b/.github/workflows/building.yml @@ -121,6 +121,7 @@ jobs: python -c "import pyg_lib; print('pyg-lib:', pyg_lib.__version__)" python -c "import pyg_lib; print('CUDA:', pyg_lib.cuda_version())" cd .. + shell: bash - name: Configure AWS uses: aws-actions/configure-aws-credentials@v1 diff --git a/.github/workflows/cuda/Windows.sh b/.github/workflows/cuda/Windows.sh index 0bee4bf53..8f275405d 100644 --- a/.github/workflows/cuda/Windows.sh +++ b/.github/workflows/cuda/Windows.sh @@ -1,10 +1,5 @@ #!/bin/bash -# Install NVIDIA drivers, see: -# https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 -curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" -7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" - case ${1} in cu121) CUDA_SHORT=12.1 @@ -42,6 +37,11 @@ case ${1} in ;; esac +# Install NVIDIA drivers, see: +# https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 +curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" +7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" + curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index 950739419..d0c5d4104 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -29,9 +29,8 @@ jobs: - name: Install package run: | - source ./.github/workflows/cuda/${{ runner.os }}-env.sh ${{ matrix.cuda-version }} + # source ./.github/workflows/cuda/${{ runner.os }}-env.sh ${{ matrix.cuda-version }} pip install --verbose -e . - shell: bash - name: Test imports run: | diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 1eef0b3b8..968350427 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -127,6 +127,7 @@ jobs: python -c "import pyg_lib; print('pyg-lib:', pyg_lib.__version__)" python -c "import pyg_lib; print('CUDA:', pyg_lib.cuda_version())" cd .. + shell: bash - name: Configure AWS uses: aws-actions/configure-aws-credentials@v1 From 4fe3b6a0ad7244ffe144d67d18356fc8e3e914cd Mon Sep 17 00:00:00 2001 From: rusty1s Date: Thu, 4 Apr 2024 11:05:28 +0000 Subject: [PATCH 04/46] update --- .github/actions/setup/action.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/actions/setup/action.yml b/.github/actions/setup/action.yml index af42016a8..3388149b0 100644 --- a/.github/actions/setup/action.yml +++ b/.github/actions/setup/action.yml @@ -26,6 +26,9 @@ runs: sudo rm -rf /usr/share/dotnet shell: bash + - name: Set up Windows developer command prompt + uses: ilammy/msvc-dev-cmd@v1 + - name: Install CUDA ${{ inputs.cuda-version }} if: ${{ inputs.cuda-version != 'cpu' }} run: | From 0732f2232441d17f8901acdcf137ebc04a7acd19 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Thu, 4 Apr 2024 19:12:23 +0000 Subject: [PATCH 05/46] update --- CMakeLists.txt | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d66365542..6e98ad35d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,16 +71,16 @@ else() target_include_directories(${PROJECT_NAME} PRIVATE ${PHMAP_DIR}) endif() -set(METIS_DIR third_party/METIS) -target_include_directories(${PROJECT_NAME} PRIVATE ${METIS_DIR}/include) -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DIDXTYPEWIDTH=64 -DREALTYPEWIDTH=32") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIDXTYPEWIDTH=64 -DREALTYPEWIDTH=32") -set(GKLIB_PATH "${METIS_DIR}/GKlib") -include(${GKLIB_PATH}/GKlibSystem.cmake) -include_directories(${GKLIB_PATH}) -include_directories("${METIS_DIR}/include") -add_subdirectory("${METIS_DIR}/libmetis") -target_link_libraries(${PROJECT_NAME} PRIVATE metis) +# set(METIS_DIR third_party/METIS) +# target_include_directories(${PROJECT_NAME} PRIVATE ${METIS_DIR}/include) +# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DIDXTYPEWIDTH=64 -DREALTYPEWIDTH=32") +# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIDXTYPEWIDTH=64 -DREALTYPEWIDTH=32") +# set(GKLIB_PATH "${METIS_DIR}/GKlib") +# include(${GKLIB_PATH}/GKlibSystem.cmake) +# include_directories(${GKLIB_PATH}) +# include_directories("${METIS_DIR}/include") +# add_subdirectory("${METIS_DIR}/libmetis") +# target_link_libraries(${PROJECT_NAME} PRIVATE metis) find_package(Torch REQUIRED) target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES}) From 257635e037baca736a4cede61f7ca5e88a8099e9 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Thu, 4 Apr 2024 19:32:34 +0000 Subject: [PATCH 06/46] update --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6e98ad35d..565cd2597 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,7 @@ cmake_minimum_required(VERSION 3.15) project(pyg) set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) set(PYG_VERSION 0.4.0) option(BUILD_TEST "Enable testing" OFF) From e7f5dd41de1bc069965fb50512201a890871a77b Mon Sep 17 00:00:00 2001 From: rusty1s Date: Thu, 4 Apr 2024 19:41:24 +0000 Subject: [PATCH 07/46] update --- pyg_lib/csrc/ops/cpu/matmul_kernel.cpp | 301 +++++++++++++------------ 1 file changed, 156 insertions(+), 145 deletions(-) diff --git a/pyg_lib/csrc/ops/cpu/matmul_kernel.cpp b/pyg_lib/csrc/ops/cpu/matmul_kernel.cpp index 06a020086..73f8631d3 100644 --- a/pyg_lib/csrc/ops/cpu/matmul_kernel.cpp +++ b/pyg_lib/csrc/ops/cpu/matmul_kernel.cpp @@ -206,76 +206,82 @@ void grouped_matmul_out_kernel_mkl_impl(const std::vector input, const std::vector other, std::vector out) { // matrix_params - using matrix_params = std::tuple; - phmap::flat_hash_map> groups; - for (size_t i = 0; i < input.size(); ++i) { - const matrix_params mp = {input[i].size(0), other[i].size(-1), - input[i].size(-1)}; - if (groups.count(mp)) { - groups[mp].push_back(i); - } else { - groups.insert({mp, {i}}); - } - } - - AT_DISPATCH_FLOATING_TYPES( - input.front().scalar_type(), "grouped_matmul_out_kernel_mkl_impl", [&] { - const auto group_count = static_cast(groups.size()); - std::vector alpha(group_count, 1); - std::vector beta(group_count, 0); - - std::vector ms(group_count); - std::vector ns(group_count); - std::vector ks(group_count); - std::vector ld_src0(group_count); - std::vector ld_src1(group_count); - std::vector ld_dst(group_count); - std::vector group_sizes(group_count); - std::vector src0; - std::vector src1; - std::vector dst; - - size_t group_idx = 0; - for (const auto& group_kv : groups) { - int m; - int n; - int k; - std::tie(m, n, k) = group_kv.first; - const auto& indices = group_kv.second; - - ms[group_idx] = m; - ns[group_idx] = n; - ks[group_idx] = k; - ld_src0[group_idx] = k; - ld_src1[group_idx] = n; - ld_dst[group_idx] = n; - group_sizes[group_idx] = indices.size(); - ++group_idx; - - for (const auto tensor_idx : indices) { - src0.push_back(input[tensor_idx].data_ptr()); - src1.push_back(other[tensor_idx].data_ptr()); - dst.push_back(out[tensor_idx].data_ptr()); - } - } - - auto src0_ptrs = const_cast(src0.data()); - auto src1_ptrs = const_cast(src1.data()); - auto dst_ptrs = dst.data(); - -#if AT_MKL_SEQUENTIAL() - // unlikely to happen - requires Torch to be built from source with - // explicit flag denoting MKL sequential version - parallel_mkl_blas_gemm_batched(ms, ns, ks, alpha, src0_ptrs, ld_src0, - src1_ptrs, ld_src1, beta, dst_ptrs, - ld_dst, group_count, group_sizes); -#else - mkl_blas_gemm_batched(ms.data(), ns.data(), ks.data(), alpha.data(), - src0_ptrs, ld_src0.data(), src1_ptrs, ld_src1.data(), - beta.data(), dst_ptrs, ld_dst.data(), group_count, - group_sizes.data()); -#endif - }); + /* using matrix_params = std::tuple; */ + /* phmap::flat_hash_map> groups; */ + /* for (size_t i = 0; i < input.size(); ++i) { */ + /* const matrix_params mp = {input[i].size(0), other[i].size(-1), */ + /* input[i].size(-1)}; */ + /* if (groups.count(mp)) { */ + /* groups[mp].push_back(i); */ + /* } else { */ + /* groups.insert({mp, {i}}); */ + /* } */ + /* } */ + + /* AT_DISPATCH_FLOATING_TYPES( */ + /* input.front().scalar_type(), "grouped_matmul_out_kernel_mkl_impl", [&] + * { */ + /* const auto group_count = static_cast(groups.size()); */ + /* std::vector alpha(group_count, 1); */ + /* std::vector beta(group_count, 0); */ + + /* std::vector ms(group_count); */ + /* std::vector ns(group_count); */ + /* std::vector ks(group_count); */ + /* std::vector ld_src0(group_count); */ + /* std::vector ld_src1(group_count); */ + /* std::vector ld_dst(group_count); */ + /* std::vector group_sizes(group_count); */ + /* std::vector src0; */ + /* std::vector src1; */ + /* std::vector dst; */ + + /* size_t group_idx = 0; */ + /* for (const auto& group_kv : groups) { */ + /* int m; */ + /* int n; */ + /* int k; */ + /* std::tie(m, n, k) = group_kv.first; */ + /* const auto& indices = group_kv.second; */ + + /* ms[group_idx] = m; */ + /* ns[group_idx] = n; */ + /* ks[group_idx] = k; */ + /* ld_src0[group_idx] = k; */ + /* ld_src1[group_idx] = n; */ + /* ld_dst[group_idx] = n; */ + /* group_sizes[group_idx] = indices.size(); */ + /* ++group_idx; */ + + /* for (const auto tensor_idx : indices) { */ + /* src0.push_back(input[tensor_idx].data_ptr()); */ + /* src1.push_back(other[tensor_idx].data_ptr()); */ + /* dst.push_back(out[tensor_idx].data_ptr()); */ + /* } */ + /* } */ + + /* auto src0_ptrs = const_cast(src0.data()); */ + /* auto src1_ptrs = const_cast(src1.data()); */ + /* auto dst_ptrs = dst.data(); */ + + /* #if AT_MKL_SEQUENTIAL() */ + /* // unlikely to happen - requires Torch to be built from source with + */ + /* // explicit flag denoting MKL sequential version */ + /* parallel_mkl_blas_gemm_batched(ms, ns, ks, alpha, src0_ptrs, ld_src0, + */ + /* src1_ptrs, ld_src1, beta, dst_ptrs, */ + /* ld_dst, group_count, group_sizes); */ + /* #else */ + /* mkl_blas_gemm_batched(ms.data(), ns.data(), ks.data(), alpha.data(), + */ + /* src0_ptrs, ld_src0.data(), src1_ptrs, + * ld_src1.data(), */ + /* beta.data(), dst_ptrs, ld_dst.data(), + * group_count, */ + /* group_sizes.data()); */ + /* #endif */ + /* }); */ } std::vector grouped_matmul_kernel(const at::TensorList input, @@ -328,81 +334,86 @@ void segment_matmul_out_kernel_mkl_impl(const at::Tensor& input, const at::Tensor& other, at::Tensor& out, const at::IntArrayRef& sizes) { - const int n = other.size(-1); - const int k = input.size(-1); - const int nk = n * k; - phmap::flat_hash_map> groups; - std::vector offsets = {{0, 0, 0}}; - offsets.reserve(sizes.size() + 1); - for (size_t i = 0; i < sizes.size(); ++i) { - const int m = sizes[i]; - if (groups.count(m)) { - groups[m].push_back(i); - } else { - groups.insert({m, {i}}); - } - - offset_params offset = {m * k, nk, m * n}; - offset += offsets.back(); - offsets.push_back(offset); - } - offsets.pop_back(); - - AT_DISPATCH_FLOATING_TYPES( - input.scalar_type(), "segment_matmul_out_kernel_mkl_impl", [&] { - const auto group_count = static_cast(groups.size()); - std::vector alpha(group_count, 1); - std::vector beta(group_count, 0); - std::vector ns(group_count, n); - std::vector ks(group_count, k); - std::vector ld_src0(group_count, k); - std::vector ld_src1(group_count, n); - std::vector ld_dst(group_count, n); - - std::vector ms(group_count); - std::vector group_sizes(group_count); - std::vector src0; - std::vector src1; - std::vector dst; - - const auto src0_base_ptr = input.data_ptr(); - const auto src1_base_ptr = other.data_ptr(); - const auto dst_base_ptr = out.data_ptr(); - - size_t group_idx = 0; - for (const auto& group_kv : groups) { - int m = group_kv.first; - const auto& indices = group_kv.second; - - ms[group_idx] = m; - group_sizes[group_idx] = indices.size(); - ++group_idx; - - for (const auto offset_idx : indices) { - const auto offset = offsets[offset_idx]; - src0.push_back(src0_base_ptr + offset.src0_offset); - src1.push_back(src1_base_ptr + offset.src1_offset); - dst.push_back(dst_base_ptr + offset.dst_offset); - } - } - - auto src0_ptrs = const_cast(src0.data()); - auto src1_ptrs = const_cast(src1.data()); - auto dst_ptrs = dst.data(); - -#if AT_MKL_SEQUENTIAL() - // unlikely to happen - requires Torch to be built from source with - // explicit flag denoting MKL sequential version - parallel_mkl_blas_gemm_batched(ms, ns, ks, alpha, src0_ptrs, ld_src0, - src1_ptrs, ld_src1, beta, dst_ptrs, - ld_dst, group_count, group_sizes); -#else - mkl_blas_gemm_batched(ms.data(), ns.data(), ks.data(), alpha.data(), - src0_ptrs, ld_src0.data(), src1_ptrs, ld_src1.data(), - beta.data(), dst_ptrs, ld_dst.data(), group_count, - group_sizes.data()); -#endif - }); + /* const int n = other.size(-1); */ + /* const int k = input.size(-1); */ + /* const int nk = n * k; */ + /* phmap::flat_hash_map> groups; */ + /* std::vector offsets = {{0, 0, 0}}; */ + /* offsets.reserve(sizes.size() + 1); */ + /* for (size_t i = 0; i < sizes.size(); ++i) { */ + /* const int m = sizes[i]; */ + /* if (groups.count(m)) { */ + /* groups[m].push_back(i); */ + /* } else { */ + /* groups.insert({m, {i}}); */ + /* } */ + + /* offset_params offset = {m * k, nk, m * n}; */ + /* offset += offsets.back(); */ + /* offsets.push_back(offset); */ + /* } */ + /* offsets.pop_back(); */ + + /* AT_DISPATCH_FLOATING_TYPES( */ + /* input.scalar_type(), "segment_matmul_out_kernel_mkl_impl", [&] { */ + /* const auto group_count = static_cast(groups.size()); */ + /* std::vector alpha(group_count, 1); */ + /* std::vector beta(group_count, 0); */ + /* std::vector ns(group_count, n); */ + /* std::vector ks(group_count, k); */ + /* std::vector ld_src0(group_count, k); */ + /* std::vector ld_src1(group_count, n); */ + /* std::vector ld_dst(group_count, n); */ + + /* std::vector ms(group_count); */ + /* std::vector group_sizes(group_count); */ + /* std::vector src0; */ + /* std::vector src1; */ + /* std::vector dst; */ + + /* const auto src0_base_ptr = input.data_ptr(); */ + /* const auto src1_base_ptr = other.data_ptr(); */ + /* const auto dst_base_ptr = out.data_ptr(); */ + + /* size_t group_idx = 0; */ + /* for (const auto& group_kv : groups) { */ + /* int m = group_kv.first; */ + /* const auto& indices = group_kv.second; */ + + /* ms[group_idx] = m; */ + /* group_sizes[group_idx] = indices.size(); */ + /* ++group_idx; */ + + /* for (const auto offset_idx : indices) { */ + /* const auto offset = offsets[offset_idx]; */ + /* src0.push_back(src0_base_ptr + offset.src0_offset); */ + /* src1.push_back(src1_base_ptr + offset.src1_offset); */ + /* dst.push_back(dst_base_ptr + offset.dst_offset); */ + /* } */ + /* } */ + + /* auto src0_ptrs = const_cast(src0.data()); */ + /* auto src1_ptrs = const_cast(src1.data()); */ + /* auto dst_ptrs = dst.data(); */ + + /* #if AT_MKL_SEQUENTIAL() */ + /* // unlikely to happen - requires Torch to be built from source with + */ + /* // explicit flag denoting MKL sequential version */ + /* parallel_mkl_blas_gemm_batched(ms, ns, ks, alpha, src0_ptrs, ld_src0, + */ + /* src1_ptrs, ld_src1, beta, dst_ptrs, */ + /* ld_dst, group_count, group_sizes); */ + /* #else */ + /* mkl_blas_gemm_batched(ms.data(), ns.data(), ks.data(), alpha.data(), + */ + /* src0_ptrs, ld_src0.data(), src1_ptrs, + * ld_src1.data(), */ + /* beta.data(), dst_ptrs, ld_dst.data(), + * group_count, */ + /* group_sizes.data()); */ + /* #endif */ + /* }); */ } at::Tensor segment_matmul_kernel(const at::Tensor& input, From fb770cbe13de11f24a37c6861412582ac211071f Mon Sep 17 00:00:00 2001 From: rusty1s Date: Thu, 4 Apr 2024 19:46:47 +0000 Subject: [PATCH 08/46] update --- pyg_lib/csrc/partition/cpu/metis_kernel.cpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/pyg_lib/csrc/partition/cpu/metis_kernel.cpp b/pyg_lib/csrc/partition/cpu/metis_kernel.cpp index df516224f..7430574f2 100644 --- a/pyg_lib/csrc/partition/cpu/metis_kernel.cpp +++ b/pyg_lib/csrc/partition/cpu/metis_kernel.cpp @@ -1,7 +1,7 @@ #include #include -#include +/* #include */ namespace pyg { namespace partition { @@ -31,14 +31,16 @@ at::Tensor metis_kernel(const at::Tensor& rowptr, auto part = at::empty({nvtxs}, rowptr.options()); auto part_data = part.data_ptr(); - if (recursive) { - METIS_PartGraphRecursive(&nvtxs, &ncon, xadj, adjncy, vwgt, NULL, adjwgt, - &num_partitions, NULL, NULL, NULL, &objval, - part_data); - } else { - METIS_PartGraphKway(&nvtxs, &ncon, xadj, adjncy, vwgt, NULL, adjwgt, - &num_partitions, NULL, NULL, NULL, &objval, part_data); - } + /* if (recursive) { */ + /* METIS_PartGraphRecursive(&nvtxs, &ncon, xadj, adjncy, vwgt, NULL, adjwgt, + */ + /* &num_partitions, NULL, NULL, NULL, &objval, */ + /* part_data); */ + /* } else { */ + /* METIS_PartGraphKway(&nvtxs, &ncon, xadj, adjncy, vwgt, NULL, adjwgt, */ + /* &num_partitions, NULL, NULL, NULL, &objval, + * part_data); */ + /* } */ return part; } From 221ccb8281d9bd8a34b28bec4fa5dccf8e49be5b Mon Sep 17 00:00:00 2001 From: rusty1s Date: Thu, 4 Apr 2024 20:04:40 +0000 Subject: [PATCH 09/46] update --- setup.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 4ea063ff2..67993157b 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ class CMakeExtension(Extension): def __init__(self, name, sourcedir=''): Extension.__init__(self, name, sources=[]) - self.sourcedir = os.path.abspath(sourcedir) + self.sourcedir = osp.abspath(sourcedir) class CMakeBuild(build_ext): @@ -29,6 +29,7 @@ def check_env_flag(name: str, default: str = "") -> bool: return value in ["1", "ON", "YES", "TRUE", "Y"] def get_ext_filename(self, ext_name): + print("GET EXT FILENAME") # Remove Python ABI suffix: ext_filename = super().get_ext_filename(ext_name) ext_filename_parts = ext_filename.split('.') @@ -40,7 +41,7 @@ def build_extension(self, ext): import torch - extdir = os.path.abspath(osp.dirname(self.get_ext_fullpath(ext.name))) + extdir = osp.abspath(osp.dirname(self.get_ext_fullpath(ext.name))) self.build_type = "DEBUG" if self.debug else "RELEASE" if self.debug is None: if CMakeBuild.check_env_flag("DEBUG"): @@ -79,10 +80,13 @@ def build_extension(self, ext): build_args = [] + print("1111 ---------------") subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp) + print("2222 ---------------") subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) + print("3333 ---------------") def maybe_append_with_mkl(dependencies): From 5ea50352dd149570b7ce367ac46199730b085d61 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Thu, 4 Apr 2024 21:38:00 +0000 Subject: [PATCH 10/46] update --- CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 565cd2597..b1834a847 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -121,4 +121,6 @@ set_target_properties(${PROJECT_NAME} PROPERTIES # Cmake creates *.dylib by default, but python expects *.so by default if (APPLE) set_property(TARGET ${PROJECT_NAME} PROPERTY SUFFIX .so) +elseif (MSVC) + set_property(TARGET ${PROJECT_NAME} PROPERTY SUFFIX .pyd) endif() From 255960a53640d0baf44234b68f9c56d53d79cbe9 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sat, 6 Apr 2024 08:39:09 +0000 Subject: [PATCH 11/46] update --- CMakeLists.txt | 2 +- README.md | 10 +++++----- setup.py | 36 ++++++++++++++++++++---------------- 3 files changed, 26 insertions(+), 22 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b1834a847..1a70643b9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -121,6 +121,6 @@ set_target_properties(${PROJECT_NAME} PROPERTIES # Cmake creates *.dylib by default, but python expects *.so by default if (APPLE) set_property(TARGET ${PROJECT_NAME} PROPERTY SUFFIX .so) -elseif (MSVC) +elseif (MSVC AND USE_PYTHON) set_property(TARGET ${PROJECT_NAME} PROPERTY SUFFIX .pyd) endif() diff --git a/README.md b/README.md index 5834e651b..81ec7fb0d 100644 --- a/README.md +++ b/README.md @@ -37,31 +37,31 @@ The following combinations are supported: | PyTorch 2.2 | `cpu` | `cu102` | `cu113` | `cu116` | `cu117` | `cu118` | `cu121` | |--------------|-------|---------|---------|---------|---------|---------|---------| | **Linux** | ✅ | | | | | ✅ | ✅ | -| **Windows** | | | | | | | | +| **Windows** | ✅ | | | | | ✅ | ✅ | | **macOS** | ✅ | | | | | | | | PyTorch 2.1 | `cpu` | `cu102` | `cu113` | `cu116` | `cu117` | `cu118` | `cu121` | |--------------|-------|---------|---------|---------|---------|---------|---------| | **Linux** | ✅ | | | | | ✅ | ✅ | -| **Windows** | | | | | | | | +| **Windows** | ✅ | | | | | ✅ | ✅ | | **macOS** | ✅ | | | | | | | | PyTorch 2.0 | `cpu` | `cu102` | `cu113` | `cu116` | `cu117` | `cu118` | `cu121` | |--------------|-------|---------|---------|---------|---------|---------|---------| | **Linux** | ✅ | | | | ✅ | ✅ | | -| **Windows** | | | | | | | | +| **Windows** | ✅ | | | | ✅ | ✅ | | | **macOS** | ✅ | | | | | | | | PyTorch 1.13 | `cpu` | `cu102` | `cu113` | `cu116` | `cu117` | `cu118` | `cu121` | |--------------|-------|---------|---------|---------|---------|---------|---------| | **Linux** | ✅ | | | ✅ | ✅ | | | -| **Windows** | | | | | | | | +| **Windows** | ✅ | | | ✅ | ✅ | | | | **macOS** | ✅ | | | | | | | | PyTorch 1.12 | `cpu` | `cu102` | `cu113` | `cu116` | `cu117` | `cu118` | `cu121` | |--------------|-------|---------|---------|---------|---------|---------| --------| | **Linux** | ✅ | ✅ | ✅ | ✅ | | | | -| **Windows** | | | | | | | | +| **Windows** | ✅ | ✅ | ✅ | ✅ | | | | | **macOS** | ✅ | | | | | | | ### Form nightly diff --git a/setup.py b/setup.py index 67993157b..b8782c473 100644 --- a/setup.py +++ b/setup.py @@ -6,6 +6,7 @@ import importlib import os import os.path as osp +import re import subprocess import warnings @@ -34,6 +35,7 @@ def get_ext_filename(self, ext_name): ext_filename = super().get_ext_filename(ext_name) ext_filename_parts = ext_filename.split('.') ext_filename_parts = ext_filename_parts[:-2] + ext_filename_parts[-1:] + print('.'.join(ext_filename_parts)) return '.'.join(ext_filename_parts) def build_extension(self, ext): @@ -89,26 +91,28 @@ def build_extension(self, ext): print("3333 ---------------") -def maybe_append_with_mkl(dependencies): - if CMakeBuild.check_env_flag('USE_MKL_BLAS'): - import re +def mkl_dependencies(): + if not CMakeBuild.check_env_flag('USE_MKL_BLAS'): + return [] - import torch - torch_config = torch.__config__.show() - with_mkl_blas = 'BLAS_INFO=mkl' in torch_config - if torch.backends.mkl.is_available() and with_mkl_blas: - product_version = '2023.1.0' - pattern = r'oneAPI Math Kernel Library Version [0-9]{4}\.[0-9]+' - match = re.search(pattern, torch_config) - if match: - product_version = match.group(0).split(' ')[-1] + import torch + + dependencies = [] + torch_config = torch.__config__.show() + with_mkl_blas = 'BLAS_INFO=mkl' in torch_config + if torch.backends.mkl.is_available() and with_mkl_blas: + product_version = '2023.1.0' + pattern = r'oneAPI Math Kernel Library Version [0-9]{4}\.[0-9]+' + match = re.search(pattern, torch_config) + if match: + product_version = match.group(0).split(' ')[-1] + dependencies.append(f'mkl-include=={product_version}') + dependencies.append(f'mkl-static=={product_version}') - dependencies.append(f'mkl-include=={product_version}') - dependencies.append(f'mkl-static=={product_version}') + return dependencies -install_requires = [] -maybe_append_with_mkl(install_requires) +install_requires = [] + mkl_dependencies() triton_requires = [ 'triton', From bd68fb3d621228f43e4b5f00916509e92d5171a9 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sat, 6 Apr 2024 09:04:27 +0000 Subject: [PATCH 12/46] update --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a70643b9..15973f5b7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.15) project(pyg) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_SHARED_LIBRARY_PREFIX "lib") set(PYG_VERSION 0.4.0) option(BUILD_TEST "Enable testing" OFF) From fa9af6650e45c44e8a1dd06cfd0e422b152b4386 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sat, 6 Apr 2024 09:12:39 +0000 Subject: [PATCH 13/46] update --- setup.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/setup.py b/setup.py index b8782c473..d6890f50c 100644 --- a/setup.py +++ b/setup.py @@ -90,6 +90,13 @@ def build_extension(self, ext): cwd=self.build_temp) print("3333 ---------------") + print('. --------------') + print(os.listdir('.')) + print('build --------------') + print(os.listdir(osp.join('.', 'build'))) + print('build lib.win --------------') + print(os.listdir(osp.join('.', 'build', 'lib.win-amd64-3.8'))) + def mkl_dependencies(): if not CMakeBuild.check_env_flag('USE_MKL_BLAS'): From af49e9bd8230dece989f470a511e962571e7ac58 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sat, 6 Apr 2024 09:18:53 +0000 Subject: [PATCH 14/46] update --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index d6890f50c..133965570 100644 --- a/setup.py +++ b/setup.py @@ -94,8 +94,8 @@ def build_extension(self, ext): print(os.listdir('.')) print('build --------------') print(os.listdir(osp.join('.', 'build'))) - print('build lib.win --------------') - print(os.listdir(osp.join('.', 'build', 'lib.win-amd64-3.8'))) + print('build temp.win --------------') + print(os.listdir(osp.join('.', 'build', 'temp.win-amd64-3.8'))) def mkl_dependencies(): From 0a6d0a9b684537ca9cd0d164ab40c8ad0f8ef7d7 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sat, 6 Apr 2024 09:29:24 +0000 Subject: [PATCH 15/46] update --- setup.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 133965570..d4532665b 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ # Environment flags to control different options # -# USE_MKL_BLAS=1 -# enables use of MKL BLAS (requires PyTorch to be built with MKL support) +# - USE_MKL_BLAS=1: +# Enables use of MKL BLAS (requires PyTorch to be built with MKL support) import importlib import os @@ -44,6 +44,8 @@ def build_extension(self, ext): import torch extdir = osp.abspath(osp.dirname(self.get_ext_fullpath(ext.name))) + print(ext) + print(extdir) self.build_type = "DEBUG" if self.debug else "RELEASE" if self.debug is None: if CMakeBuild.check_env_flag("DEBUG"): From bfe51a87c5c459954fb010ccbf0002b975d85a9b Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sat, 6 Apr 2024 09:36:32 +0000 Subject: [PATCH 16/46] update --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index d4532665b..e1040f692 100644 --- a/setup.py +++ b/setup.py @@ -65,6 +65,7 @@ def build_extension(self, ext): '-DUSE_PYTHON=ON', f'-DWITH_CUDA={"ON" if WITH_CUDA else "OFF"}', f'-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}', + f'-DCMAKE_RUNTIME_OUTPUT_DIRECTORY={extdir}', f'-DCMAKE_BUILD_TYPE={self.build_type}', f'-DCMAKE_PREFIX_PATH={torch.utils.cmake_prefix_path}', ] From 2a112b3dd49a7362331852ee1babb23adfa2fdce Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sat, 6 Apr 2024 09:45:12 +0000 Subject: [PATCH 17/46] update --- .github/workflows/install.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index d0c5d4104..950739419 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -29,8 +29,9 @@ jobs: - name: Install package run: | - # source ./.github/workflows/cuda/${{ runner.os }}-env.sh ${{ matrix.cuda-version }} + source ./.github/workflows/cuda/${{ runner.os }}-env.sh ${{ matrix.cuda-version }} pip install --verbose -e . + shell: bash - name: Test imports run: | From 0012f5ac7fd0e29b9f3558ed5f96f0677aee74cd Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sat, 6 Apr 2024 09:55:51 +0000 Subject: [PATCH 18/46] update --- .github/workflows/install.yml | 2 +- setup.py | 14 -------------- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index 950739419..ee189d652 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -13,7 +13,7 @@ jobs: strategy: matrix: - os: [windows-latest] + os: [windows-2019] cuda-version: ['cpu', 'cu121'] steps: diff --git a/setup.py b/setup.py index e1040f692..cbb4ecddd 100644 --- a/setup.py +++ b/setup.py @@ -30,12 +30,10 @@ def check_env_flag(name: str, default: str = "") -> bool: return value in ["1", "ON", "YES", "TRUE", "Y"] def get_ext_filename(self, ext_name): - print("GET EXT FILENAME") # Remove Python ABI suffix: ext_filename = super().get_ext_filename(ext_name) ext_filename_parts = ext_filename.split('.') ext_filename_parts = ext_filename_parts[:-2] + ext_filename_parts[-1:] - print('.'.join(ext_filename_parts)) return '.'.join(ext_filename_parts) def build_extension(self, ext): @@ -44,8 +42,6 @@ def build_extension(self, ext): import torch extdir = osp.abspath(osp.dirname(self.get_ext_fullpath(ext.name))) - print(ext) - print(extdir) self.build_type = "DEBUG" if self.debug else "RELEASE" if self.debug is None: if CMakeBuild.check_env_flag("DEBUG"): @@ -85,20 +81,10 @@ def build_extension(self, ext): build_args = [] - print("1111 ---------------") subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp) - print("2222 ---------------") subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) - print("3333 ---------------") - - print('. --------------') - print(os.listdir('.')) - print('build --------------') - print(os.listdir(osp.join('.', 'build'))) - print('build temp.win --------------') - print(os.listdir(osp.join('.', 'build', 'temp.win-amd64-3.8'))) def mkl_dependencies(): From 77642ced464f3eac1863c267e57baa9a2e351f73 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sat, 6 Apr 2024 10:08:31 +0000 Subject: [PATCH 19/46] update --- .github/workflows/cuda/Windows.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cuda/Windows.sh b/.github/workflows/cuda/Windows.sh index 8f275405d..17789911e 100644 --- a/.github/workflows/cuda/Windows.sh +++ b/.github/workflows/cuda/Windows.sh @@ -39,7 +39,7 @@ esac # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 -curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" +curl -k -L "https://ossci-windows.s3.us-east-1.amazonaws.com/builder/additional_dlls.zip" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" From e6ba17d90727f2944dac41cfca5538b49ad4d054 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sat, 6 Apr 2024 11:28:36 +0000 Subject: [PATCH 20/46] update --- .github/workflows/cuda/Windows.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cuda/Windows.sh b/.github/workflows/cuda/Windows.sh index 17789911e..307b77827 100644 --- a/.github/workflows/cuda/Windows.sh +++ b/.github/workflows/cuda/Windows.sh @@ -39,8 +39,8 @@ esac # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 -curl -k -L "https://ossci-windows.s3.us-east-1.amazonaws.com/builder/additional_dlls.zip" --output "/tmp/gpu_driver_dlls.zip" -7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" +# curl -k -L "https://ossci-windows.s3.us-east-1.amazonaws.com/builder/additional_dlls.zip" --output "/tmp/gpu_driver_dlls.zip" +# 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" From 936f4cf2396cd49e54cb78dbb57020b324eb86e8 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sat, 6 Apr 2024 12:46:22 +0000 Subject: [PATCH 21/46] update --- .github/workflows/cuda/Windows.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/cuda/Windows.sh b/.github/workflows/cuda/Windows.sh index 307b77827..131be2b29 100644 --- a/.github/workflows/cuda/Windows.sh +++ b/.github/workflows/cuda/Windows.sh @@ -48,3 +48,14 @@ echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" + +echo Installing NvToolsExt... +curl -k -L https://ossci-windows.s3.us-east-1.amazonaws.com/builder/NvToolsExt.7z --output "tmp/NvToolsExt.7z" +7z x tmp/NvToolsExt.7z -o"/tmp/NvToolsExt" +mkdir "/c/Program Files/NVIDIA Corporation/NvToolsExt\bin\x64" +mkdir "/c/Program Files/NVIDIA Corporation/NvToolsExt\include" +mkdir "/c/Program Files/NVIDIA Corporation/NvToolsExt\lib\x64" +xcopy /Y "/tmp/NvToolsExt/bin/x64/*.*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/bin/x64" +xcopy /Y "/tmp/NvToolsExt/include/*.*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/include" +xcopy /Y "/tmp/NvToolsExt/lib/x64/*.*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/lib/x64" +export NVTOOLSEXT_PATH="/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64" From 826c85fce2ed7ae06c6aa51987005a84bbd6cd2c Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sat, 6 Apr 2024 13:01:00 +0000 Subject: [PATCH 22/46] update --- .github/workflows/cuda/Windows.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cuda/Windows.sh b/.github/workflows/cuda/Windows.sh index 131be2b29..183d45856 100644 --- a/.github/workflows/cuda/Windows.sh +++ b/.github/workflows/cuda/Windows.sh @@ -50,11 +50,11 @@ echo "Done!" rm -f "${CUDA_FILE}" echo Installing NvToolsExt... -curl -k -L https://ossci-windows.s3.us-east-1.amazonaws.com/builder/NvToolsExt.7z --output "tmp/NvToolsExt.7z" +curl -k -L https://ossci-windows.s3.us-east-1.amazonaws.com/builder/NvToolsExt.7z --output "/tmp/NvToolsExt.7z" 7z x tmp/NvToolsExt.7z -o"/tmp/NvToolsExt" -mkdir "/c/Program Files/NVIDIA Corporation/NvToolsExt\bin\x64" -mkdir "/c/Program Files/NVIDIA Corporation/NvToolsExt\include" -mkdir "/c/Program Files/NVIDIA Corporation/NvToolsExt\lib\x64" +mkdir "/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64" +mkdir "/c/Program Files/NVIDIA Corporation/NvToolsExt/include" +mkdir "/c/Program Files/NVIDIA Corporation/NvToolsExt/lib/x64" xcopy /Y "/tmp/NvToolsExt/bin/x64/*.*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/bin/x64" xcopy /Y "/tmp/NvToolsExt/include/*.*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/include" xcopy /Y "/tmp/NvToolsExt/lib/x64/*.*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/lib/x64" From 195a8d4e6a9d34040bd82825b1100c446090d1f2 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sat, 6 Apr 2024 13:14:01 +0000 Subject: [PATCH 23/46] update --- .github/workflows/cuda/Windows.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/cuda/Windows.sh b/.github/workflows/cuda/Windows.sh index 183d45856..822a5e004 100644 --- a/.github/workflows/cuda/Windows.sh +++ b/.github/workflows/cuda/Windows.sh @@ -52,10 +52,10 @@ rm -f "${CUDA_FILE}" echo Installing NvToolsExt... curl -k -L https://ossci-windows.s3.us-east-1.amazonaws.com/builder/NvToolsExt.7z --output "/tmp/NvToolsExt.7z" 7z x tmp/NvToolsExt.7z -o"/tmp/NvToolsExt" -mkdir "/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64" -mkdir "/c/Program Files/NVIDIA Corporation/NvToolsExt/include" -mkdir "/c/Program Files/NVIDIA Corporation/NvToolsExt/lib/x64" -xcopy /Y "/tmp/NvToolsExt/bin/x64/*.*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/bin/x64" -xcopy /Y "/tmp/NvToolsExt/include/*.*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/include" -xcopy /Y "/tmp/NvToolsExt/lib/x64/*.*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/lib/x64" +mkdir -p "/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64" +mkdir -p "/c/Program Files/NVIDIA Corporation/NvToolsExt/include" +mkdir -p "/c/Program Files/NVIDIA Corporation/NvToolsExt/lib/x64" +cp "/tmp/NvToolsExt/bin/x64/*.*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/bin/x64" +cp "/tmp/NvToolsExt/include/*.*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/include" +cp "/tmp/NvToolsExt/lib/x64/*.*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/lib/x64" export NVTOOLSEXT_PATH="/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64" From e9910ca55b767f83d46e9991f43539caa02e71b1 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sat, 6 Apr 2024 13:24:59 +0000 Subject: [PATCH 24/46] update --- .github/workflows/cuda/Windows.sh | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cuda/Windows.sh b/.github/workflows/cuda/Windows.sh index 822a5e004..8d13ca35f 100644 --- a/.github/workflows/cuda/Windows.sh +++ b/.github/workflows/cuda/Windows.sh @@ -55,7 +55,19 @@ curl -k -L https://ossci-windows.s3.us-east-1.amazonaws.com/builder/NvToolsExt.7 mkdir -p "/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64" mkdir -p "/c/Program Files/NVIDIA Corporation/NvToolsExt/include" mkdir -p "/c/Program Files/NVIDIA Corporation/NvToolsExt/lib/x64" -cp "/tmp/NvToolsExt/bin/x64/*.*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/bin/x64" -cp "/tmp/NvToolsExt/include/*.*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/include" -cp "/tmp/NvToolsExt/lib/x64/*.*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/lib/x64" +echo "-------" +ls "/tmp/NvToolsExt" +echo "-------" +ls "/tmp/NvToolsExt/bin" +echo "-------" +ls "/tmp/NvToolsExt/bin/x64" +echo "-------" +ls "/tmp/NvToolsExt/include" +echo "-------" +ls "/tmp/NvToolsExt/lib" +echo "-------" +ls "/tmp/NvToolsExt/lib/x64" +cp "/tmp/NvToolsExt/bin/x64/*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/bin/x64" +cp "/tmp/NvToolsExt/include/*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/include" +cp "/tmp/NvToolsExt/lib/x64/*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/lib/x64" export NVTOOLSEXT_PATH="/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64" From 505b1d2b76f4dd56381a2406ed2ef5959f4de299 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sat, 6 Apr 2024 13:29:56 +0000 Subject: [PATCH 25/46] update --- .github/workflows/cuda/Windows.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cuda/Windows.sh b/.github/workflows/cuda/Windows.sh index 8d13ca35f..59de34013 100644 --- a/.github/workflows/cuda/Windows.sh +++ b/.github/workflows/cuda/Windows.sh @@ -51,7 +51,7 @@ rm -f "${CUDA_FILE}" echo Installing NvToolsExt... curl -k -L https://ossci-windows.s3.us-east-1.amazonaws.com/builder/NvToolsExt.7z --output "/tmp/NvToolsExt.7z" -7z x tmp/NvToolsExt.7z -o"/tmp/NvToolsExt" +7z x "/tmp/NvToolsExt.7z" -o"/tmp/NvToolsExt" mkdir -p "/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64" mkdir -p "/c/Program Files/NVIDIA Corporation/NvToolsExt/include" mkdir -p "/c/Program Files/NVIDIA Corporation/NvToolsExt/lib/x64" From be7397fc349ac6fd4de8f91309fceb0a41d0989e Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sat, 6 Apr 2024 13:37:28 +0000 Subject: [PATCH 26/46] update --- .github/workflows/cuda/Windows.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cuda/Windows.sh b/.github/workflows/cuda/Windows.sh index 59de34013..caef76086 100644 --- a/.github/workflows/cuda/Windows.sh +++ b/.github/workflows/cuda/Windows.sh @@ -67,7 +67,7 @@ echo "-------" ls "/tmp/NvToolsExt/lib" echo "-------" ls "/tmp/NvToolsExt/lib/x64" -cp "/tmp/NvToolsExt/bin/x64/*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/bin/x64" -cp "/tmp/NvToolsExt/include/*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/include" -cp "/tmp/NvToolsExt/lib/x64/*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/lib/x64" +cp -r "/tmp/NvToolsExt/bin/x64/*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/bin/x64" +cp -r "/tmp/NvToolsExt/include/*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/include" +cp -r "/tmp/NvToolsExt/lib/x64/*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/lib/x64" export NVTOOLSEXT_PATH="/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64" From aa5650e3deedb1667f3c9a3f71e4baea18da7867 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sat, 6 Apr 2024 15:07:32 +0000 Subject: [PATCH 27/46] update --- .github/workflows/cuda/Windows.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cuda/Windows.sh b/.github/workflows/cuda/Windows.sh index caef76086..4529e6bb1 100644 --- a/.github/workflows/cuda/Windows.sh +++ b/.github/workflows/cuda/Windows.sh @@ -67,7 +67,7 @@ echo "-------" ls "/tmp/NvToolsExt/lib" echo "-------" ls "/tmp/NvToolsExt/lib/x64" -cp -r "/tmp/NvToolsExt/bin/x64/*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/bin/x64" -cp -r "/tmp/NvToolsExt/include/*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/include" -cp -r "/tmp/NvToolsExt/lib/x64/*" "/cProgram Files/NVIDIA Corporation/NvToolsExt/lib/x64" +cp -r /tmp/NvToolsExt/bin/x64/* "/cProgram Files/NVIDIA Corporation/NvToolsExt/bin/x64" +cp -r /tmp/NvToolsExt/include/* "/cProgram Files/NVIDIA Corporation/NvToolsExt/include" +cp -r /tmp/NvToolsExt/lib/x64/* "/cProgram Files/NVIDIA Corporation/NvToolsExt/lib/x64" export NVTOOLSEXT_PATH="/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64" From e6eaa1cb8a4d11fa4ccf8abe52b3eede5cba26e2 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sat, 6 Apr 2024 15:18:27 +0000 Subject: [PATCH 28/46] update --- .github/workflows/cuda/Windows.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cuda/Windows.sh b/.github/workflows/cuda/Windows.sh index 4529e6bb1..04ac24c2b 100644 --- a/.github/workflows/cuda/Windows.sh +++ b/.github/workflows/cuda/Windows.sh @@ -67,7 +67,7 @@ echo "-------" ls "/tmp/NvToolsExt/lib" echo "-------" ls "/tmp/NvToolsExt/lib/x64" -cp -r /tmp/NvToolsExt/bin/x64/* "/cProgram Files/NVIDIA Corporation/NvToolsExt/bin/x64" -cp -r /tmp/NvToolsExt/include/* "/cProgram Files/NVIDIA Corporation/NvToolsExt/include" -cp -r /tmp/NvToolsExt/lib/x64/* "/cProgram Files/NVIDIA Corporation/NvToolsExt/lib/x64" +cp -r /tmp/NvToolsExt/bin/x64/* "/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64" +cp -r /tmp/NvToolsExt/include/* "/c/Program Files/NVIDIA Corporation/NvToolsExt/include" +cp -r /tmp/NvToolsExt/lib/x64/* "/c/Program Files/NVIDIA Corporation/NvToolsExt/lib/x64" export NVTOOLSEXT_PATH="/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64" From 64583daaadfde105a0852a09586a728e5212db8c Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sat, 6 Apr 2024 17:28:17 +0000 Subject: [PATCH 29/46] update --- .github/workflows/cuda/Windows.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/cuda/Windows.sh b/.github/workflows/cuda/Windows.sh index 04ac24c2b..805b21923 100644 --- a/.github/workflows/cuda/Windows.sh +++ b/.github/workflows/cuda/Windows.sh @@ -71,3 +71,5 @@ cp -r /tmp/NvToolsExt/bin/x64/* "/c/Program Files/NVIDIA Corporation/NvToolsExt/ cp -r /tmp/NvToolsExt/include/* "/c/Program Files/NVIDIA Corporation/NvToolsExt/include" cp -r /tmp/NvToolsExt/lib/x64/* "/c/Program Files/NVIDIA Corporation/NvToolsExt/lib/x64" export NVTOOLSEXT_PATH="/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64" + +export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" From b95e7d06751aa99746d54eabf4a0176a12cb1d93 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sun, 7 Apr 2024 06:53:21 +0000 Subject: [PATCH 30/46] update --- .github/workflows/cuda/Windows.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/cuda/Windows.sh b/.github/workflows/cuda/Windows.sh index 805b21923..d2a1c50e2 100644 --- a/.github/workflows/cuda/Windows.sh +++ b/.github/workflows/cuda/Windows.sh @@ -72,4 +72,5 @@ cp -r /tmp/NvToolsExt/include/* "/c/Program Files/NVIDIA Corporation/NvToolsExt/ cp -r /tmp/NvToolsExt/lib/x64/* "/c/Program Files/NVIDIA Corporation/NvToolsExt/lib/x64" export NVTOOLSEXT_PATH="/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64" +export CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" From 332c064356ba3e9c75ce16d0248e7289860ab57f Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sun, 7 Apr 2024 07:07:31 +0000 Subject: [PATCH 31/46] update --- .github/workflows/cuda/Windows.sh | 1 - setup.py | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cuda/Windows.sh b/.github/workflows/cuda/Windows.sh index d2a1c50e2..805b21923 100644 --- a/.github/workflows/cuda/Windows.sh +++ b/.github/workflows/cuda/Windows.sh @@ -72,5 +72,4 @@ cp -r /tmp/NvToolsExt/include/* "/c/Program Files/NVIDIA Corporation/NvToolsExt/ cp -r /tmp/NvToolsExt/lib/x64/* "/c/Program Files/NVIDIA Corporation/NvToolsExt/lib/x64" export NVTOOLSEXT_PATH="/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64" -export CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" diff --git a/setup.py b/setup.py index cbb4ecddd..b4c9820c9 100644 --- a/setup.py +++ b/setup.py @@ -66,6 +66,10 @@ def build_extension(self, ext): f'-DCMAKE_PREFIX_PATH={torch.utils.cmake_prefix_path}', ] + cuda_arch_list = os.getenv('TORCH_CUDA_ARCH_LIST') + if WITH_CUDA and cuda_arch_list is not None: + cmake_args.append(f'-DCMAKE_CUDA_ARCHITECTURES={cuda_arch_list}') + if CMakeBuild.check_env_flag('USE_MKL_BLAS'): include_dir = f"{sysconfig.get_path('data')}{os.sep}include" cmake_args.append(f'-DBLAS_INCLUDE_DIR={include_dir}') From 88a8a71cf66caa8904e06c419376dea3e0568510 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sun, 7 Apr 2024 07:20:06 +0000 Subject: [PATCH 32/46] update --- setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.py b/setup.py index b4c9820c9..b9a78f029 100644 --- a/setup.py +++ b/setup.py @@ -69,6 +69,9 @@ def build_extension(self, ext): cuda_arch_list = os.getenv('TORCH_CUDA_ARCH_LIST') if WITH_CUDA and cuda_arch_list is not None: cmake_args.append(f'-DCMAKE_CUDA_ARCHITECTURES={cuda_arch_list}') + else: + cuda_arch_list = "3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" + cmake_args.append(f'-DCMAKE_CUDA_ARCHITECTURES={cuda_arch_list}') if CMakeBuild.check_env_flag('USE_MKL_BLAS'): include_dir = f"{sysconfig.get_path('data')}{os.sep}include" From 69a3eee089d4b5eb85e2d21baf124ade5a7f4b5b Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sun, 7 Apr 2024 07:33:11 +0000 Subject: [PATCH 33/46] update --- .github/workflows/cuda/Windows.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cuda/Windows.sh b/.github/workflows/cuda/Windows.sh index 805b21923..7cd46c282 100644 --- a/.github/workflows/cuda/Windows.sh +++ b/.github/workflows/cuda/Windows.sh @@ -72,4 +72,4 @@ cp -r /tmp/NvToolsExt/include/* "/c/Program Files/NVIDIA Corporation/NvToolsExt/ cp -r /tmp/NvToolsExt/lib/x64/* "/c/Program Files/NVIDIA Corporation/NvToolsExt/lib/x64" export NVTOOLSEXT_PATH="/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64" -export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" +export TORCH_CUDA_ARCH_LIST="35;50;60;70;75;80;86" From 5452828a771ea53855726b9dac19b2f076197fb7 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sun, 7 Apr 2024 07:44:00 +0000 Subject: [PATCH 34/46] update --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b9a78f029..558e0354d 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,7 @@ def build_extension(self, ext): if WITH_CUDA and cuda_arch_list is not None: cmake_args.append(f'-DCMAKE_CUDA_ARCHITECTURES={cuda_arch_list}') else: - cuda_arch_list = "3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" + cuda_arch_list = "35;50;60;70;75;80;86" cmake_args.append(f'-DCMAKE_CUDA_ARCHITECTURES={cuda_arch_list}') if CMakeBuild.check_env_flag('USE_MKL_BLAS'): From bec70967dac332a07c0cae30a5fffd307910b30b Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sun, 7 Apr 2024 07:53:35 +0000 Subject: [PATCH 35/46] update --- setup.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 558e0354d..5ba6fdf78 100644 --- a/setup.py +++ b/setup.py @@ -67,10 +67,13 @@ def build_extension(self, ext): ] cuda_arch_list = os.getenv('TORCH_CUDA_ARCH_LIST') + print("ARCH LIST") + print("-----------") + print(cuda_arch_list) if WITH_CUDA and cuda_arch_list is not None: cmake_args.append(f'-DCMAKE_CUDA_ARCHITECTURES={cuda_arch_list}') else: - cuda_arch_list = "35;50;60;70;75;80;86" + cuda_arch_list = "50;60;70;75;80;86" cmake_args.append(f'-DCMAKE_CUDA_ARCHITECTURES={cuda_arch_list}') if CMakeBuild.check_env_flag('USE_MKL_BLAS'): From 4016fc315e53e41d0edfd3042fac0cb105104262 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sun, 7 Apr 2024 08:59:39 +0000 Subject: [PATCH 36/46] update --- CMakeLists.txt | 2 ++ setup.py | 11 ++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 15973f5b7..469a348fe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,6 +42,8 @@ endif() if(WITH_CUDA) enable_language(CUDA) add_definitions(-DWITH_CUDA) + message("CUDA FLAGS HEHEHEHEHEHE") + message("${CMAKE_CUDA_FLAGS}") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr") if (NOT "$ENV{EXTERNAL_CUTLASS_INCLUDE_DIR}" STREQUAL "") diff --git a/setup.py b/setup.py index 5ba6fdf78..a5b8b5f6f 100644 --- a/setup.py +++ b/setup.py @@ -70,11 +70,12 @@ def build_extension(self, ext): print("ARCH LIST") print("-----------") print(cuda_arch_list) - if WITH_CUDA and cuda_arch_list is not None: - cmake_args.append(f'-DCMAKE_CUDA_ARCHITECTURES={cuda_arch_list}') - else: - cuda_arch_list = "50;60;70;75;80;86" - cmake_args.append(f'-DCMAKE_CUDA_ARCHITECTURES={cuda_arch_list}') + cmake_args.append('-DCUDA_ARCH_PTX=5.0+PTX') + # if WITH_CUDA and cuda_arch_list is not None: + # cmake_args.append(f'-DCMAKE_CUDA_ARCHITECTURES={cuda_arch_list}') + # else: + # cuda_arch_list = "50;60;70;75;80;86" + # cmake_args.append(f'-DCMAKE_CUDA_ARCHITECTURES={cuda_arch_list}') if CMakeBuild.check_env_flag('USE_MKL_BLAS'): include_dir = f"{sysconfig.get_path('data')}{os.sep}include" From c3009bfae054e36dbc7d2fdb1c54c260c8fcc72f Mon Sep 17 00:00:00 2001 From: rusty1s Date: Sun, 7 Apr 2024 09:38:28 +0000 Subject: [PATCH 37/46] update --- .github/workflows/cuda/Windows.sh | 3 ++- setup.py | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cuda/Windows.sh b/.github/workflows/cuda/Windows.sh index 7cd46c282..c670755a7 100644 --- a/.github/workflows/cuda/Windows.sh +++ b/.github/workflows/cuda/Windows.sh @@ -72,4 +72,5 @@ cp -r /tmp/NvToolsExt/include/* "/c/Program Files/NVIDIA Corporation/NvToolsExt/ cp -r /tmp/NvToolsExt/lib/x64/* "/c/Program Files/NVIDIA Corporation/NvToolsExt/lib/x64" export NVTOOLSEXT_PATH="/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64" -export TORCH_CUDA_ARCH_LIST="35;50;60;70;75;80;86" +export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" +export TORCH_CUDA_ARCH_LIST="35;50+PTX;6.0;7.0;7.5;8.0;8.6" diff --git a/setup.py b/setup.py index a5b8b5f6f..5b4cc0747 100644 --- a/setup.py +++ b/setup.py @@ -66,6 +66,8 @@ def build_extension(self, ext): f'-DCMAKE_PREFIX_PATH={torch.utils.cmake_prefix_path}', ] + os.environ['TORCH_CUDA_ARCH_LIST'] = '8.0 8.6 9.0' + cuda_arch_list = os.getenv('TORCH_CUDA_ARCH_LIST') print("ARCH LIST") print("-----------") From e4cee393353170563f7fe85e61c3a6206332730b Mon Sep 17 00:00:00 2001 From: rusty1s Date: Mon, 8 Apr 2024 05:51:51 +0000 Subject: [PATCH 38/46] update --- .github/workflows/cuda/Windows.sh | 30 +++++++----------------------- .github/workflows/install.yml | 2 ++ CMakeLists.txt | 2 -- setup.py | 12 ++++++------ 4 files changed, 15 insertions(+), 31 deletions(-) diff --git a/.github/workflows/cuda/Windows.sh b/.github/workflows/cuda/Windows.sh index c670755a7..582541da0 100644 --- a/.github/workflows/cuda/Windows.sh +++ b/.github/workflows/cuda/Windows.sh @@ -37,11 +37,6 @@ case ${1} in ;; esac -# Install NVIDIA drivers, see: -# https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 -# curl -k -L "https://ossci-windows.s3.us-east-1.amazonaws.com/builder/additional_dlls.zip" --output "/tmp/gpu_driver_dlls.zip" -# 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" - curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." @@ -49,28 +44,17 @@ PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s echo "Done!" rm -f "${CUDA_FILE}" +# echo Installing NVIDIA drivers... +# https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 +# curl -k -L "https://ossci-windows.s3.us-east-1.amazonaws.com/builder/additional_dlls.zip" --output "/tmp/gpu_driver_dlls.zip" +# 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" + echo Installing NvToolsExt... -curl -k -L https://ossci-windows.s3.us-east-1.amazonaws.com/builder/NvToolsExt.7z --output "/tmp/NvToolsExt.7z" -7z x "/tmp/NvToolsExt.7z" -o"/tmp/NvToolsExt" +curl -k -L https://ossci-windows.s3.us-east-1.amazonaws.com/builder/NvToolsExt.7z --output /tmp/NvToolsExt.7z +7z x /tmp/NvToolsExt.7z -o"/tmp/NvToolsExt" mkdir -p "/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64" mkdir -p "/c/Program Files/NVIDIA Corporation/NvToolsExt/include" mkdir -p "/c/Program Files/NVIDIA Corporation/NvToolsExt/lib/x64" -echo "-------" -ls "/tmp/NvToolsExt" -echo "-------" -ls "/tmp/NvToolsExt/bin" -echo "-------" -ls "/tmp/NvToolsExt/bin/x64" -echo "-------" -ls "/tmp/NvToolsExt/include" -echo "-------" -ls "/tmp/NvToolsExt/lib" -echo "-------" -ls "/tmp/NvToolsExt/lib/x64" cp -r /tmp/NvToolsExt/bin/x64/* "/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64" cp -r /tmp/NvToolsExt/include/* "/c/Program Files/NVIDIA Corporation/NvToolsExt/include" cp -r /tmp/NvToolsExt/lib/x64/* "/c/Program Files/NVIDIA Corporation/NvToolsExt/lib/x64" -export NVTOOLSEXT_PATH="/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64" - -export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" -export TORCH_CUDA_ARCH_LIST="35;50+PTX;6.0;7.0;7.5;8.0;8.6" diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index ee189d652..6ce265025 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -32,6 +32,8 @@ jobs: source ./.github/workflows/cuda/${{ runner.os }}-env.sh ${{ matrix.cuda-version }} pip install --verbose -e . shell: bash + env: + TORCH_CUDA_ARCH_LIST: "3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" - name: Test imports run: | diff --git a/CMakeLists.txt b/CMakeLists.txt index 469a348fe..15973f5b7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,8 +42,6 @@ endif() if(WITH_CUDA) enable_language(CUDA) add_definitions(-DWITH_CUDA) - message("CUDA FLAGS HEHEHEHEHEHE") - message("${CMAKE_CUDA_FLAGS}") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr") if (NOT "$ENV{EXTERNAL_CUTLASS_INCLUDE_DIR}" STREQUAL "") diff --git a/setup.py b/setup.py index 5b4cc0747..7020c9b14 100644 --- a/setup.py +++ b/setup.py @@ -66,13 +66,13 @@ def build_extension(self, ext): f'-DCMAKE_PREFIX_PATH={torch.utils.cmake_prefix_path}', ] - os.environ['TORCH_CUDA_ARCH_LIST'] = '8.0 8.6 9.0' + # os.environ['TORCH_CUDA_ARCH_LIST'] = '8.0 8.6 9.0' - cuda_arch_list = os.getenv('TORCH_CUDA_ARCH_LIST') - print("ARCH LIST") - print("-----------") - print(cuda_arch_list) - cmake_args.append('-DCUDA_ARCH_PTX=5.0+PTX') + # cuda_arch_list = os.getenv('TORCH_CUDA_ARCH_LIST') + # print("ARCH LIST") + # print("-----------") + # print(cuda_arch_list) + # cmake_args.append('-DCUDA_ARCH_PTX=5.0+PTX') # if WITH_CUDA and cuda_arch_list is not None: # cmake_args.append(f'-DCMAKE_CUDA_ARCHITECTURES={cuda_arch_list}') # else: From 9db6ea5a19d19f79976d3c12768db13b8e1a990d Mon Sep 17 00:00:00 2001 From: rusty1s Date: Mon, 8 Apr 2024 06:02:42 +0000 Subject: [PATCH 39/46] update --- .github/workflows/building.yml | 2 + .github/workflows/install.yml | 2 +- .github/workflows/nightly.yml | 2 + pyg_lib/csrc/ops/cpu/matmul_kernel.cpp | 307 ++++++++++++------------- setup.py | 13 -- 5 files changed, 152 insertions(+), 174 deletions(-) diff --git a/.github/workflows/building.yml b/.github/workflows/building.yml index d0398ac7f..1c19e9f21 100644 --- a/.github/workflows/building.yml +++ b/.github/workflows/building.yml @@ -112,6 +112,8 @@ jobs: source ./.github/workflows/cuda/${{ runner.os }}-env.sh ${{ matrix.cuda-version }} python setup.py bdist_wheel --dist-dir=dist shell: bash + env: + TORCH_CUDA_ARCH_LIST: "5.0+PTX;6.0;7.0;7.5;8.0;8.6" - name: Test wheel run: | diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index 6ce265025..5200ad263 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -33,7 +33,7 @@ jobs: pip install --verbose -e . shell: bash env: - TORCH_CUDA_ARCH_LIST: "3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" + TORCH_CUDA_ARCH_LIST: "5.0+PTX;6.0;7.0;7.5;8.0;8.6" - name: Test imports run: | diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 968350427..8aeeba1b1 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -118,6 +118,8 @@ jobs: source ./.github/workflows/cuda/${{ runner.os }}-env.sh ${{ matrix.cuda-version }} python setup.py bdist_wheel --dist-dir=dist shell: bash + env: + TORCH_CUDA_ARCH_LIST: "5.0+PTX;6.0;7.0;7.5;8.0;8.6" - name: Test wheel run: | diff --git a/pyg_lib/csrc/ops/cpu/matmul_kernel.cpp b/pyg_lib/csrc/ops/cpu/matmul_kernel.cpp index 73f8631d3..d75f7a2b1 100644 --- a/pyg_lib/csrc/ops/cpu/matmul_kernel.cpp +++ b/pyg_lib/csrc/ops/cpu/matmul_kernel.cpp @@ -86,8 +86,7 @@ void mkl_blas_gemm_batched(const int* m_array, const int* ldc_array, const int group_count, const int* group_size) { - TORCH_INTERNAL_ASSERT(false, - "mkl_blas_gemm_batched: MKL BLAS is not supported"); + TORCH_INTERNAL_ASSERT(false, "MKL BLAS is not supported"); } void mkl_blas_gemm_batched(const int* m_array, @@ -103,8 +102,7 @@ void mkl_blas_gemm_batched(const int* m_array, const int* ldc_array, const int group_count, const int* group_size) { - TORCH_INTERNAL_ASSERT(false, - "mkl_blas_gemm_batched: MKL BLAS is not supported"); + TORCH_INTERNAL_ASSERT(false, "MKL BLAS is not supported"); } #endif @@ -206,82 +204,76 @@ void grouped_matmul_out_kernel_mkl_impl(const std::vector input, const std::vector other, std::vector out) { // matrix_params - /* using matrix_params = std::tuple; */ - /* phmap::flat_hash_map> groups; */ - /* for (size_t i = 0; i < input.size(); ++i) { */ - /* const matrix_params mp = {input[i].size(0), other[i].size(-1), */ - /* input[i].size(-1)}; */ - /* if (groups.count(mp)) { */ - /* groups[mp].push_back(i); */ - /* } else { */ - /* groups.insert({mp, {i}}); */ - /* } */ - /* } */ - - /* AT_DISPATCH_FLOATING_TYPES( */ - /* input.front().scalar_type(), "grouped_matmul_out_kernel_mkl_impl", [&] - * { */ - /* const auto group_count = static_cast(groups.size()); */ - /* std::vector alpha(group_count, 1); */ - /* std::vector beta(group_count, 0); */ - - /* std::vector ms(group_count); */ - /* std::vector ns(group_count); */ - /* std::vector ks(group_count); */ - /* std::vector ld_src0(group_count); */ - /* std::vector ld_src1(group_count); */ - /* std::vector ld_dst(group_count); */ - /* std::vector group_sizes(group_count); */ - /* std::vector src0; */ - /* std::vector src1; */ - /* std::vector dst; */ - - /* size_t group_idx = 0; */ - /* for (const auto& group_kv : groups) { */ - /* int m; */ - /* int n; */ - /* int k; */ - /* std::tie(m, n, k) = group_kv.first; */ - /* const auto& indices = group_kv.second; */ - - /* ms[group_idx] = m; */ - /* ns[group_idx] = n; */ - /* ks[group_idx] = k; */ - /* ld_src0[group_idx] = k; */ - /* ld_src1[group_idx] = n; */ - /* ld_dst[group_idx] = n; */ - /* group_sizes[group_idx] = indices.size(); */ - /* ++group_idx; */ - - /* for (const auto tensor_idx : indices) { */ - /* src0.push_back(input[tensor_idx].data_ptr()); */ - /* src1.push_back(other[tensor_idx].data_ptr()); */ - /* dst.push_back(out[tensor_idx].data_ptr()); */ - /* } */ - /* } */ - - /* auto src0_ptrs = const_cast(src0.data()); */ - /* auto src1_ptrs = const_cast(src1.data()); */ - /* auto dst_ptrs = dst.data(); */ - - /* #if AT_MKL_SEQUENTIAL() */ - /* // unlikely to happen - requires Torch to be built from source with - */ - /* // explicit flag denoting MKL sequential version */ - /* parallel_mkl_blas_gemm_batched(ms, ns, ks, alpha, src0_ptrs, ld_src0, - */ - /* src1_ptrs, ld_src1, beta, dst_ptrs, */ - /* ld_dst, group_count, group_sizes); */ - /* #else */ - /* mkl_blas_gemm_batched(ms.data(), ns.data(), ks.data(), alpha.data(), - */ - /* src0_ptrs, ld_src0.data(), src1_ptrs, - * ld_src1.data(), */ - /* beta.data(), dst_ptrs, ld_dst.data(), - * group_count, */ - /* group_sizes.data()); */ - /* #endif */ - /* }); */ + using matrix_params = std::tuple; + phmap::flat_hash_map> groups; + for (size_t i = 0; i < input.size(); ++i) { + const matrix_params mp = {input[i].size(0), other[i].size(-1), + input[i].size(-1)}; + if (groups.count(mp)) { + groups[mp].push_back(i); + } else { + groups.insert({mp, {i}}); + } + } + + AT_DISPATCH_FLOATING_TYPES( + input.front().scalar_type(), "grouped_matmul_out_kernel_mkl_impl", [&] { + const auto group_count = static_cast(groups.size()); + std::vector alpha(group_count, 1); + std::vector beta(group_count, 0); + + std::vector ms(group_count); + std::vector ns(group_count); + std::vector ks(group_count); + std::vector ld_src0(group_count); + std::vector ld_src1(group_count); + std::vector ld_dst(group_count); + std::vector group_sizes(group_count); + std::vector src0; + std::vector src1; + std::vector dst; + + size_t group_idx = 0; + for (const auto& group_kv : groups) { + int m; + int n; + int k; + std::tie(m, n, k) = group_kv.first; + const auto& indices = group_kv.second; + + ms[group_idx] = m; + ns[group_idx] = n; + ks[group_idx] = k; + ld_src0[group_idx] = k; + ld_src1[group_idx] = n; + ld_dst[group_idx] = n; + group_sizes[group_idx] = indices.size(); + ++group_idx; + + for (const auto tensor_idx : indices) { + src0.push_back(input[tensor_idx].data_ptr()); + src1.push_back(other[tensor_idx].data_ptr()); + dst.push_back(out[tensor_idx].data_ptr()); + } + } + + auto src0_ptrs = const_cast(src0.data()); + auto src1_ptrs = const_cast(src1.data()); + auto dst_ptrs = dst.data(); + +#if AT_MKL_SEQUENTIAL() + // unlikely to happen - requires Torch to be built from source with + // explicit flag denoting MKL sequential version + parallel_mkl_blas_gemm_batched(ms, ns, ks, alpha, src0_ptrs, ld_src0, + src1_ptrs, ld_src1, beta, dst_ptrs, + ld_dst, group_count, group_sizes); +#else + mkl_blas_gemm_batched(ms.data(), ns.data(), ks.data(), alpha.data(), + src0_ptrs, ld_src0.data(), src1_ptrs, ld_src1.data(), + beta.data(), dst_ptrs, ld_dst.data(), group_count, + group_sizes.data()); +#endif + }); } std::vector grouped_matmul_kernel(const at::TensorList input, @@ -334,86 +326,81 @@ void segment_matmul_out_kernel_mkl_impl(const at::Tensor& input, const at::Tensor& other, at::Tensor& out, const at::IntArrayRef& sizes) { - /* const int n = other.size(-1); */ - /* const int k = input.size(-1); */ - /* const int nk = n * k; */ - /* phmap::flat_hash_map> groups; */ - /* std::vector offsets = {{0, 0, 0}}; */ - /* offsets.reserve(sizes.size() + 1); */ - /* for (size_t i = 0; i < sizes.size(); ++i) { */ - /* const int m = sizes[i]; */ - /* if (groups.count(m)) { */ - /* groups[m].push_back(i); */ - /* } else { */ - /* groups.insert({m, {i}}); */ - /* } */ - - /* offset_params offset = {m * k, nk, m * n}; */ - /* offset += offsets.back(); */ - /* offsets.push_back(offset); */ - /* } */ - /* offsets.pop_back(); */ - - /* AT_DISPATCH_FLOATING_TYPES( */ - /* input.scalar_type(), "segment_matmul_out_kernel_mkl_impl", [&] { */ - /* const auto group_count = static_cast(groups.size()); */ - /* std::vector alpha(group_count, 1); */ - /* std::vector beta(group_count, 0); */ - /* std::vector ns(group_count, n); */ - /* std::vector ks(group_count, k); */ - /* std::vector ld_src0(group_count, k); */ - /* std::vector ld_src1(group_count, n); */ - /* std::vector ld_dst(group_count, n); */ - - /* std::vector ms(group_count); */ - /* std::vector group_sizes(group_count); */ - /* std::vector src0; */ - /* std::vector src1; */ - /* std::vector dst; */ - - /* const auto src0_base_ptr = input.data_ptr(); */ - /* const auto src1_base_ptr = other.data_ptr(); */ - /* const auto dst_base_ptr = out.data_ptr(); */ - - /* size_t group_idx = 0; */ - /* for (const auto& group_kv : groups) { */ - /* int m = group_kv.first; */ - /* const auto& indices = group_kv.second; */ - - /* ms[group_idx] = m; */ - /* group_sizes[group_idx] = indices.size(); */ - /* ++group_idx; */ - - /* for (const auto offset_idx : indices) { */ - /* const auto offset = offsets[offset_idx]; */ - /* src0.push_back(src0_base_ptr + offset.src0_offset); */ - /* src1.push_back(src1_base_ptr + offset.src1_offset); */ - /* dst.push_back(dst_base_ptr + offset.dst_offset); */ - /* } */ - /* } */ - - /* auto src0_ptrs = const_cast(src0.data()); */ - /* auto src1_ptrs = const_cast(src1.data()); */ - /* auto dst_ptrs = dst.data(); */ - - /* #if AT_MKL_SEQUENTIAL() */ - /* // unlikely to happen - requires Torch to be built from source with - */ - /* // explicit flag denoting MKL sequential version */ - /* parallel_mkl_blas_gemm_batched(ms, ns, ks, alpha, src0_ptrs, ld_src0, - */ - /* src1_ptrs, ld_src1, beta, dst_ptrs, */ - /* ld_dst, group_count, group_sizes); */ - /* #else */ - /* mkl_blas_gemm_batched(ms.data(), ns.data(), ks.data(), alpha.data(), - */ - /* src0_ptrs, ld_src0.data(), src1_ptrs, - * ld_src1.data(), */ - /* beta.data(), dst_ptrs, ld_dst.data(), - * group_count, */ - /* group_sizes.data()); */ - /* #endif */ - /* }); */ + const int n = other.size(-1); + const int k = input.size(-1); + const int nk = n * k; + phmap::flat_hash_map> groups; + std::vector offsets = {{0, 0, 0}}; + offsets.reserve(sizes.size() + 1); + for (size_t i = 0; i < sizes.size(); ++i) { + const int m = sizes[i]; + if (groups.count(m)) { + groups[m].push_back(i); + } else { + groups.insert({m, {i}}); + } + + offset_params offset = {m * k, nk, m * n}; + offset += offsets.back(); + offsets.push_back(offset); + } + offsets.pop_back(); + + AT_DISPATCH_FLOATING_TYPES( + input.scalar_type(), "segment_matmul_out_kernel_mkl_impl", [&] { + const auto group_count = static_cast(groups.size()); + std::vector alpha(group_count, 1); + std::vector beta(group_count, 0); + std::vector ns(group_count, n); + std::vector ks(group_count, k); + std::vector ld_src0(group_count, k); + std::vector ld_src1(group_count, n); + std::vector ld_dst(group_count, n); + + std::vector ms(group_count); + std::vector group_sizes(group_count); + std::vector src0; + std::vector src1; + std::vector dst; + + const auto src0_base_ptr = input.data_ptr(); + const auto src1_base_ptr = other.data_ptr(); + const auto dst_base_ptr = out.data_ptr(); + + size_t group_idx = 0; + for (const auto& group_kv : groups) { + int m = group_kv.first; + const auto& indices = group_kv.second; + + ms[group_idx] = m; + group_sizes[group_idx] = indices.size(); + ++group_idx; + + for (const auto offset_idx : indices) { + const auto offset = offsets[offset_idx]; + src0.push_back(src0_base_ptr + offset.src0_offset); + src1.push_back(src1_base_ptr + offset.src1_offset); + dst.push_back(dst_base_ptr + offset.dst_offset); + } + } + + auto src0_ptrs = const_cast(src0.data()); + auto src1_ptrs = const_cast(src1.data()); + auto dst_ptrs = dst.data(); + +#if AT_MKL_SEQUENTIAL() + // unlikely to happen - requires Torch to be built from source with + // explicit flag denoting MKL sequential version + parallel_mkl_blas_gemm_batched(ms, ns, ks, alpha, src0_ptrs, ld_src0, + src1_ptrs, ld_src1, beta, dst_ptrs, + ld_dst, group_count, group_sizes); +#else + mkl_blas_gemm_batched(ms.data(), ns.data(), ks.data(), alpha.data(), + src0_ptrs, ld_src0.data(), src1_ptrs, ld_src1.data(), + beta.data(), dst_ptrs, ld_dst.data(), group_count, + group_sizes.data()); +#endif + }); } at::Tensor segment_matmul_kernel(const at::Tensor& input, diff --git a/setup.py b/setup.py index 7020c9b14..cbb4ecddd 100644 --- a/setup.py +++ b/setup.py @@ -66,19 +66,6 @@ def build_extension(self, ext): f'-DCMAKE_PREFIX_PATH={torch.utils.cmake_prefix_path}', ] - # os.environ['TORCH_CUDA_ARCH_LIST'] = '8.0 8.6 9.0' - - # cuda_arch_list = os.getenv('TORCH_CUDA_ARCH_LIST') - # print("ARCH LIST") - # print("-----------") - # print(cuda_arch_list) - # cmake_args.append('-DCUDA_ARCH_PTX=5.0+PTX') - # if WITH_CUDA and cuda_arch_list is not None: - # cmake_args.append(f'-DCMAKE_CUDA_ARCHITECTURES={cuda_arch_list}') - # else: - # cuda_arch_list = "50;60;70;75;80;86" - # cmake_args.append(f'-DCMAKE_CUDA_ARCHITECTURES={cuda_arch_list}') - if CMakeBuild.check_env_flag('USE_MKL_BLAS'): include_dir = f"{sysconfig.get_path('data')}{os.sep}include" cmake_args.append(f'-DBLAS_INCLUDE_DIR={include_dir}') From b777858b4cc0caf6f9a76dbaa33a6b2b0d19255d Mon Sep 17 00:00:00 2001 From: rusty1s Date: Mon, 8 Apr 2024 06:43:07 +0000 Subject: [PATCH 40/46] update --- pyg_lib/csrc/ops/cpu/matmul_kernel.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyg_lib/csrc/ops/cpu/matmul_kernel.cpp b/pyg_lib/csrc/ops/cpu/matmul_kernel.cpp index d75f7a2b1..20ee14f44 100644 --- a/pyg_lib/csrc/ops/cpu/matmul_kernel.cpp +++ b/pyg_lib/csrc/ops/cpu/matmul_kernel.cpp @@ -261,6 +261,7 @@ void grouped_matmul_out_kernel_mkl_impl(const std::vector input, auto src1_ptrs = const_cast(src1.data()); auto dst_ptrs = dst.data(); +#if WITH_MKL_BLAS() #if AT_MKL_SEQUENTIAL() // unlikely to happen - requires Torch to be built from source with // explicit flag denoting MKL sequential version @@ -272,6 +273,7 @@ void grouped_matmul_out_kernel_mkl_impl(const std::vector input, src0_ptrs, ld_src0.data(), src1_ptrs, ld_src1.data(), beta.data(), dst_ptrs, ld_dst.data(), group_count, group_sizes.data()); +#endif #endif }); } @@ -388,6 +390,7 @@ void segment_matmul_out_kernel_mkl_impl(const at::Tensor& input, auto src1_ptrs = const_cast(src1.data()); auto dst_ptrs = dst.data(); +#if WITH_MKL_BLAS() #if AT_MKL_SEQUENTIAL() // unlikely to happen - requires Torch to be built from source with // explicit flag denoting MKL sequential version @@ -399,6 +402,7 @@ void segment_matmul_out_kernel_mkl_impl(const at::Tensor& input, src0_ptrs, ld_src0.data(), src1_ptrs, ld_src1.data(), beta.data(), dst_ptrs, ld_dst.data(), group_count, group_sizes.data()); +#endif #endif }); } From c35f37baebaddb70751f3475ba9c052443d28266 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Mon, 8 Apr 2024 06:52:32 +0000 Subject: [PATCH 41/46] update --- pyg_lib/csrc/ops/cpu/matmul_kernel.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyg_lib/csrc/ops/cpu/matmul_kernel.cpp b/pyg_lib/csrc/ops/cpu/matmul_kernel.cpp index 20ee14f44..21b41d843 100644 --- a/pyg_lib/csrc/ops/cpu/matmul_kernel.cpp +++ b/pyg_lib/csrc/ops/cpu/matmul_kernel.cpp @@ -203,6 +203,7 @@ void grouped_matmul_out_kernel_at_impl(const std::vector input, void grouped_matmul_out_kernel_mkl_impl(const std::vector input, const std::vector other, std::vector out) { +#if WITH_MKL_BLAS() // matrix_params using matrix_params = std::tuple; phmap::flat_hash_map> groups; @@ -261,7 +262,6 @@ void grouped_matmul_out_kernel_mkl_impl(const std::vector input, auto src1_ptrs = const_cast(src1.data()); auto dst_ptrs = dst.data(); -#if WITH_MKL_BLAS() #if AT_MKL_SEQUENTIAL() // unlikely to happen - requires Torch to be built from source with // explicit flag denoting MKL sequential version @@ -273,9 +273,9 @@ void grouped_matmul_out_kernel_mkl_impl(const std::vector input, src0_ptrs, ld_src0.data(), src1_ptrs, ld_src1.data(), beta.data(), dst_ptrs, ld_dst.data(), group_count, group_sizes.data()); -#endif #endif }); +#endif } std::vector grouped_matmul_kernel(const at::TensorList input, @@ -328,6 +328,7 @@ void segment_matmul_out_kernel_mkl_impl(const at::Tensor& input, const at::Tensor& other, at::Tensor& out, const at::IntArrayRef& sizes) { +#if WITH_MKL_BLAS() const int n = other.size(-1); const int k = input.size(-1); const int nk = n * k; @@ -390,7 +391,6 @@ void segment_matmul_out_kernel_mkl_impl(const at::Tensor& input, auto src1_ptrs = const_cast(src1.data()); auto dst_ptrs = dst.data(); -#if WITH_MKL_BLAS() #if AT_MKL_SEQUENTIAL() // unlikely to happen - requires Torch to be built from source with // explicit flag denoting MKL sequential version @@ -402,9 +402,9 @@ void segment_matmul_out_kernel_mkl_impl(const at::Tensor& input, src0_ptrs, ld_src0.data(), src1_ptrs, ld_src1.data(), beta.data(), dst_ptrs, ld_dst.data(), group_count, group_sizes.data()); -#endif #endif }); +#endif } at::Tensor segment_matmul_kernel(const at::Tensor& input, From 7ffb312b288edde99b733c410c9d94c8eb2489ba Mon Sep 17 00:00:00 2001 From: rusty1s Date: Mon, 8 Apr 2024 07:55:16 +0000 Subject: [PATCH 42/46] update --- CMakeLists.txt | 20 ++++++++++---------- pyg_lib/csrc/partition/cpu/metis_kernel.cpp | 20 +++++++++----------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 15973f5b7..1e971f167 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,16 +73,16 @@ else() target_include_directories(${PROJECT_NAME} PRIVATE ${PHMAP_DIR}) endif() -# set(METIS_DIR third_party/METIS) -# target_include_directories(${PROJECT_NAME} PRIVATE ${METIS_DIR}/include) -# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DIDXTYPEWIDTH=64 -DREALTYPEWIDTH=32") -# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIDXTYPEWIDTH=64 -DREALTYPEWIDTH=32") -# set(GKLIB_PATH "${METIS_DIR}/GKlib") -# include(${GKLIB_PATH}/GKlibSystem.cmake) -# include_directories(${GKLIB_PATH}) -# include_directories("${METIS_DIR}/include") -# add_subdirectory("${METIS_DIR}/libmetis") -# target_link_libraries(${PROJECT_NAME} PRIVATE metis) +set(METIS_DIR third_party/METIS) +target_include_directories(${PROJECT_NAME} PRIVATE ${METIS_DIR}/include) +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DIDXTYPEWIDTH=64 -DREALTYPEWIDTH=32") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIDXTYPEWIDTH=64 -DREALTYPEWIDTH=32") +set(GKLIB_PATH "${METIS_DIR}/GKlib") +include(${GKLIB_PATH}/GKlibSystem.cmake) +include_directories(${GKLIB_PATH}) +include_directories("${METIS_DIR}/include") +add_subdirectory("${METIS_DIR}/libmetis") +target_link_libraries(${PROJECT_NAME} PRIVATE metis) find_package(Torch REQUIRED) target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES}) diff --git a/pyg_lib/csrc/partition/cpu/metis_kernel.cpp b/pyg_lib/csrc/partition/cpu/metis_kernel.cpp index 7430574f2..df516224f 100644 --- a/pyg_lib/csrc/partition/cpu/metis_kernel.cpp +++ b/pyg_lib/csrc/partition/cpu/metis_kernel.cpp @@ -1,7 +1,7 @@ #include #include -/* #include */ +#include namespace pyg { namespace partition { @@ -31,16 +31,14 @@ at::Tensor metis_kernel(const at::Tensor& rowptr, auto part = at::empty({nvtxs}, rowptr.options()); auto part_data = part.data_ptr(); - /* if (recursive) { */ - /* METIS_PartGraphRecursive(&nvtxs, &ncon, xadj, adjncy, vwgt, NULL, adjwgt, - */ - /* &num_partitions, NULL, NULL, NULL, &objval, */ - /* part_data); */ - /* } else { */ - /* METIS_PartGraphKway(&nvtxs, &ncon, xadj, adjncy, vwgt, NULL, adjwgt, */ - /* &num_partitions, NULL, NULL, NULL, &objval, - * part_data); */ - /* } */ + if (recursive) { + METIS_PartGraphRecursive(&nvtxs, &ncon, xadj, adjncy, vwgt, NULL, adjwgt, + &num_partitions, NULL, NULL, NULL, &objval, + part_data); + } else { + METIS_PartGraphKway(&nvtxs, &ncon, xadj, adjncy, vwgt, NULL, adjwgt, + &num_partitions, NULL, NULL, NULL, &objval, part_data); + } return part; } From 61a5725c42b49ae475e2822453dcb1d63dd32b34 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Mon, 8 Apr 2024 08:04:38 +0000 Subject: [PATCH 43/46] update --- .github/workflows/building.yml | 1 - .github/workflows/nightly.yml | 1 - CMakeLists.txt | 21 ++++++++++++++------- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/.github/workflows/building.yml b/.github/workflows/building.yml index 1c19e9f21..442ed1c0c 100644 --- a/.github/workflows/building.yml +++ b/.github/workflows/building.yml @@ -15,7 +15,6 @@ jobs: torch-version: [1.12.0, 1.13.0, 2.0.0, 2.1.0, 2.2.0] cuda-version: ['cpu', 'cu113', 'cu116', 'cu117', 'cu118', 'cu121'] exclude: - - os: windows-2019 # No windows support yet :( - torch-version: 1.12.0 python-version: '3.12' - torch-version: 1.13.0 diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 8aeeba1b1..f23c6721f 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -19,7 +19,6 @@ jobs: torch-version: [1.12.0, 1.13.0, 2.0.0, 2.1.0, 2.2.0] cuda-version: ['cpu', 'cu113', 'cu116', 'cu117', 'cu118', 'cu121'] exclude: - - os: windows-2019 # No windows support yet :( - torch-version: 1.12.0 python-version: '3.12' - torch-version: 1.13.0 diff --git a/CMakeLists.txt b/CMakeLists.txt index 1e971f167..cb54d51d6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -75,13 +75,20 @@ endif() set(METIS_DIR third_party/METIS) target_include_directories(${PROJECT_NAME} PRIVATE ${METIS_DIR}/include) -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DIDXTYPEWIDTH=64 -DREALTYPEWIDTH=32") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIDXTYPEWIDTH=64 -DREALTYPEWIDTH=32") -set(GKLIB_PATH "${METIS_DIR}/GKlib") -include(${GKLIB_PATH}/GKlibSystem.cmake) -include_directories(${GKLIB_PATH}) -include_directories("${METIS_DIR}/include") -add_subdirectory("${METIS_DIR}/libmetis") +if (MSVC) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /DIDXTYPEWIDTH=64 /DREALTYPEWIDTH=32") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /DIDXTYPEWIDTH=64 /DREALTYPEWIDTH=32") +else() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DIDXTYPEWIDTH=64 -DREALTYPEWIDTH=32") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIDXTYPEWIDTH=64 -DREALTYPEWIDTH=32") +endif() +if (NOT MSVC) + set(GKLIB_PATH "${METIS_DIR}/GKlib") + include(${GKLIB_PATH}/GKlibSystem.cmake) + include_directories(${GKLIB_PATH}) + include_directories("${METIS_DIR}/include") + add_subdirectory("${METIS_DIR}/libmetis") +endif() target_link_libraries(${PROJECT_NAME} PRIVATE metis) find_package(Torch REQUIRED) From 4f0dd5a5cf1bb91e999f85d472ef45f17faee74a Mon Sep 17 00:00:00 2001 From: rusty1s Date: Mon, 8 Apr 2024 08:12:04 +0000 Subject: [PATCH 44/46] update --- CMakeLists.txt | 7 ++++--- pyg_lib/csrc/partition/cpu/metis_kernel.cpp | 6 ++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cb54d51d6..187a81073 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,8 +73,6 @@ else() target_include_directories(${PROJECT_NAME} PRIVATE ${PHMAP_DIR}) endif() -set(METIS_DIR third_party/METIS) -target_include_directories(${PROJECT_NAME} PRIVATE ${METIS_DIR}/include) if (MSVC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /DIDXTYPEWIDTH=64 /DREALTYPEWIDTH=32") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /DIDXTYPEWIDTH=64 /DREALTYPEWIDTH=32") @@ -82,14 +80,17 @@ else() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DIDXTYPEWIDTH=64 -DREALTYPEWIDTH=32") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIDXTYPEWIDTH=64 -DREALTYPEWIDTH=32") endif() + if (NOT MSVC) + set(METIS_DIR third_party/METIS) + target_include_directories(${PROJECT_NAME} PRIVATE ${METIS_DIR}/include) set(GKLIB_PATH "${METIS_DIR}/GKlib") include(${GKLIB_PATH}/GKlibSystem.cmake) include_directories(${GKLIB_PATH}) include_directories("${METIS_DIR}/include") add_subdirectory("${METIS_DIR}/libmetis") + target_link_libraries(${PROJECT_NAME} PRIVATE metis) endif() -target_link_libraries(${PROJECT_NAME} PRIVATE metis) find_package(Torch REQUIRED) target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES}) diff --git a/pyg_lib/csrc/partition/cpu/metis_kernel.cpp b/pyg_lib/csrc/partition/cpu/metis_kernel.cpp index df516224f..56f6da886 100644 --- a/pyg_lib/csrc/partition/cpu/metis_kernel.cpp +++ b/pyg_lib/csrc/partition/cpu/metis_kernel.cpp @@ -1,7 +1,9 @@ #include #include +#ifndef _WIN32 #include +#endif namespace pyg { namespace partition { @@ -14,6 +16,9 @@ at::Tensor metis_kernel(const at::Tensor& rowptr, const c10::optional& node_weight, const c10::optional& edge_weight, bool recursive) { +#ifdef _WIN32 + TORCH_INTERNAL_ASSERT(false, "METIS not yet supported on Windows"); +#else int64_t nvtxs = rowptr.numel() - 1; int64_t ncon = 1; auto* xadj = rowptr.data_ptr(); @@ -41,6 +46,7 @@ at::Tensor metis_kernel(const at::Tensor& rowptr, } return part; +#endif } } // namespace From 55b636fc35d052a8311c4abbace9bb6f158eaedb Mon Sep 17 00:00:00 2001 From: rusty1s Date: Mon, 8 Apr 2024 08:42:25 +0000 Subject: [PATCH 45/46] update --- .github/workflows/install.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index 5200ad263..809bca3c0 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -9,11 +9,10 @@ on: # yamllint disable-line rule:truthy jobs: import: - runs-on: ${{ matrix.os }} + runs-on: ubuntu-latest strategy: matrix: - os: [windows-2019] cuda-version: ['cpu', 'cu121'] steps: From e5edd0c3f1560643ece0461ebd2c9f579aec8a9f Mon Sep 17 00:00:00 2001 From: rusty1s Date: Mon, 8 Apr 2024 11:13:44 +0000 Subject: [PATCH 46/46] update --- .github/workflows/building.yml | 3 +++ .github/workflows/nightly.yml | 3 +++ README.md | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/building.yml b/.github/workflows/building.yml index 442ed1c0c..6c7f81447 100644 --- a/.github/workflows/building.yml +++ b/.github/workflows/building.yml @@ -71,6 +71,9 @@ jobs: python-version: '3.8' - os: macos-14 python-version: '3.9' + - os: windows-2019 + torch-version: 2.0.0 + cuda-version: 'cu121' steps: - name: Checkout repository diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index f23c6721f..d3653b719 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -75,6 +75,9 @@ jobs: python-version: '3.8' - os: macos-14 python-version: '3.9' + - os: windows-2019 + torch-version: 2.0.0 + cuda-version: 'cu121' steps: - name: Checkout repository diff --git a/README.md b/README.md index 81ec7fb0d..e71c6b79f 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ The following combinations are supported: | PyTorch 2.0 | `cpu` | `cu102` | `cu113` | `cu116` | `cu117` | `cu118` | `cu121` | |--------------|-------|---------|---------|---------|---------|---------|---------| -| **Linux** | ✅ | | | | ✅ | ✅ | | +| **Linux** | ✅ | | | | ✅ | ✅ | ✅ | | **Windows** | ✅ | | | | ✅ | ✅ | | | **macOS** | ✅ | | | | | | |