From d7b8519d8d5306e93cf47b861e45191ef0853849 Mon Sep 17 00:00:00 2001 From: Binyang Li Date: Fri, 1 Nov 2024 10:26:46 +0000 Subject: [PATCH] fix compile issue --- include/mscclpp/gpu.hpp | 7 +++++++ include/mscclpp/gpu_utils.hpp | 6 ++++-- src/registered_memory.cc | 14 ++++++++++---- src/utils.cc | 12 ++++++------ 4 files changed, 27 insertions(+), 12 deletions(-) diff --git a/include/mscclpp/gpu.hpp b/include/mscclpp/gpu.hpp index e860c7b93..059f579c0 100644 --- a/include/mscclpp/gpu.hpp +++ b/include/mscclpp/gpu.hpp @@ -103,6 +103,13 @@ constexpr auto CU_MEM_ACCESS_FLAGS_PROT_READWRITE = hipMemAccessFlagsProtReadWri #define CUDA_NVLS_SUPPORTED 0 #endif // !defined(__HIP_PLATFORM_AMD__) +// Fabric +#if !defined(__HIP_PLATFORM_AMD__) +#define CUDA_FABRIC_SUPPORTED ((CUDART_VERSION >= 12040)) +#else // !defined(__HIP_PLATFORM_AMD__) +#define CUDA_FABRIC_SUPPORTED 0 +#endif // !defined(__HIP_PLATFORM_AMD__) + // GPU sync threads #if defined(__HIP_PLATFORM_AMD__) #define __syncshm() asm volatile("s_waitcnt lgkmcnt(0) \n s_barrier"); diff --git a/include/mscclpp/gpu_utils.hpp b/include/mscclpp/gpu_utils.hpp index 80cc435bf..b226b2993 100644 --- a/include/mscclpp/gpu_utils.hpp +++ b/include/mscclpp/gpu_utils.hpp @@ -109,9 +109,11 @@ T* cudaPhysicalCalloc(size_t nbytes, size_t gran) { prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE; #if defined(__HIP_PLATFORM_AMD__) prop.requestedHandleType = CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR; -#else +#elif (CUDA_FABRIC_SUPPORTED) prop.requestedHandleTypes = (CUmemAllocationHandleType)(CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR | CU_MEM_HANDLE_TYPE_FABRIC); +#else + prop.requestedHandleTypes = CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR; #endif prop.location.id = currentDevice; @@ -260,7 +262,7 @@ static inline size_t getMulticastGranularity(size_t size, CUmulticastGranularity #if defined(__HIP_PLATFORM_AMD__) // TODO: revisit when HIP fixes this typo in the field name prop.handleTypes = CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR; -#elif (CUDA_NVLS_SUPPORTED) +#elif (CUDA_FABRIC_SUPPORTED) prop.handleTypes = (CUmemAllocationHandleType)(CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR | CU_MEM_HANDLE_TYPE_FABRIC); #else prop.handleTypes = CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR; diff --git a/src/registered_memory.cc b/src/registered_memory.cc index 2f15ba5a9..114557892 100644 --- a/src/registered_memory.cc +++ b/src/registered_memory.cc @@ -23,6 +23,7 @@ bool isCuMemMapAllocated(void* ptr) { return true; } +#if (CUDA_FABRIC_SUPPORTED) // Get the recommended granularity for cuMemAddressReserve size_t getRecommendedGranularity() { size_t gran = 0; @@ -34,16 +35,13 @@ size_t getRecommendedGranularity() { CUmemAllocationProp prop = {}; prop.type = CU_MEM_ALLOCATION_TYPE_PINNED; prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE; -#if defined(__HIP_PLATFORM_AMD__) - prop.requestedHandleType = CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR; -#else prop.requestedHandleTypes = (CUmemAllocationHandleType)(CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR | CU_MEM_HANDLE_TYPE_FABRIC); -#endif prop.location.id = currentDevice; MSCCLPP_CUTHROW(cuMemGetAllocationGranularity(&gran, &prop, CU_MEM_ALLOC_GRANULARITY_RECOMMENDED)); return gran; } +#endif } // namespace namespace mscclpp { @@ -67,6 +65,7 @@ RegisteredMemory::Impl::Impl(void* data, size_t size, TransportFlags transports, this->isCuMemMapAlloc = true; } if (this->isCuMemMapAlloc) { +#if (CUDA_FABRIC_SUPPORTED) if (isFabricSupported()) { CUmemGenericAllocationHandle handle; MSCCLPP_CUTHROW(cuMemRetainAllocationHandle(&handle, baseDataPtr)); @@ -76,6 +75,9 @@ RegisteredMemory::Impl::Impl(void* data, size_t size, TransportFlags transports, } else { throw Error("Fabric is not supported", ErrorCode::InvalidUsage); } +#else + throw Error("Only support cuMemMap with CUDA 12.4 or later", ErrorCode::InvalidUsage); +#endif } else { cudaIpcMemHandle_t handle; MSCCLPP_CUDATHROW(cudaIpcGetMemHandle(&handle, baseDataPtr)); @@ -218,6 +220,7 @@ RegisteredMemory::Impl::Impl(const std::vector& serialization) { auto entry = getTransportInfo(Transport::CudaIpc); void* base; if (this->isCuMemMapAlloc) { +#if (CUDA_FABRIC_SUPPORTED) if (isFabricSupported()) { CUmemGenericAllocationHandle handle; MSCCLPP_CUTHROW(cuMemImportFromShareableHandle(&handle, entry.shareableHandle, CU_MEM_HANDLE_TYPE_FABRIC)); @@ -229,6 +232,9 @@ RegisteredMemory::Impl::Impl(const std::vector& serialization) { } else { throw Error("Fabric is not supported", ErrorCode::InvalidUsage); } +#else + throw Error("Only support cuMemMap with CUDA 12.4 or later", ErrorCode::InvalidUsage); +#endif } else { MSCCLPP_CUDATHROW(cudaIpcOpenMemHandle(&base, entry.cudaIpcBaseHandle, cudaIpcMemLazyEnablePeerAccess)); this->data = static_cast(base) + entry.cudaIpcOffsetFromBase; diff --git a/src/utils.cc b/src/utils.cc index 8566ccac1..a6a007c84 100644 --- a/src/utils.cc +++ b/src/utils.cc @@ -68,9 +68,9 @@ std::string getHostName(int maxlen, const char delim) { } bool isNvlsSupported() { - static bool result = false; - static bool isChecked = false; -#if (CUDART_VERSION >= 12040) + [[maybe_unused]] static bool result = false; + [[maybe_unused]] static bool isChecked = false; +#if (CUDA_FABRIC_SUPPORTED) if (!isChecked) { int isMulticastSupported; int isFabricSupported; @@ -86,9 +86,9 @@ bool isNvlsSupported() { } bool isFabricSupported() { - static bool result = false; - static bool isChecked = false; -#if (CUDART_VERSION >= 12040) + [[maybe_unused]] static bool result = false; + [[maybe_unused]] static bool isChecked = false; +#if (CUDA_FABRIC_SUPPORTED) if (!isChecked) { int isFabricSupported; CUdevice dev;