Skip to content

Commit

Permalink
Lint
Browse files Browse the repository at this point in the history
  • Loading branch information
chhwang committed Oct 7, 2023
1 parent 76f5e28 commit 6d1e20e
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 11 deletions.
10 changes: 6 additions & 4 deletions include/mscclpp/packet.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,9 @@ union LLPacket {

#ifdef __CUDACC__
/// Read from the origin and write to the target buffer.
__forceinline__ __device__ void putPackets(void* targetPtr, uint64_t targetOffset, const void* originPtr, uint64_t originOffset,
uint64_t originBytes, uint32_t threadId, uint32_t numThreads, uint32_t flag) {
__forceinline__ __device__ void putPackets(void* targetPtr, uint64_t targetOffset, const void* originPtr,
uint64_t originOffset, uint64_t originBytes, uint32_t threadId,
uint32_t numThreads, uint32_t flag) {
// Offsets should be aligned to 8 bytes & size should be a multiple of 8 bytes
const uint32_t* originBase = (const uint32_t*)((const char*)originPtr + originOffset);
LLPacket* targetBase = (LLPacket*)((char*)targetPtr + targetOffset);
Expand All @@ -89,8 +90,9 @@ __forceinline__ __device__ void putPackets(void* targetPtr, uint64_t targetOffse
}

/// Read from the target buffer and write to the origin.
__forceinline__ __device__ void getPackets(const void* targetPtr, uint64_t targetOffset, void* originPtr, uint64_t originOffset,
uint64_t originBytes, uint32_t threadId, uint32_t numThreads, uint32_t flag) {
__forceinline__ __device__ void getPackets(const void* targetPtr, uint64_t targetOffset, void* originPtr,
uint64_t originOffset, uint64_t originBytes, uint32_t threadId,
uint32_t numThreads, uint32_t flag) {
// Offsets should be aligned to 8 bytes & size should be a multiple of 8 bytes
const LLPacket* targetBase = (const LLPacket*)((const char*)targetPtr + targetOffset);
uint2* originBase = (uint2*)((char*)originPtr + originOffset);
Expand Down
17 changes: 10 additions & 7 deletions include/mscclpp/sm_channel_device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,9 +211,10 @@ struct SmChannelDeviceHandle {
/// @param numThreads The total number of threads that run this function.
///
template <int Alignment = 16, bool CopyRemainder = true>
__forceinline__ __device__ void put(uint64_t targetOffset, uint64_t originOffset, uint64_t originBytes, uint32_t threadId,
uint32_t numThreads) {
copy<Alignment, CopyRemainder>((char*)dst_ + targetOffset, (char*)src_ + originOffset, originBytes, threadId, numThreads);
__forceinline__ __device__ void put(uint64_t targetOffset, uint64_t originOffset, uint64_t originBytes,
uint32_t threadId, uint32_t numThreads) {
copy<Alignment, CopyRemainder>((char*)dst_ + targetOffset, (char*)src_ + originOffset, originBytes, threadId,
numThreads);
}

/// Copy data from the remote memory (target) to the local memory (origin).
Expand All @@ -231,10 +232,11 @@ struct SmChannelDeviceHandle {
/// @param numThreads The total number of threads that run this function.
///
template <int Alignment = 16, bool CopyRemainder = true>
__forceinline__ __device__ void get(uint64_t targetOffset, uint64_t originOffset, uint64_t originBytes, uint32_t threadId,
uint32_t numThreads) {
__forceinline__ __device__ void get(uint64_t targetOffset, uint64_t originOffset, uint64_t originBytes,
uint32_t threadId, uint32_t numThreads) {
// Note that `dst` and `src` are swapped for `get()`.
copy<Alignment, CopyRemainder>((char*)src_ + originOffset, (char*)dst_ + targetOffset, originBytes, threadId, numThreads);
copy<Alignment, CopyRemainder>((char*)src_ + originOffset, (char*)dst_ + targetOffset, originBytes, threadId,
numThreads);
}

/// Copy data from the local memory (origin) to the remote memory (target).
Expand Down Expand Up @@ -273,7 +275,8 @@ struct SmChannelDeviceHandle {
get<Alignment, CopyRemainder>(offset, offset, bytes, threadId, numThreads);
}

/// Construct @ref LLPacket from the data in the local memory (origin) and write it on the remote packet buffer (target).
/// Construct @ref LLPacket from the data in the local memory (origin) and write it on the remote packet buffer
/// (target).
///
/// This function is intended to be collectively called by multiple threads. Each thread copies a part of packets.
///
Expand Down

0 comments on commit 6d1e20e

Please sign in to comment.