diff --git a/include/mscclpp/sm_channel_device.hpp b/include/mscclpp/sm_channel_device.hpp index f5289369..f3aa9202 100644 --- a/include/mscclpp/sm_channel_device.hpp +++ b/include/mscclpp/sm_channel_device.hpp @@ -4,9 +4,9 @@ #ifndef MSCCLPP_SM_CHANNEL_DEVICE_HPP_ #define MSCCLPP_SM_CHANNEL_DEVICE_HPP_ -#include "semaphore_device.hpp" -#include "poll.hpp" #include "packet.hpp" +#include "poll.hpp" +#include "semaphore_device.hpp" namespace mscclpp { @@ -105,19 +105,19 @@ struct Element { using T = uint4; static __forceinline__ __device__ void load(T& v, const T* p) { asm volatile("ld.volatile.global.v4.u32 {%0,%1,%2,%3}, [%4];" - : "=r"(v.w), "=r"(v.x), "=r"(v.y), "=r"(v.z) - : "l"(p) - : "memory"); + : "=r"(v.w), "=r"(v.x), "=r"(v.y), "=r"(v.z) + : "l"(p) + : "memory"); } static __forceinline__ __device__ void store(T* p, const T& v) { asm volatile("st.volatile.global.v4.u32 [%0], {%1,%2,%3,%4};" - : - : "l"(p), "r"(v.w), "r"(v.x), "r"(v.y), "r"(v.z) - : "memory"); + : + : "l"(p), "r"(v.w), "r"(v.x), "r"(v.y), "r"(v.z) + : "memory"); } }; -#endif // __CUDACC__ +#endif // __CUDACC__ /// Channel for accessing peer memory directly from SM. struct SmChannelDeviceHandle { diff --git a/python/sm_channel_py.cpp b/python/sm_channel_py.cpp index 7ad08c22..8d636b88 100644 --- a/python/sm_channel_py.cpp +++ b/python/sm_channel_py.cpp @@ -29,5 +29,4 @@ void register_sm_channel(nb::module_& m) { .def_prop_ro("raw", [](const SmChannel::DeviceHandle& self) -> nb::bytes { return nb::bytes(reinterpret_cast(&self), sizeof(self)); }); - };