diff --git a/clang/lib/DPCT/RulesAsm/AsmMigration.cpp b/clang/lib/DPCT/RulesAsm/AsmMigration.cpp index 91abe3cbfa89..38b476405f1f 100644 --- a/clang/lib/DPCT/RulesAsm/AsmMigration.cpp +++ b/clang/lib/DPCT/RulesAsm/AsmMigration.cpp @@ -1035,10 +1035,12 @@ class SYCLGen : public SYCLGenBase { BI->getKind() != InlineAsmBuiltinType::s64 && BI->getKind() != InlineAsmBuiltinType::u64 && BI->getKind() != InlineAsmBuiltinType::s16x2 && - BI->getKind() != InlineAsmBuiltinType::u16x2) + BI->getKind() != InlineAsmBuiltinType::u16x2 && + BI->getKind() != InlineAsmBuiltinType::f16x2) return false; isVec = BI->getKind() == InlineAsmBuiltinType::s16x2 || - BI->getKind() == InlineAsmBuiltinType::u16x2; + BI->getKind() == InlineAsmBuiltinType::u16x2 || + BI->getKind() == InlineAsmBuiltinType::f16x2; } else { return false; } diff --git a/clang/lib/DPCT/RulesAsm/Parser/AsmTokenKinds.def b/clang/lib/DPCT/RulesAsm/Parser/AsmTokenKinds.def index a53d3f4cd7b4..073b5eda6433 100644 --- a/clang/lib/DPCT/RulesAsm/Parser/AsmTokenKinds.def +++ b/clang/lib/DPCT/RulesAsm/Parser/AsmTokenKinds.def @@ -278,6 +278,7 @@ STATE_SPACE(reg, ".reg") STATE_SPACE(sreg, ".sreg") STATE_SPACE(const, ".const") STATE_SPACE(global, ".global") +STATE_SPACE(volatile.global, ".volatile.global") STATE_SPACE(local, ".local") STATE_SPACE(param, ".param") STATE_SPACE(shared, ".shared") diff --git a/clang/test/dpct/asm/ld.cu b/clang/test/dpct/asm/ld.cu index f6fe1e5a86cb..a3f3b1e1f1c3 100644 --- a/clang/test/dpct/asm/ld.cu +++ b/clang/test/dpct/asm/ld.cu @@ -8,7 +8,7 @@ #include /* -.ss = { .const, .global, .local, .param, .shared }; +.ss = { .const, .global, .local, .param, .shared, .volatile.global }; .type = { .b8, .b16, .b32, .b64, .b128, .u8, .u16, .u32, .u64, .s8, .s16, .s32, .s64, @@ -27,6 +27,11 @@ __global__ void ld(int *arr) { asm volatile ("ld.global.u32 %0, [%1 + 4];" : "=r"(c) : "l"(arr)); // CHECK: d = *((uint64_t *)((uintptr_t)arr + 8)); asm volatile ("ld.global.u64 %0, [%1 + 8];" : "=l"(d) : "l"(arr)); + // CHECK: a = *arr; + asm volatile ("ld.volatile.global.s32 %0, [%1];" : "=r"(a) : "l"(arr)); + // CHECK: b = *((uint32_t *)(uintptr_t)arr); + asm volatile ("ld.volatile.global.u32 %0, [%1];" : "=r"(b) : "l"(arr)); + // CHECK: c = *((uint32_t *)((uintptr_t)arr + 4)); } __device__ void shared_address_load32(uint32_t addr, uint32_t &val) { diff --git a/clang/test/dpct/asm/sub.cu b/clang/test/dpct/asm/sub.cu index 82aaf7bd8e96..21a1db91bd8a 100644 --- a/clang/test/dpct/asm/sub.cu +++ b/clang/test/dpct/asm/sub.cu @@ -7,6 +7,7 @@ // clang-format off #include #include +#include __global__ void sub() { int x = 1, y = 2; @@ -18,6 +19,7 @@ __global__ void sub() { uint64_t u64; short2 s16x2, sa{1, 2}, sb{1, 2}; ushort2 u16x2, ua{1, 2}, ub{1, 2}; + half2 f16x2, fa{1.f, 2.f}, fb{1.f, 2.f}; // CHECK: i16 = x - y; asm("sub.s16 %0, %1, %2;" : "=r"(i16) : "r"(x), "r"(y)); @@ -42,6 +44,9 @@ __global__ void sub() { // CHECK: s16x2 = sa - sb; asm("sub.s16x2 %0, %1, %2;" : "=r"(s16x2) : "r"(sa), "r"(sb)); + + // CHECK: f16x2 = fa - fb; + asm("sub.f16x2 %0, %1, %2;" : "=r"(f16x2) : "r"(fa), "r"(fb)); // CHECK: u16x2 = ua - ub; asm("sub.u16x2 %0, %1, %2;" : "=r"(u16x2) : "r"(ua), "r"(ub)); @@ -49,14 +54,23 @@ __global__ void sub() { // CHECK: s16x2 = sa - sycl::short2{1, 1}; asm("sub.s16x2 %0, %1, {1, 1};" : "=r"(s16x2) : "r"(sa)); + // CHECK: f16x2 = fa - sycl::half2{1, 1}; + asm("sub.f16x2 %0, %1, {1, 1};" : "=r"(f16x2) : "r"(fa)); + // CHECK: u16x2 = ua - sycl::ushort2{1, 1}; asm("sub.u16x2 %0, %1, {1, 1};" : "=r"(u16x2) : "r"(ua)); // CHECK: s16x2 = sycl::short2{1, 1} - sa; asm("sub.s16x2 %0, {1, 1}, %1;" : "=r"(s16x2) : "r"(sa)); + // CHECK: f16x2 = sycl::half2{1, 1} - fa; + asm("sub.f16x2 %0, {1, 1}, %1;" : "=r"(f16x2) : "r"(fa)); + // CHECK: u16x2 = sycl::ushort2{1, 1} - ua; asm("sub.u16x2 %0, {1, 1}, %1;" : "=r"(u16x2) : "r"(ua)); + + // CHECK: f16x2 = sycl::half2{1, 1} - fa; + asm("sub.f16x2 %0, {1, 1}, %1;" : "=r"(f16x2) : "r"(fa)); } // clang-format on