From 997f040f48b5c34e20ad6b0f512bb9d1ae6e6128 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Tue, 7 May 2024 08:27:27 -0700 Subject: [PATCH] Support Metal math functions (#4118) * Support Metal math functions Closes #4024 Note that Metal document says Metal doesn't support "double" type; "Metal does not support the double, long long, unsigned long long, and long double data types." According to Metal document, math functions are not defined for integer types. That leaves only two types to test: half and float. As a code clean up, __floatCast is replaced with __realCast. But I had to add a new signature that can convert from integer to float. Some of GLSL functions are moved to hlsl.meta.slang. For those functions, there isn't builtin functions for HLSL but there are for GLSL and Metal. "nextafter(T,T)" is currently not working because it requires Metal version 3.1 and we invoke metal compiler with a profile version lower than 3.1. * Changes based on review comments. --- source/slang/core.meta.slang | 2 + source/slang/glsl.meta.slang | 201 +---- source/slang/hlsl.meta.slang | 1209 ++++++++++++++++++++++++++--- source/slang/slang-emit-metal.cpp | 43 +- tests/metal/math.slang | 513 ++++++++++++ 5 files changed, 1638 insertions(+), 330 deletions(-) create mode 100644 tests/metal/math.slang diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang index 3fc2fc570f..22822196cd 100644 --- a/source/slang/core.meta.slang +++ b/source/slang/core.meta.slang @@ -421,6 +421,8 @@ __generic __intrinsic_op(select) vector select(vector(U val); +__intrinsic_op($(kIROp_CastIntToFloat)) + T __realCast(U val); __intrinsic_op($(kIROp_IntCast)) T __intCast(U val); ${{{{ diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang index 9715a44ce3..bacc8958ed 100644 --- a/source/slang/glsl.meta.slang +++ b/source/slang/glsl.meta.slang @@ -321,114 +321,6 @@ public vector atan(vector y, vector x) return atan2(y, x); } -__generic -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] -public T asinh(T x) -{ - __target_switch - { - case cpp: __intrinsic_asm "$P_asinh($0)"; - case cuda: __intrinsic_asm "$P_asinh($0)"; - case glsl: __intrinsic_asm "asinh"; - case spirv: return spirv_asm { - OpExtInst $$T result glsl450 Asinh $x - }; - default: - return log(x + sqrt(x * x + T(1))); - } -} - -__generic -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] -public vector asinh(vector x) -{ - __target_switch - { - case glsl: __intrinsic_asm "asinh"; - case spirv: return spirv_asm { - OpExtInst $$vector result glsl450 Asinh $x - }; - default: - VECTOR_MAP_UNARY(T, N, asinh, x); - } -} - -__generic -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] -public T acosh(T x) -{ - __target_switch - { - case cpp: __intrinsic_asm "$P_acosh($0)"; - case cuda: __intrinsic_asm "$P_acosh($0)"; - case glsl: __intrinsic_asm "acosh"; - case spirv: return spirv_asm { - OpExtInst $$T result glsl450 Acosh $x - }; - default: - return log(x + sqrt( x * x - T(1))); - } -} - -__generic -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] -public vector acosh(vector x) -{ - __target_switch - { - case glsl: __intrinsic_asm "acosh"; - case spirv: return spirv_asm { - OpExtInst $$vector result glsl450 Acosh $x - }; - default: - VECTOR_MAP_UNARY(T, N, acosh, x); - } -} - -__generic -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] -public T atanh(T x) -{ - __target_switch - { - case cpp: __intrinsic_asm "$P_atanh($0)"; - case cuda: __intrinsic_asm "$P_atanh($0)"; - case glsl: __intrinsic_asm "atanh"; - case spirv: return spirv_asm { - OpExtInst $$T result glsl450 Atanh $x - }; - default: - return T(0.5) * log((T(1) + x) / (T(1) - x)); - } -} - -__generic -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] -public vector atanh(vector x) -{ - __target_switch - { - case glsl: __intrinsic_asm "atanh"; - case spirv: return spirv_asm { - OpExtInst $$vector result glsl450 Atanh $x - }; - default: - VECTOR_MAP_UNARY(T, N, atanh, x); - } -} - // // Section 8.2. Exponential Functions // @@ -458,66 +350,19 @@ public vector inversesqrt(vector x) __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)] public T roundEven(T x) { - __target_switch - { - case glsl: __intrinsic_asm "roundEven"; - case spirv: return spirv_asm { - OpExtInst $$T result glsl450 RoundEven $x - }; - default: - T nearest = round(x); - - // Check if the value is exactly halfway between two integers - if (abs(x - nearest) == T(0.5)) - { - // If halfway, choose the even number - if (mod(nearest, T(2)) != T(0)) - { - // If the nearest number is odd, - // move to the closest even number - nearest -= ((x < nearest) ? T(1) : T(-1)); - } - } - return nearest; - } + return rint(x); } __generic [__readNone] [ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, GLSL_130)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)] public vector roundEven(vector x) { - __target_switch - { - case glsl: __intrinsic_asm "roundEven"; - case spirv: return spirv_asm { - OpExtInst $$vector result glsl450 RoundEven $x - }; - default: - VECTOR_MAP_UNARY(T, N, roundEven, x); - } -} - -__generic -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] -public T fract(T x) -{ - return frac(x); -} - -__generic -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] -public vector fract(vector x) -{ - return frac(x); + return rint(x); } __generic @@ -824,44 +669,6 @@ uint float2half(float f) return (s | e | m); } -__generic -[__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] -public T ldexp(T x, E exp) -{ - __target_switch - { - case hlsl: __intrinsic_asm "ldexp"; - case glsl: __intrinsic_asm "ldexp"; - case spirv: return spirv_asm { - OpExtInst $$T result glsl450 Ldexp $x $exp - }; - default: - return ldexp(x, __floatCast(exp)); - } -} - -__generic -[__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] -public vector ldexp(vector x, vector exp) -{ - __target_switch - { - case hlsl: __intrinsic_asm "ldexp"; - case glsl: __intrinsic_asm "ldexp"; - case spirv: return spirv_asm { - OpExtInst $$vector result glsl450 Ldexp $x $exp - }; - default: - vector temp; - [ForceUnroll] - for (int i = 0; i < N; ++i) - temp[i] = __floatCast(exp[i]); - return ldexp(x, temp); - } -} - [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index ca1fb0af35..6b3c5db59d 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -5,9 +5,6 @@ typedef uint UINT; __intrinsic_op($(kIROp_RequireGLSLExtension)) void __requireGLSLExtension(String extensionName); -__intrinsic_op($(kIROp_FloatCast)) -T __floatCast(U v); - [sealed] interface IBufferDataLayout { @@ -4093,12 +4090,13 @@ matrix abs(matrix x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T abs(T x) { __target_switch { case hlsl: __intrinsic_asm "abs"; + case metal: __intrinsic_asm "abs"; case glsl: __intrinsic_asm "abs"; case cuda: __intrinsic_asm "$P_abs($0)"; case cpp: __intrinsic_asm "$P_abs($0)"; @@ -4110,12 +4108,13 @@ T abs(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector abs(vector x) { __target_switch { case hlsl: __intrinsic_asm "abs"; + case metal: __intrinsic_asm "abs"; case glsl: __intrinsic_asm "abs"; case spirv: return spirv_asm { result:$$vector = OpExtInst glsl450 FAbs $x; @@ -4127,7 +4126,7 @@ vector abs(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix abs(matrix x) { __target_switch @@ -4138,11 +4137,40 @@ matrix abs(matrix x) } } +__generic +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T fabs(T x) +{ + __target_switch + { + case metal: __intrinsic_asm "fabs"; + default: + return abs(x); + } +} + +__generic +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector fabs(vector x) +{ + __target_switch + { + case metal: __intrinsic_asm "fabs"; + default: + return abs(x); + } +} + + // Inverse cosine (HLSL SM 1.0) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T acos(T x) { __target_switch @@ -4151,6 +4179,7 @@ T acos(T x) case cuda: __intrinsic_asm "$P_acos($0)"; case glsl: __intrinsic_asm "acos"; case hlsl: __intrinsic_asm "acos"; + case metal: __intrinsic_asm "acos"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Acos $x }; @@ -4159,13 +4188,14 @@ T acos(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector acos(vector x) { __target_switch { case glsl: __intrinsic_asm "acos"; case hlsl: __intrinsic_asm "acos"; + case metal: __intrinsic_asm "acos"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Acos $x }; @@ -4176,7 +4206,7 @@ vector acos(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix acos(matrix x) { __target_switch @@ -4187,9 +4217,51 @@ matrix acos(matrix x) } } +// Inverse hyperbolic cosine + +__generic +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)] +T acosh(T x) +{ + __target_switch + { + case cpp: __intrinsic_asm "$P_acosh($0)"; + case cuda: __intrinsic_asm "$P_acosh($0)"; + case glsl: __intrinsic_asm "acosh"; + case metal: __intrinsic_asm "acosh"; + case spirv: return spirv_asm { + OpExtInst $$T result glsl450 Acosh $x + }; + default: + return log(x + sqrt( x * x - T(1))); + } +} + +__generic +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)] +vector acosh(vector x) +{ + __target_switch + { + case glsl: __intrinsic_asm "acosh"; + case metal: __intrinsic_asm "acosh"; + case spirv: return spirv_asm { + OpExtInst $$vector result glsl450 Acosh $x + }; + default: + VECTOR_MAP_UNARY(T, N, acosh, x); + } +} + + // Test if all components are non-zero (HLSL SM 1.0) __generic [__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] bool all(T x) { __target_switch @@ -4198,6 +4270,8 @@ bool all(T x) __intrinsic_asm "bool($0)"; case hlsl: __intrinsic_asm "all"; + case metal: + __intrinsic_asm "all"; case spirv: let zero = __default(); if (__isInt()) @@ -4219,12 +4293,15 @@ bool all(T x) __generic [__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] bool all(vector x) { __target_switch { case hlsl: __intrinsic_asm "all"; + case metal: + __intrinsic_asm "all"; case glsl: __intrinsic_asm "all(bvec$N0($0))"; case spirv: @@ -4261,7 +4338,7 @@ bool all(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv)] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] bool all(matrix x) { __target_switch @@ -4318,6 +4395,7 @@ int3 WorkgroupSize(); __generic [__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] bool any(T x) { __target_switch @@ -4326,6 +4404,8 @@ bool any(T x) __intrinsic_asm "bool($0)"; case hlsl: __intrinsic_asm "any"; + case metal: + __intrinsic_asm "any"; case spirv: let zero = __default(); if (__isInt()) @@ -4346,12 +4426,15 @@ bool any(T x) __generic [__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] bool any(vector x) { __target_switch { case hlsl: __intrinsic_asm "any"; + case metal: + __intrinsic_asm "any"; case glsl: __intrinsic_asm "any(bvec$N0($0))"; case spirv: @@ -4541,7 +4624,7 @@ matrix asfloat(matrix x) // Inverse sine (HLSL SM 1.0) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T asin(T x) { __target_switch @@ -4550,6 +4633,7 @@ T asin(T x) case cuda: __intrinsic_asm "$P_asin($0)"; case glsl: __intrinsic_asm "asin"; case hlsl: __intrinsic_asm "asin"; + case metal: __intrinsic_asm "asin"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Asin $x }; @@ -4558,13 +4642,14 @@ T asin(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector asin(vector x) { __target_switch { case glsl: __intrinsic_asm "asin"; case hlsl: __intrinsic_asm "asin"; + case metal: __intrinsic_asm "asin"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Asin $x }; @@ -4575,7 +4660,7 @@ vector asin(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix asin(matrix x) { __target_switch @@ -4586,6 +4671,46 @@ matrix asin(matrix x) } } +// Inverse hyperbolic sine + +__generic +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)] +T asinh(T x) +{ + __target_switch + { + case cpp: __intrinsic_asm "$P_asinh($0)"; + case cuda: __intrinsic_asm "$P_asinh($0)"; + case glsl: __intrinsic_asm "asinh"; + case metal: __intrinsic_asm "asinh"; + case spirv: return spirv_asm { + OpExtInst $$T result glsl450 Asinh $x + }; + default: + return log(x + sqrt(x * x + T(1))); + } +} + +__generic +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)] +vector asinh(vector x) +{ + __target_switch + { + case glsl: __intrinsic_asm "asinh"; + case metal: __intrinsic_asm "asinh"; + case spirv: return spirv_asm { + OpExtInst $$vector result glsl450 Asinh $x + }; + default: + VECTOR_MAP_UNARY(T, N, asinh, x); + } +} + // Reinterpret bits as an int (HLSL SM 4.0) [__readNone] @@ -5029,7 +5154,7 @@ matrix asfloat16(matrix va // Inverse tangent (HLSL SM 1.0) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T atan(T x) { __target_switch @@ -5038,6 +5163,7 @@ T atan(T x) case cuda: __intrinsic_asm "$P_atan($0)"; case glsl: __intrinsic_asm "atan"; case hlsl: __intrinsic_asm "atan"; + case metal: __intrinsic_asm "atan"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Atan $x }; @@ -5046,13 +5172,14 @@ T atan(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector atan(vector x) { __target_switch { case glsl: __intrinsic_asm "atan"; case hlsl: __intrinsic_asm "atan"; + case metal: __intrinsic_asm "atan"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Atan $x }; @@ -5063,7 +5190,7 @@ vector atan(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix atan(matrix x) { __target_switch @@ -5076,7 +5203,7 @@ matrix atan(matrix x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T atan2(T y, T x) { __target_switch @@ -5085,6 +5212,7 @@ T atan2(T y, T x) case cuda: __intrinsic_asm "$P_atan2($0, $1)"; case glsl: __intrinsic_asm "atan($0,$1)"; case hlsl: __intrinsic_asm "atan2"; + case metal: __intrinsic_asm "atan2"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Atan2 $y $x }; @@ -5093,13 +5221,14 @@ T atan2(T y, T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector atan2(vector y, vector x) { __target_switch { case glsl: __intrinsic_asm "atan($0,$1)"; case hlsl: __intrinsic_asm "atan2"; + case metal: __intrinsic_asm "atan2"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Atan2 $y $x }; @@ -5110,7 +5239,7 @@ vector atan2(vector y, vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix atan2(matrix y, matrix x) { __target_switch @@ -5121,10 +5250,50 @@ matrix atan2(matrix y, matrix x) } } +// Hyperbolic inverse tangent + +__generic +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)] +T atanh(T x) +{ + __target_switch + { + case cpp: __intrinsic_asm "$P_atanh($0)"; + case cuda: __intrinsic_asm "$P_atanh($0)"; + case glsl: __intrinsic_asm "atanh"; + case metal: __intrinsic_asm "atanh"; + case spirv: return spirv_asm { + OpExtInst $$T result glsl450 Atanh $x + }; + default: + return T(0.5) * log((T(1) + x) / (T(1) - x)); + } +} + +__generic +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector atanh(vector x) +{ + __target_switch + { + case glsl: __intrinsic_asm "atanh"; + case metal: __intrinsic_asm "atanh"; + case spirv: return spirv_asm { + OpExtInst $$vector result glsl450 Atanh $x + }; + default: + VECTOR_MAP_UNARY(T, N, atanh, x); + } +} + // Ceiling (HLSL SM 1.0) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T ceil(T x) { __target_switch @@ -5133,6 +5302,7 @@ T ceil(T x) case cuda: __intrinsic_asm "$P_ceil($0)"; case glsl: __intrinsic_asm "ceil"; case hlsl: __intrinsic_asm "ceil"; + case metal: __intrinsic_asm "ceil"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Ceil $x }; @@ -5141,13 +5311,14 @@ T ceil(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector ceil(vector x) { __target_switch { case glsl: __intrinsic_asm "ceil"; case hlsl: __intrinsic_asm "ceil"; + case metal: __intrinsic_asm "ceil"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Ceil $x }; @@ -5158,7 +5329,7 @@ vector ceil(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix ceil(matrix x) { __target_switch @@ -5169,6 +5340,87 @@ matrix ceil(matrix x) } } +// Copy-sign + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] +vector copysign_half(vector x, vector y) +{ + let ux = reinterpret>(x); + let uy = reinterpret>(y); + vector signY = (uy & (uint16_t(1) << uint16_t(15))); + vector newX = (ux & ((uint16_t(1) << uint16_t(15)) - uint16_t(1))) + signY; + return reinterpret>(newX); +} + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] +vector copysign_float(vector x, vector y) +{ + let ux = reinterpret>(x); + let uy = reinterpret>(y); + vector signY = (uy & (uint32_t(1) << uint32_t(31))); + vector newX = (ux & ((uint32_t(1) << uint32_t(31)) - uint32_t(1))) + signY; + return reinterpret>(newX); +} + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] +vector copysign_double(vector x, vector y) +{ + let ux = reinterpret>(x); + let uy = reinterpret>(y); + vector signY = (uy & (uint64_t(1) << uint64_t(63))); + vector newX = (ux & ((uint64_t(1) << uint64_t(63)) - uint64_t(1))) + signY; + return reinterpret>(newX); +} + +__generic +__intrinsic_op($(kIROp_FloatCast)) +vector __real_cast(vector val); + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] +vector copysign(vector x, vector y) +{ + __target_switch + { + case metal: __intrinsic_asm "copysign"; + default: + { + // sign of -0.0 needs to be respected. + if (T is half) + return __real_cast(copysign_half( + __real_cast(x), + __real_cast(y))); + if (T is float) + return __real_cast(copysign_float( + __real_cast(x), + __real_cast(y))); + return __real_cast(copysign_double( + __real_cast(x), + __real_cast(y))); + } + } +} + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] +T copysign(T x, T y) +{ + __target_switch + { + case metal: __intrinsic_asm "copysign"; + default: + return copysign(vector(x), vector(y))[0]; + } +} + // Check access status to tiled resource bool CheckAccessFullyMapped(uint status); @@ -5320,7 +5572,7 @@ void clip(matrix x) // Cosine __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T cos(T x) { __target_switch @@ -5329,6 +5581,7 @@ T cos(T x) case cuda: __intrinsic_asm "$P_cos($0)"; case glsl: __intrinsic_asm "cos"; case hlsl: __intrinsic_asm "cos"; + case metal: __intrinsic_asm "cos"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Cos $x }; @@ -5337,13 +5590,14 @@ T cos(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector cos(vector x) { __target_switch { case glsl: __intrinsic_asm "cos"; case hlsl: __intrinsic_asm "cos"; + case metal: __intrinsic_asm "cos"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Cos $x }; @@ -5354,7 +5608,7 @@ vector cos(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix cos(matrix x) { __target_switch @@ -5368,7 +5622,7 @@ matrix cos(matrix x) // Hyperbolic cosine __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv)] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] T cosh(T x) { __target_switch @@ -5377,6 +5631,7 @@ T cosh(T x) case cuda: __intrinsic_asm "$P_cosh($0)"; case glsl: __intrinsic_asm "cosh"; case hlsl: __intrinsic_asm "cosh"; + case metal: __intrinsic_asm "cosh"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Cosh $x }; @@ -5385,13 +5640,14 @@ T cosh(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv)] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] vector cosh(vector x) { __target_switch { case glsl: __intrinsic_asm "cosh"; case hlsl: __intrinsic_asm "cosh"; + case metal: __intrinsic_asm "cosh"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Cosh $x }; @@ -5402,7 +5658,7 @@ vector cosh(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv)] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] matrix cosh(matrix x) { __target_switch @@ -5413,6 +5669,35 @@ matrix cosh(matrix x) } } +// Cosine degree + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] +T cospi(T x) +{ + __target_switch + { + case metal: __intrinsic_asm "cospi"; + default: + return cos(T.getPi() * x); + } +} + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] +vector cospi(vector x) +{ + __target_switch + { + case metal: __intrinsic_asm "cospi"; + default: + return cos(T.getPi() * x); + } +} + + // Population count [__readNone] [require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] @@ -5776,6 +6061,63 @@ T distance(T x, T y) } } +// fdim + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_spirv)] +T fdim(T x, T y) +{ + __target_switch + { + case metal: __intrinsic_asm "fdim"; + default: + return max(T(0), x - y); + } +} + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_spirv)] +vector fdim(vector x, vector y) +{ + __target_switch + { + case metal: __intrinsic_asm "fdim"; + default: + return max(T(0), x - y); + } +} + +// divide + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] +T divide(T x, T y) +{ + __target_switch + { + case metal: __intrinsic_asm "divide"; + default: + return x / y; + } +} + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv)] +vector divide(vector x, vector y) +{ + __target_switch + { + case metal: __intrinsic_asm "divide"; + default: + return x / y; + } +} + + // Vector dot product __generic @@ -6005,7 +6347,7 @@ matrix EvaluateAttributeSnapped(matrix x, int2 offset) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T exp(T x) { __target_switch @@ -6014,6 +6356,7 @@ T exp(T x) case cuda: __intrinsic_asm "$P_exp($0)"; case glsl: __intrinsic_asm "exp"; case hlsl: __intrinsic_asm "exp"; + case metal: __intrinsic_asm "exp"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Exp $x }; @@ -6022,13 +6365,14 @@ T exp(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector exp(vector x) { __target_switch { case glsl: __intrinsic_asm "exp"; case hlsl: __intrinsic_asm "exp"; + case metal: __intrinsic_asm "exp"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Exp $x }; @@ -6039,7 +6383,7 @@ vector exp(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix exp(matrix x) { __target_switch @@ -6054,7 +6398,7 @@ matrix exp(matrix x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T exp2(T x) { __target_switch @@ -6068,13 +6412,14 @@ T exp2(T x) } else { - float xf = __floatCast(x); + float xf = __realCast(x); return T(spirv_asm { result:$$float = OpExtInst glsl450 Exp2 $xf }); } case hlsl: __intrinsic_asm "exp2($0)"; + case metal: __intrinsic_asm "exp2"; case cpp: __intrinsic_asm "$P_exp2($0)"; case cuda: @@ -6085,7 +6430,7 @@ T exp2(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector exp2(vector x) { __target_switch @@ -6093,6 +6438,7 @@ vector exp2(vector x) case glsl: __intrinsic_asm "exp2($0)"; case hlsl: __intrinsic_asm "exp2"; + case metal: __intrinsic_asm "exp2"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Exp2 $x }; @@ -6103,7 +6449,7 @@ vector exp2(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix exp2(matrix x) { __target_switch @@ -6114,6 +6460,36 @@ matrix exp2(matrix x) } } +// Base-10 exponent + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T exp10(T x) +{ + __target_switch + { + case metal: __intrinsic_asm "exp10"; + default: + const T ln10 = T(2.302585092994045901); // ln(10) + return exp(x * ln10); + } +} + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector exp10(vector x) +{ + __target_switch + { + case metal: __intrinsic_asm "exp10"; + default: + const T ln10 = T(2.30258509299); // ln(10) + return exp(x * ln10); + } +} + // Convert 16-bit float stored in low bits of integer __glsl_version(420) @@ -6439,7 +6815,7 @@ vector firstbitlow(vector value) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T floor(T x) { __target_switch @@ -6448,6 +6824,7 @@ T floor(T x) case cuda: __intrinsic_asm "$P_floor($0)"; case glsl: __intrinsic_asm "floor"; case hlsl: __intrinsic_asm "floor"; + case metal: __intrinsic_asm "floor"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Floor $x }; @@ -6456,13 +6833,14 @@ T floor(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector floor(vector x) { __target_switch { case glsl: __intrinsic_asm "floor"; case hlsl: __intrinsic_asm "floor"; + case metal: __intrinsic_asm "floor"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Floor $x }; @@ -6473,7 +6851,7 @@ vector floor(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix floor(matrix x) { __target_switch @@ -6487,7 +6865,7 @@ matrix floor(matrix x) // Fused multiply-add __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] T fma(T a, T b, T c) { __target_switch @@ -6500,6 +6878,7 @@ T fma(T a, T b, T c) return mad(a, b, c); else __intrinsic_asm "fma($0, $1, $2)"; + case metal: __intrinsic_asm "fma"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Fma $a $b $c }; @@ -6510,13 +6889,14 @@ T fma(T a, T b, T c) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] vector fma(vector a, vector b, vector c) { __target_switch { case glsl: __intrinsic_asm "fma"; case hlsl: __intrinsic_asm "fma"; + case metal: __intrinsic_asm "fma"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Fma $a $b $c }; @@ -6527,7 +6907,7 @@ vector fma(vector a, vector b, vector c) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] matrix fma(matrix a, matrix b, matrix c) { __target_switch @@ -6541,19 +6921,24 @@ matrix fma(matrix a, matrix b, matrix c) // Floating point remainder of x/y __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T fmod(T x, T y) { - // In HLSL, fmod returns a remainder. + // In HLSL, `fmod` returns a remainder. // Definition of `fmod` in HLSL is, // "The floating-point remainder is calculated such that x = i * y + f, // where i is an integer, f has the same sign as x, and the absolute value // of f is less than the absolute value of y." // - // In GLSL, mod is a Modulus function. + // In GLSL, `mod` is a Modulus function. // OpenGL document defines "Modulus" as "Returns x - y * floor(x / y)". // The use of "Floor()" makes the difference. // + // In Metal, `fmod` is Modulus function. + // Metal document defines it as "Returns x - y * trunc(x/y)". + // Note that the function name is same to HLSL but it behaves differently. + // // The tricky ones are when x or y is a negative value. // // | Remainder | Modulus @@ -6588,10 +6973,13 @@ T fmod(T x, T y) { case cpp: __intrinsic_asm "$P_fmod($0, $1)"; case cuda: __intrinsic_asm "$P_fmod($0, $1)"; - case hlsl: __intrinsic_asm "fmod"; case glsl: // GLSL doesn't have a function for remainder. - __intrinsic_asm "(($0 < 0) ? -mod(-$0,abs($1)) : mod($0,abs($1)))"; + __intrinsic_asm "(($0 < 0.0) ? -mod(-$0,abs($1)) : mod($0,abs($1)))"; + case hlsl: __intrinsic_asm "fmod"; + case metal: + // Metal doesn't have a function for remainder. + __intrinsic_asm "(($0 < 0.0) ? -fmod(-$0,abs($1)) : fmod($0,abs($1)))"; case spirv: // OpFRem return "The floating-point remainder whose sign // matches the sign of Operand 1", where Operand 1 is "x". @@ -6604,7 +6992,8 @@ T fmod(T x, T y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector fmod(vector x, vector y) { __target_switch @@ -6620,7 +7009,8 @@ vector fmod(vector x, vector y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix fmod(matrix x, matrix y) { __target_switch @@ -6634,7 +7024,7 @@ matrix fmod(matrix x, matrix y) // Fractional part __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T frac(T x) { __target_switch @@ -6643,6 +7033,7 @@ T frac(T x) case cuda: __intrinsic_asm "$P_frac($0)"; case glsl: __intrinsic_asm "fract"; case hlsl: __intrinsic_asm "frac"; + case metal: __intrinsic_asm "fract"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Fract $x }; @@ -6651,13 +7042,14 @@ T frac(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector frac(vector x) { __target_switch { case glsl: __intrinsic_asm "fract"; case hlsl: __intrinsic_asm "frac"; + case metal: __intrinsic_asm "fract"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Fract $x }; @@ -6673,10 +7065,29 @@ matrix frac(matrix x) MATRIX_MAP_UNARY(T, N, M, frac, x); } +__generic +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T fract(T x) +{ + return frac(x); +} + +__generic +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector fract(vector x) +{ + return frac(x); +} + + // Split float into mantissa and exponent __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T frexp(T x, out int exp) { __target_switch @@ -6685,6 +7096,7 @@ T frexp(T x, out int exp) case cuda: __intrinsic_asm "$P_frexp($0, $1)"; case glsl: __intrinsic_asm "frexp"; case hlsl: __intrinsic_asm "frexp"; + case metal: __intrinsic_asm "frexp($0, *($1))"; case spirv: return spirv_asm { result:$$T = OpExtInst glsl450 Frexp $x &exp }; @@ -6693,12 +7105,14 @@ T frexp(T x, out int exp) __generic [__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector frexp(vector x, out vector exp) { __target_switch { - case hlsl: __intrinsic_asm "frexp"; case glsl: __intrinsic_asm "frexp"; + case hlsl: __intrinsic_asm "frexp"; + case metal: __intrinsic_asm "frexp($0, *($1))"; case spirv: return spirv_asm { result:$$vector = OpExtInst glsl450 Frexp $x &exp }; @@ -6709,7 +7123,7 @@ vector frexp(vector x, out vector exp) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix frexp(matrix x, out matrix exp) { __target_switch @@ -7920,7 +8334,7 @@ matrix isnan(matrix x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T ldexp(T x, T exp) { __target_switch @@ -7933,7 +8347,7 @@ T ldexp(T x, T exp) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector ldexp(vector x, vector exp) { __target_switch @@ -7946,7 +8360,7 @@ vector ldexp(vector x, vector exp) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix ldexp(matrix x, matrix exp) { __target_switch @@ -7957,6 +8371,47 @@ matrix ldexp(matrix x, matrix exp) } } +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T ldexp(T x, E exp) +{ + __target_switch + { + case glsl: __intrinsic_asm "ldexp"; + case hlsl: __intrinsic_asm "ldexp"; + case metal: __intrinsic_asm "ldexp"; + case spirv: return spirv_asm { + OpExtInst $$T result glsl450 Ldexp $x $exp + }; + default: + return ldexp(x, __realCast(exp)); + } +} + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector ldexp(vector x, vector exp) +{ + __target_switch + { + case glsl: __intrinsic_asm "ldexp"; + case hlsl: __intrinsic_asm "ldexp"; + case metal: __intrinsic_asm "ldexp"; + case spirv: return spirv_asm { + OpExtInst $$vector result glsl450 Ldexp $x $exp + }; + default: + vector temp; + [ForceUnroll] + for (int i = 0; i < N; ++i) + temp[i] = __realCast(exp[i]); + return ldexp(x, temp); + } +} + + // Vector length __generic [__readNone] @@ -8058,7 +8513,7 @@ float4 lit(float n_dot_l, float n_dot_h, float m) // Base-e logarithm __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T log(T x) { __target_switch @@ -8067,6 +8522,7 @@ T log(T x) case cuda: __intrinsic_asm "$P_log($0)"; case glsl: __intrinsic_asm "log"; case hlsl: __intrinsic_asm "log"; + case metal: __intrinsic_asm "log"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Log $x }; @@ -8075,13 +8531,14 @@ T log(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector log(vector x) { __target_switch { case glsl: __intrinsic_asm "log"; case hlsl: __intrinsic_asm "log"; + case metal: __intrinsic_asm "log"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Log $x }; @@ -8092,7 +8549,7 @@ vector log(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix log(matrix x) { __target_switch @@ -8106,12 +8563,13 @@ matrix log(matrix x) // Base-10 logarithm __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T log10(T x) { __target_switch { case hlsl: __intrinsic_asm "log10"; + case metal: __intrinsic_asm "log10"; case glsl: __intrinsic_asm "(log( $0 ) * $S0( 0.43429448190325182765112891891661) )"; case cuda: __intrinsic_asm "$P_log10($0)"; case cpp: __intrinsic_asm "$P_log10($0)"; @@ -8128,12 +8586,13 @@ T log10(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector log10(vector x) { __target_switch { case hlsl: __intrinsic_asm "log10"; + case metal: __intrinsic_asm "log10"; case glsl: __intrinsic_asm "(log( $0 ) * $S0(0.43429448190325182765112891891661) )"; case spirv: { @@ -8150,7 +8609,7 @@ vector log10(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix log10(matrix x) { __target_switch @@ -8164,7 +8623,7 @@ matrix log10(matrix x) // Base-2 logarithm __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T log2(T x) { __target_switch @@ -8173,6 +8632,7 @@ T log2(T x) case cuda: __intrinsic_asm "$P_log2($0)"; case glsl: __intrinsic_asm "log2"; case hlsl: __intrinsic_asm "log2"; + case metal: __intrinsic_asm "log2"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Log2 $x }; @@ -8181,13 +8641,14 @@ T log2(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector log2(vector x) { __target_switch { case glsl: __intrinsic_asm "log2"; case hlsl: __intrinsic_asm "log2"; + case metal: __intrinsic_asm "log2"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Log2 $x }; @@ -8198,7 +8659,7 @@ vector log2(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix log2(matrix x) { __target_switch @@ -8213,7 +8674,7 @@ matrix log2(matrix x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] T mad(T mvalue, T avalue, T bvalue) { __target_switch @@ -8222,6 +8683,7 @@ T mad(T mvalue, T avalue, T bvalue) case cuda: __intrinsic_asm "$P_fma($0, $1, $2)"; case glsl: __intrinsic_asm "fma"; case hlsl: __intrinsic_asm "mad"; + case metal: __intrinsic_asm "fma"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Fma $mvalue $avalue $bvalue }; @@ -8230,13 +8692,14 @@ T mad(T mvalue, T avalue, T bvalue) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] vector mad(vector mvalue, vector avalue, vector bvalue) { __target_switch { case glsl: __intrinsic_asm "fma"; case hlsl: __intrinsic_asm "mad"; + case metal: __intrinsic_asm "fma"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Fma $mvalue $avalue $bvalue }; @@ -8247,7 +8710,7 @@ vector mad(vector mvalue, vector avalue, vector bvalue) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] matrix mad(matrix mvalue, matrix avalue, matrix bvalue) { __target_switch @@ -8385,12 +8848,13 @@ matrix max(matrix x, matrix y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T max(T x, T y) { __target_switch { case hlsl: __intrinsic_asm "max"; + case metal: __intrinsic_asm "max"; case glsl: __intrinsic_asm "max"; case cuda: __intrinsic_asm "$P_max($0, $1)"; case cpp: __intrinsic_asm "$P_max($0, $1)"; @@ -8402,12 +8866,13 @@ T max(T x, T y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector max(vector x, vector y) { __target_switch { case hlsl: __intrinsic_asm "max"; + case metal: __intrinsic_asm "max"; case glsl: __intrinsic_asm "max"; case spirv: return spirv_asm { result:$$vector = OpExtInst glsl450 FMax $x $y @@ -8419,7 +8884,7 @@ vector max(vector x, vector y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix max(matrix x, matrix y) { __target_switch @@ -8430,6 +8895,107 @@ matrix max(matrix x, matrix y) } } +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T max3(T x, T y, T z) +{ + __target_switch + { + case metal: __intrinsic_asm "max3"; + default: + return max(x, max(y, z)); + } +} + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector max3(vector x, vector y, vector z) +{ + __target_switch + { + case metal: __intrinsic_asm "max3"; + default: + return max(x, max(y, z)); + } +} + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T fmax(T x, T y) +{ + __target_switch + { + case metal: __intrinsic_asm "fmax"; + default: + if (isnan(x)) return y; + return max(x, y); + } +} + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector fmax(vector x, vector y) +{ + __target_switch + { + case metal: __intrinsic_asm "fmax"; + default: + VECTOR_MAP_BINARY(T, N, fmax, x, y); + } +} + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T fmax3(T x, T y, T z) +{ + __target_switch + { + case metal: __intrinsic_asm "fmax3"; + default: + { + bool isnanX = isnan(x); + bool isnanY = isnan(y); + bool isnanZ = isnan(z); + + if (isnanX) + { + return isnanY ? z : y; + } + else if (isnanY) + { + if (isnanZ) + return x; + return max(x, z); + } + else if (isnanZ) + { + return max(x, y); + } + + return max(y, max(x, z)); + } + } +} + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector fmax3(vector x, vector y, vector z) +{ + __target_switch + { + case metal: __intrinsic_asm "fmax3"; + default: + VECTOR_MAP_TRINARY(T, N, fmax3, x, y, z); + } +} + + // minimum __generic __target_intrinsic(hlsl) @@ -8481,12 +9047,13 @@ matrix min(matrix x, matrix y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T min(T x, T y) { __target_switch { case hlsl: __intrinsic_asm "min"; + case metal: __intrinsic_asm "min"; case glsl: __intrinsic_asm "min"; case cuda: __intrinsic_asm "$P_min($0, $1)"; case cpp: __intrinsic_asm "$P_min($0, $1)"; @@ -8498,12 +9065,13 @@ T min(T x, T y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector min(vector x, vector y) { __target_switch { case hlsl: __intrinsic_asm "min"; + case metal: __intrinsic_asm "min"; case glsl: __intrinsic_asm "min"; case spirv: return spirv_asm { result:$$vector = OpExtInst glsl450 FMin $x $y @@ -8515,7 +9083,7 @@ vector min(vector x, vector y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix min(matrix x, matrix y) { __target_switch @@ -8526,16 +9094,212 @@ matrix min(matrix x, matrix y) } } +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T min3(T x, T y, T z) +{ + __target_switch + { + case metal: __intrinsic_asm "min3"; + default: + return min(x, min(y, z)); + } +} + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector min3(vector x, vector y, vector z) +{ + __target_switch + { + case metal: __intrinsic_asm "min3"; + default: + return min(x, min(y, z)); + } +} + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T fmin(T x, T y) +{ + __target_switch + { + case metal: __intrinsic_asm "fmin"; + default: + if (isnan(x)) return y; + return min(x, y); + } +} + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector fmin(vector x, vector y) +{ + __target_switch + { + case metal: __intrinsic_asm "fmin"; + default: + VECTOR_MAP_BINARY(T, N, fmin, x, y); + } +} + + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T fmin3(T x, T y, T z) +{ + __target_switch + { + case metal: __intrinsic_asm "fmin3"; + default: + { + bool isnanX = isnan(x); + bool isnanY = isnan(y); + bool isnanZ = isnan(z); + + if (isnan(x)) + { + return isnanY ? z : y; + } + else if (isnanY) + { + if (isnanZ) + return x; + return min(x, z); + } + else if (isnanZ) + { + return min(x, y); + } + + return min(x, min(y, z)); + } + } +} + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector fmin3(vector x, vector y, vector z) +{ + __target_switch + { + case metal: __intrinsic_asm "fmin3"; + default: + VECTOR_MAP_TRINARY(T, N, fmin3, x, y, z); + } +} + + +// Median +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T median3(T x, T y, T z) +{ + __target_switch + { + case metal: __intrinsic_asm "median3"; + default: + { + // | a | b | c | m | + // ----------+---+---+---+---+ + // x > y > z | z | y | x | y | + // x > z > y | y | z | x | z | + // y > x > z | z | y | x | x | + // y > z > x | z | y | z | z | + // z > x > y | y | z | x | x | + // z > y > x | y | z | y | y | + + T a = min(y, z); + T b = max(y, z); + T c = max(x, a); + T m = min(b, c); + return m; + } + } +} + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector median3(vector x, vector y, vector z) +{ + __target_switch + { + case metal: __intrinsic_asm "median3"; + default: + { + vector a = min(y, z); + vector b = max(y, z); + vector c = max(x, a); + vector m = min(b, c); + return m; + } + } +} + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T fmedian3(T x, T y, T z) +{ + __target_switch + { + case metal: __intrinsic_asm "fmedian3"; + default: + { + bool isnanX = isnan(x); + bool isnanY = isnan(y); + bool isnanZ = isnan(z); + + if (isnanX) + { + return isnanY ? z : y; + } + else if (isnanY || isnanZ) + { + // "the function can return either non-NaN value" + return x; + } + + return median3(x, y, z); + } + } +} + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector fmedian3(vector x, vector y, vector z) +{ + __target_switch + { + case metal: __intrinsic_asm "fmedian3"; + default: + VECTOR_MAP_TRINARY(T, N, fmedian3, x, y, z); + } +} + + // split into integer and fractional parts (both with same sign) __generic [__readNone] -[require(glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T modf(T x, out T ip) { __target_switch { + case cpp: __intrinsic_asm "$P_modf($0, $1)"; + case cuda: __intrinsic_asm "$P_modf($0, $1)"; case hlsl: __intrinsic_asm "modf"; case glsl: __intrinsic_asm "modf"; + case metal: __intrinsic_asm "modf($0, *($1))"; case spirv: return spirv_asm { result:$$T = OpExtInst glsl450 Modf $x &ip }; @@ -8544,13 +9308,14 @@ T modf(T x, out T ip) __generic [__readNone] -[require(glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector modf(vector x, out vector ip) { __target_switch { case hlsl: __intrinsic_asm "modf"; case glsl: __intrinsic_asm "modf"; + case metal: __intrinsic_asm "modf($0, *($1))"; case spirv: return spirv_asm { result:$$vector = OpExtInst glsl450 Modf $x &ip }; @@ -8561,7 +9326,7 @@ vector modf(vector x, out vector ip) __generic [__readNone] -[require(glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix modf(matrix x, out matrix ip) { __target_switch @@ -8883,6 +9648,50 @@ matrix mul(matrix left, matrix right) } } +// next-after: next representable floating-point value +// after x in the direction of y + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] +T nextafter(T x, T y) +{ + __target_switch + { + case metal: __intrinsic_asm "nextafter"; + default: + if (isnan(x)) return x; + if (isnan(y)) return y; + if (x == y) return y; + if (T is half) + { + T delta = __realCast(bit_cast(uint16_t(1))); + return x + ((x < y) ? delta : -delta); + } + if (T is float) + { + T delta = __realCast(bit_cast(uint32_t(1))); + return x + ((x < y) ? delta : -delta); + } + T delta = __realCast(bit_cast(uint64_t(1))); + return x + ((x < y) ? delta : -delta); + } +} + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] +vector nextafter(vector x, vector y) +{ + __target_switch + { + case metal: __intrinsic_asm "nextafter"; + default: + VECTOR_MAP_BINARY(T, N, nextafter, x, y); + } +} + + // noise (deprecated) [__readNone] @@ -8981,7 +9790,7 @@ T normalize(T x) // Raise to a power __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T pow(T x, T y) { __target_switch @@ -8990,6 +9799,7 @@ T pow(T x, T y) case cuda: __intrinsic_asm "$P_pow($0, $1)"; case glsl: __intrinsic_asm "pow"; case hlsl: __intrinsic_asm "pow"; + case metal: __intrinsic_asm "pow"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Pow $x $y }; @@ -8998,13 +9808,14 @@ T pow(T x, T y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector pow(vector x, vector y) { __target_switch { case glsl: __intrinsic_asm "pow"; case hlsl: __intrinsic_asm "pow"; + case metal: __intrinsic_asm "pow"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Pow $x $y }; @@ -9015,7 +9826,7 @@ vector pow(vector x, vector y) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix pow(matrix x, matrix y) { __target_switch @@ -9026,6 +9837,32 @@ matrix pow(matrix x, matrix y) } } +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T powr(T x, T y) +{ + __target_switch + { + case metal: __intrinsic_asm "powr"; + default: + return pow(abs(x), y); + } +} + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector powr(vector x, vector y) +{ + __target_switch + { + case metal: __intrinsic_asm "powr"; + default: + return pow(abs(x), y); + } +} + // Output message // TODO: add check to ensure format is const literal. @@ -9360,10 +10197,60 @@ vector reversebits(vector value) } } +// round even +__generic +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)] +T rint(T x) +{ + __target_switch + { + case glsl: __intrinsic_asm "roundEven"; + case metal: __intrinsic_asm "rint"; + case spirv: return spirv_asm { + OpExtInst $$T result glsl450 RoundEven $x + }; + default: + T nearest = round(x); + + // Check if the value is exactly halfway between two integers + if (abs(x - nearest) == T(0.5)) + { + // If halfway, choose the even number + if ((nearest / T(2)) * T(2) != nearest) + { + // If the nearest number is odd, + // move to the closest even number + nearest -= ((x < nearest) ? T(1) : T(-1)); + } + } + return nearest; + } +} + +__generic +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, GLSL_130)] +vector rint(vector x) +{ + __target_switch + { + case glsl: __intrinsic_asm "roundEven"; + case metal: __intrinsic_asm "rint"; + case spirv: return spirv_asm { + OpExtInst $$vector result glsl450 RoundEven $x + }; + default: + VECTOR_MAP_UNARY(T, N, rint, x); + } +} + // Round-to-nearest __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T round(T x) { __target_switch @@ -9372,6 +10259,7 @@ T round(T x) case cuda: __intrinsic_asm "$P_round($0)"; case glsl: __intrinsic_asm "round"; case hlsl: __intrinsic_asm "round"; + case metal: __intrinsic_asm "round"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Round $x }; @@ -9380,13 +10268,14 @@ T round(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector round(vector x) { __target_switch { case glsl: __intrinsic_asm "round"; case hlsl: __intrinsic_asm "round"; + case metal: __intrinsic_asm "round"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Round $x }; @@ -9397,7 +10286,7 @@ vector round(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix round(matrix x) { __target_switch @@ -9411,7 +10300,7 @@ matrix round(matrix x) // Reciprocal of square root __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T rsqrt(T x) { __target_switch @@ -9420,6 +10309,7 @@ T rsqrt(T x) case cuda: __intrinsic_asm "$P_rsqrt($0)"; case glsl: __intrinsic_asm "inversesqrt($0)"; case hlsl: __intrinsic_asm "rsqrt"; + case metal: __intrinsic_asm "rsqrt"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 InverseSqrt $x }; @@ -9430,13 +10320,14 @@ T rsqrt(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector rsqrt(vector x) { __target_switch { case glsl: __intrinsic_asm "inversesqrt($0)"; case hlsl: __intrinsic_asm "rsqrt"; + case metal: __intrinsic_asm "rsqrt"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 InverseSqrt $x }; @@ -9447,7 +10338,7 @@ vector rsqrt(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix rsqrt(matrix x) { __target_switch @@ -9568,12 +10459,11 @@ matrix sign(matrix x) } } - // Sine __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T sin(T x) { __target_switch @@ -9582,6 +10472,7 @@ T sin(T x) case cuda: __intrinsic_asm "$P_sin($0)"; case glsl: __intrinsic_asm "sin"; case hlsl: __intrinsic_asm "sin"; + case metal: __intrinsic_asm "sin"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Sin $x }; @@ -9590,13 +10481,14 @@ T sin(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector sin(vector x) { __target_switch { case glsl: __intrinsic_asm "sin"; case hlsl: __intrinsic_asm "sin"; + case metal: __intrinsic_asm "sin"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Sin $x }; @@ -9607,7 +10499,7 @@ vector sin(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix sin(matrix x) { __target_switch @@ -9621,13 +10513,40 @@ matrix sin(matrix x) // Sine and cosine __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(metal)] +T __sincos_metal(T x, out T c) +{ + __target_switch + { + case metal: __intrinsic_asm "sincos($0, *$1)"; + } +} + +__generic +[__readNone] +[require(metal)] +vector __sincos_metal(vector x, out vector c) +{ + __target_switch + { + case metal: __intrinsic_asm "sincos($0, *$1)"; + } +} + +__generic +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] void sincos(T x, out T s, out T c) { __target_switch { case cuda: __intrinsic_asm "$P_sincos($0, $1, $2)"; case hlsl: __intrinsic_asm "sincos"; + case metal: + //__intrinsic_asm "*($1) = sincos($0, *($2))"; + s = __sincos_metal(x, c); + return; default: s = sin(x); c = cos(x); @@ -9636,12 +10555,17 @@ void sincos(T x, out T s, out T c) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] void sincos(vector x, out vector s, out vector c) { __target_switch { case hlsl: __intrinsic_asm "sincos"; + case metal: + //__intrinsic_asm "*($1) = sincos($0, *($2))"; + s = __sincos_metal(x, c); + return; default: s = sin(x); c = cos(x); @@ -9650,7 +10574,8 @@ void sincos(vector x, out vector s, out vector c) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] void sincos(matrix x, out matrix s, out matrix c) { __target_switch @@ -9665,7 +10590,7 @@ void sincos(matrix x, out matrix s, out matrix c) // Hyperbolic Sine __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T sinh(T x) { __target_switch @@ -9674,6 +10599,7 @@ T sinh(T x) case cuda: __intrinsic_asm "$P_sinh($0)"; case glsl: __intrinsic_asm "sinh"; case hlsl: __intrinsic_asm "sinh"; + case metal: __intrinsic_asm "sinh"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Sinh $x }; @@ -9682,13 +10608,14 @@ T sinh(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector sinh(vector x) { __target_switch { case glsl: __intrinsic_asm "sinh"; case hlsl: __intrinsic_asm "sinh"; + case metal: __intrinsic_asm "sinh"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Sinh $x }; @@ -9699,7 +10626,7 @@ vector sinh(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix sinh(matrix x) { __target_switch @@ -9710,6 +10637,35 @@ matrix sinh(matrix x) } } +// Sine degree + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T sinpi(T x) +{ + __target_switch + { + case metal: __intrinsic_asm "sinpi"; + default: + return sin(T.getPi() * x); + } +} + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector sinpi(vector x) +{ + __target_switch + { + case metal: __intrinsic_asm "sinpi"; + default: + return sin(T.getPi() * x); + } +} + + // Smooth step (Hermite interpolation) __generic [__readNone] @@ -9762,7 +10718,7 @@ matrix smoothstep(matrix min, matrix max, matrix [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T sqrt(T x) { __target_switch @@ -9771,6 +10727,7 @@ T sqrt(T x) case cuda: __intrinsic_asm "$P_sqrt($0)"; case glsl: __intrinsic_asm "sqrt"; case hlsl: __intrinsic_asm "sqrt"; + case metal: __intrinsic_asm "sqrt"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Sqrt $x }; @@ -9779,13 +10736,14 @@ T sqrt(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector sqrt(vector x) { __target_switch { case glsl: __intrinsic_asm "sqrt"; case hlsl: __intrinsic_asm "sqrt"; + case metal: __intrinsic_asm "sqrt"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Sqrt $x }; @@ -9796,7 +10754,7 @@ vector sqrt(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix sqrt(matrix x) { __target_switch @@ -9858,7 +10816,7 @@ matrix step(matrix y, matrix x) // Tangent __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T tan(T x) { __target_switch @@ -9867,6 +10825,7 @@ T tan(T x) case cuda: __intrinsic_asm "$P_tan($0)"; case glsl: __intrinsic_asm "tan"; case hlsl: __intrinsic_asm "tan"; + case metal: __intrinsic_asm "tan"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Tan $x }; @@ -9875,13 +10834,14 @@ T tan(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector tan(vector x) { __target_switch { case glsl: __intrinsic_asm "tan"; case hlsl: __intrinsic_asm "tan"; + case metal: __intrinsic_asm "tan"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Tan $x }; @@ -9892,7 +10852,7 @@ vector tan(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix tan(matrix x) { __target_switch @@ -9906,7 +10866,7 @@ matrix tan(matrix x) // Hyperbolic tangent __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T tanh(T x) { __target_switch @@ -9915,6 +10875,7 @@ T tanh(T x) case cuda: __intrinsic_asm "$P_tanh($0)"; case glsl: __intrinsic_asm "tanh"; case hlsl: __intrinsic_asm "tanh"; + case metal: __intrinsic_asm "tanh"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Tanh $x }; @@ -9923,13 +10884,14 @@ T tanh(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector tanh(vector x) { __target_switch { case glsl: __intrinsic_asm "tanh"; case hlsl: __intrinsic_asm "tanh"; + case metal: __intrinsic_asm "tanh"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Tanh $x }; @@ -9940,7 +10902,7 @@ vector tanh(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix tanh(matrix x) { __target_switch @@ -9951,6 +10913,35 @@ matrix tanh(matrix x) } } +// Tangent degree + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +T tanpi(T x) +{ + __target_switch + { + case metal: __intrinsic_asm "tanpi"; + default: + return tan(T.getPi() * x); + } +} + +__generic +[__readNone] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] +vector tanpi(vector x) +{ + __target_switch + { + case metal: __intrinsic_asm "tanpi"; + default: + return tan(T.getPi() * x); + } +} + + // Matrix transpose __generic [__readNone] @@ -10020,7 +11011,7 @@ matrix transpose(matrix x) // Truncate to integer __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] T trunc(T x) { __target_switch @@ -10029,6 +11020,7 @@ T trunc(T x) case cuda: __intrinsic_asm "$P_trunc($0)"; case glsl: __intrinsic_asm "trunc"; case hlsl: __intrinsic_asm "trunc"; + case metal: __intrinsic_asm "trunc"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Trunc $x }; @@ -10037,13 +11029,14 @@ T trunc(T x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] vector trunc(vector x) { __target_switch { case glsl: __intrinsic_asm "trunc"; case hlsl: __intrinsic_asm "trunc"; + case metal: __intrinsic_asm "trunc"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Trunc $x }; @@ -10054,7 +11047,7 @@ vector trunc(vector x) __generic [__readNone] -[require(cpp_cuda_glsl_hlsl_spirv, sm_2_0_GLSL_140)] +[require(cpp_cuda_glsl_hlsl_metal_spirv, sm_2_0_GLSL_140)] matrix trunc(matrix x) { __target_switch diff --git a/source/slang/slang-emit-metal.cpp b/source/slang/slang-emit-metal.cpp index 2c327b6134..7da48cac12 100644 --- a/source/slang/slang-emit-metal.cpp +++ b/source/slang/slang-emit-metal.cpp @@ -298,35 +298,27 @@ bool MetalSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inO void MetalSourceEmitter::emitVectorTypeNameImpl(IRType* elementType, IRIntegerValue elementCount) { - // In some cases we *need* to use the built-in syntax sugar for vector types, - // so we will try to emit those whenever possible. - // - if( elementCount >= 1 && elementCount <= 4 ) - { - switch( elementType->getOp() ) + emitSimpleTypeImpl(elementType); + + switch (elementType->getOp()) + { + case kIROp_FloatType: + case kIROp_HalfType: + case kIROp_BoolType: + case kIROp_Int8Type: + case kIROp_UInt8Type: + case kIROp_Int16Type: + case kIROp_UInt16Type: + case kIROp_IntType: + case kIROp_UIntType: + case kIROp_Int64Type: + case kIROp_UInt64Type: + if (elementCount > 1) { - case kIROp_FloatType: - case kIROp_IntType: - case kIROp_UIntType: - // TODO: There are more types that need to be covered here - emitType(elementType); m_writer->emit(elementCount); - return; - - default: - break; } + break; } - - // As a fallback, we will use the `vector<...>` type constructor, - // although we should not expect to run into types that don't - // have a sugared form. - // - m_writer->emit("vector<"); - emitType(elementType); - m_writer->emit(","); - m_writer->emit(elementCount); - m_writer->emit(">"); } void MetalSourceEmitter::emitLoopControlDecorationImpl(IRLoopControlDecoration* decl) @@ -855,6 +847,7 @@ void MetalSourceEmitter::handleRequiredCapabilitiesImpl(IRInst* inst) void MetalSourceEmitter::emitFrontMatterImpl(TargetRequest*) { m_writer->emit("#include \n"); + m_writer->emit("#include \n"); m_writer->emit("using namespace metal;\n"); } diff --git a/tests/metal/math.slang b/tests/metal/math.slang new file mode 100644 index 0000000000..288d6137c3 --- /dev/null +++ b/tests/metal/math.slang @@ -0,0 +1,513 @@ +//TEST:SIMPLE(filecheck=METAL): -stage compute -entry computeMain -target metal +//TEST:SIMPLE(filecheck=GLSL): -stage compute -entry computeMain -target glsl +//TEST:SIMPLE(filecheck=GLSL_SPIRV): -stage compute -entry computeMain -target spirv -emit-spirv-via-glsl +//TEST:SIMPLE(filecheck=SPIR): -stage compute -entry computeMain -target spirv -emit-spirv-directly +//TEST:SIMPLE(filecheck=HLSL): -stage compute -entry computeMain -target hlsl +//TEST:SIMPLE(filecheck=CUDA): -stage compute -entry computeMain -target cuda +//TEST:SIMPLE(filecheck=CPP): -stage compute -entry computeMain -target cpp + +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -output-using-type -emit-spirv-via-glsl +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -output-using-type -emit-spirv-directly +//TEST:SIMPLE(filecheck=METALLIB): -target metallib + +//TEST_INPUT:ubuffer(data=[0 1 -1], stride=4):name=inputBuffer +RWStructuredBuffer inputBuffer; + +//TEST_INPUT: ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer outputBuffer; + +// METALLIB: define void @computeMain + +// It is unclear why "nextafter" is not working for Metal. +#define TEST_WHEN_nextafter_WORKS 0 + +__generic +bool Test_Scalar() +{ + // METAL-LABEL: Test_Scalar + const T zero = T(inputBuffer[0]); + const T one = T(inputBuffer[1]); + + const int zeroInt = int(inputBuffer[0]); + + T outFloat1, outFloat2; + int outInt; + + bool voidResult = true; + + // METAL: sincos( + // METAL-NOT: sincos( + sincos(zero, outFloat1, outFloat2); + voidResult = voidResult && zero == outFloat1 && one == outFloat2; + + return voidResult + // METAL: acos( + // METALLIB: acos.f32 + && zero == acos(one) + + // METAL: acosh( + // METALLIB: acosh.f32 + && zero == acosh(one) + + // METAL: asin( + // METALLIB: asin.f32 + && zero == asin(zero) + + // METAL: asinh( + // METALLIB: asinh.f32 + && zero == asinh(zero) + + // METAL: atan( + // METALLIB: atan.f32 + && zero == atan(zero) + + // METAL: atan2( + // METALLIB: atan2.f32 + && zero == atan2(zero, zero) + + // METAL: atanh( + // METALLIB: atanh.f32 + && zero == atanh(zero) + + // METAL: ceil( + // METALLIB: ceil.f32 + && zero == ceil(zero) + + // METAL: copysign( + // METALLIB: bitcast float + && zero == copysign(zero, zero) + + // METAL: cos( + // METALLIB: cos.f32 + && one == cos(zero) + + // METAL: cosh( + // METALLIB: cosh.f32 + && one == cosh(zero) + + // METAL: cospi( + // METALLIB: cospi.f32 + && one == cospi(zero) + + // METAL: divide( + // METALLIB: fdiv + && zero == divide(zero, one) + + // METAL: exp( + // METALLIB: exp.f32 + && one == exp(zero) + + // METAL: exp2( + // METALLIB: exp2.f32 + && one == exp2(zero) + + // METAL: exp10( + // METALLIB: exp10.f32 + && one == exp10(zero) + + // METAL: fabs( + // METALLIB: fabs.f32 + && zero == fabs(zero) + + // METAL: abs( + && zero == abs(zero) + + // METAL: fdim( + && zero == fdim(zero, zero) + + // METAL: floor( + // METALLIB: floor.f32 + && zero == floor(zero) + + // METAL: fma( + // METALLIB: fma.f32 + && zero == fma(zero, zero, zero) + + // METAL: fmax( + // METALLIB: fmax.f32 + && zero == fmax(zero, zero) + + // METAL: max( + && zero == max(zero, zero) + + // METAL: fmax3( + // METALLIB: fmax3.f32 + && zero == fmax3(zero, zero, zero) + + // METAL: max3( + && zero == max3(zero, zero, zero) + + // METAL: fmedian3( + // METALLIB: fmedian3.f32 + && zero == fmedian3(zero, zero, zero) + + // METAL: median3( + && zero == median3(zero, zero, zero) + + // METAL: fmin( + // METALLIB: fmin.f32 + && zero == fmin(zero, zero) + + // METAL: min( + && zero == min(zero, zero) + + // METAL: fmin3( + // METALLIB: fmin3.f32 + && zero == fmin3(zero, zero, zero) + + // METAL: min3( + && zero == min3(zero, zero, zero) + + // METAL-COUNT-2: fmod( + // METALLIB-COUNT-2: fmod.f32 + && zero == fmod(zero, one) + + // METAL: fract( + // METALLIB: fract.f32 + && zero == fract(zero) + + // METAL: frexp( + // METALLIB: frexp_float + && zero == frexp(zero, outInt) && zeroInt == outInt + + // METAL: ldexp( + // METALLIB: ldexp.f32 + && zero == ldexp(zero, zeroInt) + + // METAL: log( + // METALLIB: log.f32 + && zero == log(one) + + // METAL: log2( + // METALLIB: log2.f32 + && zero == log2(one) + + // METAL: log10( + // METALLIB: log10.f32 + && zero == log10(one) + + // METAL: modf( + && zero == modf(zero, outFloat1) + +#if TEST_WHEN_nextafter_WORKS + // M-ETAL: nextafter( + && zero == nextafter(zero, zero) +#endif + + // METAL: pow( + // METALLIB: pow.f32 + && zero == pow(zero, one) + + // METAL: powr( + // METALLIB: powr.f32 + && zero == powr(zero, one) + + // METAL: rint( + // METALLIB: rint.f32 + && zero == rint(zero) + + // METAL: round( + // METALLIB: round.f32 + && zero == round(zero) + + // METAL: rsqrt( + // METALLIB: rsqrt.f32 + && one == rsqrt(one) + + // METAL: sin( + // METALLIB: sin.f32 + && zero == sin(zero) + + // METAL: sinh( + // METALLIB: sinh.f32 + && zero == sinh(zero) + + // METAL: sinpi( + // METALLIB: sinpi.f32 + && zero == sinpi(zero) + + // METAL: sqrt( + // METALLIB: sqrt.f32 + && zero == sqrt(zero) + + // METAL: tan( + // METALLIB: tan.f32 + && zero == tan(zero) + + // METAL: tanh( + // METALLIB: tanh.f32 + && zero == tanh(zero) + + // METAL: tanpi( + // METALLIB: tanpi.f32 + && zero == tanpi(zero) + + // METAL: trunc( + && zero == trunc(zero) + ; + + // METALLIB: ret +} + +__generic +bool Test_Vector() +{ + // METAL-LABEL: Test_Vector_0 + const vector zero = T(inputBuffer[0]); + const vector one = T(inputBuffer[1]); + + const vector zeroInt = int(inputBuffer[0]); + + vector outFloat1, outFloat2; + vector outInt; + + bool voidResult = true; + + // METAL: sincos( + // METAL-NOT: sincos( + sincos(zero, outFloat1, outFloat2); + voidResult = voidResult && zero == outFloat1 && one == outFloat2; + + return voidResult + // METAL: acos( + // METAL-NOT: acos( + && zero == acos(one) + + // METAL: acosh( + // METAL-NOT: acosh( + && zero == acosh(one) + + // METAL: asin( + // METAL-NOT: asin( + && zero == asin(zero) + + // METAL: asinh( + // METAL-NOT: asinh( + && zero == asinh(zero) + + // METAL: atan( + // METAL-NOT: atan( + && zero == atan(zero) + + // METAL: atan2( + // METAL-NOT: atan2( + && zero == atan2(zero, zero) + + // METAL: atanh( + // METAL-NOT: atanh( + && zero == atanh(zero) + + // METAL: ceil( + // METAL-NOT: ceil( + && zero == ceil(zero) + + // METAL: copysign( + // METAL-NOT: copysign( + && zero == copysign(zero, zero) + + // METAL: cos( + // METAL-NOT: cos( + && one == cos(zero) + + // METAL: cosh( + // METAL-NOT: cosh( + && one == cosh(zero) + + // METAL: cospi( + // METAL-NOT: cospi( + && one == cospi(zero) + + // METAL: divide( + // METAL-NOT: divide( + && zero == divide(zero, one) + + // METAL: exp( + // METAL-NOT: exp( + && one == exp(zero) + + // METAL: exp2( + // METAL-NOT: exp2( + && one == exp2(zero) + + // METAL: exp10( + // METAL-NOT: exp10( + && one == exp10(zero) + + // METAL: fabs( + // METAL-NOT: fabs( + && zero == fabs(zero) + + // METAL: abs( + // METAL-NOT: abs( + && zero == abs(zero) + + // METAL: fdim( + // METAL-NOT: fdim( + && zero == fdim(zero, zero) + + // METAL: floor( + // METAL-NOT: floor( + && zero == floor(zero) + + // METAL: fma( + // METAL-NOT: fma( + && zero == fma(zero, zero, zero) + + // METAL: fmax( + // METAL-NOT: fmax( + && zero == fmax(zero, zero) + + // METAL: max( + // METAL-NOT: max( + && zero == max(zero, zero) + + // METAL: fmax3( + // METAL-NOT: fmax3( + && zero == fmax3(zero, zero, zero) + + // METAL: max3( + // METAL-NOT: max3( + && zero == max3(zero, zero, zero) + + // METAL: fmedian3( + // METAL-NOT: fmedian3( + && zero == fmedian3(zero, zero, zero) + + // METAL: median3( + // METAL-NOT: median3( + && zero == median3(zero, zero, zero) + + // METAL: fmin( + // METAL-NOT: fmin( + && zero == fmin(zero, zero) + + // METAL: min( + // METAL-NOT: min( + && zero == min(zero, zero) + + // METAL: fmin3( + // METAL-NOT: fmin3( + && zero == fmin3(zero, zero, zero) + + // METAL: min3( + // METAL-NOT: min3( + && zero == min3(zero, zero, zero) + + // METAL-COUNT-2: fmod( + // METAL-NOT: fmod( + && zero == fmod(zero, one) + + // METAL: fract( + // METAL-NOT: fract( + && zero == fract(zero) + + // METAL: frexp( + // METAL-NOT: frexp( + && zero == frexp(zero, outInt) && all(zeroInt == outInt) + + // METAL: ldexp( + // METAL-NOT: ldexp( + && zero == ldexp(zero, zeroInt) + + // METAL: log( + // METAL-NOT: log( + && zero == log(one) + + // METAL: log2( + // METAL-NOT: log2( + && zero == log2(one) + + // METAL: log10( + // METAL-NOT: log10( + && zero == log10(one) + + // METAL: modf( + // METAL-NOT: modf( + && zero == modf(zero, outFloat1) + +#if TEST_WHEN_nextafter_WORKS + // M-ETAL: nextafter( + // METAL-NOT: nextafter( + && zero == nextafter(zero, zero) +#endif + + // METAL: pow( + // METAL-NOT: pow( + && zero == pow(zero, one) + + // METAL: powr( + // METAL-NOT: powr( + && zero == powr(zero, one) + + // METAL: rint( + // METAL-NOT: rint( + && zero == rint(zero) + + // METAL: round( + // METAL-NOT: round( + && zero == round(zero) + + // METAL: rsqrt( + // METAL-NOT: rsqrt( + && one == rsqrt(one) + + // METAL: sin( + // METAL-NOT: sin( + && zero == sin(zero) + + // METAL: sinh( + // METAL-NOT: sinh( + && zero == sinh(zero) + + // METAL: sinpi( + // METAL-NOT: sinpi( + && zero == sinpi(zero) + + // METAL: sqrt( + // METAL-NOT: sqrt( + && zero == sqrt(zero) + + // METAL: tan( + // METAL-NOT: tan( + && zero == tan(zero) + + // METAL: tanh( + // METAL-NOT: tanh( + && zero == tanh(zero) + + // METAL: tanpi( + // METAL-NOT: tanpi( + && zero == tanpi(zero) + + // METAL: trunc( + // METAL-NOT: trunc( + && zero == trunc(zero) + ; + + // METAL-LABEL: Test_Vector_1 +} + +[numthreads(1,1,1)] +void computeMain() +{ + // GLSL: void main( + // GLSL_SPIRV: OpEntryPoint + // SPIR: OpEntryPoint + // HLSL: void computeMain( + // CUDA: void computeMain( + // CPP: void _computeMain( + + bool result = true + && Test_Scalar() + && Test_Vector() + && Test_Vector() + && Test_Vector() + && Test_Scalar() + && Test_Vector() + && Test_Vector() + && Test_Vector() + ; + + // BUF: 1 + outputBuffer[0] = int(result); +}