diff --git a/ogre2/include/gz/rendering/ogre2/Ogre2RenderEngine.hh b/ogre2/include/gz/rendering/ogre2/Ogre2RenderEngine.hh index 76c10d7ad..d1a8c4441 100644 --- a/ogre2/include/gz/rendering/ogre2/Ogre2RenderEngine.hh +++ b/ogre2/include/gz/rendering/ogre2/Ogre2RenderEngine.hh @@ -87,8 +87,6 @@ namespace gz /// \brief Destructor public: virtual ~Ogre2RenderEngine(); - public: void ManualLoad(const std::map &_params); - // Documentation Inherited. public: virtual void Destroy() override; @@ -264,9 +262,6 @@ namespace gz /// \brief True to use the current opengl context private: bool useCurrentGLContext = false; - private: std::string SDL2x11 = {}; - private: std::string parentWindowHandle = {}; - /// \brief Pointer to private data private: std::unique_ptr dataPtr; diff --git a/ogre2/src/Ogre2RenderEngine.cc b/ogre2/src/Ogre2RenderEngine.cc index 97d9be678..3983ffe4c 100644 --- a/ogre2/src/Ogre2RenderEngine.cc +++ b/ogre2/src/Ogre2RenderEngine.cc @@ -15,6 +15,7 @@ * */ +#include #ifdef _WIN32 // Ensure that Winsock2.h is included before Windows.h, which can get // pulled in by anybody (e.g., Boost). @@ -73,6 +74,37 @@ #include #endif + +class GZ_RENDERING_OGRE2_HIDDEN Ogre2LogListener: public Ogre::LogListener +{ + public: + ~Ogre2LogListener() override = default; + void messageLogged( + const Ogre::String &message, + Ogre::LogMessageLevel lml, + bool maskDebug, + const Ogre::String &logName, + bool &skipThisMessage ) override + { + std::stringstream msg; + + msg << "[" << logName << "] " << message << std::endl; + + switch (lml) + { + case Ogre::LogMessageLevel::LML_NORMAL: + gz::common::Console::msg() << msg.str(); + break; + case Ogre::LogMessageLevel::LML_TRIVIAL: + gz::common::Console::dbg() << msg.str(); + break; + case Ogre::LogMessageLevel::LML_CRITICAL: + gz::common::Console::err() << msg.str(); + break; + } + } +}; + class GZ_RENDERING_OGRE2_HIDDEN gz::rendering::Ogre2RenderEnginePrivate { @@ -170,9 +202,7 @@ Ogre::Window * Ogre2RenderEngine::OgreWindow() const } ////////////////////////////////////////////////// -Ogre2RenderEngine::~Ogre2RenderEngine() -{ -} +Ogre2RenderEngine::~Ogre2RenderEngine() = default; ////////////////////////////////////////////////// void Ogre2RenderEngine::Destroy() @@ -189,7 +219,7 @@ void Ogre2RenderEngine::Destroy() this->dataPtr->hlmsPbsTerraShadows.reset(); - if (this->ogreRoot) + if (this->ogreRoot != nullptr) { // Clean up any textures that may still be in flight. Ogre::TextureGpuManager *mgr = @@ -218,19 +248,19 @@ void Ogre2RenderEngine::Destroy() this->ogreLogManager = nullptr; #if HAVE_GLX - if (this->dummyDisplay) + if (this->dummyDisplay != nullptr) { - Display *x11Display = static_cast(this->dummyDisplay); - if (this->dummyContext) + auto *x11Display = static_cast(this->dummyDisplay); + if (this->dummyContext != nullptr) { - GLXContext x11Context = static_cast(this->dummyContext); + auto *x11Context = static_cast(this->dummyContext); glXDestroyContext(x11Display, x11Context); this->dummyContext = nullptr; } XDestroyWindow(x11Display, this->dummyWindowId); XCloseDisplay(x11Display); this->dummyDisplay = nullptr; - if (this->dataPtr->dummyFBConfigs) + if (this->dataPtr->dummyFBConfigs != nullptr) { XFree(this->dataPtr->dummyFBConfigs); this->dataPtr->dummyFBConfigs = nullptr; @@ -362,142 +392,6 @@ SceneStorePtr Ogre2RenderEngine::Scenes() const return this->scenes; } -////////////////////////////////////////////////// -void Ogre2RenderEngine::ManualLoad(const std::map &_params) -{ - this->dataPtr->vkExternalInstance.instance = nullptr; - this->dataPtr->vkExternalDevice.physicalDevice = nullptr; - this->dataPtr->vkExternalDevice.device = nullptr; - this->dataPtr->vkExternalDevice.graphicsQueue = nullptr; - this->dataPtr->vkExternalDevice.presentQueue = nullptr; - - auto it = _params.find("vulkan"); - if (it != _params.end()) - { - bool useVulkan {false}; - std::istringstream(it->second) >> useVulkan; - if (useVulkan) - { - gzdbg << "[OGRE2] Using Vulkan Backend" << std::endl; - this->dataPtr->graphicsAPI = GraphicsAPI::VULKAN; - - it = _params.find("external_instance"); - if (it != _params.end()) - { - const auto *gzExternalInstance = - reinterpret_cast( - Ogre::StringConverter::parseUnsignedLong(it->second)); - - this->dataPtr->vkExternalInstance.instance = - gzExternalInstance->instance; - // This works as long as std::vector memory is actually contiguous - this->dataPtr->vkExternalInstance.instanceLayers.appendPOD( - &*gzExternalInstance->instanceLayers.begin(), - &*gzExternalInstance->instanceLayers.end()); - this->dataPtr->vkExternalInstance.instanceExtensions.appendPOD( - &*gzExternalInstance->instanceExtensions.begin(), - &*gzExternalInstance->instanceExtensions.end()); - } - - it = _params.find("external_device"); - if (it != _params.end()) - { - const auto *gzExternalDevice = - reinterpret_cast( - Ogre::StringConverter::parseUnsignedLong(it->second)); - - this->dataPtr->vkExternalDevice.physicalDevice = - gzExternalDevice->physicalDevice; - this->dataPtr->vkExternalDevice.device = gzExternalDevice->device; - this->dataPtr->vkExternalDevice.graphicsQueue = - gzExternalDevice->graphicsQueue; - this->dataPtr->vkExternalDevice.presentQueue = - gzExternalDevice->presentQueue; - // This works as long as std::vector memory is actually contiguous - this->dataPtr->vkExternalDevice.deviceExtensions.appendPOD( - &*gzExternalDevice->deviceExtensions.begin(), - &*gzExternalDevice->deviceExtensions.end()); - } - } - } - - // init the resources - - try - { - this->CreateLogger(); - this->CreateRoot(); - this->CreateOverlay(); - this->LoadPlugins(); - - this->CreateRenderSystem(); - this->ogreRoot->initialise(false); - - Ogre::StringVector paramsVector; - Ogre::NameValuePairList params; - this->window = nullptr; - - params["SDL2x11"] = _params.find("SDL2x11")->second; - params["parentWindowHandle"] = _params.find("parentWindowHandle")->second; - params["FSAA"] = "0"; - params["stereoMode"] = "Frame Sequential"; - params["border"] = "none"; - - std::ostringstream stream; - stream << "OgreWindow(0)" << "_1"; - - // Needed for retina displays - params["contentScalingFactor"] = "1"; - params["gamma"] = "Yes"; - params["parentWindowHandle"] = this->winID; - - int attempts = 0; - while (this->window == nullptr && (attempts++) < 10) - { - try - { - this->window = Ogre::Root::getSingleton().createRenderWindow( - stream.str(), 640, 480, false, ¶ms); - } - catch(const std::exception &_e) - { - gzerr << " Unable to create the rendering window: " << _e.what() - << std::endl; - this->window = nullptr; - } - } - - if (attempts >= 10) - { - gzerr << "Unable to create the rendering window after [" << attempts - << "] attempts." << std::endl; - } - - this->RegisterHlms(); - - if (this->window) - { - this->window->_setVisible(true); - } - - this->CreateResources(); - - this->loaded = true; - this->initialized = true; - } - catch (Ogre::Exception &ex) - { - gzerr << ex.what() << std::endl; - } - catch (...) - { - gzerr << "Failed to load render-engine" << std::endl; - } - - Ogre::ResourceGroupManager::getSingleton().initialiseAllResourceGroups(false); - this->scenes = Ogre2SceneStorePtr(new Ogre2SceneStore); -} - ////////////////////////////////////////////////// bool Ogre2RenderEngine::LoadImpl( const std::map &_params) @@ -518,22 +412,12 @@ bool Ogre2RenderEngine::LoadImpl( it = _params.find("metal"); if (it != _params.end()) { - bool useMetal; + bool useMetal {false}; std::istringstream(it->second) >> useMetal; - if(useMetal) + if (useMetal) this->dataPtr->graphicsAPI = GraphicsAPI::METAL; } - it = _params.find("SDL2x11"); - if (it != _params.end()) - std::istringstream(it->second) >> this->SDL2x11; - - it = _params.find("parentWindowHandle"); - if (it != _params.end()) - { - std::istringstream(it->second) >> this->parentWindowHandle; - } - #ifdef OGRE_BUILD_RENDERSYSTEM_VULKAN this->dataPtr->vkExternalInstance.instance = nullptr; this->dataPtr->vkExternalDevice.physicalDevice = nullptr; @@ -545,7 +429,7 @@ bool Ogre2RenderEngine::LoadImpl( it = _params.find("vulkan"); if (it != _params.end()) { - bool useVulkan; + bool useVulkan {false}; std::istringstream(it->second) >> useVulkan; if(useVulkan) { @@ -635,8 +519,7 @@ void Ogre2RenderEngine::LoadAttempt() this->CreateLogger(); if (!this->useCurrentGLContext && (this->dataPtr->graphicsAPI == GraphicsAPI::OPENGL || - this->dataPtr->graphicsAPI == GraphicsAPI::VULKAN) && - !this->SDL2x11.empty()) + this->dataPtr->graphicsAPI == GraphicsAPI::VULKAN)) { this->CreateContext(); } @@ -662,7 +545,8 @@ void Ogre2RenderEngine::CreateLogger() // create actual log this->ogreLogManager = new Ogre::LogManager(); - this->ogreLogManager->createLog(logPath, true, false, false); + auto *log = this->ogreLogManager->createLog(logPath, true, false, false); + log->addListener(new Ogre2LogListener()); } ////////////////////////////////////////////////// @@ -843,7 +727,6 @@ void Ogre2RenderEngine::LoadPlugins() if (!common::exists(filename)) { filename = filename + "." + std::string(OGRE2_VERSION); - if (!common::exists(filename)) { if (piter->name.find("RenderSystem") != std::string::npos) @@ -1049,14 +932,32 @@ void Ogre2RenderEngine::RegisterHlms() rootHlmsFolder, "2.0", "scripts", "Compositors"); Ogre::ResourceGroupManager::getSingleton().addResourceLocation( pbsCompositorFolder, "FileSystem", "General"); + Ogre::String commonMaterialFolder = common::joinPaths( rootHlmsFolder, "2.0", "scripts", "materials", "Common"); Ogre::ResourceGroupManager::getSingleton().addResourceLocation( commonMaterialFolder, "FileSystem", "General"); + + Ogre::String commonAnyMaterialFolder = common::joinPaths( + rootHlmsFolder, "2.0", "scripts", "materials", "Common", "Any"); + Ogre::ResourceGroupManager::getSingleton().addResourceLocation( + commonAnyMaterialFolder, "FileSystem", "General"); + Ogre::String commonGLSLMaterialFolder = common::joinPaths( rootHlmsFolder, "2.0", "scripts", "materials", "Common", "GLSL"); Ogre::ResourceGroupManager::getSingleton().addResourceLocation( commonGLSLMaterialFolder, "FileSystem", "General"); + + Ogre::String commonGLESMaterialFolder = common::joinPaths( + rootHlmsFolder, "2.0", "scripts", "materials", "Common", "GLES"); + Ogre::ResourceGroupManager::getSingleton().addResourceLocation( + commonGLESMaterialFolder, "FileSystem", "General"); + + Ogre::String commonHLSLMaterialFolder = common::joinPaths( + rootHlmsFolder, "2.0", "scripts", "materials", "Common", "HLSL"); + Ogre::ResourceGroupManager::getSingleton().addResourceLocation( + commonHLSLMaterialFolder, "FileSystem", "General"); + Ogre::String terraMaterialFolder = common::joinPaths( rootHlmsFolder, "2.0", "scripts", "materials", "Terra"); Ogre::ResourceGroupManager::getSingleton().addResourceLocation( @@ -1284,6 +1185,8 @@ void Ogre2RenderEngine::CreateResources() std::make_pair(p + "/Hlms/Common/Any", "General")); archNames.push_back( std::make_pair(p + "/Hlms/Common/GLSL", "General")); + archNames.push_back( + std::make_pair(p + "/Hlms/Common/HLSL", "General")); archNames.push_back( std::make_pair(p + "/Hlms/Pbs/Any", "General")); @@ -1346,13 +1249,16 @@ std::string Ogre2RenderEngine::CreateRenderWindow(const std::string &_handle, const unsigned int _width, const unsigned int _height, const double _ratio, const unsigned int _antiAliasing) { - Ogre::StringVector paramsVector; + Ogre::StringVector paramsVector; Ogre::NameValuePairList params; this->window = nullptr; - if (this->dataPtr->graphicsAPI == GraphicsAPI::VULKAN && !this->SDL2x11.empty()) + if (this->dataPtr->graphicsAPI == GraphicsAPI::VULKAN) { - params["SDL2x11"] = this->SDL2x11; + if (!_handle.empty()) + { + params["SDL2x11"] = _handle; + } } else { @@ -1386,7 +1292,7 @@ std::string Ogre2RenderEngine::CreateRenderWindow(const std::string &_handle, stream << "OgreWindow(0)" << "_" << _handle; // Needed for retina displays - params["contentScalingFactor"] = (_ratio); + params["contentScalingFactor"] = std::to_string(_ratio); // Ogre 2 PBS expects gamma correction params["gamma"] = "Yes"; @@ -1435,12 +1341,12 @@ std::string Ogre2RenderEngine::CreateRenderWindow(const std::string &_handle, { gzerr << "Unable to create the rendering window after [" << attempts << "] attempts." << std::endl; - return std::string(); + return {}; } this->RegisterHlms(); - if (this->window) + if (this->window != nullptr) { this->window->_setVisible(true); @@ -1448,7 +1354,6 @@ std::string Ogre2RenderEngine::CreateRenderWindow(const std::string &_handle, // this->window->reposition(0, 0); } return stream.str(); - } ////////////////////////////////////////////////// diff --git a/ogre2/src/media/2.0/scripts/materials/Common/Any/AtmosphereNprSky_ps.any b/ogre2/src/media/2.0/scripts/materials/Common/Any/AtmosphereNprSky_ps.any new file mode 100644 index 000000000..2180d2839 --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/Any/AtmosphereNprSky_ps.any @@ -0,0 +1,90 @@ +// NPR = Non-Physically-based Rendering Atmo +// +// This file is used by both Hlms and low level materials +// Which is why Hlms syntax is in comments + +//#include "SyntaxHighlightingMisc.h" + +// @property( atmosky_npr ) + +#ifdef HEADER + +float getSunDisk( const float LdotV, const float sunY, const float sunPower ) +{ + return pow( LdotV, lerp( 4.0f, 8500.0f, sunY ) * 0.25f ) * sunPower; +} + +float getMie( const float LdotV, const float sunY ) +{ + return pow( LdotV, lerp( 1.0f, 1.0f, sunY ) ); +} + +// See https://en.wikipedia.org/wiki/Rayleigh_distribution +// It's inspired, not fully based. +// +// The formula also gives us the nice property that for inputs +// where absorption is in range [0; 1] the output i also in range [0; 1] +float3 getSkyRayleighAbsorption( float3 vDir, const float density ) +{ + float3 absorption = -density * vDir; + absorption = exp2( absorption ) * 2.0f; + return absorption; +} + +float3 pow3( float3 v, float e ) +{ + return float3( pow( v.x, e ), pow( v.y, e ), pow( v.z, e ) ); +} + +#else + +float3 atmoCameraDir = normalize( inPs.cameraDir ); + +const float LdotV = max( dot( atmoCameraDir, p_sunDir ), 0.0f ); + +atmoCameraDir.y += p_densityDiffusion * 0.075f * ( 1.0f - atmoCameraDir.y ) * ( 1.0f - atmoCameraDir.y ); +atmoCameraDir += p_cameraDisplacement.xyz; +atmoCameraDir = normalize( atmoCameraDir ); + +atmoCameraDir.y = max( atmoCameraDir.y, p_borderLimit ); + +// It's not a mistake. We do this twice. Doing it before p_borderLimit +// allows the horizon to shift. Doing it after p_borderLimit lets +// the sky to get darker as we get upwards. +atmoCameraDir += p_cameraDisplacement.xyz; +atmoCameraDir = normalize( atmoCameraDir ); + +const float LdotV360 = dot( atmoCameraDir, p_sunDir ) * 0.5f + 0.5f; + +// ptDensity gets smaller as sunHeight gets bigger +// ptDensity gets smaller as atmoCameraDir.y gets bigger +const float ptDensity = + p_densityCoeff / pow( max( atmoCameraDir.y / ( 1.0f - p_sunHeight ), 0.0035f ), + lerp( 0.10f, p_densityDiffusion, pow( atmoCameraDir.y, 0.3f ) ) ); + +const float sunDisk = getSunDisk( LdotV, p_sunHeight, p_sunPower ); + +const float antiMie = max( p_sunHeightWeight, 0.08f ); + +const float3 skyAbsorption = getSkyRayleighAbsorption( p_skyColour, ptDensity ); +// const float3 skyColourGradient = pow3( exp2( -atmoCameraDir.y / p_skyColour ), +// lerp( 15.5f, 1.5f, pow( p_sunHeightWeight, 0.5f ) ) ); +const float3 skyColourGradient = pow3( exp2( -atmoCameraDir.y / p_skyColour ), 1.5f ); + +const float mie = getMie( LdotV360, p_sunHeightWeight ); + +float3 atmoColour = float3( 0.0f, 0.0f, 0.0f ); + +const float3 sharedTerms = skyColourGradient * skyAbsorption; + +atmoColour += antiMie * sharedTerms * p_sunAbsorption; +atmoColour += ( mie * ptDensity * p_lightDensity ) * sharedTerms * p_skyLightAbsorption; +atmoColour += mie * p_mieAbsorption; +atmoColour *= p_lightDensity; + +atmoColour *= p_finalMultiplier; +atmoColour += sunDisk * p_skyLightAbsorption; + +#endif + +// @end !atmosky_npr diff --git a/ogre2/src/media/2.0/scripts/materials/Common/Any/PccDepthCompressor_ps.any b/ogre2/src/media/2.0/scripts/materials/Common/Any/PccDepthCompressor_ps.any new file mode 100644 index 000000000..2efaf1be5 --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/Any/PccDepthCompressor_ps.any @@ -0,0 +1,47 @@ + +//#include "SyntaxHighlightingMisc.h" + +#ifdef HEADER +/** + Finds the intersection between the cube of half size probeShapeHalfSize and center at origin + and the reflDir, so that posLS + reflDirLS * retVal = intersectionPos; +*/ +INLINE float findIntersectionDistance( float3 reflDirLS, float3 posLS, float3 probeShapeHalfSize ) +{ + //Find the ray intersection with box plane + float3 invReflDirLS = float3( 1.0, 1.0, 1.0 ) / reflDirLS; + float3 intersectAtMinPlane = ( -probeShapeHalfSize - posLS ) * invReflDirLS; + float3 intersectAtMaxPlane = ( probeShapeHalfSize - posLS ) * invReflDirLS; + //Get the largest intersection values (we are not interested in negative values) + float3 largestIntersect = max( intersectAtMaxPlane.xyz, intersectAtMinPlane.xyz ); + //Get the closest of all solutions + float distance = min( min( largestIntersect.x, largestIntersect.y ), largestIntersect.z ); + return distance; +} + +#else + + float fDepth = OGRE_Sample( depthTexture, pointSampler, inPs.uv0 ).x; + + float linearDepth = p_projectionParams.y / (fDepth - p_projectionParams.x); + + float3 viewSpacePosition = inPs.cameraDir * linearDepth; + + float fDist = length( viewSpacePosition.xyz ); + float3 probeToPosDir = viewSpacePosition / fDist; + + probeToPosDir = mul( p_viewSpaceToProbeLocalSpace, probeToPosDir ); + + float fApproxDist = findIntersectionDistance( probeToPosDir, p_cameraPosLS, p_probeShapeHalfSize ); + + //We can't store fDist directly because we have limited precision (often 8 bits) + //Thus we store it in terms of + // fApproxDist * alpha = fDist; + //During render we'll know fApproxDist and alpha, but want to know fDist + //We also know alpha >= 0 + //For precision issues and because it's good enough, we force alpha <= 2.0 + float alpha = fDist / fApproxDist; + alpha *= 0.5; + alpha = min( alpha, 1.0 ); + +#endif diff --git a/ogre2/src/media/2.0/scripts/materials/Common/Atmosphere.material b/ogre2/src/media/2.0/scripts/materials/Common/Atmosphere.material new file mode 100644 index 000000000..61e7d92e0 --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/Atmosphere.material @@ -0,0 +1,62 @@ + +// GLSL shaders +fragment_program Ogre/Atmo/NprSky_ps_GLSL glsl +{ + source AtmosphereNprSky_ps.glsl + preprocessor_defines GL + enable_include_header true +} + +fragment_program Ogre/Atmo/NprSky_ps_VK glslvk +{ + source AtmosphereNprSky_ps.glsl + enable_include_header true +} + +// HLSL shaders +fragment_program Ogre/Atmo/NprSky_ps_HLSL hlsl +{ + source AtmosphereNprSky_ps.hlsl + entry_point main + target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3 +} + +// Metal shaders +fragment_program Ogre/Atmo/NprSky_ps_Metal metal +{ + source AtmosphereNprSky_ps.metal + enable_include_header true + shader_reflection_pair_hint Ogre/Compositor/QuadCameraDirNoUV_vs +} + +// Unified definitions +fragment_program Ogre/Atmo/NprSky_ps unified +{ + delegate Ogre/Atmo/NprSky_ps_HLSL + delegate Ogre/Atmo/NprSky_ps_VK + delegate Ogre/Atmo/NprSky_ps_GLSL + delegate Ogre/Atmo/NprSky_ps_Metal +} + +// Material definition +material Ogre/Atmo/NprSky +{ + technique + { + pass + { + depth_check on + depth_write off + + cull_hardware none + + vertex_program_ref Ogre/Compositor/QuadCameraDirNoUV_vs + { + } + + fragment_program_ref Ogre/Atmo/NprSky_ps + { + } + } + } +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/DPM.material b/ogre2/src/media/2.0/scripts/materials/Common/DPM.material new file mode 100644 index 000000000..23b6c941c --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/DPM.material @@ -0,0 +1,65 @@ +//DPM stands for Dual Parabolloid Mapping. + +fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps_GLSL glsl +{ + source CubeToDpm_4xFP16_ps.glsl + default_params + { + param_named cubeTexture int 0 + } +} + +fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps_VK glslvk +{ + source CubeToDpm_4xFP16_ps.glsl +} + +fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps_HLSL hlsl +{ + source CubeToDpm_4xFP16_ps.hlsl + entry_point main + target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3 +} + +fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps_Metal metal +{ + source CubeToDpm_4xFP16_ps.metal + shader_reflection_pair_hint Ogre/Compositor/Quad_vs +} + +fragment_program Ogre/DPM/CubeToDpm_4xFP16_ps unified +{ + delegate Ogre/DPM/CubeToDpm_4xFP16_ps_GLSL + delegate Ogre/DPM/CubeToDpm_4xFP16_ps_VK + delegate Ogre/DPM/CubeToDpm_4xFP16_ps_HLSL + delegate Ogre/DPM/CubeToDpm_4xFP16_ps_Metal +} + +// Converts a cubemap to DPM in the pixel shader. +material Ogre/DPM/CubeToDpm +{ + technique + { + pass + { + depth_check off + depth_func always_pass + + cull_hardware none + + vertex_program_ref Ogre/Compositor/Quad_vs + { + } + + fragment_program_ref Ogre/DPM/CubeToDpm_4xFP16_ps + { + } + + texture_unit depthTexture + { + filtering bilinear + tex_address_mode clamp + } + } + } +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/GLSL/AtmosphereNprSky_ps.glsl b/ogre2/src/media/2.0/scripts/materials/Common/GLSL/AtmosphereNprSky_ps.glsl new file mode 100644 index 000000000..820fb1e01 --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/GLSL/AtmosphereNprSky_ps.glsl @@ -0,0 +1,56 @@ +#version ogre_glsl_ver_330 + +#ifdef GL + #define const +#endif + +#define float2 vec2 +#define float3 vec3 +#define float4 vec4 +#define lerp mix + +vulkan_layout( location = 0 ) +in block +{ + vec3 cameraDir; +} inPs; + +vulkan_layout( location = 0 ) +out vec4 fragColour; + +vulkan( layout( ogre_P0 ) uniform Params { ) + uniform float4 packedParams0; + uniform float3 skyLightAbsorption; + uniform float4 sunAbsorption; + uniform float4 cameraDisplacement; + uniform float4 packedParams1; + uniform float4 packedParams2; + uniform float4 packedParams3; +vulkan( }; ) + +#define p_densityCoeff packedParams0.x +#define p_lightDensity packedParams0.y +#define p_sunHeight packedParams0.z +#define p_sunHeightWeight packedParams0.w +#define p_skyLightAbsorption skyLightAbsorption +#define p_sunAbsorption sunAbsorption.xyz +#define p_sunPower sunAbsorption.w +#define p_cameraDisplacement cameraDisplacement +#define p_mieAbsorption packedParams1.xyz +#define p_finalMultiplier packedParams1.w +#define p_sunDir packedParams2.xyz +#define p_borderLimit packedParams2.w +#define p_skyColour packedParams3.xyz +#define p_densityDiffusion packedParams3.w + +#define HEADER +#include "AtmosphereNprSky_ps.any" +#undef HEADER + +void main() +{ + #include "AtmosphereNprSky_ps.any" + + fragColour.xyz = atmoColour; + fragColour.w = 1.0f; +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/GLSLES/CubeToDpsm_ps.glsles b/ogre2/src/media/2.0/scripts/materials/Common/GLSLES/CubeToDpsm_ps.glsles new file mode 100644 index 000000000..034906965 --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/GLSLES/CubeToDpsm_ps.glsles @@ -0,0 +1,35 @@ +#version 300 es +precision highp float; + +uniform samplerCube depthTexture; + +in block +{ + vec2 uv0; +} inPs; + +in vec4 gl_FragCoord; +//out float gl_FragDepth; + +#if OUTPUT_TO_COLOUR + out float fragColour; +#endif + +void main() +{ + vec3 cubeDir; + + cubeDir.x = mod( inPs.uv0.x, 0.5 ) * 4.0 - 1.0; + cubeDir.y = inPs.uv0.y * 2.0 - 1.0; + cubeDir.z = 0.5 - 0.5 * (cubeDir.x * cubeDir.x + cubeDir.y * cubeDir.y); + + cubeDir.z = inPs.uv0.x < 0.5 ? cubeDir.z : -cubeDir.z; + + float depthValue = textureLod( depthTexture, cubeDir.xyz, 0 ).x; + +#if OUTPUT_TO_COLOUR + fragColour = depthValue; +#else + gl_FragDepth = depthValue; +#endif +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/GLSLES/QuadCameraDir_vs.glsles b/ogre2/src/media/2.0/scripts/materials/Common/GLSLES/QuadCameraDir_vs.glsles new file mode 100644 index 000000000..c24467e56 --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/GLSLES/QuadCameraDir_vs.glsles @@ -0,0 +1,25 @@ +#version 300 es +precision highp float; + +in vec4 vertex; +in vec3 normal; +in vec2 uv0; +uniform mat4 worldViewProj; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +out block +{ + vec2 uv0; + vec3 cameraDir; +} outVs; + +void main() +{ + gl_Position = (worldViewProj * vertex).xyzw; + outVs.uv0.xy = uv0.xy; + outVs.cameraDir.xyz = normal.xyz; +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/GLSLES/Quad_vs.glsles b/ogre2/src/media/2.0/scripts/materials/Common/GLSLES/Quad_vs.glsles new file mode 100644 index 000000000..3a4479fdd --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/GLSLES/Quad_vs.glsles @@ -0,0 +1,22 @@ +#version 300 es +precision highp float; + +in vec4 vertex; +in vec2 uv0; +uniform mat4 worldViewProj; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +out block +{ + vec2 uv0; +} outVs; + +void main() +{ + gl_Position = worldViewProj * vertex; + outVs.uv0.xy = uv0.xy; +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/HLSL/AtmosphereNprSky_ps.hlsl b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/AtmosphereNprSky_ps.hlsl new file mode 100644 index 000000000..0972322e8 --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/AtmosphereNprSky_ps.hlsl @@ -0,0 +1,47 @@ + +struct PS_INPUT +{ + float3 cameraDir : TEXCOORD0; +}; + +#define p_densityCoeff packedParams0.x +#define p_lightDensity packedParams0.y +#define p_sunHeight packedParams0.z +#define p_sunHeightWeight packedParams0.w +#define p_skyLightAbsorption skyLightAbsorption +#define p_sunAbsorption sunAbsorption.xyz +#define p_sunPower sunAbsorption.w +#define p_cameraDisplacement cameraDisplacement +#define p_mieAbsorption packedParams1.xyz +#define p_finalMultiplier packedParams1.w +#define p_sunDir packedParams2.xyz +#define p_borderLimit packedParams2.w +#define p_skyColour packedParams3.xyz +#define p_densityDiffusion packedParams3.w + +#define HEADER +#include "AtmosphereNprSky_ps.any" +#undef HEADER + +float4 main +( + PS_INPUT inPs, + + uniform float4 packedParams0, + uniform float3 skyLightAbsorption, + uniform float4 sunAbsorption, + uniform float4 cameraDisplacement, + uniform float4 packedParams1, + uniform float4 packedParams2, + uniform float4 packedParams3 +) : SV_Target0 +{ + float4 fragColour; + + #include "AtmosphereNprSky_ps.any" + + fragColour.xyz = atmoColour; + fragColour.w = 1.0f; + + return fragColour; +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/HLSL/Copyback_1xFP32_ps.hlsl b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/Copyback_1xFP32_ps.hlsl new file mode 100644 index 000000000..6861022d3 --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/Copyback_1xFP32_ps.hlsl @@ -0,0 +1,8 @@ + +Texture2D myTexture : register(t0); +SamplerState mySampler : register(s0); + +float main( float2 uv : TEXCOORD0 ) : SV_Target +{ + return myTexture.Sample( mySampler, uv ).x; +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/HLSL/Copyback_4xFP32_2DArray_ps.hlsl b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/Copyback_4xFP32_2DArray_ps.hlsl new file mode 100644 index 000000000..8d9ac85d4 --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/Copyback_4xFP32_2DArray_ps.hlsl @@ -0,0 +1,8 @@ + +Texture2DArray myTexture : register(t0); +SamplerState mySampler : register(s0); + +float4 main( float2 uv : TEXCOORD0, uniform float sliceIdx ) : SV_Target +{ + return myTexture.Sample( mySampler, float3( uv, sliceIdx ) ).xyzw; +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/HLSL/Copyback_4xFP32_ps.hlsl b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/Copyback_4xFP32_ps.hlsl new file mode 100644 index 000000000..ab105ed23 --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/Copyback_4xFP32_ps.hlsl @@ -0,0 +1,8 @@ + +Texture2D myTexture : register(t0); +SamplerState mySampler : register(s0); + +float4 main( float2 uv : TEXCOORD0 ) : SV_Target +{ + return myTexture.Sample( mySampler, uv ).xyzw; +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/HLSL/CubeToDpm_4xFP16_ps.hlsl b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/CubeToDpm_4xFP16_ps.hlsl new file mode 100644 index 000000000..cf9efaade --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/CubeToDpm_4xFP16_ps.hlsl @@ -0,0 +1,25 @@ + +TextureCube cubeTexture : register(t0); +SamplerState samplerState : register(s0); + +struct PS_INPUT +{ + float2 uv0 : TEXCOORD0; +}; + +float4 main +( + PS_INPUT inPs, + uniform float lodLevel +) : SV_Target0 +{ + float3 cubeDir; + + cubeDir.x = fmod( inPs.uv0.x, 0.5 ) * 4.0 - 1.0; + cubeDir.y = inPs.uv0.y * 2.0 - 1.0; + cubeDir.z = 0.5 - 0.5 * (cubeDir.x * cubeDir.x + cubeDir.y * cubeDir.y); + + cubeDir.z = inPs.uv0.x < 0.5 ? cubeDir.z : -cubeDir.z; + + return cubeTexture.SampleLevel( samplerState, cubeDir.xyz, lodLevel ).xyzw; +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/HLSL/CubeToDpsm_ps.hlsl b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/CubeToDpsm_ps.hlsl new file mode 100644 index 000000000..5b461d1c6 --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/CubeToDpsm_ps.hlsl @@ -0,0 +1,30 @@ + +TextureCube depthTexture : register(t0); +SamplerState samplerState : register(s0); + +struct PS_INPUT +{ + float2 uv0 : TEXCOORD0; +}; + +float main +( + PS_INPUT inPs +#if OUTPUT_TO_COLOUR +) : SV_Target0 +#else +) : SV_Depth +#endif +{ + float3 cubeDir; + + cubeDir.x = fmod( inPs.uv0.x, 0.5 ) * 4.0 - 1.0; + cubeDir.y = inPs.uv0.y * 2.0 - 1.0; + cubeDir.z = 0.5 - 0.5 * (cubeDir.x * cubeDir.x + cubeDir.y * cubeDir.y); + + cubeDir.z = inPs.uv0.x < 0.5 ? cubeDir.z : -cubeDir.z; + + float depthValue = depthTexture.SampleLevel( samplerState, cubeDir.xyz, 0 ).x; + + return depthValue; +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/HLSL/DepthDownscaleMax_ps.hlsl b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/DepthDownscaleMax_ps.hlsl new file mode 100644 index 000000000..a8023c123 --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/DepthDownscaleMax_ps.hlsl @@ -0,0 +1,21 @@ +struct PS_INPUT +{ + float2 uv0 : TEXCOORD0; +}; + +Texture2D depthTexture : register(t0); + +float main +( + PS_INPUT inPs, + float4 gl_FragCoord : SV_Position +) : SV_Depth +{ + float fDepth0 = depthTexture.Load( int3( int2(gl_FragCoord.xy * 2.0), 0 ) ).x; + float fDepth1 = depthTexture.Load( int3( int2(gl_FragCoord.xy * 2.0) + int2( 0, 1 ), 0 ) ).x; + float fDepth2 = depthTexture.Load( int3( int2(gl_FragCoord.xy * 2.0) + int2( 1, 0 ), 0 ) ).x; + float fDepth3 = depthTexture.Load( int3( int2(gl_FragCoord.xy * 2.0) + int2( 1, 1 ), 0 ) ).x; + + //return depthTexture.Load( int3( int2(gl_FragCoord.xy * 2.0), 0 ) ).x; + return max( max( fDepth0, fDepth1 ), max( fDepth2, fDepth3 ) ); +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/HLSL/EsmGaussianBlurLogFilter_cs.hlsl b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/EsmGaussianBlurLogFilter_cs.hlsl new file mode 100644 index 000000000..8a4a906c2 --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/EsmGaussianBlurLogFilter_cs.hlsl @@ -0,0 +1,22 @@ +//Based on GPUOpen's samples SeparableFilter11 +//https://github.com/GPUOpen-LibrariesAndSDKs/SeparableFilter11 +//For better understanding, read "Efficient Compute Shader Programming" from Bill Bilodeau +//http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Efficient%20Compute%20Shader%20Programming.pps + +//TL;DR: +// * Each thread works on 4 pixels at a time (for VLIW hardware, i.e. Radeon HD 5000 & 6000 series). +// * 256 pixels per threadgroup. Each threadgroup works on 2 rows of 128 pixels each. +// That means 32x2 threads = 64. 64 threads x 4 pixels per thread = 256 + +@piece( data_type )float@end +@piece( lds_data_type )float@end +@piece( lds_definition )groupshared float g_f3LDS[ 2 ] [ @value( samples_per_threadgroup ) ];@end + +@piece( image_sample ) + return inputImage.SampleLevel( inputSampler, f2SamplePosition, 0 ).x; +@end + +@piece( image_store ) + @foreach( 4, iPixel ) + outputImage[i2Center + @iPixel * i2Inc] = outColour[ @iPixel ];@end +@end diff --git a/ogre2/src/media/2.0/scripts/materials/Common/HLSL/GaussianBlurBase_cs.hlsl b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/GaussianBlurBase_cs.hlsl new file mode 100644 index 000000000..314ae513b --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/GaussianBlurBase_cs.hlsl @@ -0,0 +1,242 @@ + +//Based on GPUOpen's samples SeparableFilter11 +//https://github.com/GPUOpen-LibrariesAndSDKs/SeparableFilter11 +//For better understanding, read "Efficient Compute Shader Programming" from Bill Bilodeau +//http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Efficient%20Compute%20Shader%20Programming.pps + +//TL;DR: +// * Each thread works on 4 pixels at a time (for VLIW hardware, i.e. Radeon HD 5000 & 6000 series). +// * 256 pixels per threadgroup. Each threadgroup works on 2 rows of 128 pixels each. +// That means 32x2 threads = 64. 64 threads x 4 pixels per thread = 256 + +// For this shader to work, several pieces need to be defined: +// data_type (i.e. float3) +// lds_data_type (i.e. float3, uint) +// lds_definition +// image_store +// image_sample +// decode_lds (optional, i.e. when lds_data_type != data_type) +// Define the property "downscale" if you're doing a downsample. +// Define "downscale_lq" (must also define downscale) for SLIGHTLY lower quality downscale +// The script uses the template syntax to automatically set the num. of threadgroups +// based on the bound input texture. + +SamplerState inputSampler : register(s0); +@property( texture0_texture_type == TextureTypes_Type2DArray ) + Texture2DArray inputImage : register(t0); +@else + Texture2D inputImage : register(t0); +@end + +@property( uav0_texture_type == TextureTypes_Type2DArray ) + RWTexture2DArray<@insertpiece(uav0_pf_type)> outputImage : register(u0); +@else + RWTexture2D<@insertpiece(uav0_pf_type)> outputImage : register(u0); +@end + +// 32 = 128 / 4 +@pset( threads_per_group_x, 32 ) +@pset( threads_per_group_y, 2 ) +@pset( threads_per_group_z, 1 ) + +@pmul( pixelsPerRow, threads_per_group_x, 4 ) +@pset( rowsPerThreadGroup, threads_per_group_y ) +@pset( num_thread_groups_z, 1 ) + +@set( input_width, uav0_width_with_lod ) +@set( input_height, uav0_height_with_lod ) + +@property( horizontal_pass ) + @property( downscale ) @mul( input_width, 2 ) @end + + /// Calculate num_thread_groups_ + /// num_thread_groups_x = (texture0_width + pixelsPerRow - 1) / pixelsPerRow + /// num_thread_groups_y = (texture0_height + rowsPerThreadGroup - 1) / rowsPerThreadGroup + @add( num_thread_groups_x, input_width, pixelsPerRow ) + @sub( num_thread_groups_x, 1 ) + @div( num_thread_groups_x, pixelsPerRow ) + + @add( num_thread_groups_y, input_height, rowsPerThreadGroup ) + @sub( num_thread_groups_y, 1 ) + @div( num_thread_groups_y, rowsPerThreadGroup ) +@end @property( !horizontal_pass ) + @property( downscale ) @mul( input_height, 2 ) @end + + /// Calculate num_thread_groups_ + /// num_thread_groups_x = (texture0_width + rowsPerThreadGroup - 1) / rowsPerThreadGroup + /// num_thread_groups_y = (texture0_height + pixelsPerRow - 1) / pixelsPerRow + @add( num_thread_groups_x, input_width, rowsPerThreadGroup ) + @sub( num_thread_groups_x, 1 ) + @div( num_thread_groups_x, rowsPerThreadGroup ) + + @add( num_thread_groups_y, input_height, pixelsPerRow ) + @sub( num_thread_groups_y, 1 ) + @div( num_thread_groups_y, pixelsPerRow ) +@end + +/// groupshared float3 g_f3LDS[ 2 ] [ @value( samples_per_threadgroup ) ]; +@insertpiece( lds_definition ) + +uniform float4 g_f4OutputSize; + +//Tightly pack the weights +uniform float4 c_weights[(@value( kernel_radius_plus1 ) + 3u) / 4u]; + +@insertpiece( lds_data_type ) sampleTex( int2 i2Position , float2 f2Offset ) +{ + float2 f2SamplePosition = float2( i2Position ) + float2( 0.5f, 0.5f ); + + f2SamplePosition *= g_f4OutputSize.zw; + ///return inputImage.SampleLevel( inputSampler, f2SamplePosition, 0 ).xyz; + @insertpiece( image_sample ) +} + +void ComputeFilterKernel( int iPixelOffset, int iLineOffset, int2 i2Center, int2 i2Inc ) +{ + @property( !downscale_lq ) + @insertpiece( data_type ) outColour[ 4 ]; + @end @property( downscale_lq ) + @insertpiece( data_type ) outColour[ 2 ]; + @end + @insertpiece( data_type ) RDI[ 4 ] ; + + @foreach( 4, iPixel ) + RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @value( kernel_radius ) + @iPixel ] );@end + + @property( !downscale_lq ) + @foreach( 4, iPixel ) + outColour[ @iPixel ] = RDI[ @iPixel ] * c_weights[ @value( kernel_radius ) >> 2u ][ @value( kernel_radius ) & 3u ];@end + @end @property( downscale_lq ) + @foreach( 2, iPixel ) + outColour[ @iPixel ] = RDI[ @iPixel * 2 ] * c_weights[ @value( kernel_radius ) >> 2u ][ @value( kernel_radius ) & 3u ];@end + @end + + @foreach( 4, iPixel ) + RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iPixel ] );@end + + iPixelOffset += 4; + + /// Deal with taps to our left. + /// for ( iIteration = 0; iIteration < radius; iIteration += 1 ) + @foreach( kernel_radius, iIteration ) + @property( !downscale_lq ) + @foreach( 4, iPixel ) + outColour[ @iPixel ] += RDI[ @iPixel ] * c_weights[ @iIteration >> 2u ][ @iIteration & 3u ];@end + @end @property( downscale_lq ) + @foreach( 2, iPixel ) + outColour[ @iPixel ] += RDI[ @iPixel * 2 ] * c_weights[ @iIteration >> 2u ][ @iIteration & 3u ];@end + @end + @foreach( 3, iPixel ) + RDI[ @iPixel ] = RDI[ @iPixel + ( 1 ) ];@end + @foreach( 1, iPixel ) + RDI[ 4 - 1 + @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iIteration + @iPixel ] );@end + @end + + @foreach( 4, iPixel ) + RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset - 4 + @value( kernel_radius ) + 1 + @iPixel ] );@end + + @padd( kernel_radius_plus1, kernel_radius, 1 ) + @pmul( kernel_radius2x_plus1, kernel_radius, 2 ) + @padd( kernel_radius2x_plus1, 1 ) + + @pmul( kernel_radius2x, kernel_radius, 2 ) + + /// Deal with taps to our right. + /// for ( iIteration = radius + 1; iIteration < ( radius * 2 + 1 ); iIteration += 1 ) + @foreach( kernel_radius2x_plus1, iIteration, kernel_radius_plus1 ) + @property( !downscale_lq ) + @foreach( 4, iPixel ) + outColour[ @iPixel ] += RDI[ @iPixel ] * c_weights[ (@value( kernel_radius2x ) - @iIteration) >> 2u ][ (@value( kernel_radius2x ) - @iIteration) & 3u ];@end + @end @property( downscale_lq ) + @foreach( 2, iPixel ) + outColour[ @iPixel ] += RDI[ @iPixel * 2 ] * c_weights[ (@value( kernel_radius2x ) - @iIteration) >> 2u ][ (@value( kernel_radius2x ) - @iIteration) & 3u ];@end + @end + @foreach( 3, iPixel ) + RDI[ @iPixel ] = RDI[ @iPixel + ( 1 ) ];@end + @foreach( 1, iPixel ) + RDI[ 4 - 1 + @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iIteration + @iPixel ] );@end + @end + + /* + foreach( 4, iPixel ) + outputImage[i2Center + iPixel * i2Inc] = float4( outColour[ iPixel ], 1.0 ) );end + */ + @insertpiece( image_store ) +} + +[numthreads(@value( threads_per_group_x ), @value( threads_per_group_y ), @value( threads_per_group_z ))] +void main( uint3 gl_WorkGroupID : SV_GroupID, uint3 gl_LocalInvocationID : SV_GroupThreadID ) +{ + /// samples_per_threadgroup = 128 + ( ( kernel_radius * 2 + 1 ) - 1 ) + /// samples_per_thread = ( 128 + ( ( kernel_radius * 2 + 1 ) - 1 ) ) / ( 128 / 4 ) + @padd( samples_per_threadgroup, 127, kernel_radius2x_plus1 ) + @pdiv( samples_per_thread, samples_per_threadgroup, 32 ) + +@property( horizontal_pass ) + int iSampleOffset = int( gl_LocalInvocationID.x * @value( samples_per_thread ) ); + int iLineOffset = int( gl_LocalInvocationID.y ); + + int2 i2GroupCoord = int2( ( gl_WorkGroupID.x << 7u ) - @value( kernel_radius )u, gl_WorkGroupID.y << 1u ); + int2 i2Coord = int2( i2GroupCoord.x + iSampleOffset, i2GroupCoord.y ); + + @foreach( samples_per_thread, i ) + g_f3LDS[ iLineOffset ][ iSampleOffset + @i ] = sampleTex( i2Coord + int2( @i, gl_LocalInvocationID.y ) , float2( 0.5f, 0.0f ) );@end + + if( gl_LocalInvocationID.x < @value( samples_per_threadgroup )u - 32u * @value( samples_per_thread )u ) + { + g_f3LDS[ iLineOffset ][ @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ] = + sampleTex( i2GroupCoord + int2( @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x, gl_LocalInvocationID.y ), float2( 0.5f, 0.0f ) ); + } + + GroupMemoryBarrierWithGroupSync(); + + int iPixelOffset = int( gl_LocalInvocationID.x << 2u ); //gl_LocalInvocationID.x * 4u + i2Coord = int2( i2GroupCoord.x + iPixelOffset, i2GroupCoord.y ); + i2Coord.x += @value( kernel_radius ); + + if( i2Coord.x < int(g_f4OutputSize.x) ) + { + int2 i2Center = i2Coord + int2( 0, gl_LocalInvocationID.y ); + int2 i2Inc = int2 ( 1, 0 ); + + @property( downscale ) + i2Center.x = int( uint( i2Center.x ) >> 1u ); + @end + + ComputeFilterKernel( iPixelOffset, iLineOffset, i2Center, i2Inc ); + } +@end @property( !horizontal_pass ) + int iSampleOffset = int( gl_LocalInvocationID.x * @value( samples_per_thread ) ); + int iLineOffset = int( gl_LocalInvocationID.y ); + + int2 i2GroupCoord = int2( gl_WorkGroupID.x << 1u, ( gl_WorkGroupID.y << 7u ) - @value( kernel_radius )u ); + int2 i2Coord = int2( i2GroupCoord.x, i2GroupCoord.y + iSampleOffset ); + + @foreach( samples_per_thread, i ) + g_f3LDS[ iLineOffset ][ iSampleOffset + @i ] = sampleTex( i2Coord + int2( gl_LocalInvocationID.y, @i ) , float2( 0.0f, 0.5f ) );@end + + if( gl_LocalInvocationID.x < @value( samples_per_threadgroup )u - 32u * @value( samples_per_thread )u ) + { + g_f3LDS[ iLineOffset ][ @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ] = + sampleTex( i2GroupCoord + int2( gl_LocalInvocationID.y, @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ), float2( 0.0f, 0.5f ) ); + } + + GroupMemoryBarrierWithGroupSync(); + + int iPixelOffset = int( gl_LocalInvocationID.x << 2u ); //gl_LocalInvocationID.x * 4u + i2Coord = int2( i2GroupCoord.x, i2GroupCoord.y + iPixelOffset ); + i2Coord.y += @value( kernel_radius ); + + if( i2Coord.y < int(g_f4OutputSize.y) ) + { + int2 i2Center = i2Coord + int2( gl_LocalInvocationID.y, 0 ); + int2 i2Inc = int2 ( 0, 1 ); + + @property( downscale ) + i2Center.y = int( uint( i2Center.y ) >> 1u ); + @end + + ComputeFilterKernel( iPixelOffset, iLineOffset, i2Center, i2Inc ); + } +@end +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/HLSL/GaussianBlurLogFilterBase_cs.hlsl b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/GaussianBlurLogFilterBase_cs.hlsl new file mode 100644 index 000000000..3369ebe5d --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/GaussianBlurLogFilterBase_cs.hlsl @@ -0,0 +1,263 @@ + +//See GaussianBlurBase_cs for the original. +//This is a derived version which is used for filtering ESM (Exponential Shadow Maps). +//Normally ESM is in exponential space: exp( K * linearSpaceDepth ); +//Filtering should be done in that space. +//However because of precision reasons, we store linearSpaceDepth instead. In order to perform +//correct filtering, we use the following formula: +// exp( filteredDepth ) = w0 * exp( d0 ) + w1 * exp( d1 ) + w2 * exp( d2 ) + ... +// +//But this is not precision friendly. So we do instead: +// = w0 * exp( d0 ) + w1 * exp( d1 ) + w2 * exp( d2 ) +// = exp( d0 ) * ( w0 + w1 * exp( d1 ) / exp( d0 ) + w2 * exp( d2 ) / exp( d0 ) ) +// = exp( d0 ) * ( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) ) +// = exp( d0 ) * exp( log( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) ) ) +// = exp( d0 + log( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) ) ) +// exp( filteredDepth ) = exp( d0 + log( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) ) ) +//Almost final formula: +// filteredDepth = d0 + log( w0 + w1 * exp( d1 - d0 ) + w2 * exp( d2 - d0 ) ) +// +//The formula is actually: +// exp( K * filteredDepth ) = w0 * exp( K * d0 ) + w1 * exp( K * d1 ) + w2 * exp( K * d2 ) + ... +//Final formula: +// = d0 + log( w0 + w1 * exp( K * (d1 - d0) ) + w2 * exp( K * (d2 - d0) ) ) / K + +//Like in the original filter: +// * Each thread works on 4 pixels at a time (for VLIW hardware, i.e. Radeon HD 5000 & 6000 series). +// * 256 pixels per threadgroup. Each threadgroup works on 2 rows of 128 pixels each. +// That means 32x2 threads = 64. 64 threads x 4 pixels per thread = 256 + +// For this shader to work, several pieces need to be defined: +// data_type (i.e. float3) +// lds_data_type (i.e. float3, uint) +// lds_definition +// image_store +// image_sample +// decode_lds (optional, i.e. when lds_data_type != data_type) +// Define the property "downscale" if you're doing a downsample. +// Define "downscale_lq" (must also define downscale) for SLIGHTLY lower quality downscale +// The script uses the template syntax to automatically set the num. of threadgroups +// based on the bound input texture. + +SamplerState inputSampler : register(s0); +Texture2D inputImage : register(t0); +RWTexture2D<@insertpiece(uav0_pf_type)> outputImage : register(u0); + +// 32 = 128 / 4 +@pset( threads_per_group_x, 32 ) +@pset( threads_per_group_y, 2 ) +@pset( threads_per_group_z, 1 ) + +@pmul( pixelsPerRow, threads_per_group_x, 4 ) +@pset( rowsPerThreadGroup, threads_per_group_y ) +@pset( num_thread_groups_z, 1 ) + +@set( input_width, uav0_width_with_lod ) +@set( input_height, uav0_height_with_lod ) + +@property( horizontal_pass ) + @property( downscale ) @mul( input_width, 2 ) @end + + /// Calculate num_thread_groups_ + /// num_thread_groups_x = (texture0_width + pixelsPerRow - 1) / pixelsPerRow + /// num_thread_groups_y = (texture0_height + rowsPerThreadGroup - 1) / rowsPerThreadGroup + @add( num_thread_groups_x, input_width, pixelsPerRow ) + @sub( num_thread_groups_x, 1 ) + @div( num_thread_groups_x, pixelsPerRow ) + + @add( num_thread_groups_y, input_height, rowsPerThreadGroup ) + @sub( num_thread_groups_y, 1 ) + @div( num_thread_groups_y, rowsPerThreadGroup ) +@end @property( !horizontal_pass ) + @property( downscale ) @mul( input_height, 2 ) @end + + /// Calculate num_thread_groups_ + /// num_thread_groups_x = (texture0_width + rowsPerThreadGroup - 1) / rowsPerThreadGroup + /// num_thread_groups_y = (texture0_height + pixelsPerRow - 1) / pixelsPerRow + @add( num_thread_groups_x, input_width, rowsPerThreadGroup ) + @sub( num_thread_groups_x, 1 ) + @div( num_thread_groups_x, rowsPerThreadGroup ) + + @add( num_thread_groups_y, input_height, pixelsPerRow ) + @sub( num_thread_groups_y, 1 ) + @div( num_thread_groups_y, pixelsPerRow ) +@end + +/// groupshared float3 g_f3LDS[ 2 ] [ @value( samples_per_threadgroup ) ]; +@insertpiece( lds_definition ) + +uniform float4 g_f4OutputSize; + +//Tightly pack the weights +uniform float4 c_weights[(@value( kernel_radius_plus1 ) + 3u) / 4u]; + +@insertpiece( lds_data_type ) sampleTex( int2 i2Position , float2 f2Offset ) +{ + float2 f2SamplePosition = float2( i2Position ) + float2( 0.5f, 0.5f ); + + f2SamplePosition *= g_f4OutputSize.zw; + ///return inputImage.SampleLevel( inputSampler, f2SamplePosition, 0 ).x; + @insertpiece( image_sample ) +} + +void ComputeFilterKernel( int iPixelOffset, int iLineOffset, int2 i2Center, int2 i2Inc ) +{ + @property( !downscale_lq ) + @insertpiece( data_type ) outColour[ 4 ]; + @insertpiece( data_type ) firstSmpl[ 4 ]; + @end @property( downscale_lq ) + @insertpiece( data_type ) outColour[ 2 ]; + @insertpiece( data_type ) firstSmpl[ 2 ]; + @end + @insertpiece( data_type ) RDI[ 4 ] ; + + @foreach( 4, iPixel ) + RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @value( kernel_radius ) + @iPixel ] );@end + + @property( !downscale_lq ) + @foreach( 4, iPixel ) + firstSmpl[ @iPixel ].x = RDI[ @iPixel ]; + outColour[ @iPixel ].x = c_weights[ @value( kernel_radius ) >> 2u ][ @value( kernel_radius ) & 3u ];@end + @end @property( downscale_lq ) + @foreach( 2, iPixel ) + firstSmpl[ @iPixel ].x = RDI[ @iPixel * 2 ]; + outColour[ @iPixel ].x = c_weights[ @value( kernel_radius ) >> 2u ][ @value( kernel_radius ) & 3u ];@end + @end + + @foreach( 4, iPixel ) + RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iPixel ] );@end + + iPixelOffset += 4; + + /// Deal with taps to our left. + /// for ( iIteration = 0; iIteration < radius; iIteration += 1 ) + @foreach( kernel_radius, iIteration ) + @property( !downscale_lq ) + @foreach( 4, iPixel ) + outColour[ @iPixel ].x += exp(@value(K)*(RDI[ @iPixel ] - firstSmpl[ @iPixel ])) * c_weights[ @iIteration >> 2u ][ @iIteration & 3u ];@end + @end @property( downscale_lq ) + @foreach( 2, iPixel ) + outColour[ @iPixel ].x += exp(@value(K)*(RDI[ @iPixel * 2 ] - firstSmpl[ @iPixel ])) * c_weights[ @iIteration >> 2u ][ @iIteration & 3u ];@end + @end + @foreach( 3, iPixel ) + RDI[ @iPixel ] = RDI[ @iPixel + ( 1 ) ];@end + @foreach( 1, iPixel ) + RDI[ 4 - 1 + @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iIteration + @iPixel ] );@end + @end + + @foreach( 4, iPixel ) + RDI[ @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset - 4 + @value( kernel_radius ) + 1 + @iPixel ] );@end + + @padd( kernel_radius_plus1, kernel_radius, 1 ) + @pmul( kernel_radius2x_plus1, kernel_radius, 2 ) + @padd( kernel_radius2x_plus1, 1 ) + + @pmul( kernel_radius2x, kernel_radius, 2 ) + + /// Deal with taps to our right. + /// for ( iIteration = radius + 1; iIteration < ( radius * 2 + 1 ); iIteration += 1 ) + @foreach( kernel_radius2x_plus1, iIteration, kernel_radius_plus1 ) + @property( !downscale_lq ) + @foreach( 4, iPixel ) + outColour[ @iPixel ].x += exp(@value(K)*(RDI[ @iPixel ] - firstSmpl[ @iPixel ])) * c_weights[ (@value( kernel_radius2x ) - @iIteration) >> 2u ][ (@value( kernel_radius2x ) - @iIteration) & 3u ];@end + @end @property( downscale_lq ) + @foreach( 2, iPixel ) + outColour[ @iPixel ].x += exp(@value(K)*(RDI[ @iPixel * 2 ] - firstSmpl[ @iPixel ])) * c_weights[ (@value( kernel_radius2x ) - @iIteration) >> 2u ][ (@value( kernel_radius2x ) - @iIteration) & 3u ];@end + @end + @foreach( 3, iPixel ) + RDI[ @iPixel ] = RDI[ @iPixel + ( 1 ) ];@end + @foreach( 1, iPixel ) + RDI[ 4 - 1 + @iPixel ] = @insertpiece( decode_lds )( g_f3LDS[ iLineOffset ][ iPixelOffset + @iIteration + @iPixel ] );@end + @end + + @property( !downscale_lq ) + @foreach( 4, iPixel ) + outColour[ @iPixel ] = firstSmpl[ @iPixel ] + log( outColour[ @iPixel ] ) / @value(K);@end + @end @property( downscale_lq ) + @foreach( 2, iPixel ) + outColour[ @iPixel ] = firstSmpl[ @iPixel ] + log( outColour[ @iPixel ] ) / @value(K);@end + @end + + /* + foreach( 4, iPixel ) + outputImage[i2Center + iPixel * i2Inc] = float4( outColour[ iPixel ], 1.0 ) );end + */ + @insertpiece( image_store ) +} + +[numthreads(@value( threads_per_group_x ), @value( threads_per_group_y ), @value( threads_per_group_z ))] +void main( uint3 gl_WorkGroupID : SV_GroupID, uint3 gl_LocalInvocationID : SV_GroupThreadID ) +{ + /// samples_per_threadgroup = 128 + ( ( kernel_radius * 2 + 1 ) - 1 ) + /// samples_per_thread = ( 128 + ( ( kernel_radius * 2 + 1 ) - 1 ) ) / ( 128 / 4 ) + @padd( samples_per_threadgroup, 127, kernel_radius2x_plus1 ) + @pdiv( samples_per_thread, samples_per_threadgroup, 32 ) + +@property( horizontal_pass ) + int iSampleOffset = int( gl_LocalInvocationID.x * @value( samples_per_thread ) ); + int iLineOffset = int( gl_LocalInvocationID.y ); + + int2 i2GroupCoord = int2( ( gl_WorkGroupID.x << 7u ) - @value( kernel_radius )u, gl_WorkGroupID.y << 1u ); + int2 i2Coord = int2( i2GroupCoord.x + iSampleOffset, i2GroupCoord.y ); + + @foreach( samples_per_thread, i ) + g_f3LDS[ iLineOffset ][ iSampleOffset + @i ] = sampleTex( i2Coord + int2( @i, gl_LocalInvocationID.y ) , float2( 0.5f, 0.0f ) );@end + + if( gl_LocalInvocationID.x < @value( samples_per_threadgroup )u - 32u * @value( samples_per_thread )u ) + { + g_f3LDS[ iLineOffset ][ @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ] = + sampleTex( i2GroupCoord + int2( @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x, gl_LocalInvocationID.y ), float2( 0.5f, 0.0f ) ); + } + + GroupMemoryBarrierWithGroupSync(); + + int iPixelOffset = int( gl_LocalInvocationID.x << 2u ); //gl_LocalInvocationID.x * 4u + i2Coord = int2( i2GroupCoord.x + iPixelOffset, i2GroupCoord.y ); + i2Coord.x += @value( kernel_radius ); + + if( i2Coord.x < int(g_f4OutputSize.x) ) + { + int2 i2Center = i2Coord + int2( 0, gl_LocalInvocationID.y ); + int2 i2Inc = int2 ( 1, 0 ); + + @property( downscale ) + i2Center.x = int( uint( i2Center.x ) >> 1u ); + @end + + ComputeFilterKernel( iPixelOffset, iLineOffset, i2Center, i2Inc ); + } +@end @property( !horizontal_pass ) + int iSampleOffset = int( gl_LocalInvocationID.x * @value( samples_per_thread ) ); + int iLineOffset = int( gl_LocalInvocationID.y ); + + int2 i2GroupCoord = int2( gl_WorkGroupID.x << 1u, ( gl_WorkGroupID.y << 7u ) - @value( kernel_radius )u ); + int2 i2Coord = int2( i2GroupCoord.x, i2GroupCoord.y + iSampleOffset ); + + @foreach( samples_per_thread, i ) + g_f3LDS[ iLineOffset ][ iSampleOffset + @i ] = sampleTex( i2Coord + int2( gl_LocalInvocationID.y, @i ) , float2( 0.0f, 0.5f ) );@end + + if( gl_LocalInvocationID.x < @value( samples_per_threadgroup )u - 32u * @value( samples_per_thread )u ) + { + g_f3LDS[ iLineOffset ][ @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ] = + sampleTex( i2GroupCoord + int2( gl_LocalInvocationID.y, @value(samples_per_threadgroup)u - 1u - gl_LocalInvocationID.x ), float2( 0.0f, 0.5f ) ); + } + + GroupMemoryBarrierWithGroupSync(); + + int iPixelOffset = int( gl_LocalInvocationID.x << 2u ); //gl_LocalInvocationID.x * 4u + i2Coord = int2( i2GroupCoord.x, i2GroupCoord.y + iPixelOffset ); + i2Coord.y += @value( kernel_radius ); + + if( i2Coord.y < int(g_f4OutputSize.y) ) + { + int2 i2Center = i2Coord + int2( gl_LocalInvocationID.y, 0 ); + int2 i2Inc = int2 ( 0, 1 ); + + @property( downscale ) + i2Center.y = int( uint( i2Center.y ) >> 1u ); + @end + + ComputeFilterKernel( iPixelOffset, iLineOffset, i2Center, i2Inc ); + } +@end +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/HLSL/GaussianBlurLogFilter_ps.hlsl b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/GaussianBlurLogFilter_ps.hlsl new file mode 100644 index 000000000..471cbde9f --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/GaussianBlurLogFilter_ps.hlsl @@ -0,0 +1,43 @@ + +struct PS_INPUT +{ + float2 uv0 : TEXCOORD0; +}; + +Texture2D tex : register(t0); + +float4 main +( + PS_INPUT inPs, + uniform float4 weights[(NUM_WEIGHTS + 3u) / 4u], + float4 gl_FragCoord : SV_Position +) : SV_Target +{ + float val; + float outColour; + float firstSmpl; + + firstSmpl = tex.Load( int3( int2( gl_FragCoord.xy ) - int2( HORIZONTAL_STEP * (NUM_WEIGHTS - 1), + VERTICAL_STEP * (NUM_WEIGHTS - 1) ), 0 ) ).x; + outColour = weights[0][0]; + + int i; + for( i=NUM_WEIGHTS - 1; (--i) > 0; ) + { + val = tex.Load( int3( int2( gl_FragCoord.xy ) - int2( HORIZONTAL_STEP * i, + VERTICAL_STEP * i ), 0 ) ).x; + outColour += exp( K * (val - firstSmpl) ) * weights[(NUM_WEIGHTS-i-1u) >> 2u][(NUM_WEIGHTS-i-1u) & 3u]; + } + + val = tex.Load( int3( gl_FragCoord.xy, 0 ) ).x; + outColour += exp( K * (val - firstSmpl) ) * weights[(NUM_WEIGHTS-1u) >> 2u][(NUM_WEIGHTS-1u) & 3u]; + + for( i=0; i> 2u][(NUM_WEIGHTS-i-2u) & 3u]; + } + + return firstSmpl + log( outColour ) / K; +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/HLSL/HiddenAreaMeshVr_ps.hlsl b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/HiddenAreaMeshVr_ps.hlsl new file mode 100644 index 000000000..5459e7559 --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/HiddenAreaMeshVr_ps.hlsl @@ -0,0 +1,4 @@ +float4 main() : SV_Target +{ + return float4( 0, 0, 0, 0 ); +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/HLSL/HiddenAreaMeshVr_vs.hlsl b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/HiddenAreaMeshVr_vs.hlsl new file mode 100644 index 000000000..dfd4aeaca --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/HiddenAreaMeshVr_vs.hlsl @@ -0,0 +1,27 @@ +struct VS_INPUT +{ + float4 position : POSITION; +}; + +struct PS_INPUT +{ + float4 gl_Position : SV_Position; + uint gl_ViewportIndex : SV_ViewportArrayIndex; +}; + +PS_INPUT main +( + VS_INPUT input, + uniform float4x4 projectionMatrix, + uniform float2 rsDepthRange +) +{ + PS_INPUT outVs; + + outVs.gl_Position.xy = mul( projectionMatrix, float4( input.position.xy, 0.0f, 1.0f ) ).xy; + outVs.gl_Position.z = rsDepthRange.x; + outVs.gl_Position.w = 1.0f; + outVs.gl_ViewportIndex = int( input.position.z ); + + return outVs; +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/HLSL/MipmapsGaussianBlur_cs.hlsl b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/MipmapsGaussianBlur_cs.hlsl new file mode 100644 index 000000000..5c304ed17 --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/MipmapsGaussianBlur_cs.hlsl @@ -0,0 +1,37 @@ +//Based on GPUOpen's samples SeparableFilter11 +//https://github.com/GPUOpen-LibrariesAndSDKs/SeparableFilter11 +//For better understanding, read "Efficient Compute Shader Programming" from Bill Bilodeau +//http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Efficient%20Compute%20Shader%20Programming.pps + +//TL;DR: +// * Each thread works on 4 pixels at a time (for VLIW hardware, i.e. Radeon HD 5000 & 6000 series). +// * 256 pixels per threadgroup. Each threadgroup works on 2 rows of 128 pixels each. +// That means 32x2 threads = 64. 64 threads x 4 pixels per thread = 256 + +@piece( data_type )float3@end +@piece( lds_data_type )float3@end +@piece( lds_definition ) + groupshared float3 g_f3LDS[ 2 ] [ @value( samples_per_threadgroup ) ]; + uniform float srcLodIdx; +@end + +@piece( image_sample ) + return inputImage.SampleLevel( inputSampler, f2SamplePosition, srcLodIdx ).xyz; +@end + +//Overwrite these so that num_thread_groups gets correctly calculated by accounting LOD. +@pset( texture0_width, width_with_lod ) +@pset( texture0_height, height_with_lod ) + +@pset( downscale_lq, 1 ) + +@piece( image_store ) + @property( downscale_lq ) + @foreach( 2, iPixel ) + outputImage[i2Center + @iPixel * i2Inc] = float4( outColour[ @iPixel ], 1.0 );@end + @end @property( !downscale_lq ) + @foreach( 2, iPixel ) + outputImage[i2Center + @iPixel * i2Inc] = float4( (outColour[ @iPixel * 2 ] + + outColour[ @iPixel * 2 + 1 ]) * 0.5, 1.0 );@end + @end +@end diff --git a/ogre2/src/media/2.0/scripts/materials/Common/HLSL/PccDepthCompressor_ps.hlsl b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/PccDepthCompressor_ps.hlsl new file mode 100644 index 000000000..c40a01f27 --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/PccDepthCompressor_ps.hlsl @@ -0,0 +1,39 @@ + +#define INLINE + +#define OGRE_Sample( tex, sampler, uv ) tex.Sample( sampler, uv ) + +struct PS_INPUT +{ + float2 uv0 : TEXCOORD0; + float3 cameraDir : TEXCOORD1; +}; + +Texture2D depthTexture : register(t0); +SamplerState pointSampler : register(s0); + +uniform float2 projectionParams; +uniform float3 probeShapeHalfSize; +uniform float3 cameraPosLS; + +uniform float3x3 viewSpaceToProbeLocalSpace; + +#define p_projectionParams projectionParams +#define p_probeShapeHalfSize probeShapeHalfSize +#define p_cameraPosLS cameraPosLS +#define p_viewSpaceToProbeLocalSpace viewSpaceToProbeLocalSpace + +#define HEADER + #include "PccDepthCompressor_ps.any" +#undef HEADER + +float4 main +( + PS_INPUT inPs +) : SV_Target0 +{ + #include "PccDepthCompressor_ps.any" + + //RGB writes should be masked off + return float4( 0, 0, 0, alpha ); +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/HLSL/QuadCameraDirNoUV_vs.hlsl b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/QuadCameraDirNoUV_vs.hlsl new file mode 100644 index 000000000..991642434 --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/QuadCameraDirNoUV_vs.hlsl @@ -0,0 +1,31 @@ + +struct VS_INPUT +{ + float2 vertex : POSITION; + float3 normal : NORMAL; +}; + +struct PS_INPUT +{ + float3 cameraDir : TEXCOORD0; + + float4 gl_Position : SV_Position; +}; + +PS_INPUT main +( + VS_INPUT input, + + uniform float2 rsDepthRange, + uniform matrix worldViewProj +) +{ + PS_INPUT outVs; + + outVs.gl_Position.xy= mul( worldViewProj, float4( input.vertex.xy, 0.0f, 1.0f ) ).xy; + outVs.gl_Position.z = rsDepthRange.y; + outVs.gl_Position.w = 1.0f; + outVs.cameraDir = input.normal; + + return outVs; +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/HLSL/QuadCameraDir_vs.hlsl b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/QuadCameraDir_vs.hlsl new file mode 100644 index 000000000..cbd569d50 --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/QuadCameraDir_vs.hlsl @@ -0,0 +1,31 @@ + +struct VS_INPUT +{ + float4 vertex : POSITION; + float3 normal : NORMAL; + float2 uv0 : TEXCOORD0; +}; + +struct PS_INPUT +{ + float2 uv0 : TEXCOORD0; + float3 cameraDir : TEXCOORD1; + + float4 gl_Position : SV_Position; +}; + +PS_INPUT main +( + VS_INPUT input, + + uniform matrix worldViewProj +) +{ + PS_INPUT outVs; + + outVs.gl_Position = mul( worldViewProj, input.vertex ).xyzw; + outVs.uv0 = input.uv0; + outVs.cameraDir = input.normal; + + return outVs; +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/HLSL/Quad_vs.hlsl b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/Quad_vs.hlsl new file mode 100644 index 000000000..503133b27 --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/Quad_vs.hlsl @@ -0,0 +1,25 @@ +struct VS_INPUT +{ + float4 vertex : POSITION; + float2 uv0 : TEXCOORD0; +}; + +struct PS_INPUT +{ + float2 uv0 : TEXCOORD0; + float4 gl_Position : SV_Position; +}; + +PS_INPUT main +( + VS_INPUT input, + uniform matrix worldViewProj +) +{ + PS_INPUT outVs; + + outVs.gl_Position = mul( worldViewProj, input.vertex ).xyzw; + outVs.uv0 = input.uv0; + + return outVs; +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/HLSL/RadialDensityMask_ps.hlsl b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/RadialDensityMask_ps.hlsl new file mode 100644 index 000000000..5f7f2eecf --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/RadialDensityMask_ps.hlsl @@ -0,0 +1,35 @@ + +#define p_leftEyeCenter leftEyeCenter_rightEyeCenter.xy +#define p_rightEyeCenter leftEyeCenter_rightEyeCenter.zw +#define p_rightEyeStart rightEyeStart_radius.x +#define p_radius rightEyeStart_radius.yzw +#define p_invBlockResolution invBlockResolution + +float4 main +( + float4 gl_FragCoord : SV_Position, + + uniform float4 rightEyeStart_radius, + uniform float4 leftEyeCenter_rightEyeCenter, + uniform float2 invBlockResolution +) : SV_Target +{ + float2 eyeCenter = gl_FragCoord.x >= p_rightEyeStart ? p_rightEyeCenter : p_leftEyeCenter; + + //We must work in blocks so the reconstruction filter can work properly + float2 toCenter = trunc(gl_FragCoord.xy * 0.125f) * p_invBlockResolution.xy - eyeCenter; + toCenter.x *= 2.0f; //Twice because of stereo (each eye is half the size of the full res) + float distToCenter = length( toCenter ); + + uint2 iFragCoordHalf = uint2( gl_FragCoord.xy * 0.5f ); + if( distToCenter < p_radius.x ) + discard; + else if( (iFragCoordHalf.x & 0x01u) == (iFragCoordHalf.y & 0x01u) && distToCenter < p_radius.y ) + discard; + else if( !((iFragCoordHalf.x & 0x01u) != 0u || (iFragCoordHalf.y & 0x01u) != 0u) && distToCenter < p_radius.z ) + discard; + else if( !((iFragCoordHalf.x & 0x03u) != 0u || (iFragCoordHalf.y & 0x03u) != 0u) ) + discard; + + return float4( 0, 0, 0, 0 ); +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/HLSL/RadialDensityMask_vs.hlsl b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/RadialDensityMask_vs.hlsl new file mode 100644 index 000000000..7dfe97236 --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/RadialDensityMask_vs.hlsl @@ -0,0 +1,31 @@ +struct VS_INPUT +{ + float2 vertex : POSITION; + uint vertexId : SV_VertexID; + #define gl_VertexID input.vertexId +}; + +struct PS_INPUT +{ + float4 gl_Position : SV_Position; + uint gl_ViewportIndex : SV_ViewportArrayIndex; +}; + +PS_INPUT main +( + VS_INPUT input, + + uniform float ogreBaseVertex, + uniform float2 rsDepthRange, + uniform matrix worldViewProj +) +{ + PS_INPUT outVs; + + outVs.gl_Position.xy= mul( worldViewProj, float4( input.vertex.xy, 0.0f, 1.0f ) ).xy; + outVs.gl_Position.z = rsDepthRange.x; + outVs.gl_Position.w = 1.0f; + outVs.gl_ViewportIndex = gl_VertexID >= (3 * 4) ? 1 : 0; + + return outVs; +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/HLSL/Resolve_1xFP32_Subsample0_ps.hlsl b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/Resolve_1xFP32_Subsample0_ps.hlsl new file mode 100644 index 000000000..f228b5cfa --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/Resolve_1xFP32_Subsample0_ps.hlsl @@ -0,0 +1,16 @@ + +Texture2DMS myTexture : register(t0); + +struct PS_INPUT +{ + float2 uv0 : TEXCOORD0; +}; + +float main +( + PS_INPUT inPs, + float4 gl_FragCoord : SV_Position +) : SV_Target +{ + return myTexture.Load( int2( gl_FragCoord.xy ), 0 ).x; +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/HLSL/SkyCubemap_ps.hlsl b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/SkyCubemap_ps.hlsl new file mode 100644 index 000000000..61e814f09 --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/SkyCubemap_ps.hlsl @@ -0,0 +1,16 @@ +struct PS_INPUT +{ + float3 cameraDir : TEXCOORD0; +}; + +TextureCube skyCubemap : register(t0); +SamplerState samplerState : register(s0); + +float4 main +( + PS_INPUT inPs +) : SV_Target0 +{ + //Cubemaps are left-handed + return skyCubemap.Sample( samplerState, float3( inPs.cameraDir.xy, -inPs.cameraDir.z ) ); +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/HLSL/SkyEquirectangular_ps.hlsl b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/SkyEquirectangular_ps.hlsl new file mode 100644 index 000000000..f781e63a5 --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/HLSL/SkyEquirectangular_ps.hlsl @@ -0,0 +1,25 @@ +#define PI 3.14159265359f + +struct PS_INPUT +{ + float3 cameraDir : TEXCOORD0; +}; + +Texture2DArray skyEquirectangular : register(t0); +SamplerState samplerState : register(s0); + +float4 main +( + PS_INPUT inPs, + + uniform float sliceIdx +) : SV_Target0 +{ + float3 cameraDir = normalize( inPs.cameraDir ); + float2 longlat; + longlat.x = atan2( cameraDir.x, -cameraDir.z ) + PI; + longlat.y = acos( cameraDir.y ); + float2 uv = longlat / float2( 2.0f * PI, PI ); + + return skyEquirectangular.Sample( samplerState, float3( uv.xy, sliceIdx ) ); +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/HiddenAreaMeshVr.material b/ogre2/src/media/2.0/scripts/materials/Common/HiddenAreaMeshVr.material new file mode 100644 index 000000000..830e833b7 --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/HiddenAreaMeshVr.material @@ -0,0 +1,95 @@ +vertex_program Ogre/VR/HiddenAreaMeshVr_vs_GLSL glsl +{ + source HiddenAreaMeshVr_vs.glsl +} + +vertex_program Ogre/VR/HiddenAreaMeshVr_vs_VK glslvk +{ + source HiddenAreaMeshVr_vs.glsl +} + +vertex_program Ogre/VR/HiddenAreaMeshVr_vs_HLSL hlsl +{ + source HiddenAreaMeshVr_vs.hlsl + entry_point main + sets_vp_or_rt_array_index true + target vs_5_0 vs_4_0 vs_4_0_level_9_1 vs_4_0_level_9_3 +} + +vertex_program Ogre/VR/HiddenAreaMeshVr_vs_Metal metal +{ + source HiddenAreaMeshVr_vs.metal + shader_reflection_pair_hint Ogre/VR/HiddenAreaMeshVr_vs +} + +vertex_program Ogre/VR/HiddenAreaMeshVr_vs unified +{ + delegate Ogre/VR/HiddenAreaMeshVr_vs_GLSL + delegate Ogre/VR/HiddenAreaMeshVr_vs_VK + delegate Ogre/VR/HiddenAreaMeshVr_vs_HLSL + delegate Ogre/VR/HiddenAreaMeshVr_vs_Metal + + default_params + { + param_named_auto projectionMatrix projection_matrix + param_named_auto rsDepthRange rs_depth_range + } +} + +fragment_program Ogre/VR/HiddenAreaMeshVr_ps_GLSL glsl +{ + source HiddenAreaMeshVr_ps.glsl +} + +fragment_program Ogre/VR/HiddenAreaMeshVr_ps_VK glslvk +{ + source HiddenAreaMeshVr_ps.glsl +} + +fragment_program Ogre/VR/HiddenAreaMeshVr_ps_HLSL hlsl +{ + source HiddenAreaMeshVr_ps.hlsl + entry_point main + target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3 +} + +fragment_program Ogre/VR/HiddenAreaMeshVr_ps_Metal metal +{ + source HiddenAreaMeshVr_ps.metal + shader_reflection_pair_hint Ogre/VR/HiddenAreaMeshVr_vs +} + +fragment_program Ogre/VR/HiddenAreaMeshVr_ps unified +{ + delegate Ogre/VR/HiddenAreaMeshVr_ps_GLSL + delegate Ogre/VR/HiddenAreaMeshVr_ps_VK + delegate Ogre/VR/HiddenAreaMeshVr_ps_HLSL + delegate Ogre/VR/HiddenAreaMeshVr_ps_Metal +} + +material Ogre/VR/HiddenAreaMeshVr +{ + technique + { + pass + { + //depth_check off + //depth_write off + + cull_hardware none + + depth_func always_pass + + //Do not write colour at all + channel_mask 0 + + vertex_program_ref Ogre/VR/HiddenAreaMeshVr_vs + { + } + + fragment_program_ref Ogre/VR/HiddenAreaMeshVr_ps + { + } + } + } +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/Metal/AtmosphereNprSky_ps.metal b/ogre2/src/media/2.0/scripts/materials/Common/Metal/AtmosphereNprSky_ps.metal new file mode 100644 index 000000000..deea9fa2d --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/Metal/AtmosphereNprSky_ps.metal @@ -0,0 +1,56 @@ +#include +using namespace metal; + +#define lerp mix + +struct PS_INPUT +{ + float3 cameraDir; +}; + +struct Params +{ + float4 packedParams0; + float3 skyLightAbsorption; + float4 sunAbsorption; + float4 cameraDisplacement; + float4 packedParams1; + float4 packedParams2; + float4 packedParams3; +}; + +#define p_densityCoeff p.packedParams0.x +#define p_lightDensity p.packedParams0.y +#define p_sunHeight p.packedParams0.z +#define p_sunHeightWeight p.packedParams0.w +#define p_skyLightAbsorption p.skyLightAbsorption +#define p_sunAbsorption p.sunAbsorption.xyz +#define p_sunPower p.sunAbsorption.w +#define p_cameraDisplacement p.cameraDisplacement +#define p_mieAbsorption p.packedParams1.xyz +#define p_finalMultiplier p.packedParams1.w +#define p_sunDir p.packedParams2.xyz +#define p_borderLimit p.packedParams2.w +#define p_skyColour p.packedParams3.xyz +#define p_densityDiffusion p.packedParams3.w + +#define HEADER +#include "AtmosphereNprSky_ps.any" +#undef HEADER + +fragment float4 main_metal +( + PS_INPUT inPs [[stage_in]], + + constant Params &p [[buffer(PARAMETER_SLOT)]] +) +{ + float4 fragColour; + + #include "AtmosphereNprSky_ps.any" + + fragColour.xyz = atmoColour; + fragColour.w = 1.0f; + + return fragColour; +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/Metal/MipmapsGaussianBlur_cs.metal b/ogre2/src/media/2.0/scripts/materials/Common/Metal/MipmapsGaussianBlur_cs.metal index bf23ddd2e..782b52a7b 100644 --- a/ogre2/src/media/2.0/scripts/materials/Common/Metal/MipmapsGaussianBlur_cs.metal +++ b/ogre2/src/media/2.0/scripts/materials/Common/Metal/MipmapsGaussianBlur_cs.metal @@ -15,7 +15,6 @@ @end @piece( extra_params ) float srcLodIdx; - uint dstLodIdx; @end @piece( image_sample ) @@ -32,11 +31,11 @@ @property( downscale_lq ) @foreach( 2, iPixel ) outputImage.write( float4( outColour[ @iPixel ], 1.0 ), uint2( i2Center + @iPixel * i2Inc ), - p.dstLodIdx );@end + 0u );@end @end @property( !downscale_lq ) @foreach( 2, iPixel ) outputImage.write( float4( (outColour[ @iPixel * 2 ] + outColour[ @iPixel * 2 + 1 ]) * 0.5, 1.0 ), uint2( i2Center + @iPixel * i2Inc ), - p.dstLodIdx );@end + 0u );@end @end @end diff --git a/ogre2/src/media/2.0/scripts/materials/Common/PccDepthCompressor.material b/ogre2/src/media/2.0/scripts/materials/Common/PccDepthCompressor.material new file mode 100644 index 000000000..ada1aa4c4 --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/PccDepthCompressor.material @@ -0,0 +1,74 @@ + +// GLSL shaders +fragment_program PccDepthCompressor_ps_GLSL glsl +{ + source PccDepthCompressor_ps.glsl + enable_include_header true + default_params + { + param_named depthTexture int 0 + } +} + +fragment_program PccDepthCompressor_ps_VK glslvk +{ + source PccDepthCompressor_ps.glsl + enable_include_header true +} + +// HLSL shaders +fragment_program PccDepthCompressor_ps_HLSL hlsl +{ + source PccDepthCompressor_ps.hlsl + entry_point main + target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3 +} + +// Metal shaders +fragment_program PccDepthCompressor_ps_Metal metal +{ + source PccDepthCompressor_ps.metal + enable_include_header true + shader_reflection_pair_hint Ogre/Compositor/QuadCameraDir_vs +} + +// Unified definitions +fragment_program PccDepthCompressor_ps unified +{ + delegate PccDepthCompressor_ps_HLSL + delegate PccDepthCompressor_ps_GLSL + delegate PccDepthCompressor_ps_VK + delegate PccDepthCompressor_ps_Metal +} + +// Material definition +material PCC/DepthCompressor +{ + technique + { + pass + { + depth_check off + depth_write off + + cull_hardware none + + //Only write to alpha channel + channel_mask a + + vertex_program_ref Ogre/Compositor/QuadCameraDir_vs + { + } + + fragment_program_ref PccDepthCompressor_ps + { + } + + texture_unit + { + filtering none + tex_address_mode clamp + } + } + } +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/Quad.program b/ogre2/src/media/2.0/scripts/materials/Common/Quad.program index 10dc15a92..bfb0159c0 100644 --- a/ogre2/src/media/2.0/scripts/materials/Common/Quad.program +++ b/ogre2/src/media/2.0/scripts/materials/Common/Quad.program @@ -5,7 +5,7 @@ vertex_program Ogre/Compositor/Quad_vs_GLSL glsl vertex_program Ogre/Compositor/Quad_vs_VK glslvk { - source Quad_vs.glsl + source Quad_vs.glsl } vertex_program Ogre/Compositor/Quad_vs_GLSLES glsles diff --git a/ogre2/src/media/2.0/scripts/materials/Common/RadialDensityMask.material b/ogre2/src/media/2.0/scripts/materials/Common/RadialDensityMask.material new file mode 100644 index 000000000..73271da1f --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/RadialDensityMask.material @@ -0,0 +1,90 @@ +vertex_program Ogre/Compositor/RadialDensityMask_vs_GLSL glsl +{ + source RadialDensityMask_vs.glsl +} + +vertex_program Ogre/Compositor/RadialDensityMask_vs_VK glslvk +{ + source RadialDensityMask_vs.glsl +} + +vertex_program Ogre/Compositor/RadialDensityMask_vs_HLSL hlsl +{ + source RadialDensityMask_vs.hlsl + entry_point main + sets_vp_or_rt_array_index true + target vs_5_0 vs_4_0 vs_4_0_level_9_1 vs_4_0_level_9_3 +} + +vertex_program Ogre/Compositor/RadialDensityMask_vs_Metal metal +{ + source RadialDensityMask_vs.metal +} + +// Unified definitions +vertex_program Ogre/Compositor/RadialDensityMask_vs unified +{ + delegate Ogre/Compositor/RadialDensityMask_vs_GLSL + delegate Ogre/Compositor/RadialDensityMask_vs_VK + delegate Ogre/Compositor/RadialDensityMask_vs_HLSL + delegate Ogre/Compositor/RadialDensityMask_vs_Metal + + default_params + { + param_named_auto worldViewProj worldviewproj_matrix + param_named_auto rsDepthRange rs_depth_range + } +} + +fragment_program Ogre/VR/RadialDensityMask_ps_GLSL glsl +{ + source RadialDensityMask_ps.glsl +} + +fragment_program Ogre/VR/RadialDensityMask_ps_VK glslvk +{ + source RadialDensityMask_ps.glsl +} + +fragment_program Ogre/VR/RadialDensityMask_ps_HLSL hlsl +{ + source RadialDensityMask_ps.hlsl + entry_point main + target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3 +} + +fragment_program Ogre/VR/RadialDensityMask_ps_Metal metal +{ + source RadialDensityMask_ps.metal + shader_reflection_pair_hint Ogre/Compositor/RadialDensityMask_vs +} + +fragment_program Ogre/VR/RadialDensityMask_ps unified +{ + delegate Ogre/VR/RadialDensityMask_ps_GLSL + delegate Ogre/VR/RadialDensityMask_ps_VK + delegate Ogre/VR/RadialDensityMask_ps_HLSL + delegate Ogre/VR/RadialDensityMask_ps_Metal +} + +material Ogre/VR/RadialDensityMask +{ + technique + { + pass + { + cull_hardware none + + //Do not write colour at all + //channel_mask 0 + + vertex_program_ref Ogre/Compositor/RadialDensityMask_vs + { + } + + fragment_program_ref Ogre/VR/RadialDensityMask_ps + { + } + } + } +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/Sky.material b/ogre2/src/media/2.0/scripts/materials/Common/Sky.material new file mode 100644 index 000000000..7e11bdc7e --- /dev/null +++ b/ogre2/src/media/2.0/scripts/materials/Common/Sky.material @@ -0,0 +1,137 @@ + +// GLSL shaders +fragment_program Ogre/Sky/Cubemap_ps_GLSL glsl +{ + source SkyCubemap_ps.glsl + default_params + { + param_named skyCubemap int 0 + } +} + +fragment_program Ogre/Sky/Cubemap_ps_VK glslvk +{ + source SkyCubemap_ps.glsl +} + +// HLSL shaders +fragment_program Ogre/Sky/Cubemap_ps_HLSL hlsl +{ + source SkyCubemap_ps.hlsl + entry_point main + target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3 +} + +// Metal shaders +fragment_program Ogre/Sky/Cubemap_ps_Metal metal +{ + source SkyCubemap_ps.metal + shader_reflection_pair_hint Ogre/Compositor/QuadCameraDirNoUV_vs +} + +// Unified definitions +fragment_program Ogre/Sky/Cubemap_ps unified +{ + delegate Ogre/Sky/Cubemap_ps_HLSL + delegate Ogre/Sky/Cubemap_ps_VK + delegate Ogre/Sky/Cubemap_ps_GLSL + delegate Ogre/Sky/Cubemap_ps_Metal +} + +// Material definition +material Ogre/Sky/Cubemap +{ + technique + { + pass + { + depth_check on + depth_write off + + cull_hardware none + + vertex_program_ref Ogre/Compositor/QuadCameraDirNoUV_vs + { + } + + fragment_program_ref Ogre/Sky/Cubemap_ps + { + } + + texture_unit + { + filtering trilinear + tex_address_mode clamp + } + } + } +} + +// GLSL shaders +fragment_program Ogre/Sky/Equirectangular_ps_GLSL glsl +{ + source SkyEquirectangular_ps.glsl + default_params + { + param_named skyEquirectangular int 0 + } +} + +fragment_program Ogre/Sky/Equirectangular_ps_VK glslvk +{ + source SkyEquirectangular_ps.glsl +} + +// HLSL shaders +fragment_program Ogre/Sky/Equirectangular_ps_HLSL hlsl +{ + source SkyEquirectangular_ps.hlsl + entry_point main + target ps_5_0 ps_4_0 ps_4_0_level_9_1 ps_4_0_level_9_3 +} + +// Metal shaders +fragment_program Ogre/Sky/Equirectangular_ps_Metal metal +{ + source SkyEquirectangular_ps.metal + shader_reflection_pair_hint Ogre/Compositor/QuadCameraDirNoUV_vs +} + +// Unified definitions +fragment_program Ogre/Sky/Equirectangular_ps unified +{ + delegate Ogre/Sky/Equirectangular_ps_HLSL + delegate Ogre/Sky/Equirectangular_ps_VK + delegate Ogre/Sky/Equirectangular_ps_GLSL + delegate Ogre/Sky/Equirectangular_ps_Metal +} + +// Material definition +material Ogre/Sky/Equirectangular +{ + technique + { + pass + { + depth_check on + depth_write off + + cull_hardware none + + vertex_program_ref Ogre/Compositor/QuadCameraDirNoUV_vs + { + } + + fragment_program_ref Ogre/Sky/Equirectangular_ps + { + param_named sliceIdx float 0.0 + } + + texture_unit + { + filtering trilinear + tex_address_mode clamp + } + } + } +} diff --git a/ogre2/src/media/2.0/scripts/materials/Common/brtfLutDfg.dds b/ogre2/src/media/2.0/scripts/materials/Common/brtfLutDfg.dds index b91c1da0e..c9a628d0f 100644 Binary files a/ogre2/src/media/2.0/scripts/materials/Common/brtfLutDfg.dds and b/ogre2/src/media/2.0/scripts/materials/Common/brtfLutDfg.dds differ diff --git a/ogre2/src/media/Hlms/Common/Any/Cubemap_piece_all.any b/ogre2/src/media/Hlms/Common/Any/Cubemap_piece_all.any index b98265b26..3cd2def5c 100644 --- a/ogre2/src/media/Hlms/Common/Any/Cubemap_piece_all.any +++ b/ogre2/src/media/Hlms/Common/Any/Cubemap_piece_all.any @@ -29,30 +29,30 @@ struct CubemapProbe The w component contains the distance from pos to intersection walking across reflDir */ -INLINE float4 localCorrect( float3 reflDir, float3 posLS, CubemapProbe probe ) +INLINE midf4 localCorrect( midf3 reflDir, midf3 posLS, CubemapProbe probe ) { - float3 probeShapeHalfSize = probe.halfSize.xyz; - float3x3 viewSpaceToProbeLocal = buildFloat3x3( probe.row0_centerX.xyz, - probe.row1_centerY.xyz, - probe.row2_centerZ.xyz ); - float3 reflDirLS = mul( reflDir, viewSpaceToProbeLocal ); + midf3 probeShapeHalfSize = midf3_c( probe.halfSize.xyz ); + midf3x3 viewSpaceToProbeLocal = buildMidf3x3( probe.row0_centerX.xyz, + probe.row1_centerY.xyz, + probe.row2_centerZ.xyz ); + midf3 reflDirLS = mul( reflDir, viewSpaceToProbeLocal ); //Find the ray intersection with box plane - float3 invReflDirLS = float3( 1.0, 1.0, 1.0 ) / reflDirLS; - float3 intersectAtMinPlane = ( -probeShapeHalfSize - posLS ) * invReflDirLS; - float3 intersectAtMaxPlane = ( probeShapeHalfSize - posLS ) * invReflDirLS; + midf3 invReflDirLS = midf3_c( 1.0, 1.0, 1.0 ) / reflDirLS; + midf3 intersectAtMinPlane = ( -probeShapeHalfSize - posLS ) * invReflDirLS; + midf3 intersectAtMaxPlane = ( probeShapeHalfSize - posLS ) * invReflDirLS; //Get the largest intersection values (we are not intersted in negative values) - float3 largestIntersect = max( intersectAtMaxPlane.xyz, intersectAtMinPlane.xyz ); + midf3 largestIntersect = max( intersectAtMaxPlane.xyz, intersectAtMinPlane.xyz ); //Get the closest of all solutions - float distance = min( min( largestIntersect.x, largestIntersect.y ), largestIntersect.z ); + midf distance = min( min( largestIntersect.x, largestIntersect.y ), largestIntersect.z ); //Get the intersection position - float3 intersectPositionLS = posLS.xyz + reflDirLS.xyz * distance; + midf3 intersectPositionLS = posLS.xyz + reflDirLS.xyz * distance; //Get corrected vector - float3 localCorrectedVec = intersectPositionLS.xyz - probe.cubemapPosLS.xyz; + midf3 localCorrectedVec = intersectPositionLS.xyz - midf3_c( probe.cubemapPosLS.xyz ); //Make it left-handed. localCorrectedVec.z = -localCorrectedVec.z; - return float4( localCorrectedVec, distance ); + return midf4_c( localCorrectedVec, distance ); } /** Converts a position from view space to probe's local space. @@ -93,20 +93,20 @@ INLINE float4 localCorrect( float3 reflDir, float3 posLS, CubemapProbe probe ) /// 1 means being at the center of the probe. /// 0 means being at the edge of the probe /// <0 means position is outside the probe. -@property( syntax == metal )inline @end float getProbeFade( float3 posLS, CubemapProbe probe ) +@property( syntax == metal )inline @end midf getProbeFade( midf3 posLS, CubemapProbe probe ) { - float3 vDiff = ( probe.halfSize.xyz - abs( posLS.xyz ) ) / probe.halfSize.xyz; + midf3 vDiff = ( midf3_c( probe.halfSize.xyz ) - abs( posLS.xyz ) ) / midf3_c( probe.halfSize.xyz ); return min( min( vDiff.x, vDiff.y ), vDiff.z ); } -INLINE float getProbeNDF( float3 posLS, float3 probeToAreaCenterOffsetLS, - float3 innerRange, float3 outerRange ) +INLINE midf getProbeNDF( midf3 posLS, midf3 probeToAreaCenterOffsetLS, + midf3 innerRange, midf3 outerRange ) { - float3 areaPosLS = posLS.xyz - probeToAreaCenterOffsetLS.xyz; - //float3 areaPosLS = posLS.xyz - float3( 0, 0, -5 ); - float3 dist = abs( areaPosLS.xyz ); + midf3 areaPosLS = posLS.xyz - probeToAreaCenterOffsetLS.xyz; + //midf3 areaPosLS = posLS.xyz - midf3_c( 0, 0, -5 ); + midf3 dist = abs( areaPosLS.xyz ); //1e-6f avoids division by zero. - float3 ndf = (dist - innerRange) / (outerRange - innerRange + 1e-6f); + midf3 ndf = (dist - innerRange) / (outerRange - innerRange + _h( 1e-6f )); return max3( ndf.x, ndf.y, ndf.z ); } diff --git a/ogre2/src/media/Hlms/Common/Any/DualParaboloid_piece_ps.any b/ogre2/src/media/Hlms/Common/Any/DualParaboloid_piece_ps.any index 024e6f60d..73b81d0e4 100644 --- a/ogre2/src/media/Hlms/Common/Any/DualParaboloid_piece_ps.any +++ b/ogre2/src/media/Hlms/Common/Any/DualParaboloid_piece_ps.any @@ -4,13 +4,13 @@ /// Converts UVW coordinates that would be used for sampling a cubemap, /// and returns UV coordinates to sample a dual paraboloid in 2D -INLINE float2 mapCubemapToDpm( float3 cubemapDir ) +INLINE midf2 mapCubemapToDpm( midf3 cubemapDir ) { cubemapDir = normalize( cubemapDir ); - float2 retVal; - retVal.x = (cubemapDir.x / (1.0 + abs( cubemapDir.z ))) * 0.25 + - (cubemapDir.z < 0.0 ? 0.75 : 0.25 ); - retVal.y = (cubemapDir.y / (1.0 + abs( cubemapDir.z ))) * 0.5 + 0.5; + midf2 retVal; + retVal.x = (cubemapDir.x / (_h( 1.0 ) + abs( cubemapDir.z ))) * _h( 0.25 ) + + (cubemapDir.z < _h( 0.0 ) ? _h( 0.75 ) : _h( 0.25 ) ); + retVal.y = (cubemapDir.y / (_h( 1.0 ) + abs( cubemapDir.z ))) * _h( 0.5 ) + _h( 0.5 ); return retVal; } diff --git a/ogre2/src/media/Hlms/Common/GLSL/CrossPlatformSettings_piece_all.glsl b/ogre2/src/media/Hlms/Common/GLSL/CrossPlatformSettings_piece_all.glsl index 049e3e5b5..a67029741 100644 --- a/ogre2/src/media/Hlms/Common/GLSL/CrossPlatformSettings_piece_all.glsl +++ b/ogre2/src/media/Hlms/Common/GLSL/CrossPlatformSettings_piece_all.glsl @@ -15,9 +15,14 @@ @insertpiece( CustomGlslExtensions ) +@property( precision_mode == midf16 && syntax == glslvk ) + #extension GL_EXT_shader_16bit_storage: require + #extension GL_EXT_shader_explicit_arithmetic_types_float16: require +@end + @property( GL_ARB_shading_language_420pack ) - #extension GL_ARB_shading_language_420pack: require - #define layout_constbuffer(x) layout( std140, x ) + #extension GL_ARB_shading_language_420pack: require + #define layout_constbuffer(x) layout( std140, x ) @else #define layout_constbuffer(x) layout( std140 ) @end @@ -70,8 +75,137 @@ #define toFloat3x3( x ) mat3( x ) #define buildFloat3x3( row0, row1, row2 ) mat3( row0, row1, row2 ) +// Let's explain this madness: +// +// We use the keyword "midf" because "half" is already taken on Metal. +// +// When precision_mode == full32 midf is float. Nothing weird +// +// When precision_mode == midf16, midf and midf_c map both to float16_t. It's similar to full32 +// but literals need to be prefixed with _h() +// +// Thus, what happens if we resolve some of the macros, we end up with: +// float16_t a = 1.0f; // Error +// float16_t b = _h( 1.0f ); // OK! +// float16_t c = float16_t( someFloat ); // OK! +// +// But when precision_mode == relaxed; we have the following problem: +// mediump float a = 1.0f; // Error +// mediump float b = _h( 1.0f ); // OK! +// mediump float c = mediump float( someFloat ); // Invalid syntax! +// +// That's where 'midf_c' comes into play. The "_c" means cast or construct. Hence we do instead: +// midf c = midf( someFloat ); // Will turn into invalid syntax on relaxed! +// midf c = midf_c( someFloat ); // OK! +// +// Therefore datatypes are declared with midf. And casts and constructors are with midf_c +// Proper usage is as follows: +// midf b = _h( 1.0f ); +// midf b = midf_c( someFloat ); +// midf c = midf3_c( 1.0f, 2.0f, 3.0f ); +// +// Using this convention ensures that code will compile with all 3 precision modes. +// Breaking this convention means one or more of the modes (except full32) will not compile. +@property( precision_mode == full32 ) + #define _h(x) (x) + + #define midf float + #define midf2 vec2 + #define midf3 vec3 + #define midf4 vec4 + #define midf2x2 mat2 + #define midf3x3 mat3 + #define midf4x4 mat4 + + #define midf_c float + #define midf2_c vec2 + #define midf3_c vec3 + #define midf4_c vec4 + #define midf2x2_c mat2 + #define midf3x3_c mat3 + #define midf4x4_c mat4 + + #define midf_tex + + #define toMidf3x3( x ) mat3( x ) + #define buildMidf3x3( row0, row1, row2 ) mat3( row0, row1, row2 ) + + #define ensureValidRangeF16(x) + + #define saturate(x) clamp( (x), 0.0, 1.0 ) +@end +@property( precision_mode == midf16 ) + #define _h(x) float16_t(x) + + // TODO: Do the same with ushort + #define midf float16_t + #define midf2 f16vec2 + #define midf3 f16vec3 + #define midf4 f16vec4 + #define midf2x2 f16mat2x2 + #define midf3x3 f16mat3x3 + #define midf4x4 f16mat4x4 + + #define midf_c float16_t + #define midf2_c f16vec2 + #define midf3_c f16vec3 + #define midf4_c f16vec4 + #define midf2x2_c f16mat2x2 + #define midf3x3_c f16mat3x3 + #define midf4x4_c f16mat4x4 + + #define midf_tex mediump + + #define toMidf3x3( x ) f16mat3x3( x ) + #define buildMidf3x3( row0, row1, row2 ) f16mat3x3( row0, row1, row2 ) + + #define ensureValidRangeF16(x) x = min(x, _h( 65504.0 )) + + float saturate( float x ) { return clamp( x, 0.0, 1.0 ); } + vec2 saturate( vec2 x ) { return clamp( x, vec2( 0.0 ), vec2( 1.0 ) ); } + vec3 saturate( vec3 x ) { return clamp( x, vec3( 0.0 ), vec3( 1.0 ) ); } + vec4 saturate( vec4 x ) { return clamp( x, vec4( 0.0 ), vec4( 1.0 ) ); } + + midf saturate( midf x ) { return clamp( x, midf( 0.0 ), midf( 1.0 ) ); } + midf2 saturate( midf2 x ) { return clamp( x, midf2( 0.0 ), midf2( 1.0 ) ); } + midf3 saturate( midf3 x ) { return clamp( x, midf3( 0.0 ), midf3( 1.0 ) ); } + midf4 saturate( midf4 x ) { return clamp( x, midf4( 0.0 ), midf4( 1.0 ) ); } +@end +@property( precision_mode == relaxed ) + precision highp int; // Silence warning about default is highp + precision highp float; // Silence warning about default is highp + + #define _h(x) (x) + + #define midf mediump float + #define midf2 mediump vec2 + #define midf3 mediump vec3 + #define midf4 mediump vec4 + #define midf2x2 mediump mat2 + #define midf3x3 mediump mat3 + #define midf4x4 mediump mat4 + + // For casting to midf + #define midf_c float + #define midf2_c vec2 + #define midf3_c vec3 + #define midf4_c vec4 + #define midf2x2_c mat2 + #define midf3x3_c mat3 + #define midf4x4_c mat4 + + #define midf_tex mediump + + #define toMidf3x3( x ) mat3( x ) + #define buildMidf3x3( row0, row1, row2 ) mat3( row0, row1, row2 ) + + mediump float saturate( mediump float x ) { return clamp( x, 0.0, 1.0 ); } + mediump vec2 saturate( mediump vec2 x ) { return clamp( x, vec2( 0.0 ), vec2( 1.0 ) ); } + mediump vec3 saturate( mediump vec3 x ) { return clamp( x, vec3( 0.0 ), vec3( 1.0 ) ); } + mediump vec4 saturate( mediump vec4 x ) { return clamp( x, vec4( 0.0 ), vec4( 1.0 ) ); } +@end + #define mul( x, y ) ((x) * (y)) -#define saturate(x) clamp( (x), 0.0, 1.0 ) #define lerp mix #define rsqrt inversesqrt #define INLINE @@ -109,7 +243,11 @@ #define outVs_Position gl_Position #define outVs_viewportIndex gl_ViewportIndex +@property( hlms_emulate_clip_distances ) +#define outVs_clipDistance0 outVs.clipDistance0 +@else #define outVs_clipDistance0 gl_ClipDistance[0] +@end #define gl_SampleMaskIn0 gl_SampleMaskIn[0] #define reversebits bitfieldReverse @@ -129,12 +267,30 @@ #define texture3D sampler3D #define textureCube samplerCube #define textureCubeArray samplerCubeArray + + #define OGRE_Load2DF16( tex, iuv, lod ) midf4_c( texelFetch( tex, ivec2( iuv ), lod ) ) + #define OGRE_Load2DMSF16( tex, iuv, subsample ) midf4_c( texelFetch( tex, iuv, subsample ) ) + #define OGRE_SampleF16( tex, sampler, uv ) midf4_c( texture( tex, uv ) ) + #define OGRE_SampleLevelF16( tex, sampler, uv, lod ) midf4_c( textureLod( tex, uv, lod ) ) + #define OGRE_SampleArray2DF16( tex, sampler, uv, arrayIdx ) midf4_c( texture( tex, vec3( uv, arrayIdx ) ) ) + #define OGRE_SampleArray2DLevelF16( tex, sampler, uv, arrayIdx, lod ) midf4_c( textureLod( tex, vec3( uv, arrayIdx ), lod ) ) + #define OGRE_SampleArrayCubeLevelF16( tex, sampler, uv, arrayIdx, lod ) midf4_c( textureLod( tex, vec4( uv, arrayIdx ), lod ) ) + #define OGRE_SampleGradF16( tex, sampler, uv, ddx, ddy ) midf4_c( textureGrad( tex, uv, ddx, ddy ) ) + #define OGRE_SampleArray2DGradF16( tex, sampler, uv, arrayIdx, ddx, ddy ) midf4_c( textureGrad( tex, vec3( uv, arrayIdx ), ddx, ddy ) ) @else + #define OGRE_SampleArray2D( tex, sampler, uv, arrayIdx ) texture( sampler2DArray( tex, sampler ), vec3( uv, arrayIdx ) ) #define OGRE_SampleArray2DLevel( tex, sampler, uv, arrayIdx, lod ) textureLod( sampler2DArray( tex, sampler ), vec3( uv, arrayIdx ), lod ) #define OGRE_SampleArrayCubeLevel( tex, sampler, uv, arrayIdx, lod ) textureLod( samplerCubeArray( tex, sampler ), vec4( uv, arrayIdx ), lod ) #define OGRE_SampleArray2DGrad( tex, sampler, uv, arrayIdx, ddx, ddy ) textureGrad( sampler2DArray( tex, sampler ), vec3( uv, arrayIdx ), ddx, ddy ) + #define OGRE_Load2DF16( tex, iuv, lod ) midf4_c( texelFetch( tex, ivec2( iuv ), lod ) ) + #define OGRE_Load2DMSF16( tex, iuv, subsample ) midf4_c( texelFetch( tex, iuv, subsample ) ) + #define OGRE_SampleArray2DF16( tex, sampler, uv, arrayIdx ) midf4_c( texture( sampler2DArray( tex, sampler ), vec3( uv, arrayIdx ) ) ) + #define OGRE_SampleArray2DLevelF16( tex, sampler, uv, arrayIdx, lod ) midf4_c( textureLod( sampler2DArray( tex, sampler ), vec3( uv, arrayIdx ), lod ) ) + #define OGRE_SampleArrayCubeLevelF16( tex, sampler, uv, arrayIdx, lod ) midf4_c( textureLod( samplerCubeArray( tex, sampler ), vec4( uv, arrayIdx ), lod ) ) + #define OGRE_SampleArray2DGradF16( tex, sampler, uv, arrayIdx, ddx, ddy ) midf4_c( textureGrad( sampler2DArray( tex, sampler ), vec3( uv, arrayIdx ), ddx, ddy ) ) + float4 OGRE_Sample( texture2D t, sampler s, float2 uv ) { return texture( sampler2D( t, s ), uv ); } float4 OGRE_Sample( texture3D t, sampler s, float3 uv ) { return texture( sampler3D( t, s ), uv ); } float4 OGRE_Sample( textureCube t, sampler s, float3 uv ) { return texture( samplerCube( t, s ), uv ); } @@ -146,6 +302,18 @@ float4 OGRE_SampleGrad( texture2D t, sampler s, float2 uv, float2 myDdx, float2 myDdy ) { return textureGrad( sampler2D( t, s ), uv, myDdx, myDdy ); } float4 OGRE_SampleGrad( texture3D t, sampler s, float3 uv, float3 myDdx, float3 myDdy ) { return textureGrad( sampler3D( t, s ), uv, myDdx, myDdy ); } float4 OGRE_SampleGrad( textureCube t, sampler s, float3 uv, float3 myDdx, float3 myDdy ) { return textureGrad( samplerCube( t, s ), uv, myDdx, myDdy ); } + + midf4 OGRE_SampleF16( texture2D t, sampler s, float2 uv ) { return midf4_c( texture( sampler2D( t, s ), uv ) ); } + midf4 OGRE_SampleF16( texture3D t, sampler s, float3 uv ) { return midf4_c( texture( sampler3D( t, s ), uv ) ); } + midf4 OGRE_SampleF16( textureCube t, sampler s, float3 uv ) { return midf4_c( texture( samplerCube( t, s ), uv ) ); } + + midf4 OGRE_SampleLevelF16( texture2D t, sampler s, float2 uv, float lod ) { return midf4_c( textureLod( sampler2D( t, s ), uv, lod ) ); } + midf4 OGRE_SampleLevelF16( texture3D t, sampler s, float3 uv, float lod ) { return midf4_c( textureLod( sampler3D( t, s ), uv, lod ) ); } + midf4 OGRE_SampleLevelF16( textureCube t, sampler s, float3 uv, float lod ) { return midf4_c( textureLod( samplerCube( t, s ), uv, lod ) ); } + + midf4 OGRE_SampleGradF16( texture2D t, sampler s, float2 uv, float2 myDdx, float2 myDdy ) { return midf4_c( textureGrad( sampler2D( t, s ), uv, myDdx, myDdy ) ); } + midf4 OGRE_SampleGradF16( texture3D t, sampler s, float3 uv, float3 myDdx, float3 myDdy ) { return midf4_c( textureGrad( sampler3D( t, s ), uv, myDdx, myDdy ) ); } + midf4 OGRE_SampleGradF16( textureCube t, sampler s, float3 uv, float3 myDdx, float3 myDdy ) { return midf4_c( textureGrad( samplerCube( t, s ), uv, myDdx, myDdy ) ); } @end #define OGRE_ddx( val ) dFdx( val ) #define OGRE_ddy( val ) dFdy( val ) @@ -155,7 +323,9 @@ #define OGRE_Load3D( tex, iuv, lod ) texelFetch( tex, ivec3( iuv ), lod ) -#define bufferFetch1( buffer, idx ) texelFetch( buffer, idx ).x +@property( GL_ARB_texture_buffer_range ) + #define bufferFetch1( buffer, idx ) texelFetch( buffer, idx ).x +@end @property( syntax != glslvk ) #define OGRE_SAMPLER_ARG_DECL( samplerName ) @@ -210,24 +380,40 @@ @property( !GL_ARB_texture_buffer_range || !GL_ARB_shading_language_420pack ) @piece( SetCompatibilityLayer ) @property( !GL_ARB_texture_buffer_range ) - #define samplerBuffer sampler2D - #define isamplerBuffer isampler2D - #define usamplerBuffer usampler2D - vec4 bufferFetch( in sampler2D sampl, in int pixelIdx ) - { - ivec2 pos = ivec2( mod( pixelIdx, 2048 ), int( uint(pixelIdx) >> 11u ) ); - return texelFetch( sampl, pos, 0 ); - } - ivec4 bufferFetch(in isampler2D sampl, in int pixelIdx) - { - ivec2 pos = ivec2( mod( pixelIdx, 2048 ), int( uint(pixelIdx) >> 11u ) ); - return texelFetch( sampl, pos, 0 ); - } - uvec4 bufferFetch( in usampler2D sampl, in int pixelIdx ) - { - ivec2 pos = ivec2( mod( pixelIdx, 2048 ), int( uint(pixelIdx) >> 11u ) ); - return texelFetch( sampl, pos, 0 ); - } + #define samplerBuffer sampler2D + #define isamplerBuffer isampler2D + #define usamplerBuffer usampler2D + vec4 bufferFetch( in sampler2D sampl, in int pixelIdx ) + { + ivec2 pos = ivec2( mod( pixelIdx, 2048 ), int( uint(pixelIdx) >> 11u ) ); + return texelFetch( sampl, pos, 0 ); + } + ivec4 bufferFetch(in isampler2D sampl, in int pixelIdx) + { + ivec2 pos = ivec2( mod( pixelIdx, 2048 ), int( uint(pixelIdx) >> 11u ) ); + return texelFetch( sampl, pos, 0 ); + } + uvec4 bufferFetch( in usampler2D sampl, in int pixelIdx ) + { + ivec2 pos = ivec2( mod( pixelIdx, 2048 ), int( uint(pixelIdx) >> 11u ) ); + return texelFetch( sampl, pos, 0 ); + } + + float bufferFetch1( in sampler2D sampl, in int pixelIdx ) + { + ivec2 pos = ivec2( mod( pixelIdx, 2048 ), int( uint(pixelIdx) >> 11u ) ); + return texelFetch( sampl, pos, 0 ).x; + } + int bufferFetch1(in isampler2D sampl, in int pixelIdx) + { + ivec2 pos = ivec2( mod( pixelIdx, 2048 ), int( uint(pixelIdx) >> 11u ) ); + return texelFetch( sampl, pos, 0 ).x; + } + uint bufferFetch1( in usampler2D sampl, in int pixelIdx ) + { + ivec2 pos = ivec2( mod( pixelIdx, 2048 ), int( uint(pixelIdx) >> 11u ) ); + return texelFetch( sampl, pos, 0 ).x; + } @end @end @end diff --git a/ogre2/src/media/Hlms/Common/GLSL/QuaternionCode_piece_all.glsl b/ogre2/src/media/Hlms/Common/GLSL/QuaternionCode_piece_all.glsl index b72defabf..01f0fe59a 100644 --- a/ogre2/src/media/Hlms/Common/GLSL/QuaternionCode_piece_all.glsl +++ b/ogre2/src/media/Hlms/Common/GLSL/QuaternionCode_piece_all.glsl @@ -1,51 +1,104 @@ @piece( DeclQuat_xAxis ) -vec3 xAxis( vec4 qQuat ) -{ - float fTy = 2.0 * qQuat.y; - float fTz = 2.0 * qQuat.z; - float fTwy = fTy * qQuat.w; - float fTwz = fTz * qQuat.w; - float fTxy = fTy * qQuat.x; - float fTxz = fTz * qQuat.x; - float fTyy = fTy * qQuat.y; - float fTzz = fTz * qQuat.z; - - return vec3( 1.0-(fTyy+fTzz), fTxy+fTwz, fTxz-fTwy ); -} + @property( precision_mode != relaxed ) + vec3 xAxis( vec4 qQuat ) + { + float fTy = 2.0 * qQuat.y; + float fTz = 2.0 * qQuat.z; + float fTwy = fTy * qQuat.w; + float fTwz = fTz * qQuat.w; + float fTxy = fTy * qQuat.x; + float fTxz = fTz * qQuat.x; + float fTyy = fTy * qQuat.y; + float fTzz = fTz * qQuat.z; + + return vec3( 1.0-(fTyy+fTzz), fTxy+fTwz, fTxz-fTwy ); + } + @end + @property( precision_mode != full32 ) + midf3 xAxis( midf4 qQuat ) + { + midf fTy = _h( 2.0 ) * qQuat.y; + midf fTz = _h( 2.0 ) * qQuat.z; + midf fTwy = fTy * qQuat.w; + midf fTwz = fTz * qQuat.w; + midf fTxy = fTy * qQuat.x; + midf fTxz = fTz * qQuat.x; + midf fTyy = fTy * qQuat.y; + midf fTzz = fTz * qQuat.z; + + return midf3_c( _h( 1.0 )-(fTyy+fTzz), fTxy+fTwz, fTxz-fTwy ); + } + @end @end @piece( DeclQuat_yAxis ) -vec3 yAxis( vec4 qQuat ) -{ - float fTx = 2.0 * qQuat.x; - float fTy = 2.0 * qQuat.y; - float fTz = 2.0 * qQuat.z; - float fTwx = fTx * qQuat.w; - float fTwz = fTz * qQuat.w; - float fTxx = fTx * qQuat.x; - float fTxy = fTy * qQuat.x; - float fTyz = fTz * qQuat.y; - float fTzz = fTz * qQuat.z; - - return vec3( fTxy-fTwz, 1.0-(fTxx+fTzz), fTyz+fTwx ); -} + @property( precision_mode != relaxed ) + vec3 yAxis( vec4 qQuat ) + { + float fTx = 2.0 * qQuat.x; + float fTy = 2.0 * qQuat.y; + float fTz = 2.0 * qQuat.z; + float fTwx = fTx * qQuat.w; + float fTwz = fTz * qQuat.w; + float fTxx = fTx * qQuat.x; + float fTxy = fTy * qQuat.x; + float fTyz = fTz * qQuat.y; + float fTzz = fTz * qQuat.z; + + return vec3( fTxy-fTwz, 1.0-(fTxx+fTzz), fTyz+fTwx ); + } + @end + @property( precision_mode != full32 ) + midf3 yAxis( midf4 qQuat ) + { + midf fTx = _h( 2.0 ) * qQuat.x; + midf fTy = _h( 2.0 ) * qQuat.y; + midf fTz = _h( 2.0 ) * qQuat.z; + midf fTwx = fTx * qQuat.w; + midf fTwz = fTz * qQuat.w; + midf fTxx = fTx * qQuat.x; + midf fTxy = fTy * qQuat.x; + midf fTyz = fTz * qQuat.y; + midf fTzz = fTz * qQuat.z; + + return midf3_c( fTxy-fTwz, _h( 1.0 )-(fTxx+fTzz), fTyz+fTwx ); + } + @end @end @piece( DeclQuat_zAxis ) -vec3 zAxis( vec4 qQuat ) -{ - float fTx = 2.0 * qQuat.x; - float fTy = 2.0 * qQuat.y; - float fTz = 2.0 * qQuat.z; - float fTwx = fTx * qQuat.w; - float fTwy = fTy * qQuat.w; - float fTxx = fTx * qQuat.x; - float fTxz = fTz * qQuat.x; - float fTyy = fTy * qQuat.y; - float fTyz = fTz * qQuat.y; - - return vec3( fTxz+fTwy, fTyz-fTwx, 1.0-(fTxx+fTyy) ); -} + @property( precision_mode != relaxed ) + vec3 zAxis( vec4 qQuat ) + { + float fTx = 2.0 * qQuat.x; + float fTy = 2.0 * qQuat.y; + float fTz = 2.0 * qQuat.z; + float fTwx = fTx * qQuat.w; + float fTwy = fTy * qQuat.w; + float fTxx = fTx * qQuat.x; + float fTxz = fTz * qQuat.x; + float fTyy = fTy * qQuat.y; + float fTyz = fTz * qQuat.y; + + return vec3( fTxz+fTwy, fTyz-fTwx, 1.0-(fTxx+fTyy) ); + } + @end + @property( precision_mode != full32 ) + midf3 zAxis( midf4 qQuat ) + { + midf fTx = _h( 2.0 ) * qQuat.x; + midf fTy = _h( 2.0 ) * qQuat.y; + midf fTz = _h( 2.0 ) * qQuat.z; + midf fTwx = fTx * qQuat.w; + midf fTwy = fTy * qQuat.w; + midf fTxx = fTx * qQuat.x; + midf fTxz = fTz * qQuat.x; + midf fTyy = fTy * qQuat.y; + midf fTyz = fTz * qQuat.y; + + return midf3_c( fTxz+fTwy, fTyz-fTwx, _h( 1.0 )-(fTxx+fTyy) ); + } + @end @end @piece( DeclQuat_AllAxis ) diff --git a/ogre2/src/media/Hlms/Common/HLSL/CrossPlatformSettings_piece_all.hlsl b/ogre2/src/media/Hlms/Common/HLSL/CrossPlatformSettings_piece_all.hlsl index b0f14fa76..c5b7bc3d1 100644 --- a/ogre2/src/media/Hlms/Common/HLSL/CrossPlatformSettings_piece_all.hlsl +++ b/ogre2/src/media/Hlms/Common/HLSL/CrossPlatformSettings_piece_all.hlsl @@ -15,6 +15,57 @@ #define toFloat3x3( x ) ((float3x3)(x)) #define buildFloat3x3( row0, row1, row2 ) transpose( float3x3( row0, row1, row2 ) ) +// See CrossPlatformSettings_piece_all.glsl for an explanation +@property( precision_mode == full32 ) + #define _h(x) (x) + + #define midf float + #define midf2 float2 + #define midf3 float3 + #define midf4 float4 + #define midf2x2 float2x2 + #define midf3x3 float3x3 + #define midf4x4 float4x4 + + #define midf_c float + #define midf2_c float2 + #define midf3_c float3 + #define midf4_c float4 + #define midf2x2_c float2x2 + #define midf3x3_c float3x3 + #define midf4x4_c float4x4 + + #define toMidf3x3( x ) ((float3x3)( x )) + #define buildMidf3x3( row0, row1, row2 ) transpose( float3x3( row0, row1, row2 ) ) + + #define ensureValidRangeF16(x) +@end +@property( precision_mode == relaxed ) + #define _h(x) min16float((x)) + + #define midf min16float + #define midf2 min16float2 + #define midf3 min16float3 + #define midf4 min16float4 + #define midf2x2 min16float2x2 + #define midf3x3 min16float3x3 + #define midf4x4 min16float4x4 + + // For casting to midf + #define midf_c min16float + #define midf2_c min16float2 + #define midf3_c min16float3 + #define midf4_c min16float4 + #define midf2x2_c min16float2x2 + #define midf3x3_c min16float3x3 + #define midf4x4_c min16float4x4 + + #define toMidf3x3( x ) ((min16float3x3)( x )) + #define buildMidf3x3( row0, row1, row2 ) transpose( min16float3x3( row0, row1, row2 ) ) + + #define ensureValidRangeF16(x) x = min(x, min16float(65504.0)) +@end + #define min3( a, b, c ) min( a, min( b, c ) ) #define max3( a, b, c ) max( a, max( b, c ) ) @@ -78,6 +129,16 @@ #define OGRE_Load3D( tex, iuv, lod ) tex.Load( int4( iuv, lod ) ) +#define OGRE_Load2DF16( tex, iuv, lod ) tex.Load( int3( iuv, lod ) ) +#define OGRE_Load2DMSF16( tex, iuv, subsample ) tex.Load( iuv, subsample ) +#define OGRE_SampleF16( tex, sampler, uv ) tex.Sample( sampler, uv ) +#define OGRE_SampleLevelF16( tex, sampler, uv, lod ) tex.SampleLevel( sampler, uv, lod ) +#define OGRE_SampleArray2DF16( tex, sampler, uv, arrayIdx ) tex.Sample( sampler, float3( uv, arrayIdx ) ) +#define OGRE_SampleArray2DLevelF16( tex, sampler, uv, arrayIdx, lod ) tex.SampleLevel( sampler, float3( uv, arrayIdx ), lod ) +#define OGRE_SampleArrayCubeLevelF16( tex, sampler, uv, arrayIdx, lod ) tex.SampleLevel( sampler, float4( uv, arrayIdx ), lod ) +#define OGRE_SampleGradF16( tex, sampler, uv, ddx, ddy ) tex.SampleGrad( sampler, uv, ddx, ddy ) +#define OGRE_SampleArray2DGradF16( tex, sampler, uv, arrayIdx, ddx, ddy ) tex.SampleGrad( sampler, float3( uv, arrayIdx ), ddx, ddy ) + #define bufferFetch( buffer, idx ) buffer.Load( idx ) #define bufferFetch1( buffer, idx ) buffer.Load( idx ).x diff --git a/ogre2/src/media/Hlms/Common/Metal/CrossPlatformSettings_piece_all.metal b/ogre2/src/media/Hlms/Common/Metal/CrossPlatformSettings_piece_all.metal index e32d00d86..3fe63c9c9 100644 --- a/ogre2/src/media/Hlms/Common/Metal/CrossPlatformSettings_piece_all.metal +++ b/ogre2/src/media/Hlms/Common/Metal/CrossPlatformSettings_piece_all.metal @@ -11,13 +11,30 @@ struct float1 inline float3x3 toMat3x3( float4x4 m ) { - return float3x3( m[0].xyz, m[1].xyz, m[2].xyz ); + return float3x3( m[0].xyz, m[1].xyz, m[2].xyz ); } inline float3x3 toMat3x3( float3x4 m ) { return float3x3( m[0].xyz, m[1].xyz, m[2].xyz ); } +inline half3x3 toMatHalf3x3( half4x4 m ) +{ + return half3x3( m[0].xyz, m[1].xyz, m[2].xyz ); +} +inline half3x3 toMatHalf3x3( half3x4 m ) +{ + return half3x3( m[0].xyz, m[1].xyz, m[2].xyz ); +} +inline half3x3 toMatHalf3x3( float4x4 m ) +{ + return half3x3( half3( m[0].xyz ), half3( m[1].xyz ), half3( m[2].xyz ) ); +} +inline half3x3 toMatHalf3x3( float3x4 m ) +{ + return half3x3( half3( m[0].xyz ), half3( m[1].xyz ), half3( m[2].xyz ) ); +} + #define ogre_float4x3 float3x4 //Short used for read operations. It's an int in GLSL & HLSL. An ushort in Metal @@ -29,7 +46,61 @@ inline float3x3 toMat3x3( float3x4 m ) #define wshort3 ushort3 #define toFloat3x3( x ) toMat3x3( x ) -#define buildFloat3x3( row0, row1, row2 ) float3x3( row0, row1, row2 ) +#define buildFloat3x3( row0, row1, row2 ) float3x3( float3( row0 ), float3( row1 ), float3( row2 ) ) + +// See CrossPlatformSettings_piece_all.glsl for an explanation +@property( precision_mode == full32 ) + // In Metal 'half' is an actual datatype. It should be OK to override it + // as long as we do it before including metal_stdlib + #define _h(x) (x) + + #define midf float + #define midf2 float2 + #define midf3 float3 + #define midf4 float4 + #define midf2x2 float2x2 + #define midf3x3 float3x3 + #define midf4x4 float4x4 + + #define midf_c float + #define midf2_c float2 + #define midf3_c float3 + #define midf4_c float4 + #define midf2x2_c float2x2 + #define midf3x3_c float3x3 + #define midf4x4_c float4x4 + + #define toMidf3x3( x ) toMat3x3( x ) + #define buildMidf3x3( row0, row1, row2 ) float3x3( row0, row1, row2 ) + + #define ensureValidRangeF16(x) +@end +@property( precision_mode == midf16 ) + // In Metal 'half' is an actual datatype. It should be OK to override it + // as long as we do it before including metal_stdlib + #define _h(x) half(x) + + #define midf half + #define midf2 half2 + #define midf3 half3 + #define midf4 half4 + #define midf2x2 half2x2 + #define midf3x3 half3x3 + #define midf4x4 half4x4 + + #define midf_c half + #define midf2_c half2 + #define midf3_c half3 + #define midf4_c half4 + #define midf2x2_c half2x2 + #define midf3x3_c half3x3 + #define midf4x4_c half4x4 + + #define toMidf3x3( x ) toMatHalf3x3( x ) + #define buildMidf3x3( row0, row1, row2 ) half3x3( half3( row0 ), half3( row1 ), half3( row2 ) ) + + #define ensureValidRangeF16(x) x = min(x, 65504.0h) +@end #define min3( a, b, c ) min( a, min( b, c ) ) #define max3( a, b, c ) max( a, max( b, c ) ) @@ -101,6 +172,16 @@ inline float3x3 toMat3x3( float3x4 m ) #define OGRE_Load3D( tex, iuv, lod ) tex.read( ushort3( iuv ), lod ) +#define OGRE_Load2DF16( tex, iuv, lod ) tex.read( iuv, lod ) +#define OGRE_Load2DMSF16( tex, iuv, subsample ) tex.read( iuv, subsample ) +#define OGRE_SampleF16( tex, sampler, uv ) tex.sample( sampler, uv ) +#define OGRE_SampleLevelF16( tex, sampler, uv, lod ) tex.sample( sampler, uv, level( lod ) ) +#define OGRE_SampleArray2DF16( tex, sampler, uv, arrayIdx ) tex.sample( sampler, float2( uv ), arrayIdx ) +#define OGRE_SampleArray2DLevelF16( tex, sampler, uv, arrayIdx, lod ) tex.sample( sampler, float2( uv ), ushort( arrayIdx ), level( lod ) ) +#define OGRE_SampleArrayCubeLevelF16( tex, sampler, uv, arrayIdx, lod ) tex.sample( sampler, float3( uv ), ushort( arrayIdx ), level( lod ) ) +#define OGRE_SampleGradF16( tex, sampler, uv, ddx, ddy ) tex.sample( sampler, uv, gradient2d( ddx, ddy ) ) +#define OGRE_SampleArray2DGradF16( tex, sampler, uv, arrayIdx, ddx, ddy ) tex.sample( sampler, uv, ushort( arrayIdx ), gradient2d( ddx, ddy ) ) + #define bufferFetch( buffer, idx ) buffer[idx] #define bufferFetch1( buffer, idx ) buffer[idx] #define readOnlyFetch( bufferVar, idx ) bufferVar[idx] diff --git a/ogre2/src/media/Hlms/Common/Metal/QuaternionCode_piece_all.metal b/ogre2/src/media/Hlms/Common/Metal/QuaternionCode_piece_all.metal index c703afafe..2c94baa18 100644 --- a/ogre2/src/media/Hlms/Common/Metal/QuaternionCode_piece_all.metal +++ b/ogre2/src/media/Hlms/Common/Metal/QuaternionCode_piece_all.metal @@ -1,51 +1,104 @@ @piece( DeclQuat_xAxis ) -inline float3 xAxis( float4 qQuat ) -{ - float fTy = 2.0 * qQuat.y; - float fTz = 2.0 * qQuat.z; - float fTwy = fTy * qQuat.w; - float fTwz = fTz * qQuat.w; - float fTxy = fTy * qQuat.x; - float fTxz = fTz * qQuat.x; - float fTyy = fTy * qQuat.y; - float fTzz = fTz * qQuat.z; - - return float3( 1.0-(fTyy+fTzz), fTxy+fTwz, fTxz-fTwy ); -} + @property( precision_mode != relaxed ) + float3 xAxis( float4 qQuat ) + { + float fTy = 2.0 * qQuat.y; + float fTz = 2.0 * qQuat.z; + float fTwy = fTy * qQuat.w; + float fTwz = fTz * qQuat.w; + float fTxy = fTy * qQuat.x; + float fTxz = fTz * qQuat.x; + float fTyy = fTy * qQuat.y; + float fTzz = fTz * qQuat.z; + + return float3( 1.0-(fTyy+fTzz), fTxy+fTwz, fTxz-fTwy ); + } + @end + @property( precision_mode != full32 ) + midf3 xAxis( midf4 qQuat ) + { + midf fTy = _h( 2.0 ) * qQuat.y; + midf fTz = _h( 2.0 ) * qQuat.z; + midf fTwy = fTy * qQuat.w; + midf fTwz = fTz * qQuat.w; + midf fTxy = fTy * qQuat.x; + midf fTxz = fTz * qQuat.x; + midf fTyy = fTy * qQuat.y; + midf fTzz = fTz * qQuat.z; + + return midf3_c( _h( 1.0 )-(fTyy+fTzz), fTxy+fTwz, fTxz-fTwy ); + } + @end @end @piece( DeclQuat_yAxis ) -inline float3 yAxis( float4 qQuat ) -{ - float fTx = 2.0 * qQuat.x; - float fTy = 2.0 * qQuat.y; - float fTz = 2.0 * qQuat.z; - float fTwx = fTx * qQuat.w; - float fTwz = fTz * qQuat.w; - float fTxx = fTx * qQuat.x; - float fTxy = fTy * qQuat.x; - float fTyz = fTz * qQuat.y; - float fTzz = fTz * qQuat.z; - - return float3( fTxy-fTwz, 1.0-(fTxx+fTzz), fTyz+fTwx ); -} + @property( precision_mode != relaxed ) + float3 yAxis( float4 qQuat ) + { + float fTx = 2.0 * qQuat.x; + float fTy = 2.0 * qQuat.y; + float fTz = 2.0 * qQuat.z; + float fTwx = fTx * qQuat.w; + float fTwz = fTz * qQuat.w; + float fTxx = fTx * qQuat.x; + float fTxy = fTy * qQuat.x; + float fTyz = fTz * qQuat.y; + float fTzz = fTz * qQuat.z; + + return float3( fTxy-fTwz, 1.0-(fTxx+fTzz), fTyz+fTwx ); + } + @end + @property( precision_mode != full32 ) + midf3 yAxis( midf4 qQuat ) + { + midf fTx = _h( 2.0 ) * qQuat.x; + midf fTy = _h( 2.0 ) * qQuat.y; + midf fTz = _h( 2.0 ) * qQuat.z; + midf fTwx = fTx * qQuat.w; + midf fTwz = fTz * qQuat.w; + midf fTxx = fTx * qQuat.x; + midf fTxy = fTy * qQuat.x; + midf fTyz = fTz * qQuat.y; + midf fTzz = fTz * qQuat.z; + + return midf3_c( fTxy-fTwz, _h( 1.0 )-(fTxx+fTzz), fTyz+fTwx ); + } + @end @end @piece( DeclQuat_zAxis ) -inline float3 zAxis( float4 qQuat ) -{ - float fTx = 2.0 * qQuat.x; - float fTy = 2.0 * qQuat.y; - float fTz = 2.0 * qQuat.z; - float fTwx = fTx * qQuat.w; - float fTwy = fTy * qQuat.w; - float fTxx = fTx * qQuat.x; - float fTxz = fTz * qQuat.x; - float fTyy = fTy * qQuat.y; - float fTyz = fTz * qQuat.y; - - return float3( fTxz+fTwy, fTyz-fTwx, 1.0-(fTxx+fTyy) ); -} + @property( precision_mode != relaxed ) + float3 zAxis( float4 qQuat ) + { + float fTx = 2.0 * qQuat.x; + float fTy = 2.0 * qQuat.y; + float fTz = 2.0 * qQuat.z; + float fTwx = fTx * qQuat.w; + float fTwy = fTy * qQuat.w; + float fTxx = fTx * qQuat.x; + float fTxz = fTz * qQuat.x; + float fTyy = fTy * qQuat.y; + float fTyz = fTz * qQuat.y; + + return float3( fTxz+fTwy, fTyz-fTwx, 1.0-(fTxx+fTyy) ); + } + @end + @property( precision_mode != full32 ) + midf3 zAxis( midf4 qQuat ) + { + midf fTx = _h( 2.0 ) * qQuat.x; + midf fTy = _h( 2.0 ) * qQuat.y; + midf fTz = _h( 2.0 ) * qQuat.z; + midf fTwx = fTx * qQuat.w; + midf fTwy = fTy * qQuat.w; + midf fTxx = fTx * qQuat.x; + midf fTxz = fTz * qQuat.x; + midf fTyy = fTy * qQuat.y; + midf fTyz = fTz * qQuat.y; + + return midf3_c( fTxz+fTwy, fTyz-fTwx, _h( 1.0 )-(fTxx+fTyy) ); + } + @end @end @piece( DeclQuat_AllAxis ) diff --git a/ogre2/src/media/Hlms/Common/Metal/RenderDepthOnly_piece_ps.metal b/ogre2/src/media/Hlms/Common/Metal/RenderDepthOnly_piece_ps.metal index 4c0127897..f01c8b75d 100644 --- a/ogre2/src/media/Hlms/Common/Metal/RenderDepthOnly_piece_ps.metal +++ b/ogre2/src/media/Hlms/Common/Metal/RenderDepthOnly_piece_ps.metal @@ -13,7 +13,7 @@ struct PS_OUTPUT { @property( !hlms_shadowcaster ) - float4 colour0 [[ color(@counter(rtv_target)) ]]; + midf4 colour0 [[ color(@counter(rtv_target)) ]]; @else @property( !hlms_render_depth_only ) float colour0 [[ color(@counter(rtv_target)) ]]; diff --git a/ogre2/src/media/Hlms/Pbs/Any/AmbientLighting_piece_ps.any b/ogre2/src/media/Hlms/Pbs/Any/AmbientLighting_piece_ps.any index 5bb8186c8..d7c8e1d0b 100644 --- a/ogre2/src/media/Hlms/Pbs/Any/AmbientLighting_piece_ps.any +++ b/ogre2/src/media/Hlms/Pbs/Any/AmbientLighting_piece_ps.any @@ -49,8 +49,10 @@ @property( ambient_hemisphere || vct_ambient_hemisphere ) @piece( DoAmbientHeader ) - float ambientWD = dot( passBuf.ambientHemisphereDir.xyz, pixelData.normal ) * 0.5 + 0.5; - float ambientWS = dot( passBuf.ambientHemisphereDir.xyz, pixelData.reflDir ) * 0.5 + 0.5; + midf ambientWD = + dot( midf3_c( passBuf.ambientHemisphereDir.xyz ), pixelData.normal ) * _h( 0.5 ) + _h( 0.5 ); + midf ambientWS = + dot( midf3_c( passBuf.ambientHemisphereDir.xyz ), pixelData.reflDir ) * _h( 0.5 ) + _h( 0.5 ); @end @end @@ -75,8 +77,10 @@ if( vctSpecular.w == 0 ) { @end - pixelData.envColourS += lerp( passBuf.ambientLowerHemi.xyz, passBuf.ambientUpperHemi.xyz, ambientWD ); - pixelData.envColourD += lerp( passBuf.ambientLowerHemi.xyz, passBuf.ambientUpperHemi.xyz, ambientWS ); + pixelData.envColourS += lerp( midf3_c( passBuf.ambientLowerHemi.xyz ), + midf3_c( passBuf.ambientUpperHemi.xyz ), ambientWD ); + pixelData.envColourD += lerp( midf3_c( passBuf.ambientLowerHemi.xyz ), + midf3_c( passBuf.ambientUpperHemi.xyz ), ambientWS ); @property( vct_num_probes ) } @end diff --git a/ogre2/src/media/Hlms/Pbs/Any/AreaLights_LTC_piece_ps.any b/ogre2/src/media/Hlms/Pbs/Any/AreaLights_LTC_piece_ps.any index 7367df1ec..584b349f3 100644 --- a/ogre2/src/media/Hlms/Pbs/Any/AreaLights_LTC_piece_ps.any +++ b/ogre2/src/media/Hlms/Pbs/Any/AreaLights_LTC_piece_ps.any @@ -48,11 +48,11 @@ INLINE float3 IntegrateEdgeVec( float3 v1, float3 v2 ) float x = dot(v1, v2); float y = abs(x); - float a = 0.8543985 + (0.4965155 + 0.0145206*y)*y; - float b = 3.4175940 + (4.1616724 + y)*y; + float a = 0.8543985 + ( 0.4965155 + 0.0145206 * y ) * y; + float b = 3.4175940 + ( 4.1616724 + y ) * y; float v = a / b; - float theta_sintheta = (x > 0.0) ? v : 0.5*rsqrt(max(1.0 - x*x, 1e-7)) - v; + float theta_sintheta = ( x > 0.0 ) ? v : 0.5 * rsqrt( max( 1.0 - x * x, 1e-7 ) ) - v; return cross( v1, v2 ) * theta_sintheta; } @@ -71,10 +71,10 @@ INLINE void ClipQuadToHorizon( inout float3 L[5], out int n ) { // detect clipping config int config = 0; - if (L[0].z > 0.0) config += 1; - if (L[1].z > 0.0) config += 2; - if (L[2].z > 0.0) config += 4; - if (L[3].z > 0.0) config += 8; + if (L[0].z > _h( 0.0 )) config += 1; + if (L[1].z > _h( 0.0 )) config += 2; + if (L[2].z > _h( 0.0 )) config += 4; + if (L[3].z > _h( 0.0 )) config += 8; // clip n = 0; @@ -178,12 +178,12 @@ INLINE void ClipQuadToHorizon( inout float3 L[5], out int n ) L[4] = L[0]; } -INLINE float LTC_Evaluate( float3 N, float3 V, float3 P, float3x3 Minv, - @property( syntax == metal )constant@end float4 points[4], - bool twoSided ) +INLINE midf LTC_Evaluate( midf3 N, midf3 V, float3 P, float3x3 Minv, + @property( syntax == metal )constant@end float4 points[4], + bool twoSided ) { // construct orthonormal basis around N - float3 T1, T2; + midf3 T1, T2; T1 = normalize( V - N*dot(V, N) ); T2 = cross(N, T1); @@ -205,16 +205,16 @@ INLINE float LTC_Evaluate( float3 N, float3 V, float3 P, float3x3 Minv, float sum = 0.0; @property( hlms_lights_ltc_clipless ) - float3 dir = points[0].xyz - P; - float3 lightNormal = cross( points[1].xyz - points[0].xyz, points[3].xyz - points[0].xyz ); - bool behind = (dot(dir, lightNormal) < 0.0); + midf3 dir = midf3_c( points[0].xyz - P ); + midf3 lightNormal = midf3_c( cross( points[1].xyz - points[0].xyz, points[3].xyz - points[0].xyz ) ); + bool behind = (dot(dir, lightNormal) < _h( 0.0 )); L[0] = normalize(L[0]); L[1] = normalize(L[1]); L[2] = normalize(L[2]); L[3] = normalize(L[3]); - float3 vsum = float3(0.0); + float3 vsum = midf3_c(0.0); vsum += IntegrateEdgeVec(L[0], L[1]); vsum += IntegrateEdgeVec(L[1], L[2]); @@ -230,19 +230,18 @@ INLINE float LTC_Evaluate( float3 N, float3 V, float3 P, float3x3 Minv, float2 uv = float2(z*0.5 + 0.5, len); uv = uv * LUT_SCALE + LUT_BIAS; - float scale = OGRE_SampleArray2DLevel( ltcMatrix, ltcSampler, uv, 1, 0 ).w; + float scale = float( OGRE_SampleArray2DLevelF16( ltcMatrix, ltcSampler, uv, 1, 0 ).w ); - sum = len*scale; + sum = len * scale; if( behind && !twoSided ) sum = 0.0; - @end - @property( !hlms_lights_ltc_clipless ) + @else int n; ClipQuadToHorizon( L, n ); if( n == 0 ) - return 0; + return _h( 0 ); // project onto sphere L[0] = normalize( L[0] ); L[1] = normalize( L[1] ); @@ -259,10 +258,10 @@ INLINE float LTC_Evaluate( float3 N, float3 V, float3 P, float3x3 Minv, if( n == 5 ) sum += IntegrateEdge( L[4], L[0] ); - sum = twoSided ? abs(sum) : max(0.0, sum); + sum = twoSided ? abs(sum) : max( 0.0f, sum ); @end - return sum; + return midf( sum ); } @end @end @@ -291,8 +290,8 @@ for( int i=0; i areaLightMasks : register(t@value(areaLightMasks)); + Texture2DArray areaLightMasks : register(t@value(areaLightMasks)); @else - Texture2DArray areaLightMasks : register(t@value(areaLightMasks)); + Texture2DArray areaLightMasks: register(t@value(areaLightMasks)); @end SamplerState areaLightMasksSampler : register(s@value(areaLightMasks)); @end @property( syntax == metal ) - , texture2d_array areaLightMasks [[texture(@value(areaLightMasks))]] + , texture2d_array areaLightMasks [[texture(@value(areaLightMasks))]] , sampler areaLightMasksSampler [[sampler(@value(areaLightMasks))]] @end @end @@ -43,9 +43,9 @@ fDistance = length( lightDir ); @property( obb_restraint_approx ) - float obbRestraintFade = getObbRestraintFade( light1Buf.areaApproxLights[i].obbRestraint, inPs.pos, - light1Buf.areaApproxLights[i].obbFadeFactorApprox.xyz ); - @piece( obbRestraintTestApprox )&& obbRestraintFade > 0.0@end + midf obbRestraintFade = getObbRestraintFade( light1Buf.areaApproxLights[i].obbRestraint, inPs.pos, + light1Buf.areaApproxLights[i].obbFadeFactorApprox.xyz ); + @piece( obbRestraintTestApprox )&& obbRestraintFade > _h( 0.0 )@end @end if( fDistance <= light1Buf.areaApproxLights[i].attenuation.x @@ -53,8 +53,8 @@ /*&& dot( -lightDir, light1Buf.areaApproxLights[i].direction.xyz ) > 0*/ @insertpiece( andObjAreaApproxLightMaskCmp ) ) { projectedPosInPlane.xyz -= light1Buf.areaApproxLights[i].position.xyz; - float3 areaLightBitangent = cross( light1Buf.areaApproxLights[i].direction.xyz, - light1Buf.areaApproxLights[i].tangent.xyz ); + float3 areaLightBitangent = cross( light1Buf.areaApproxLights[i].tangent.xyz, + light1Buf.areaApproxLights[i].direction.xyz ); float2 invHalfRectSize = float2( light1Buf.areaApproxLights[i].direction.w, light1Buf.areaApproxLights[i].tangent.w ); //lightUV is in light space, in range [-0.5; 0.5] @@ -67,18 +67,18 @@ //a surface is close and perpendicular to the light. This is fully a hack and //the values (e.g. 0.25) is completely eye balled. lightUVForTex.xy = lightUV.xy; - lightUV.xy += float2( dot( light1Buf.areaApproxLights[i].tangent.xyz, pixelData.normal ), - dot( areaLightBitangent, pixelData.normal ) ) * 3.75 * invHalfRectSize.xy; + lightUV.xy += float2( dot( ( light1Buf.areaApproxLights[i].tangent.xyz ), float3( pixelData.normal ) ), + dot( areaLightBitangent, float3( pixelData.normal ) ) ) * 3.75 * invHalfRectSize.xy; lightUV.xy = clamp( lightUV.xy, -0.5f, 0.5f ); lightUVForTex = clamp( lightUVForTex.xy, -0.5f, 0.5f ); // float booster = 1.0f - smoothstep( 0.2f, 1.9f, max( abs( lightUV.x ), abs( lightUV.y ) ) ); // booster = 1.0f + booster * 2.25f; - float booster = lerp( 1.0f, 4.0f, pixelData.roughness ); + midf booster = lerp( _h( 1.0f ), _h( 4.0f ), pixelData.roughness ); @property( !hlms_lights_area_tex_colour || !hlms_lights_area_tex_mask ) - float diffuseMask = 1.0f; + midf diffuseMask = _h( 1.0f ); @else - float3 diffuseMask = float3( 1.0f, 1.0f, 1.0f ); + midf3 diffuseMask = midf3_c( 1.0f, 1.0f, 1.0f ); @end @property( hlms_lights_area_tex_mask ) if( i < floatBitsToInt( light1Buf.numAreaApproxLightsWithMask ) ) @@ -86,11 +86,11 @@ // 1 / (1 - 0.02) = 1.020408163 float diffuseMipsLeft = light1Buf.areaLightNumMipmapsSpecFactor * 0.5 - light1Buf.areaLightDiffuseMipmapStart * 1.020408163f; - diffuseMask = OGRE_SampleArray2DLevel( areaLightMasks, areaLightMasksSampler, - lightUVForTex + 0.5f, - light1Buf.areaApproxLights[i].attenuation.w, - light1Buf.areaLightDiffuseMipmapStart + - (pixelData.roughness - 0.02f) * diffuseMipsLeft ).AREA_LIGHTS_TEX_SWIZZLE; + diffuseMask = OGRE_SampleArray2DLevelF16( areaLightMasks, areaLightMasksSampler, + lightUVForTex + 0.5f, + light1Buf.areaApproxLights[i].attenuation.w, + light1Buf.areaLightDiffuseMipmapStart + + (pixelData.roughness - 0.02f) * diffuseMipsLeft ).AREA_LIGHTS_TEX_SWIZZLE; } @end @@ -102,20 +102,20 @@ lightDir = closestPoint.xyz - inPs.pos; fDistance= length( lightDir ); - float3 toShapeLight = reflect( -pixelData.viewDir, pixelData.normal ); - float denom = dot( toShapeLight, -light1Buf.areaApproxLights[i].direction.xyz ); + midf3 toShapeLight = reflect( -pixelData.viewDir, pixelData.normal ); + midf denom = dot( toShapeLight, midf3_c( -light1Buf.areaApproxLights[i].direction.xyz ) ); @property( !hlms_lights_area_tex_mask || !hlms_lights_area_tex_colour ) - float specCol = 0; + midf specCol = _h( 0 ); @else - float3 specCol = float3( 0, 0, 0 ); + midf3 specCol = midf3_c( 0, 0, 0 ); @end if( denom > 1e-6f || light1Buf.areaApproxLights[i].doubleSided.x != 0.0f ) { float3 p0l0 = light1Buf.areaApproxLights[i].position.xyz - inPs.pos; - float t = dot( p0l0, -light1Buf.areaApproxLights[i].direction.xyz ) / denom; + float t = dot( p0l0, -light1Buf.areaApproxLights[i].direction.xyz ) / float( denom ); if( t >= 0 ) { - float3 posInShape = inPs.pos.xyz + toShapeLight.xyz * t - light1Buf.areaApproxLights[i].position.xyz; + float3 posInShape = inPs.pos.xyz + float3( toShapeLight.xyz ) * t - light1Buf.areaApproxLights[i].position.xyz; float2 reflClipSpace; reflClipSpace.x = dot( light1Buf.areaApproxLights[i].tangent.xyz, posInShape ); reflClipSpace.y = dot( areaLightBitangent, posInShape ); @@ -129,19 +129,19 @@ specVal = pow( specVal, areaPower ) * min( areaPower * areaPower, 1.0f ); @property( !hlms_lights_area_tex_mask || !hlms_lights_area_tex_colour ) - specCol = specVal; + specCol = midf_c( specVal ); @else - specCol = float3( specVal, specVal, specVal ); + specCol = midf3_c( specVal, specVal, specVal ); @end @property( hlms_lights_area_tex_mask ) if( i < floatBitsToInt( light1Buf.numAreaApproxLightsWithMask ) ) { - specCol *= OGRE_SampleArray2DLevel( areaLightMasks, areaLightMasksSampler, - reflClipSpace * invHalfRectSize + 0.5f, - light1Buf.areaApproxLights[i].attenuation.w, - (pixelData.roughness - 0.02f) * - light1Buf.areaLightNumMipmapsSpecFactor ).AREA_LIGHTS_TEX_SWIZZLE; + specCol *= OGRE_SampleArray2DLevelF16( areaLightMasks, areaLightMasksSampler, + reflClipSpace * invHalfRectSize + 0.5f, + light1Buf.areaApproxLights[i].attenuation.w, + (pixelData.roughness - _h( 0.02f )) * + light1Buf.areaLightNumMipmapsSpecFactor ).AREA_LIGHTS_TEX_SWIZZLE; } @end } @@ -151,13 +151,13 @@ //float fAreaW = dot( lightDir, -light1Buf.areaApproxLights[i].direction.xyz ) * 0.5f + 0.5f; //lightDir = (-light1Buf.areaApproxLights[i].direction.xyz + lightDir) * 0.50f; //lightDir = lerp( lightDir2, lightDir, fAreaW ); - float globalDot = saturate( dot( -lightDir, light1Buf.areaApproxLights[i].direction.xyz ) ); - globalDot = light1Buf.areaApproxLights[i].doubleSided.x != 0.0f ? 1.0f : globalDot; - tmpColour = BRDF_AreaLightApprox( lightDir, - light1Buf.areaApproxLights[i].diffuse.xyz * diffuseMask, - light1Buf.areaApproxLights[i].specular.xyz * specCol, + midf globalDot = midf_c( saturate( dot( -lightDir, light1Buf.areaApproxLights[i].direction.xyz ) ) ); + globalDot = light1Buf.areaApproxLights[i].doubleSided.x != 0.0f ? _h( 1.0f ) : globalDot; + tmpColour = BRDF_AreaLightApprox( midf3_c( lightDir ), + midf3_c( light1Buf.areaApproxLights[i].diffuse.xyz ) * diffuseMask, + midf3_c( light1Buf.areaApproxLights[i].specular.xyz ) * specCol, pixelData ) * ( globalDot * globalDot ) * booster; - float atten = 1.0 / (0.5 + (light1Buf.areaApproxLights[i].attenuation.y + light1Buf.areaApproxLights[i].attenuation.z * fDistance) * fDistance ); + midf atten = midf_c( 1.0 / (0.5 + (light1Buf.areaApproxLights[i].attenuation.y + light1Buf.areaApproxLights[i].attenuation.z * fDistance) * fDistance ) ); @property( obb_restraint_approx ) atten *= obbRestraintFade; @@ -173,38 +173,38 @@ @end @piece( DeclareBRDF_AreaLightApprox ) -INLINE float3 BRDF_AreaLightApprox +INLINE midf3 BRDF_AreaLightApprox ( - float3 lightDir, float3 lightDiffuse, float3 lightSpecular, PixelData pixelData + midf3 lightDir, midf3 lightDiffuse, midf3 lightSpecular, PixelData pixelData ) { - float3 halfWay= normalize( lightDir + pixelData.viewDir ); - float NdotL = saturate( dot( pixelData.normal, lightDir ) ); - float VdotH = saturate( dot( pixelData.viewDir, halfWay ) ); + midf3 halfWay= normalize( lightDir + pixelData.viewDir ); + midf NdotL = saturate( dot( pixelData.normal, lightDir ) ); + midf VdotH = saturate( dot( pixelData.viewDir, halfWay ) ); //Formula: // fresnelS = lerp( (1 - V*H)^5, 1, F0 ) float_fresnel fresnelS = @insertpiece( getSpecularFresnel ); //We should divide Rs by PI, but it was done inside G for performance - float3 Rs = fresnelS * pixelData.specular.xyz * lightSpecular; + midf3 Rs = fresnelS * pixelData.specular.xyz * lightSpecular; //Diffuse BRDF (*Normalized* Disney, see course_notes_moving_frostbite_to_pbr.pdf //"Moving Frostbite to Physically Based Rendering" Sebastien Lagarde & Charles de Rousiers) - float energyBias = pixelData.roughness * 0.5; - float energyFactor = lerp( 1.0, 1.0 / 1.51, pixelData.roughness ); - float fd90 = energyBias + 2.0 * VdotH * VdotH * pixelData.roughness; - float lightScatter = 1.0 + (fd90 - 1.0) * pow( 1.0 - NdotL, 5.0 ); - float viewScatter = 1.0 + (fd90 - 1.0) * pow( 1.0 - pixelData.NdotV, 5.0 ); + midf energyBias = pixelData.roughness * _h( 0.5 ); + midf energyFactor = lerp( _h( 1.0 ), _h( 1.0 / 1.51 ), pixelData.roughness ); + midf fd90 = energyBias + _h( 2.0 ) * VdotH * VdotH * pixelData.roughness; + midf lightScatter = _h( 1.0 ) + (fd90 - _h( 1.0 )) * pow( _h( 1.0 ) - NdotL, _h( 5.0 ) ); + midf viewScatter = _h( 1.0 ) + (fd90 - _h( 1.0 )) * pow( _h( 1.0 ) - pixelData.NdotV, _h( 5.0 ) ); @property( fresnel_separate_diffuse ) float_fresnel fresnelD = @insertpiece( getDiffuseFresnel ); @else - float fresnelD = 1.0f - @insertpiece( getMaxFresnelS ); + midf fresnelD = _h( 1.0f ) - @insertpiece( getMaxFresnelS ); @end //We should divide Rd by PI, but it is already included in kD - float3 Rd = (lightScatter * viewScatter * energyFactor * fresnelD) * pixelData.diffuse.xyz * lightDiffuse; + midf3 Rd = (lightScatter * viewScatter * energyFactor * fresnelD) * pixelData.diffuse.xyz * lightDiffuse; return NdotL * (Rs + Rd); } diff --git a/ogre2/src/media/Hlms/Pbs/Any/Atmosphere/200.AtmosphereNprSkyHlms_piece_all.any b/ogre2/src/media/Hlms/Pbs/Any/Atmosphere/200.AtmosphereNprSkyHlms_piece_all.any new file mode 100644 index 000000000..737e555bb --- /dev/null +++ b/ogre2/src/media/Hlms/Pbs/Any/Atmosphere/200.AtmosphereNprSkyHlms_piece_all.any @@ -0,0 +1,42 @@ +// NPR = Non-Physically-based Rendering Atmo + +//#include "SyntaxHighlightingMisc.h" + +@property( atmosky_npr ) + +@piece( AtmosphereNprSkyStructDecl ) + struct AtmoSettings + { + float densityCoeff; + float lightDensity; + float sunHeight; + float sunHeightWeight; + + float4 skyLightAbsorption; + float4 sunAbsorption; + float4 cameraDisplacement; + float4 packedParams1; + float4 packedParams2; + float4 packedParams3; + + float fogDensity; + float fogBreakMinBrightness; + float fogBreakFalloff; + float padding0; + }; + + @property( syntax != metal ) + CONST_BUFFER( AtmoSettingsBuf, @value(atmosky_npr) ) + { + AtmoSettings atmoSettings; + }; + @end +@end + +@property( syntax == metal ) + @piece( AtmosphereNprSkyDecl ) + , constant AtmoSettings &atmoSettings [[buffer(CONST_SLOT_START+@value(atmosky_npr))]] + @end +@end + +@end diff --git a/ogre2/src/media/Hlms/Pbs/Any/Atmosphere/200.AtmosphereNprSkyHlms_piece_vs.any b/ogre2/src/media/Hlms/Pbs/Any/Atmosphere/200.AtmosphereNprSkyHlms_piece_vs.any new file mode 100644 index 000000000..f266d3fe0 --- /dev/null +++ b/ogre2/src/media/Hlms/Pbs/Any/Atmosphere/200.AtmosphereNprSkyHlms_piece_vs.any @@ -0,0 +1,91 @@ +// NPR = Non-Physically-based Rendering Atmo + +//#include "SyntaxHighlightingMisc.h" + +// @property( atmosky_npr ) + +// @piece( DeclAtmosphereNprSkyFuncs ) + +// See https://en.wikipedia.org/wiki/Rayleigh_distribution +// It's inspired, not fully based. +// +// The formula also gives us the nice property that for inputs +// where absorption is in range [0; 1] the output i also in range [0; 1] +midf3 getSkyRayleighAbsorption( midf3 vDir, const midf density ) +{ + midf3 absorption = -density * vDir; + absorption = exp2( absorption ) * _h( 2.0 ); + return absorption; +} + +midf3 pow3( midf3 v, midf e ) +{ + return midf3_c( pow( v.x, e ), pow( v.y, e ), pow( v.z, e ) ); +} + +// @endpiece + +// @piece( DoAtmosphereNprSky ) +// clang-format off +#define p_densityCoeff midf_c( atmoSettings.densityCoeff ) +#define p_lightDensity midf_c( atmoSettings.lightDensity ) +#define p_sunHeight midf_c( atmoSettings.sunHeight ) +#define p_sunHeightWeight midf_c( atmoSettings.sunHeightWeight ) +#define p_skyLightAbsorption midf3_c( atmoSettings.skyLightAbsorption.xyz ) +#define p_sunAbsorption midf3_c( atmoSettings.sunAbsorption.xyz ) +#define p_cameraDisplacement midf3_c( atmoSettings.cameraDisplacement.xyz ) +#define p_mieAbsorption midf3_c( atmoSettings.packedParams1.xyz ) +#define p_finalMultiplier midf_c( atmoSettings.packedParams1.w ) +#define p_sunDir midf3_c( atmoSettings.packedParams2.xyz ) +#define p_borderLimit midf_c( atmoSettings.packedParams2.w ) +#define p_skyColour midf3_c( atmoSettings.packedParams3.xyz ) +#define p_densityDiffusion midf_c( atmoSettings.packedParams3.w ) +// clang-format on + +const float3 cameraPos = float3( atmoSettings.skyLightAbsorption.w, atmoSettings.sunAbsorption.w, + atmoSettings.cameraDisplacement.w ); +float3 cameraDir = worldPos.xyz - cameraPos; +float distToCamera = length( cameraDir ); +midf3 atmoCameraDir = midf3_c( cameraDir * ( 1.0f / distToCamera ) ); + +const midf LdotV = max( dot( atmoCameraDir, p_sunDir ), _h( 0.0 ) ); + +atmoCameraDir.y += + p_densityDiffusion * _h( 0.075 ) * ( _h( 1.0 ) - atmoCameraDir.y ) * ( _h( 1.0 ) - atmoCameraDir.y ); +atmoCameraDir += p_cameraDisplacement; +atmoCameraDir = normalize( atmoCameraDir ); + +atmoCameraDir.y = max( atmoCameraDir.y, p_borderLimit ); +atmoCameraDir.y = atmoCameraDir.y * _h( 0.9 ) + _h( 0.1 ); +atmoCameraDir = normalize( atmoCameraDir ); + +const midf LdotV360 = dot( atmoCameraDir, p_sunDir ) * _h( 0.5 ) + _h( 0.5 ); + +// ptDensity gets smaller as sunHeight gets bigger +// ptDensity gets smaller as atmoCameraDir.y gets bigger +const midf ptDensity = + p_densityCoeff / + pow( max( atmoCameraDir.y / ( _h( 1.0 ) - p_sunHeight ), _h( 0.0035 ) ), p_densityDiffusion ); + +const midf antiMie = max( p_sunHeightWeight, _h( 0.08 ) ); + +const midf3 skyAbsorption = getSkyRayleighAbsorption( p_skyColour, ptDensity ); +const midf3 skyColourGradient = pow3( exp2( -atmoCameraDir.y / p_skyColour ), _h( 1.5 ) ); + +const midf mie = LdotV360; + +midf3 atmoColour = midf3_c( 0.0f, 0.0f, 0.0f ); + +const midf3 sharedTerms = skyColourGradient * skyAbsorption; + +atmoColour += antiMie * sharedTerms * p_sunAbsorption; +atmoColour += ( mie * ptDensity * p_lightDensity ) * sharedTerms * p_skyLightAbsorption; +atmoColour += mie * p_mieAbsorption; +atmoColour *= p_lightDensity; + +atmoColour *= p_finalMultiplier; + +outVs.fog.xyz = atmoColour; +// @end + +// @end diff --git a/ogre2/src/media/Hlms/Pbs/Any/ForwardPlus_DecalsCubemaps_piece_ps.any b/ogre2/src/media/Hlms/Pbs/Any/ForwardPlus_DecalsCubemaps_piece_ps.any index 48e343dd0..da650d5b9 100644 --- a/ogre2/src/media/Hlms/Pbs/Any/ForwardPlus_DecalsCubemaps_piece_ps.any +++ b/ogre2/src/media/Hlms/Pbs/Any/ForwardPlus_DecalsCubemaps_piece_ps.any @@ -8,10 +8,10 @@ @insertpiece( forward3dHeader ) @property( hlms_decals_normals && normal_map ) - float3 finalDecalTsNormal = float3( 0.0f, 0.0f, 1.0f ); + midf3 finalDecalTsNormal = midf3_c( 0.0f, 0.0f, 1.0f ); @end @property( hlms_decals_emissive ) - float3 finalDecalEmissive = float3( 0.0f, 0.0f, 0.0f ); + midf3 finalDecalEmissive = midf3_c( 0.0f, 0.0f, 0.0f ); @end ushort numLightsInGrid = bufferFetch1( f3dGrid, int(sampleOffset + @value(hlms_forwardplus_decals_slot_offset)u) ); @@ -47,18 +47,18 @@ @property( hlms_decals_diffuse ) ushort decalDiffuseIdx = floatBitsToUint( texIndices.x ) & 0xFFFFu; - float4 decalDiffuse = OGRE_SampleArray2DGrad( decalsDiffuseTex, decalsSampler, decalUV.xy, - decalDiffuseIdx, decalUvDdx, decalUvDdy ).xyzw; + midf4 decalDiffuse = OGRE_SampleArray2DGradF16( decalsDiffuseTex, decalsSampler, decalUV.xy, + decalDiffuseIdx, decalUvDdx, decalUvDdy ).xyzw; @end @property( hlms_decals_normals && normal_map ) ushort decalNormalsIdx = floatBitsToUint( texIndices.x ) >> 16u; - float2 decalNormals = OGRE_SampleArray2DGrad( decalsNormalsTex, decalsSampler, decalUV.xy, - decalNormalsIdx, decalUvDdx, decalUvDdy ).xy; + midf2 decalNormals = OGRE_SampleArray2DGradF16( decalsNormalsTex, decalsSampler, decalUV.xy, + decalNormalsIdx, decalUvDdx, decalUvDdy ).xy; @end @property( hlms_decals_emissive ) ushort decalEmissiveIdx = floatBitsToUint( texIndices.y ) & 0xFFFFu; - float3 decalEmissive = OGRE_SampleArray2DGrad( decalsEmissiveTex, decalsSampler, decalUV.xy, - decalEmissiveIdx, decalUvDdx, decalUvDdy ).xyz; + midf3 decalEmissive = OGRE_SampleArray2DGradF16( decalsEmissiveTex, decalsSampler, decalUV.xy, + decalEmissiveIdx, decalUvDdx, decalUvDdy ).xyz; @end @property( hlms_decals_diffuse && (hlms_decals_normals || hlms_decals_emissive) ) @@ -77,27 +77,27 @@ // //Use a smooth fade to avoid flickering due to floating point precision when the normal //and the decal are perpendicular to each other. (tolerance set to 0.0002) - float3 decalDir = normalize( float3( invWorldView1.xyz ) ); + midf3 decalDir = normalize( midf3_c( invWorldView1.xyz ) ); //isOutsideDecal = dot( decalDir.xyz, inPs.normal.xyz ) <= 0.0 ? true : isOutsideDecal; - float normalAway = saturate( (dot( decalDir.xyz, inPs.normal.xyz ) + 0.0002) / 0.0002 ); - normalAway = isOutsideDecal ? 0.0f : normalAway; + midf normalAway = saturate( (dot( decalDir.xyz, inPs.normal.xyz ) + _h( 0.0002 ) ) / _h( 0.0002 ) ); + normalAway = isOutsideDecal ? _h( 0.0f ) : normalAway; - float decalMask = normalAway; + midf decalMask = normalAway; @property( hlms_decals_diffuse ) decalMask *= decalDiffuse.w; - float decalMetalness = texIndices.z; - float3 decalF0 = lerp( float3( 0.03f, 0.03f, 0.03f ), decalDiffuse.xyz, decalMetalness ); + midf decalMetalness = midf_c( texIndices.z ); + midf3 decalF0 = lerp( midf3_c( 0.03f, 0.03f, 0.03f ), decalDiffuse.xyz, decalMetalness ); decalDiffuse.xyz = decalDiffuse.xyz - decalDiffuse.xyz * decalMetalness; - pixelData.diffuse.xyz = lerp( pixelData.diffuse.xyz, decalDiffuse.xyz * 0.318309886f, decalMask ); - pixelData.roughness = lerp( pixelData.roughness, texIndices.w, decalMask ); + pixelData.diffuse.xyz = lerp( pixelData.diffuse.xyz, decalDiffuse.xyz * _h( 0.318309886f ), decalMask ); + pixelData.roughness = lerp( pixelData.roughness, midf_c( texIndices.w ), decalMask ); @property( !metallic_workflow && !fresnel_workflow && !fresnel_scalar ) pixelData.specular = lerp( pixelData.specular.xyz, decalF0, decalMask ); pixelData.F0 = lerp( pixelData.F0, decalMetalness, decalMask ); @else - pixelData.specular = lerp( pixelData.specular.xyz, float3( 1.0f, 1.0f, 1.0f ), decalMask ); + pixelData.specular = lerp( pixelData.specular.xyz, midf3_c( 1.0f, 1.0f, 1.0f ), decalMask ); pixelData.F0.xyz = lerp( pixelData.F0.xyz, decalF0.xyz, decalMask ); @end @@ -111,7 +111,7 @@ @end @property( hlms_decals_emissive ) finalDecalEmissive += (absLocalPos.x > 0.5f || absLocalPos.y > 0.5f || - absLocalPos.z > 0.5f) ? float3( 0.0f, 0.0f, 0.0f ) : + absLocalPos.z > 0.5f) ? midf3_c( 0.0f, 0.0f, 0.0f ) : (decalEmissive.xyz * decalMask); @end @@ -140,13 +140,13 @@ @property( hlms_forwardplus_debug )totalNumLightsInGrid += numLightsInGrid;@end - float cubemapAccumWeight = 0; + midf cubemapAccumWeight = _h( 0 ); - float3 pccEnvS = float3( 0, 0, 0 ); - float3 pccEnvD = float3( 0, 0, 0 ); + midf3 pccEnvS = midf3_c( 0, 0, 0 ); + midf3 pccEnvD = midf3_c( 0, 0, 0 ); @property( clear_coat ) - float3 clearCoatPccEnvS = float3( 0, 0, 0 ); + midf3 clearCoatPccEnvS = midf3_c( 0, 0, 0 ); @end @property( vct_num_probes ) @@ -171,46 +171,46 @@ float4 probeInnerRange = readOnlyFetch( f3dLightList, int(idx + 6u) ).xyzw; float4 probeOuterRange = readOnlyFetch( f3dLightList, int(idx + 7u) ).xyzw; - float3 posInProbSpace = toProbeLocalSpace( inPs.pos, probe ); - float probeFade = getProbeFade( posInProbSpace, probe ); + midf3 posInProbSpace = midf3_c( toProbeLocalSpace( inPs.pos, probe ) ); + midf probeFade = getProbeFade( posInProbSpace, probe ); - if( probeFade > 0 ) + if( probeFade > _h( 0 ) ) { float2 cubemapIdx_priority = unpackUshort2ToFloat2( floatBitsToUint( probe.halfSize.w ) ); float probeCubemapIdx = cubemapIdx_priority.x; float probePriority = cubemapIdx_priority.y; - float3 probeToAreaCenterOffsetLS = float3( probe.cubemapPosLS.w, + midf3 probeToAreaCenterOffsetLS = midf3_c( probe.cubemapPosLS.w, probe.cubemapPosVS.w, probeInnerRange.w ); - float ndf = getProbeNDF( posInProbSpace.xyz, probeToAreaCenterOffsetLS.xyz, - probeInnerRange.xyz, probeOuterRange.xyz ); + midf ndf = getProbeNDF( posInProbSpace.xyz, probeToAreaCenterOffsetLS.xyz, + midf3_c( probeInnerRange.xyz ), midf3_c( probeOuterRange.xyz ) ); ndf = saturate( ndf ); - probeFade = 1.0 - ndf; + probeFade = _h( 1.0 ) - ndf; probeFade = probeFade * probeFade; probeFade = probeFade * probeFade; - probeFade *= probePriority; + probeFade *= midf_c( probePriority ); @property( vct_num_probes ) - float4 reflDirLS_dist = localCorrect( pixelData.reflDir, posInProbSpace, probe ); - float3 reflDirLS = reflDirLS_dist.xyz; + midf4 reflDirLS_dist = localCorrect( pixelData.reflDir, posInProbSpace, probe ); + midf3 reflDirLS = reflDirLS_dist.xyz; @else - float3 reflDirLS = localCorrect( pixelData.reflDir, posInProbSpace, probe ).xyz; + midf3 reflDirLS = localCorrect( pixelData.reflDir, posInProbSpace, probe ).xyz; @end - float3 normalLS = localCorrect( pixelData.normal, posInProbSpace, probe ).xyz; + midf3 normalLS = localCorrect( pixelData.normal, posInProbSpace, probe ).xyz; - float4 pccSingleEnvS; + midf4 pccSingleEnvS; @property( clear_coat ) - float3 clearCoatPccSingleEnvS; + midf3 clearCoatPccSingleEnvS; @end @property( !hlms_cubemaps_use_dpm ) - pccSingleEnvS = OGRE_SampleArrayCubeLevel( - texEnvProbeMap, samplerState@value(envMapRegSampler), reflDirLS, + pccSingleEnvS = OGRE_SampleArrayCubeLevelF16( + texEnvProbeMap, samplerState@value(envMapRegSampler), float3( reflDirLS ), probeCubemapIdx, @insertpiece( envSpecularRoughness ) ); @property( cubemaps_as_diffuse_gi ) - pccEnvD += OGRE_SampleArrayCubeLevel( + pccEnvD += OGRE_SampleArrayCubeLevelF16( texEnvProbeMap, samplerState@value(envMapRegSampler), normalLS, probeCubemapIdx, 11.0 ).xyz @insertpiece( ApplyEnvMapScale ) * probeFade; @@ -218,34 +218,34 @@ @property( clear_coat ) clearCoatPccSingleEnvS = OGRE_SampleArrayCubeLevel( texEnvProbeMap, samplerState@value( envMapRegSampler ), - reflDirLS, + float3( reflDirLS ), probeCubemapIdx, @insertpiece( envSpecularRoughnessClearCoat ) ).xyz @insertpiece( ApplyEnvMapScale ); clearCoatPccSingleEnvS *= probeFade; @end @else - pccSingleEnvS = OGRE_SampleArray2DLevel( + pccSingleEnvS = OGRE_SampleArray2DLevelF16( texEnvProbeMap, samplerState@value(envMapRegSampler), mapCubemapToDpm( reflDirLS ), probeCubemapIdx, @insertpiece( envSpecularRoughness ) ); @property( cubemaps_as_diffuse_gi ) - pccEnvD += OGRE_SampleArray2DLevel( + pccEnvD += OGRE_SampleArray2DLevelF16( texEnvProbeMap, samplerState@value(envMapRegSampler), mapCubemapToDpm( normalLS ), probeCubemapIdx, 11.0 ).xyz @insertpiece( ApplyEnvMapScale ) * probeFade; @end @property( clear_coat ) - clearCoatPccSingleEnvS = OGRE_SampleArray2DLevel( texEnvProbeMap, samplerState@value( envMapRegSampler ), - mapCubemapToDpm( reflDirLS ), - probeCubemapIdx, - @insertpiece( envSpecularRoughnessClearCoat ) ).xyz @insertpiece( ApplyEnvMapScale ); + clearCoatPccSingleEnvS = OGRE_SampleArray2DLevelF16( texEnvProbeMap, samplerState@value( envMapRegSampler ), + mapCubemapToDpm( reflDirLS ), + probeCubemapIdx, + @insertpiece( envSpecularRoughnessClearCoat ) ).xyz @insertpiece( ApplyEnvMapScale ); clearCoatPccSingleEnvS *= probeFade; @end @end pccSingleEnvS.xyz *= probeFade; @property( envmap_scale ) - pccSingleEnvS.xyz *= passBuf.ambientUpperHemi.w; + pccSingleEnvS.xyz *= midf_c( passBuf.ambientUpperHemi.w ); @end @property( vct_num_probes ) @@ -278,9 +278,9 @@ } @property( cubemaps_as_diffuse_gi ) - pccEnvD.xyz *= cubemapAccumWeight == 0.0f ? 1.0f : (1.0f / cubemapAccumWeight); + pccEnvD.xyz *= cubemapAccumWeight == _h( 0.0f ) ? _h( 1.0f ) : (_h( 1.0f ) / cubemapAccumWeight); @end - pccEnvS.xyz *= cubemapAccumWeight == 0.0f ? 1.0f : (1.0f / cubemapAccumWeight); + pccEnvS.xyz *= cubemapAccumWeight == _h( 0.0f ) ? _h( 1.0f ) : (_h( 1.0f ) / cubemapAccumWeight); @property( clear_coat ) clearCoatPccEnvS *= cubemapAccumWeight == 0.0f ? 1.0f : ( 1.0f / cubemapAccumWeight ); @@ -292,7 +292,7 @@ pixelData.envColourS * (numProbesVctLerp - accumVctLerp) ) / numProbesVctLerp; @property( cubemaps_as_diffuse_gi ) - pixelData.envColourD += vctSpecular.w > 0 ? float3( 0, 0, 0 ) : pccEnvD; + pixelData.envColourD += vctSpecular.w > 0 ? midf3_c( 0, 0, 0 ) : pccEnvD; @end @property( clear_coat ) diff --git a/ogre2/src/media/Hlms/Pbs/Any/LightProfiles_piece_ps.any b/ogre2/src/media/Hlms/Pbs/Any/LightProfiles_piece_ps.any index e456d0903..24f452488 100644 --- a/ogre2/src/media/Hlms/Pbs/Any/LightProfiles_piece_ps.any +++ b/ogre2/src/media/Hlms/Pbs/Any/LightProfiles_piece_ps.any @@ -44,12 +44,12 @@ return (inX >= 0) ? res : 3.14159265359f - res; // Undo range reduction } - float getPhotometricAttenuation( float cosAngle, float profileIdx OGRE_PHOTOMETRIC_ARG_DECL ) + midf getPhotometricAttenuation( float cosAngle, float profileIdx OGRE_PHOTOMETRIC_ARG_DECL ) { //float angle = acos( clamp( cosAngle, -1.0, 1.0 ) ) * ( 1.0 / 3.14159265359f ); float angle = fast_acos( clamp( cosAngle, -1.0, 1.0 ) ) * ( 1.0 / 3.14159265359f ); - return OGRE_SampleLevel( lightProfiles, lightProfilesSampler, - float2( angle, profileIdx ), 0.0 ).r; + return OGRE_SampleLevelF16( lightProfiles, lightProfilesSampler, + float2( angle, profileIdx ), 0.0 ).r; } @end diff --git a/ogre2/src/media/Hlms/Pbs/Any/Main/200.BRDFs_piece_ps.any b/ogre2/src/media/Hlms/Pbs/Any/Main/200.BRDFs_piece_ps.any index 8908b3268..76a7a12c7 100644 --- a/ogre2/src/media/Hlms/Pbs/Any/Main/200.BRDFs_piece_ps.any +++ b/ogre2/src/media/Hlms/Pbs/Any/Main/200.BRDFs_piece_ps.any @@ -6,10 +6,10 @@ // getDiffuseFresnel = 1.0 - F0 + pow( 1.0 - NdotL, 5.0 ) * F0 // getSpecularFresnelWithRoughness = F0 + pow( 1.0 - VdotH, 5.0 ) * (max(roughness, (1.0 - F0)) - F0) // getDiffuseFresnelWithRoughness = max(roughness, (1.0 - F0) - F0 + pow( 1.0 - NdotL, 5.0 ) * F0 -@piece( getSpecularFresnel )pixelData.F0 + pow( 1.0 - VdotH, 5.0 ) * (1.0 - pixelData.F0)@end -@piece( getDiffuseFresnel )1.0 - pixelData.F0 + pow( 1.0 - NdotL, 5.0 ) * pixelData.F0@end -@piece( getSpecularFresnelWithRoughness )pixelData.F0 + pow( 1.0 - pixelData.NdotV, 5.0 ) * (max( make_float_fresnel( 1.0 - pixelData.roughness ), pixelData.F0 ) - pixelData.F0)@end -@piece( getDiffuseFresnelWithRoughness )max( make_float_fresnel( 1.0 - pixelData.roughness ), pixelData.F0 ) - pixelData.F0 + pow( 1.0 - NdotL, 5.0 ) * pixelData.F0@end +@piece( getSpecularFresnel )pixelData.F0 + pow( _h( 1.0 ) - VdotH, _h( 5.0 ) ) * (_h( 1.0 ) - pixelData.F0)@end +@piece( getDiffuseFresnel )_h( 1.0 ) - pixelData.F0 + pow( _h( 1.0 ) - NdotL, _h( 5.0 ) ) * pixelData.F0@end +@piece( getSpecularFresnelWithRoughness )pixelData.F0 + pow( _h( 1.0 ) - pixelData.NdotV, _h( 5.0 ) ) * (max( make_float_fresnel( _h( 1.0 ) - pixelData.perceptualRoughness ), pixelData.F0 ) - pixelData.F0)@end +@piece( getDiffuseFresnelWithRoughness )max( make_float_fresnel( _h( 1.0 ) - pixelData.perceptualRoughness ), pixelData.F0 ) - pixelData.F0 + pow( _h( 1.0 ) - NdotL, _h( 5.0 ) ) * pixelData.F0@end @property( !fresnel_scalar ) @piece( getMaxFresnelS )fresnelS@end @@ -20,45 +20,49 @@ @property( BRDF_BlinnPhong ) @piece( DeclareBRDF ) //Blinn-Phong -INLINE float3 BRDF( float3 lightDir, float3 lightDiffuse, float3 lightSpecular, PixelData pixelData ) +INLINE midf3 BRDF( midf3 lightDir, midf3 lightDiffuse, midf3 lightSpecular, PixelData pixelData PASSBUF_ARG_DECL ) { - float3 halfWay = normalize( lightDir + pixelData.viewDir ); - float NdotL = saturate( dot( pixelData.normal, lightDir ) ); //Diffuse (Lambert) - float NdotH = clamp( dot( pixelData.normal, halfWay ), 0.001, 1.0 ); //Specular + midf3 halfWay = normalize( lightDir + pixelData.viewDir ); + midf NdotL = saturate( dot( pixelData.normal, lightDir ) ); //Diffuse (Lambert) + midf NdotH = clamp( dot( pixelData.normal, halfWay ), _h( 0.001 ), _h( 1.0 ) ); //Specular @property( !legacy_math_brdf ) - float VdotH = clamp( dot( pixelData.viewDir, halfWay ), 0.001, 1.0 ); //Fresnel + midf VdotH = clamp( dot( pixelData.viewDir, halfWay ), _h( 0.001 ), _h( 1.0 ) ); //Fresnel //Fresnel term (Schlick's approximation) float_fresnel fresnelS = @insertpiece( getSpecularFresnel ); - @property( fresnel_separate_diffuse ) - float_fresnel fresnelD = @insertpiece( getDiffuseFresnel ); - @else - float fresnelD = 1.0f - @insertpiece( getMaxFresnelS ); + @property( fresnel_has_diffuse ) + @property( fresnel_separate_diffuse ) + float_fresnel fresnelD = @insertpiece( getDiffuseFresnel ); + @else + midf fresnelD = _h( 1.0f ) - @insertpiece( getMaxFresnelS ); + @end + @else + midf fresnelD = _h( 1.0f ); @end @end @property( !roughness_is_shininess ) - float shininess = exp2( 10.0 * (1.0 - pixelData.roughness) + 1.0 ) * 0.25; + midf shininess = exp2( _h( 10.0 ) * (_h( 1.0 ) - pixelData.roughness) + _h( 1.0 ) ) * _h( 0.25 ); @else - float shininess = pixelData.roughness; + midf shininess = pixelData.roughness; @end - float blinnPhong = pow( NdotH, shininess ); + midf blinnPhong = pow( NdotH, shininess ); @property( !legacy_math_brdf ) //Normalize Blinn-Phong using (n + 8) / (8 * pi) //Note this factor is an approximation. The real normalization is //*much* more expensive. See: //http://www.rorydriscoll.com/2009/01/25/energy-conservation-in-games/ - blinnPhong *= (shininess + 8.0) / (8.0 * 3.141592654); + blinnPhong *= ( shininess + _h( 8.0 ) ) / _h( 8.0 * 3.141592654 ); //Avoid very small denominators, they go to NaN or cause aliasing artifacts //Note: For blinn-phong we use larger denominators otherwise specular blows out of proportion - float_fresnel Rs = ( fresnelS * blinnPhong ) / max( 4.0 * pixelData.NdotV * NdotL, 0.75 ); + float_fresnel Rs = ( fresnelS * blinnPhong ) / max( _h( 4.0 ) * pixelData.NdotV * NdotL, _h( 0.75 ) ); //Make diffuse look closer to Default. - fresnelD *= lerp( 1.0, 1.0 / 1.51, pixelData.roughness ); + fresnelD *= lerp( _h( 1.0 ), _h( 1.0 / 1.51 ), pixelData.roughness ); @else - float Rs = blinnPhong; - float fresnelD = 1.0; + midf Rs = blinnPhong; + midf fresnelD = _h( 1.0 ); @end return NdotL * (pixelData.specular.xyz * lightSpecular * Rs + @@ -70,50 +74,69 @@ INLINE float3 BRDF( float3 lightDir, float3 lightDiffuse, float3 lightSpecular, @property( BRDF_CookTorrance ) @piece( DeclareBRDF ) //Cook-Torrance -INLINE float3 BRDF( float3 lightDir, float3 lightDiffuse, float3 lightSpecular, PixelData pixelData ) +INLINE midf3 BRDF( midf3 lightDir, midf3 lightDiffuse, midf3 lightSpecular, PixelData pixelData PASSBUF_ARG_DECL ) { - float3 halfWay = normalize( lightDir + pixelData.viewDir ); - float NdotL = saturate( dot( pixelData.normal, lightDir ) ); - float NdotH = clamp( dot( pixelData.normal, halfWay ), 0.001, 1.0 ); - float VdotH = clamp( dot( pixelData.viewDir, halfWay ), 0.001, 1.0 ); + midf3 halfWay = normalize( lightDir + pixelData.viewDir ); + midf NdotL = saturate( dot( pixelData.normal, lightDir ) ); + midf NdotH = clamp( dot( pixelData.normal, halfWay ), _h( 0.001 ), _h( 1.0 ) ); + midf VdotH = clamp( dot( pixelData.viewDir, halfWay ), _h( 0.001 ), _h( 1.0 ) ); - float sqR = pixelData.roughness * pixelData.roughness; + midf sqR = pixelData.roughness * pixelData.roughness; //Roughness/Distribution/NDF term (Beckmann distribution) //Formula: // Where alpha = NdotH and m = roughness // R = [ 1 / (m^2 x cos(alpha)^4 ] x [ e^( -tan(alpha)^2 / m^2 ) ] // R = [ 1 / (m^2 x cos(alpha)^4 ] x [ e^( ( cos(alpha)^2 - 1 ) / (m^2 cos(alpha)^2 ) ] - float NdotH_sq = NdotH * NdotH; - float roughness_a = 1.0 / ( 3.141592654 * sqR * NdotH_sq * NdotH_sq );//( 1 / (m^2 x cos(alpha)^4 ) - float roughness_b = NdotH_sq - 1.0; //( cos(alpha)^2 - 1 ) - float roughness_c = sqR * NdotH_sq; //( m^2 cos(alpha)^2 ) +@property( precision_mode == full32 ) + midf NdotH_sq = NdotH * NdotH; + midf roughness_b = NdotH_sq - _h( 1.0 ); //( cos(alpha)^2 - 1 ) +@else + // Use Lagrange's identity to compute 1 - NdotH_sq with mediump + // ||a x b||^2 = ||a||^2 ||b||^2 - (a . b)^2 + // since N and H are unit vectors: ||N x H||^2 = 1.0 - NoH^2 + // + // See https://github.com/google/filament/blob/f40b08d826c69df9fca2711841f1a9ecb77386e8/shaders/src/brdf.fs#L55 + // for details + midf3 NcrossH = cross( pixelData.normal, halfWay ); + midf roughness_b = -dot( NcrossH, NcrossH );//( cos(alpha)^2 - 1 ) + midf NdotH_sq = roughness_b + _h( 1.0 ); +@end + midf roughness_a = _h( 1.0 ) / ( _h( 3.141592654 ) * sqR * NdotH_sq * NdotH_sq );//( 1 / (m^2 x cos(alpha)^4 ) + midf roughness_c = sqR * NdotH_sq; //( m^2 cos(alpha)^2 ) //Avoid Inf * 0 = NaN; we need Inf * 0 = 0 - float R = min( roughness_a, 65504.0 ) * exp( roughness_b / roughness_c ); + midf R = min( roughness_a, _h( 65504.0 ) ) * exp( roughness_b / roughness_c ); + + ensureValidRangeF16( R ); //Geometric/Visibility term (Cook Torrance) - float shared_geo = 2.0 * NdotH / VdotH; - float geo_b = shared_geo * pixelData.NdotV; - float geo_c = shared_geo * NdotL; - float G = min( 1.0, min( geo_b, geo_c ) ); + midf shared_geo = _h( 2.0 ) * NdotH / VdotH; + ensureValidRangeF16( shared_geo ); + midf geo_b = shared_geo * pixelData.NdotV; + midf geo_c = shared_geo * NdotL; + midf G = min( _h( 1.0 ), min( geo_b, geo_c ) ); //Fresnel term (Schlick's approximation) //Formula: // fresnelS = lerp( (1 - V*H)^5, 1, F0 ) // fresnelD = lerp( (1 - N*L)^5, 1, 1 - F0 ) [See s2010_course_note_practical_implementation_at_triace.pdf] - float_fresnel fresnelS = @insertpiece( getSpecularFresnel ); - @property( fresnel_separate_diffuse ) - float_fresnel fresnelD = @insertpiece( getDiffuseFresnel ); - @else - float fresnelD = 1.0f - @insertpiece( getMaxFresnelS ); - @end + float_fresnel fresnelS = @insertpiece( getSpecularFresnel ); + @property( fresnel_has_diffuse ) + @property( fresnel_separate_diffuse ) + float_fresnel fresnelD = @insertpiece( getDiffuseFresnel ); + @else + midf fresnelD = _h( 1.0f ) - @insertpiece( getMaxFresnelS ); + @end + @else + midf fresnelD = _h( 1.0f ); + @end //Avoid very small denominators, they go to NaN or cause aliasing artifacts - float_fresnel Rs = ( fresnelS * (R * G) ) / max( 4.0 * pixelData.NdotV * NdotL, 0.01 ); + float_fresnel Rs = ( fresnelS * (R * G) ) / max( _h( 4.0 ) * pixelData.NdotV * NdotL, _h( 0.01 ) ); - return NdotL * (pixelData.specular.xyz * lightSpecular * Rs + - pixelData.diffuse.xyz * lightDiffuse * fresnelD); + return NdotL * (pixelData.specular.xyz * lightSpecular * Rs + + pixelData.diffuse.xyz * lightDiffuse * fresnelD); } @end @end @@ -121,32 +144,50 @@ INLINE float3 BRDF( float3 lightDir, float3 lightDiffuse, float3 lightSpecular, @property( BRDF_Default ) @piece( DeclareBRDF ) //Default BRDF -INLINE float3 BRDF( float3 lightDir, float3 lightDiffuse, float3 lightSpecular, PixelData pixelData ) +INLINE midf3 BRDF( midf3 lightDir, midf3 lightDiffuse, midf3 lightSpecular, PixelData pixelData PASSBUF_ARG_DECL ) { - float3 halfWay = normalize( lightDir + pixelData.viewDir ); - float NdotL = saturate( dot( pixelData.normal, lightDir ) ); - float NdotH = saturate( dot( pixelData.normal, halfWay ) ); - float VdotH = saturate( dot( pixelData.viewDir, halfWay ) ); + midf3 halfWay = normalize( lightDir + pixelData.viewDir ); + midf NdotL = saturate( dot( pixelData.normal, lightDir ) ); + midf NdotH = saturate( dot( pixelData.normal, halfWay ) ); + midf VdotH = saturate( dot( pixelData.viewDir, halfWay ) ); - float sqR = pixelData.roughness * pixelData.roughness; + midf sqR = pixelData.roughness * pixelData.roughness; + //Geometric/Visibility term (Smith GGX Height-Correlated) +@property( GGX_height_correlated ) + midf Lambda_GGXV = NdotL * sqrt( (-pixelData.NdotV * sqR + pixelData.NdotV) * pixelData.NdotV + sqR ); + midf Lambda_GGXL = pixelData.NdotV * sqrt( (-NdotL * sqR + NdotL) * NdotL + sqR ); + + midf G = _h( 0.5 ) / (( Lambda_GGXV + Lambda_GGXL + _h( 1e-6f ) ) * _h( 3.141592654 )); +@else + midf k = ( pixelData.perceptualRoughness + _h( 1 ) ) * ( pixelData.perceptualRoughness + _h( 1 ) ) * _h( 0.125 ); + midf gL = NdotL * ( _h( 1 ) - k ) + k; + midf gV = pixelData.NdotV * ( _h( 1 ) - k ) + k; + midf G = _h( 1.0 ) / (( gL * gV + _h( 1e-4f ) ) * _h( 4 * 3.141592654 ) ); +@end + +@property( precision_mode == full32 ) //Roughness/Distribution/NDF term (GGX) //Formula: // Where alpha = roughness // R = alpha^2 / [ PI * [ ( NdotH^2 * (alpha^2 - 1) ) + 1 ]^2 ] - float f = ( NdotH * sqR - NdotH ) * NdotH + 1.0; - float R = sqR / (f * f); // f is guaranteed to not be 0 because we clamped pixelData.roughness + const midf f = ( NdotH * sqR - NdotH ) * NdotH + _h( 1.0 ); + midf R = sqR / (f * f); // f is guaranteed to not be 0 because we clamped pixelData.roughness - //Geometric/Visibility term (Smith GGX Height-Correlated) -@property( GGX_height_correlated ) - float Lambda_GGXV = NdotL * sqrt( (-pixelData.NdotV * sqR + pixelData.NdotV) * pixelData.NdotV + sqR ); - float Lambda_GGXL = pixelData.NdotV * sqrt( (-NdotL * sqR + NdotL) * NdotL + sqR ); - - float G = 0.5 / (( Lambda_GGXV + Lambda_GGXL + 1e-6f ) * 3.141592654); + const midf RG = R * G; @else - float gL = NdotL * (1-sqR) + sqR; - float gV = pixelData.NdotV * (1-sqR) + sqR; - float G = 1.0 / (( gL * gV + 1e-4f ) * 4 * 3.141592654); + // Lagrange identity + const midf3 NcrossH = cross( pixelData.normal, halfWay ); + const midf roughness_b = dot( NcrossH, NcrossH ); // 1.0 - NdotH * NdotH + + // We need to do 10000 / (100 * 100) because otherwise it flushes to 0, loosing a lot of info + const midf f = (-roughness_b * sqR + sqR + roughness_b) * _h( 100.0 ) ; + midf R = sqR * _h( 10000.0 ) / (f * f); + + // Avoid INF and NaNs + R = min( R, _h( 128.0 ) ); + G = min( G, _h( 128.0 ) ); + const midf RG = R * G; @end //Formula: @@ -154,38 +195,42 @@ INLINE float3 BRDF( float3 lightDir, float3 lightDiffuse, float3 lightSpecular, float_fresnel fresnelS = @insertpiece( getSpecularFresnel ); //We should divide Rs by PI, but it was done inside G for performance - float3 Rs = ( fresnelS * (R * G) ) * pixelData.specular.xyz; + midf3 Rs = ( fresnelS * RG ) * pixelData.specular.xyz; //Diffuse BRDF (*Normalized* Disney, see course_notes_moving_frostbite_to_pbr.pdf //"Moving Frostbite to Physically Based Rendering" Sebastien Lagarde & Charles de Rousiers) - float energyBias = pixelData.perceptualRoughness * 0.5; - float energyFactor = lerp( 1.0, 1.0 / 1.51, pixelData.perceptualRoughness ); - float fd90 = energyBias + 2.0 * VdotH * VdotH * pixelData.perceptualRoughness; - float lightScatter = 1.0 + (fd90 - 1.0) * pow( 1.0 - NdotL, 5.0 ); - float viewScatter = 1.0 + (fd90 - 1.0) * pow( 1.0 - pixelData.NdotV, 5.0 ); - - @property( fresnel_separate_diffuse ) - float_fresnel fresnelD = @insertpiece( getDiffuseFresnel ); - @else - float fresnelD = 1.0f - @insertpiece( getMaxFresnelS ); - @end + midf energyBias = pixelData.perceptualRoughness * _h( 0.5 ); + midf energyFactor = lerp( _h( 1.0 ), _h( 1.0 / 1.51 ), pixelData.perceptualRoughness ); + midf fd90 = energyBias + _h( 2.0 ) * VdotH * VdotH * pixelData.perceptualRoughness; + midf lightScatter = _h( 1.0 ) + (fd90 - _h( 1.0 )) * pow( _h( 1.0 ) - NdotL, _h( 5.0 ) ); + midf viewScatter = _h( 1.0 ) + (fd90 - _h( 1.0 )) * pow( _h( 1.0 ) - pixelData.NdotV, _h( 5.0 ) ); + + @property( fresnel_has_diffuse ) + @property( fresnel_separate_diffuse ) + float_fresnel fresnelD = @insertpiece( getDiffuseFresnel ); + @else + midf fresnelD = _h( 1.0f ) - @insertpiece( getMaxFresnelS ); + @end + @else + midf fresnelD = _h( 1.0f ); + @end //We should divide Rd by PI, but it is already included in kD - float3 Rd = (lightScatter * viewScatter * energyFactor * fresnelD) * pixelData.diffuse.xyz; + midf3 Rd = (lightScatter * viewScatter * energyFactor * fresnelD) * pixelData.diffuse.xyz; @property( clear_coat ) - float3 color = Rd + Rs; + midf3 color = Rd + Rs; - float Fcc; - float clearCoat = clearCoatLobe(pixelData, halfWay, NdotH, VdotH, Fcc); - float attenuation = 1.0 - Fcc; + midf Fcc; + midf clearCoat = clearCoatLobe(pixelData, halfWay, NdotH, VdotH, Fcc); + midf attenuation = _h( 1.0 ) - Fcc; @property( normal_map ) color *= attenuation * NdotL; // If the material has a normal map, we want to use the geometric normal // instead to avoid applying the normal map details to the clear coat layer - float clearCoatNoL = saturate(dot(pixelData.geomNormal, lightDir)); + midf clearCoatNoL = saturate(dot(pixelData.geomNormal, lightDir)); color += clearCoat * clearCoatNoL; return color * lightSpecular; @@ -196,7 +241,7 @@ INLINE float3 BRDF( float3 lightDir, float3 lightDiffuse, float3 lightSpecular, return color * lightSpecular * NdotL; @end @else - return NdotL * (Rs * lightSpecular + Rd * lightDiffuse); + return NdotL * (Rs * lightSpecular + Rd * lightDiffuse); @end } @end @@ -205,9 +250,9 @@ INLINE float3 BRDF( float3 lightDir, float3 lightDiffuse, float3 lightSpecular, @property( hlms_enable_vpls ) @piece( DeclareBRDF_InstantRadiosity ) //Simplified cheap BRDF for Instant Radiosity. -float3 BRDF_IR( float3 lightDir, float3 lightDiffuse, PixelData pixelData ) +midf3 BRDF_IR( midf3 lightDir, midf3 lightDiffuse, PixelData pixelData ) { - float NdotL = clamp( dot( pixelData.normal, lightDir ), 0.0, 1.0 ); + midf NdotL = saturate( dot( pixelData.normal, lightDir ) ); float_fresnel fresnelD = @insertpiece( getDiffuseFresnel ); //We should divide Rd by PI, but it is already included in kD @@ -239,22 +284,31 @@ float3 BRDF_IR( float3 lightDir, float3 lightDiffuse, PixelData pixelData ) @property( ltc_texture_available ) #define brdfLUT ltcMatrix - float2 envBRDF = OGRE_SampleArray2D( brdfLUT, ltcSampler, - float2( pixelData.NdotV, - 1.0 - pixelData.perceptualRoughness ), 2 ).xy; + midf3 envBRDF = OGRE_SampleArray2DF16( brdfLUT, ltcSampler, + float2( pixelData.NdotV, + 1.0 - pixelData.perceptualRoughness ), 2 ).xyz; @else - float2 envBRDF = float2( 1.0f, 0.0f ); + midf3 envBRDF = midf3_c( 1.0f, 0.0f, 1.0f ); @end - @property( fresnel_separate_diffuse ) - float NdotL = saturate( dot( pixelData.normal, pixelData.reflDir ) ); - float_fresnel fresnelD = @insertpiece( getDiffuseFresnelWithRoughness ); + @property( fresnel_has_diffuse ) + @property( fresnel_separate_diffuse ) + midf NdotL = saturate( dot( pixelData.normal, pixelData.reflDir ) ); + float_fresnel fresnelD = @insertpiece( getDiffuseFresnelWithRoughness ); + @else + midf fresnelD = _h( 1.0f ) - @insertpiece( getMaxFresnelS ); + @end @else - float fresnelD = 1.0f - @insertpiece( getMaxFresnelS ); + midf fresnelD = _h( 1.0f ); @end - float3 Rd = pixelData.envColourD * pixelData.diffuse.xyz * fresnelD; - float3 Rs = pixelData.envColourS * pixelData.specular.xyz * ( fresnelS * envBRDF.x + envBRDF.y ); + midf3 Rd = pixelData.envColourD * pixelData.diffuse.xyz * _h( 3.141592654 ) * fresnelD * envBRDF.z; + midf3 Rs = pixelData.envColourS * pixelData.specular.xyz * + @property( industry_compatible ) + ( pixelData.F0 * envBRDF.x + envBRDF.y ); + @else + ( fresnelS * envBRDF.x + envBRDF.y ); + @end @property( clear_coat ) @property( normal_map ) @@ -268,7 +322,16 @@ float3 BRDF_IR( float3 lightDir, float3 lightDiffuse, PixelData pixelData ) float attenuation = 1.0 - Fc; Rd *= attenuation; Rs *= attenuation; - Rs += pixelData.clearCoatEnvColourS * Fc; + + @property( ltc_texture_available ) + midf3 clearCoatEnvBRDF = OGRE_SampleArray2DF16( brdfLUT, ltcSampler, + float2( clearCoatNoV, + 1.0 - pixelData.clearCoatPerceptualRoughness ), 2 ).xyz; + @else + midf3 clearCoatEnvBRDF = midf3_c( 1.0f, 0.0f, 1.0f ); + @end + + Rs += pixelData.clearCoatEnvColourS * pixelData.specular.xyz * ( _h( 0.04 ) * clearCoatEnvBRDF.x + clearCoatEnvBRDF.y ) * pixelData.clearCoat; @end finalColour += Rd + Rs; @@ -290,6 +353,9 @@ float3 BRDF_IR( float3 lightDir, float3 lightDiffuse, PixelData pixelData ) @piece( ObjLightMaskCmpNonCasterLoopEnd )@add( fineMaskLightIdx, hlms_lights_directional_non_caster )@end @end @piece( andObjLightMaskCmp )&& ((objLightMask & floatBitsToUint( light0Buf.lights[@counter(fineMaskLightIdx)].position.w )) != 0u)@end + @property( hlms_static_branch_shadow_map_lights ) + @piece( andObjLightMaskCmp_light_idx )&& ((objLightMask & floatBitsToUint( light0Buf.lights[light_idx].position.w )) != 0u)@end + @end @piece( andObjAreaApproxLightMaskCmp )&& ((objLightMask & floatBitsToUint( light1Buf.areaApproxLights[i].position.w )) != 0u)@end @piece( andObjAreaLtcLightMaskCmp )&& ((objLightMask & floatBitsToUint( light2Buf.areaLtcLights[i].position.w )) != 0u)@end @else @@ -299,6 +365,9 @@ float3 BRDF_IR( float3 lightDir, float3 lightDiffuse, PixelData pixelData ) @piece( ObjLightMaskCmpNonCasterLoopEnd )@add( fineMaskLightIdx, hlms_lights_directional_non_caster )@end @end @piece( andObjLightMaskCmp )&& ((objLightMask & light0Buf.lights[@counter(fineMaskLightIdx)].lightMask) != 0u)@end + @property( hlms_static_branch_shadow_map_lights ) + @piece( andObjLightMaskCmp_light_idx )&& ((objLightMask & light0Buf.lights[light_idx].lightMask) != 0u)@end + @end @piece( andObjAreaApproxLightMaskCmp )&& ((objLightMask & light1Buf.areaApproxLights[i].lightMask) != 0u)@end @piece( andObjAreaLtcLightMaskCmp )&& ((objLightMask & light2Buf.areaLtcLights[i].lightMask) != 0u)@end @end diff --git a/ogre2/src/media/Hlms/Pbs/Any/Main/200.BlendModes_piece_ps.any b/ogre2/src/media/Hlms/Pbs/Any/Main/200.BlendModes_piece_ps.any index e5f133807..cc3cfe286 100644 --- a/ogre2/src/media/Hlms/Pbs/Any/Main/200.BlendModes_piece_ps.any +++ b/ogre2/src/media/Hlms/Pbs/Any/Main/200.BlendModes_piece_ps.any @@ -8,26 +8,26 @@ @piece( NormalNonPremul ) //Normal Non Premultiplied @value(t) pixelData.diffuse.xyz = lerp( pixelData.diffuse.xyz, detailCol@value(t).xyz, detailCol@value(t).a ); - pixelData.diffuse.w = lerp( pixelData.diffuse.w, 1.0, detailCol@value(t).w ); + pixelData.diffuse.w = lerp( pixelData.diffuse.w, _h( 1.0 ), detailCol@value(t).w ); @end @piece( NormalPremul ) //Normal Premultiplied @value(t) - pixelData.diffuse.xyz = (1.0 - detailCol@value(t).a) * pixelData.diffuse.xyz + detailCol@value(t).xyz; - pixelData.diffuse.w = lerp( pixelData.diffuse.w, 1.0, detailCol@value(t).w ); + pixelData.diffuse.xyz = (_h( 1.0 ) - detailCol@value(t).a) * pixelData.diffuse.xyz + detailCol@value(t).xyz; + pixelData.diffuse.w = lerp( pixelData.diffuse.w, _h( 1.0 ), detailCol@value(t).w ); @end @piece( Add ) //Add @value(t) pixelData.diffuse.xyz = lerp( pixelData.diffuse.xyz, - min( pixelData.diffuse.xyz + detailCol@value(t).xyz, float3(1.0, 1.0, 1.0) ), + min( pixelData.diffuse.xyz + detailCol@value(t).xyz, midf3_c(1.0, 1.0, 1.0) ), detailCol@value(t).a ); @end @piece( Subtract ) //Subtract @value(t) pixelData.diffuse.xyz = lerp( pixelData.diffuse.xyz, - max( pixelData.diffuse.xyz - detailCol@value(t).xyz, float3(0.0, 0.0, 0.0) ), + max( pixelData.diffuse.xyz - detailCol@value(t).xyz, midf3_c(0.0, 0.0, 0.0) ), detailCol@value(t).a ); @end @@ -41,21 +41,21 @@ @piece( Multiply2x ) //Multiply2x @value(t) pixelData.diffuse.xyz = lerp( pixelData.diffuse.xyz, - min( pixelData.diffuse.xyz * detailCol@value(t).xyz * 2.0, float3(1.0, 1.0, 1.0) ), + min( pixelData.diffuse.xyz * detailCol@value(t).xyz * 2.0, midf3_c(1.0, 1.0, 1.0) ), detailCol@value(t).a ); @end @piece( Screen ) //Screen @value(t) pixelData.diffuse.xyz = lerp( pixelData.diffuse.xyz, - 1.0 - (1.0 - pixelData.diffuse.xyz) * (1.0 - detailCol@value(t).xyz), + _h( 1.0 ) - (_h( 1.0 ) - pixelData.diffuse.xyz) * (_h( 1.0 ) - detailCol@value(t).xyz), detailCol@value(t).a ); @end @piece( Overlay ) //Overlay @value(t) pixelData.diffuse.xyz = lerp( pixelData.diffuse.xyz, - pixelData.diffuse.xyz * ( pixelData.diffuse.xyz + 2.0 * detailCol@value(t).xyz * (1.0 - pixelData.diffuse.xyz) ), + pixelData.diffuse.xyz * ( pixelData.diffuse.xyz + _h( 2.0 ) * detailCol@value(t).xyz * (_h( 1.0 ) - pixelData.diffuse.xyz) ), detailCol@value(t).a ); @end @@ -76,14 +76,14 @@ @piece( GrainExtract ) //GrainExtract @value(t) pixelData.diffuse.xyz = lerp( pixelData.diffuse.xyz, - (pixelData.diffuse.xyz - detailCol@value(t).xyz) + 0.5f, + (pixelData.diffuse.xyz - detailCol@value(t).xyz) + _h( 0.5f ), detailCol@value(t).a ); @end @piece( GrainMerge ) //GrainMerge @value(t) pixelData.diffuse.xyz = lerp( pixelData.diffuse.xyz, - (pixelData.diffuse.xyz + detailCol@value(t).xyz) - 0.5f, + (pixelData.diffuse.xyz + detailCol@value(t).xyz) - _h( 0.5f ), detailCol@value(t).a ); @end @@ -97,12 +97,12 @@ @piece( NormalNonPremul ) //Normal Non Premultiplied @value(t) - pixelData.diffuse = lerp( pixelData.diffuse, float4( 1.0, 1.0, 1.0,1.0 ), detailCol@value(t) ); + pixelData.diffuse = lerp( pixelData.diffuse, midf4_c( 1.0, 1.0, 1.0, 1.0 ), detailCol@value(t) ); @end @piece( NormalPremul ) //Normal Premultiplied @value(t) - pixelData.diffuse = lerp( pixelData.diffuse, float4( 1.0, 1.0, 1.0,1.0 ), detailCol@value(t) ); + pixelData.diffuse = lerp( pixelData.diffuse, midf4_c( 1.0, 1.0, 1.0, 1.0 ), detailCol@value(t) ); @end @end diff --git a/ogre2/src/media/Hlms/Pbs/Any/Main/200.ForwardPlus_piece_ps.any b/ogre2/src/media/Hlms/Pbs/Any/Main/200.ForwardPlus_piece_ps.any index 864e04fc3..605f8b725 100644 --- a/ogre2/src/media/Hlms/Pbs/Any/Main/200.ForwardPlus_piece_ps.any +++ b/ogre2/src/media/Hlms/Pbs/Any/Main/200.ForwardPlus_piece_ps.any @@ -121,11 +121,11 @@ float4 posAndType = readOnlyFetch( f3dLightList, int(idx) ); @property( !hlms_forwardplus_fine_light_mask ) - float3 lightDiffuse = readOnlyFetch( f3dLightList, int(idx + 1u) ).xyz; + midf3 lightDiffuse = midf3_c( readOnlyFetch( f3dLightList, int(idx + 1u) ).xyz ); @else float4 lightDiffuse = readOnlyFetch( f3dLightList, int(idx + 1u) ).xyzw; @end - float3 lightSpecular= readOnlyFetch( f3dLightList, int(idx + 2u) ).xyz; + midf3 lightSpecular = midf3_c( readOnlyFetch( f3dLightList, int(idx + 2u) ).xyz ); float4 attenuation = readOnlyFetch( f3dLightList, int(idx + 3u) ).xyzw; @property( light_profiles_texture ) float4 spotDirection= readOnlyFetch( f3dLightList, int(idx + 4u) ).xyzw; @@ -137,19 +137,20 @@ if( fDistance <= attenuation.x @insertpiece( andObjLightMaskFwdPlusCmp ) ) { lightDir *= 1.0 / fDistance; - float atten = 1.0 / (0.5 + (attenuation.y + attenuation.z * fDistance) * fDistance ); + midf atten = midf_c( 1.0 / (0.5 + (attenuation.y + attenuation.z * fDistance) * fDistance ) ); @property( hlms_forward_fade_attenuation_range ) - atten *= max( (attenuation.x - fDistance) * attenuation.w, 0.0f ); + atten *= midf_c( max( (attenuation.x - fDistance) * attenuation.w, 0.0f ) ); @end @property( light_profiles_texture ) - float spotCosAngle = dot( -lightDir, spotDirection.xyz ); + midf spotCosAngle = dot( midf3_c( -lightDir ), midf3_c( spotDirection.xyz ) ); atten *= getPhotometricAttenuation( spotCosAngle, spotDirection.w OGRE_PHOTOMETRIC_ARG ); @end //Point light - float3 tmpColour = BRDF( lightDir, lightDiffuse.xyz, lightSpecular, pixelData ); + midf3 tmpColour = + BRDF( midf3_c( lightDir ), midf3_c( lightDiffuse.xyz ), lightSpecular, pixelData PASSBUF_ARG ); finalColour += tmpColour * atten; } } @@ -168,18 +169,18 @@ float4 posAndType = readOnlyFetch( f3dLightList, int(idx) ); @property( !hlms_forwardplus_fine_light_mask ) - float3 lightDiffuse = readOnlyFetch( f3dLightList, int(idx + 1u) ).xyz; + midf3 lightDiffuse = midf3_c( readOnlyFetch( f3dLightList, int(idx + 1u) ).xyz ); @else float4 lightDiffuse = readOnlyFetch( f3dLightList, int(idx + 1u) ).xyzw; @end - float3 lightSpecular = readOnlyFetch( f3dLightList, int(idx + 2u) ).xyz; + midf3 lightSpecular = midf3_c( readOnlyFetch( f3dLightList, int(idx + 2u) ).xyz ); float4 attenuation = readOnlyFetch( f3dLightList, int(idx + 3u) ).xyzw; @property( !light_profiles_texture ) float3 spotDirection = readOnlyFetch( f3dLightList, int(idx + 4u) ).xyz; @else float4 spotDirection = readOnlyFetch( f3dLightList, int(idx + 4u) ).xyzw; @end - float3 spotParams = readOnlyFetch( f3dLightList, int(idx + 5u) ).xyz; + midf3 spotParams = midf3_c( readOnlyFetch( f3dLightList, int(idx + 5u) ).xyz ); float3 lightDir = posAndType.xyz - inPs.pos; float fDistance = length( lightDir ); @@ -187,9 +188,9 @@ if( fDistance <= attenuation.x @insertpiece( andObjLightMaskFwdPlusCmp ) ) { lightDir *= 1.0 / fDistance; - float atten = 1.0 / (0.5 + (attenuation.y + attenuation.z * fDistance) * fDistance ); + midf atten = midf_c( 1.0 / (0.5 + (attenuation.y + attenuation.z * fDistance) * fDistance ) ); @property( hlms_forward_fade_attenuation_range ) - atten *= max( (attenuation.x - fDistance) * attenuation.w, 0.0f ); + atten *= midf_c( max( (attenuation.x - fDistance) * attenuation.w, 0.0f ) ); @end //spotParams.x = 1.0 / cos( InnerAngle ) - cos( OuterAngle ) @@ -197,9 +198,9 @@ //spotParams.z = falloff //Spot light - float spotCosAngle = dot( -lightDir, spotDirection.xyz ); + midf spotCosAngle = dot( midf3_c( -lightDir ), midf3_c( spotDirection.xyz ) ); - float spotAtten = saturate( (spotCosAngle - spotParams.y) * spotParams.x ); + midf spotAtten = saturate( (spotCosAngle - spotParams.y) * spotParams.x ); spotAtten = pow( spotAtten, spotParams.z ); atten *= spotAtten; @@ -210,7 +211,8 @@ if( spotCosAngle >= spotParams.y ) { - float3 tmpColour = BRDF( lightDir, lightDiffuse.xyz, lightSpecular, pixelData ); + midf3 tmpColour = + BRDF( midf3_c( lightDir ), midf3_c( lightDiffuse.xyz ), lightSpecular, pixelData PASSBUF_ARG ); finalColour += tmpColour * atten; } } @@ -230,7 +232,7 @@ //Get the light float4 posAndType = readOnlyFetch( f3dLightList, int(idx) ); - float3 lightDiffuse = readOnlyFetch( f3dLightList, int(idx + 1u) ).xyz; + midf3 lightDiffuse = midf3_c( readOnlyFetch( f3dLightList, int(idx + 1u) ).xyz ); float4 attenuation = readOnlyFetch( f3dLightList, int(idx + 3u) ).xyzw; float3 lightDir = posAndType.xyz - inPs.pos; @@ -239,35 +241,35 @@ if( fDistance <= attenuation.x ) { //lightDir *= 1.0 / fDistance; - float atten = 1.0 / (0.5 + (attenuation.y + attenuation.z * fDistance) * fDistance ); + midf atten = midf_c( 1.0 / (0.5 + (attenuation.y + attenuation.z * fDistance) * fDistance ) ); @property( hlms_forward_fade_attenuation_range ) - atten *= max( (attenuation.x - fDistance) * attenuation.w, 0.0f ); + atten *= midf_c( max( (attenuation.x - fDistance) * attenuation.w, 0.0f ) ); @end //float3 lightDir2 = posAndType.xyz - inPs.pos; //lightDir2 *= 1.0 / max( 1, fDistance ); //lightDir2 *= 1.0 / fDistance; - finalColour += BRDF_IR( lightDir, lightDiffuse, pixelData ) * atten; + finalColour += BRDF_IR( midf3_c( lightDir ), lightDiffuse, pixelData ) * atten; } } @end @property( hlms_forwardplus_debug ) @property( hlms_forwardplus == forward3d ) - float occupancy = (totalNumLightsInGrid / passBuf.f3dGridHWW[0].w); + midf occupancy = midf_c(totalNumLightsInGrid / passBuf.f3dGridHWW[0].w); @else - float occupancy = (totalNumLightsInGrid / float( @value( fwd_clustered_lights_per_cell ) )); + midf occupancy = midf_c(totalNumLightsInGrid / float( @value( fwd_clustered_lights_per_cell ) )); @end - float3 occupCol = float3( 0.0, 0.0, 0.0 ); - if( occupancy < 1.0 / 3.0 ) + midf3 occupCol = midf3_c( 0.0, 0.0, 0.0 ); + if( occupancy < _h( 1.0 / 3.0 ) ) occupCol.z = occupancy; - else if( occupancy < 2.0 / 3.0 ) + else if( occupancy < _h( 2.0 / 3.0 ) ) occupCol.y = occupancy; else occupCol.x = occupancy; - finalColour.xyz = lerp( finalColour.xyz, occupCol.xyz, 0.55f ) * 2; + finalColour.xyz = lerp( finalColour.xyz, occupCol.xyz, _h( 0.55f ) ) * _h( 2 ); @end @end @end diff --git a/ogre2/src/media/Hlms/Pbs/Any/Main/200.Textures_piece_ps.any b/ogre2/src/media/Hlms/Pbs/Any/Main/200.Textures_piece_ps.any index d730ecf05..0ffb34736 100644 --- a/ogre2/src/media/Hlms/Pbs/Any/Main/200.Textures_piece_ps.any +++ b/ogre2/src/media/Hlms/Pbs/Any/Main/200.Textures_piece_ps.any @@ -7,22 +7,22 @@ uniform sampler2DArray ltcMatrix; @end @property( syntax == glslvk ) - layout( ogre_t@value(ltcMatrix) ) uniform texture2DArray ltcMatrix; - layout( ogre_s@value(ltcMatrix) ) uniform sampler ltcSampler; + layout( ogre_t@value(ltcMatrix) ) midf_tex uniform texture2DArray ltcMatrix; + layout( ogre_s@value(ltcMatrix) ) midf_tex uniform sampler ltcSampler; @end @property( syntax == hlsl ) Texture2DArray ltcMatrix : register(t@value(ltcMatrix)); SamplerState ltcSampler : register(s@value(ltcMatrix)); @end @property( syntax == metal ) - , texture2d_array ltcMatrix [[texture(@value(ltcMatrix))]] + , texture2d_array ltcMatrix [[texture(@value(ltcMatrix))]] , sampler ltcSampler [[sampler(@value(ltcMatrix))]] @end @end @end @property( envmap_scale ) - @piece( ApplyEnvMapScale )* passBuf.ambientUpperHemi.w@end + @piece( ApplyEnvMapScale )* midf_c( passBuf.ambientUpperHemi.w )@end @end @property( use_envprobe_map ) diff --git a/ogre2/src/media/Hlms/Pbs/Any/Main/500.Structs_piece_vs_piece_ps.any b/ogre2/src/media/Hlms/Pbs/Any/Main/500.Structs_piece_vs_piece_ps.any index 40a493af1..c225d94af 100644 --- a/ogre2/src/media/Hlms/Pbs/Any/Main/500.Structs_piece_vs_piece_ps.any +++ b/ogre2/src/media/Hlms/Pbs/Any/Main/500.Structs_piece_vs_piece_ps.any @@ -168,6 +168,10 @@ CONST_BUFFER_STRUCT_BEGIN( PassBuffer, 0 ) float pssmBlendPoints@n;@end @end @property( hlms_pssm_fade ) float pssmFadePoint;@end +@property( hlms_static_branch_shadow_map_lights ) + float numShadowMapPointLights; + float numShadowMapSpotLights; +@end @property( !use_light_buffers ) @property( hlms_lights_spot )Light lights[@value(hlms_lights_spot)];@end @@ -226,19 +230,19 @@ CONST_BUFFER_STRUCT_END( passBuf ); @property( use_light_buffers ) -CONST_BUFFER_STRUCT_BEGIN( Light0Buffer, 4 ) +CONST_BUFFER_STRUCT_BEGIN( Light0Buffer, 3 ) { Light lights[16]; } CONST_BUFFER_STRUCT_END( light0Buf ); -CONST_BUFFER_STRUCT_BEGIN( Light1Buffer, 5 ) +CONST_BUFFER_STRUCT_BEGIN( Light1Buffer, 4 ) { AreaLight areaApproxLights[2]; } CONST_BUFFER_STRUCT_END( light1Buf ); -CONST_BUFFER_STRUCT_BEGIN( Light2Buffer, 6 ) +CONST_BUFFER_STRUCT_BEGIN( Light2Buffer, 5 ) { AreaLtcLight areaLtcLights[2]; } @@ -259,9 +263,9 @@ CONST_BUFFER_STRUCT_END( light2Buf ); @piece( PassDecl ) , constant PassBuffer &passBuf [[buffer(CONST_SLOT_START+0)]] @property( use_light_buffers ) - , constant Light0Buffer &light0Buf [[buffer(CONST_SLOT_START+4)]] - , constant Light1Buffer &light1Buf [[buffer(CONST_SLOT_START+5)]] - , constant Light2Buffer &light2Buf [[buffer(CONST_SLOT_START+6)]] + , constant Light0Buffer &light0Buf [[buffer(CONST_SLOT_START+3)]] + , constant Light1Buffer &light1Buf [[buffer(CONST_SLOT_START+4)]] + , constant Light2Buffer &light2Buf [[buffer(CONST_SLOT_START+5)]] @end // use_light_buffers @end @end @@ -368,12 +372,12 @@ struct Material @property( envprobe_map && envprobe_map != target_envprobe_map && use_parallax_correct_cubemaps && !hlms_enable_cubemaps_auto ) @piece( PccManualProbeDecl ) @property( syntax != metal ) - CONST_BUFFER( ManualProbe, 3 ) + CONST_BUFFER( ManualProbe, @value( num_pass_const_buffers ) ) { CubemapProbe manualProbe; }; @else - , constant CubemapProbe &manualProbe [[buffer(CONST_SLOT_START+3)]] + , constant CubemapProbe &manualProbe [[buffer(CONST_SLOT_START+@value( num_pass_const_buffers ))]] @end @end @end @@ -382,15 +386,15 @@ struct Material @pset( texcoord, 0 ) @piece( VStoPS_block ) - @property( syntax == metal ) + @property( syntax == metal || lower_gpu_overhead ) @property( (!hlms_shadowcaster || alpha_test) && !lower_gpu_overhead ) - ushort materialId [[flat]]; + FLAT_INTERPOLANT( ushort materialId, @counter(texcoord) ); @end @property( hlms_fine_light_mask || hlms_forwardplus_fine_light_mask ) - uint objLightMask [[flat]]; + FLAT_INTERPOLANT( uint objLightMask, @counter(texcoord) ); @end @property( use_planar_reflections ) - ushort planarReflectionIdx [[flat]]; + FLAT_INTERPOLANT( ushort planarReflectionIdx, @counter(texcoord) ); @end @else @property( (!hlms_shadowcaster || alpha_test) && !lower_gpu_overhead ) @@ -401,10 +405,10 @@ struct Material @property( !hlms_shadowcaster ) @property( hlms_normal || hlms_qtangent ) INTERPOLANT( float3 pos, @counter(texcoord) ); - INTERPOLANT( float3 normal, @counter(texcoord) ); + INTERPOLANT( midf3 normal, @counter(texcoord) ); @property( normal_map ) - INTERPOLANT( float3 tangent, @counter(texcoord) ); - @property( hlms_qtangent || hlms_tangent4 )FLAT_INTERPOLANT( float biNormalReflection, @counter(texcoord) );@end + INTERPOLANT( midf3 tangent, @counter(texcoord) ); + @property( hlms_qtangent || hlms_tangent4 )FLAT_INTERPOLANT( midf biNormalReflection, @counter(texcoord) );@end @end @end @foreach( hlms_uv_count, n ) @@ -420,7 +424,7 @@ struct Material @property( hlms_num_shadow_map_lights && !hlms_all_point_lights ) INTERPOLANT( float3 worldPos, @counter(texcoord) ); @property( hlms_normal || hlms_qtangent ) - INTERPOLANT( float3 worldNorm, @counter(texcoord) ); + INTERPOLANT( midf3 worldNorm, @counter(texcoord) ); @end @end @end @@ -433,6 +437,11 @@ struct Material @property( hlms_forwardplus && hlms_instanced_stereo ) INTERPOLANT( float3 cullCamPosXY, @counter(texcoord) ); @end + + @property( hlms_fog ) + // fog.xyz = colour + INTERPOLANT( midf3 fog, @counter(texcoord) ); + @end @else @property( alpha_test ) @foreach( hlms_uv_count, n ) @@ -448,5 +457,10 @@ struct Material @end @end @end + @property( hlms_emulate_clip_distances && hlms_pso_clip_distances ) + @foreach( hlms_pso_clip_distances, n ) + INTERPOLANT( float clipDistance@n, @counter(texcoord) ); + @end + @end @insertpiece( custom_VStoPS ) @end diff --git a/ogre2/src/media/Hlms/Pbs/Any/Main/800.PixelShader_piece_ps.any b/ogre2/src/media/Hlms/Pbs/Any/Main/800.PixelShader_piece_ps.any index da77c434f..99752eb78 100644 --- a/ogre2/src/media/Hlms/Pbs/Any/Main/800.PixelShader_piece_ps.any +++ b/ogre2/src/media/Hlms/Pbs/Any/Main/800.PixelShader_piece_ps.any @@ -1,5 +1,8 @@ -@piece( envSpecularRoughness ) pixelData.perceptualRoughness * passBuf.envMapNumMipmaps @end -@piece( envSpecularRoughnessClearCoat ) pixelData.clearCoatPerceptualRoughness * passBuf.envMapNumMipmaps @end +// The mapping below is a quadratic fit for log2(perceptualRoughness)+iblRoughnessOneLevel when +// iblRoughnessOneLevel is 4. We found empirically that this mapping works very well for +// a 256 cubemap with 5 levels used. But also scales well for other iblRoughnessOneLevel values. +@piece( envSpecularRoughness ) pixelData.perceptualRoughness * midf_c( passBuf.envMapNumMipmaps ) * ( _h( 2.0 ) - pixelData.perceptualRoughness ) @end +@piece( envSpecularRoughnessClearCoat ) pixelData.clearCoatPerceptualRoughness * midf_c( passBuf.envMapNumMipmaps ) * ( _h( 2.0 ) - pixelData.clearCoatPerceptualRoughness ) @end @piece( DefaultHeaderPS ) // START UNIFORM DECLARATION @@ -9,16 +12,19 @@ @end @insertpiece( MaterialStructDecl ) @insertpiece( InstanceStructDecl ) + @insertpiece( AtmosphereNprSkyStructDecl ) @end @insertpiece( custom_ps_uniformStructDeclaration ) // END UNIFORM DECLARATION @property( !fresnel_scalar ) - #define float_fresnel float - #define make_float_fresnel( x ) x + #define float_fresnel midf + #define float_fresnel_c( x ) midf_c( x ) + #define make_float_fresnel( x ) midf_c( x ) @else - #define float_fresnel float3 - #define make_float_fresnel( x ) float3( x, x, x ) + #define float_fresnel midf3 + #define float_fresnel_c( x ) midf3_c( x ) + #define make_float_fresnel( x ) midf3_c( x, x, x ) @end @insertpiece( DeclReverseDepthMacros ) @@ -38,64 +44,73 @@ struct PixelData { @property( !hlms_shadowcaster ) - float3 normal; + midf3 normal; @property( normal_map ) - float3 geomNormal; + midf3 geomNormal; @else #define geomNormal normal @end - float4 diffuse; - float3 specular; + @property( detail_triplanar ) + midf3 worldSpaceNormal; + @end + midf4 diffuse; + midf3 specular; @property( clear_coat ) - float clearCoat; - float clearCoatPerceptualRoughness; - float clearCoatRoughness; + midf clearCoat; + midf clearCoatPerceptualRoughness; + midf clearCoatRoughness; @end - float perceptualRoughness; - float roughness; + midf perceptualRoughness; + midf roughness; float_fresnel F0; @property( needs_view_dir ) - float3 viewDir; - float NdotV; + midf3 viewDir; + midf NdotV; @end @property( needs_refl_dir ) - float3 reflDir; + midf3 reflDir; @property( needs_env_brdf ) - float3 envColourS; - float3 envColourD; + midf3 envColourS; + midf3 envColourD; @property( clear_coat ) - float3 clearCoatEnvColourS; + midf3 clearCoatEnvColourS; @end @end @end @else - float4 diffuse; //We only use the .w component, Alpha + midf4 diffuse; //We only use the .w component, Alpha @end + + @insertpiece( custom_ps_pixelData ) }; - #define SampleDetailWeightMap( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2D( tex, sampler, uv, arrayIdx ) + #define SampleDetailWeightMap( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2DF16( tex, sampler, uv, arrayIdx ) @foreach( detail_maps_diffuse, n ) - @property( detail_map@n )#define SampleDetailCol@n( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2D( tex, sampler, uv, arrayIdx )@end + @property( detail_map@n )#define SampleDetailCol@n( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2DF16( tex, sampler, uv, arrayIdx )@end @end @property( diffuse_map ) - #define SampleDiffuse( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2D( tex, sampler, uv, arrayIdx ) @property( diffuse_map_grayscale ).rrra@end + #define SampleDiffuse( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2DF16( tex, sampler, uv, arrayIdx ) @property( diffuse_map_grayscale ).rrra@end @end @property( specular_map ) - #define SampleSpecular( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2D( tex, sampler, uv, arrayIdx ) + #define SampleSpecular( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2DF16( tex, sampler, uv, arrayIdx ) @end @property( roughness_map ) - #define SampleRoughness( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2D( tex, sampler, uv, arrayIdx ) + #define SampleRoughness( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2DF16( tex, sampler, uv, arrayIdx ) @end @property( emissive_map ) - #define SampleEmissive( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2D( tex, sampler, uv, arrayIdx ) @property( emissive_map_grayscale ).rrr@end + #define SampleEmissive( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2DF16( tex, sampler, uv, arrayIdx ) @property( emissive_map_grayscale ).rrr@end @end @property( use_envprobe_map ) - #define SampleEnvProbe( tex, sampler, uv, lod ) OGRE_SampleLevel( tex, sampler, uv, lod ) + @property( syntax == metal ) + #define SampleEnvProbe( tex, sampler, uv, lod ) OGRE_SampleLevelF16( tex, sampler, float3( uv ), lod ) + @else + #define SampleEnvProbe( tex, sampler, uv, lod ) OGRE_SampleLevelF16( tex, sampler, uv, lod ) + @end @end @property( hlms_lights_spot_textured ) @@ -107,37 +122,37 @@ @end @property( normal_map_tex || detail_maps_normal ) - INLINE float3 reconstructZfromTSNormal( float2 tsNormal2 ) + INLINE midf3 reconstructZfromTSNormal( midf2 tsNormal2 ) { - float3 tsNormal; + midf3 tsNormal; tsNormal.xy = tsNormal2.xy; - tsNormal.z = sqrt( max( 0.0f, 1.0f - tsNormal.x * tsNormal.x - tsNormal.y * tsNormal.y ) ); + tsNormal.z = sqrt( max( _h(0.0f), _h(1.0f) - tsNormal.x * tsNormal.x - tsNormal.y * tsNormal.y ) ); return tsNormal.xyz; } @property( normal_sampling_format == normal_rg_snorm ) //Normal texture must be in UV8/RG8_SNORM or BC5S format! - #define getTSNormal( normalMap, samplerState, uv, normalIdx ) reconstructZfromTSNormal( OGRE_SampleArray2D( normalMap, samplerState, uv, normalIdx ).xy ) + #define getTSNormal( normalMap, samplerState, uv, normalIdx ) reconstructZfromTSNormal( OGRE_SampleArray2DF16( normalMap, samplerState, uv, normalIdx ).xy ) @end @property( normal_sampling_format == normal_rg_unorm ) //Normal texture must be in RG8_UNORM or similar format! - #define getTSNormal( normalMap, samplerState, uv, normalIdx ) reconstructZfromTSNormal( OGRE_SampleArray2D( normalMap, samplerState, uv, normalIdx ).xy * 2.0 - 1.0 ) + #define getTSNormal( normalMap, samplerState, uv, normalIdx ) reconstructZfromTSNormal( OGRE_SampleArray2DF16( normalMap, samplerState, uv, normalIdx ).xy * 2.0 - 1.0 ) @end @property( normal_sampling_format == normal_bc3_unorm ) //Normal texture must be in BC3 or similar format! - #define getTSNormal( normalMap, samplerState, uv, normalIdx ) reconstructZfromTSNormal( OGRE_SampleArray2D( normalMap, samplerState, uv, normalIdx ).yw * 2.0 - 1.0 ) + #define getTSNormal( normalMap, samplerState, uv, normalIdx ) reconstructZfromTSNormal( OGRE_SampleArray2DF16( normalMap, samplerState, uv, normalIdx ).yw * 2.0 - 1.0 ) @end @property( normal_sampling_format == normal_la ) //Normal texture must be in LA format! - #define getTSNormal( normalMap, samplerState, uv, normalIdx ) reconstructZfromTSNormal( OGRE_SampleArray2D( normalMap, samplerState, uv, normalIdx ).xw * 2.0 - 1.0 ) + #define getTSNormal( normalMap, samplerState, uv, normalIdx ) reconstructZfromTSNormal( OGRE_SampleArray2DF16( normalMap, samplerState, uv, normalIdx ).xw * 2.0 - 1.0 ) @end @end @property( obb_restraint_approx || obb_restraint_ltc ) /// Returns value in range [-inf; 1] /// Values <= 0 means 'pos' is outside the obb - float getObbRestraintFade( @property( syntax == metal )constant@end float4 obbRestraint[3], - float3 pos, float3 obbFadeFactors ) + midf getObbRestraintFade( @property( syntax == metal )constant@end float4 obbRestraint[3], + float3 pos, float3 obbFadeFactors ) { float3 obbDistToBounds; obbDistToBounds.x = dot( obbRestraint[0].xyzw, float4( pos.xyz, 1.0 ) ); @@ -147,7 +162,7 @@ obbDistToBounds = abs( obbDistToBounds ); float3 obbFade = (1.0 - obbDistToBounds) * obbFadeFactors; - return min( min3( obbFade.x, obbFade.y, obbFade.z ), 1.0 ); + return min( midf_c( min3( obbFade.x, obbFade.y, obbFade.z ) ), _h( 1.0 ) ); } @end @@ -229,16 +244,16 @@ @property( detail_maps_diffuse || detail_maps_normal ) //Prepare weight map for the detail maps. @property( detail_weight_map ) - float4 detailWeights = SampleDetailWeightMap( textureMaps@value(detail_weight_map_idx), + midf4 detailWeights = SampleDetailWeightMap( textureMaps@value(detail_weight_map_idx), samplerState@value(detail_weight_map_sampler), UV_DETAIL_WEIGHT( inPs.uv@value(uv_detail_weight).xy ), texIndex_weightMapIdx ); - @property( detail_weights )detailWeights *= material.cDetailWeights;@end + @property( detail_weights )detailWeights *= midf4_c( material.cDetailWeights );@end @else @property( detail_weights ) - float4 detailWeights = material.cDetailWeights; + midf4 detailWeights = midf4_c( material.cDetailWeights ); @else - float4 detailWeights = float4( 1.0, 1.0, 1.0, 1.0 ); + midf4 detailWeights = midf4_c( 1.0, 1.0, 1.0, 1.0 ); @end @end @end @@ -248,9 +263,9 @@ /// Sample detail maps and weight them against the weight map in the next foreach loop. @foreach( detail_maps_diffuse, n ) @property( detail_map@n ) - float4 detailCol@n = SampleDetailCol@n( textureMaps@value(detail_map@n_idx), + midf4 detailCol@n = SampleDetailCol@n@property( detail_triplanar_diffuse )Triplanar@end ( textureMaps@value(detail_map@n_idx), samplerState@value(detail_map@n_sampler), - UV_DETAIL@n( inPs.uv@value(uv_detail@n).xy@insertpiece( offsetDetail@n ) ), + UV_DETAIL@n( @property( detail_triplanar )GetTriplanarUVTp( inPs.worldPos, pixelData.worldSpaceNormal )@else inPs.uv@value(uv_detail@n).xy@end @insertpiece( offsetDetail@n ) ), texIndex_detailMapIdx@n ); detailWeights.@insertpiece(detail_swizzle@n) *= detailCol@n.w; detailCol@n.w = detailWeights.@insertpiece(detail_swizzle@n); @@ -267,7 +282,7 @@ texIndex_diffuseIdx ); @else /// If there are no diffuse maps, we must initialize it to some value. - pixelData.diffuse.xyzw = material.bgDiffuse.xyzw; + pixelData.diffuse.xyzw = midf4_c( material.bgDiffuse.xyzw ); @end /// Blend the detail diffuse maps with the main diffuse. @@ -275,7 +290,7 @@ @insertpiece( blend_mode_idx@n ) @add( t, 1 ) @end /// Apply the material's diffuse over the textures - pixelData.diffuse.xyz *= material.kD.xyz; + pixelData.diffuse.xyz *= midf3_c( material.kD.xyz ); @property( transparent_mode || hlms_screen_space_refractions ) pixelData.diffuse.xyz *= (pixelData.diffuse.w * pixelData.diffuse.w); @end @@ -288,9 +303,9 @@ @piece( SampleSpecularMap ) /// SPECUlAR MAP - pixelData.specular.xyz = material.kS.xyz; + pixelData.specular.xyz = midf3_c( material.kS.xyz ); @property( !metallic_workflow ) - pixelData.F0 = material.F0.@insertpiece( FresnelSwizzle ); + pixelData.F0 = float_fresnel_c( material.F0.@insertpiece( FresnelSwizzle ) ); @property( specular_map && !fresnel_workflow ) pixelData.specular.xyz *= SampleSpecular( textureMaps@value( specular_map_idx ), samplerState@value(specular_map_sampler), @@ -304,17 +319,17 @@ texIndex_specularIdx ).@insertpiece( FresnelSwizzle ); @end @else - float metalness = material.F0.x; + midf metalness = midf_c( material.F0.x ); @property( specular_map ) metalness *= SampleSpecular( textureMaps@value( specular_map_idx ), samplerState@value(specular_map_sampler), UV_SPECULAR( inPs.uv@value(uv_specular).xy ), texIndex_specularIdx ).x; @end - pixelData.F0 = lerp( make_float_fresnel( 0.03f ), pixelData.diffuse.xyz * 3.14159f, metalness ); + pixelData.F0 = lerp( make_float_fresnel( 0.04f ), pixelData.diffuse.xyz * _h( 3.14159f ), metalness ); pixelData.diffuse.xyz = pixelData.diffuse.xyz - pixelData.diffuse.xyz * metalness; @property( hlms_alphablend || hlms_screen_space_refractions ) - pixelData.F0 *= material.F0.w; ///Should this be done for non-metallic as well??? + pixelData.F0 *= midf_c( material.F0.w ); ///Should this be done for non-metallic as well??? @end @end @property( transparent_mode || hlms_screen_space_refractions ) @@ -324,7 +339,7 @@ @piece( SampleRoughnessMap ) /// ROUGHNESS MAP - pixelData.perceptualRoughness = material.kS.w; + pixelData.perceptualRoughness = midf_c( material.kS.w ); @property( roughness_map ) pixelData.perceptualRoughness *= SampleRoughness( textureMaps@value( roughness_map_idx ), @@ -343,9 +358,9 @@ @end @property( perceptual_roughness ) - pixelData.roughness = max( pixelData.perceptualRoughness * pixelData.perceptualRoughness, 0.001f ); + pixelData.roughness = max( pixelData.perceptualRoughness * pixelData.perceptualRoughness, _h( 0.001f ) ); @else - pixelData.roughness = max( pixelData.perceptualRoughness, 0.001f ); + pixelData.roughness = max( pixelData.perceptualRoughness, _h( 0.001f ) ); @end @end @@ -363,15 +378,15 @@ @else //Normal mapping. pixelData.geomNormal = normalize( inPs.normal ) @insertpiece( two_sided_flip_normal ); - float3 vTangent = normalize( inPs.tangent ); + midf3 vTangent = normalize( inPs.tangent ); @property( hlms_qtangent || hlms_tangent4 ) @piece( tbnApplyReflection ) * inPs.biNormalReflection@end @end //Get the TBN matrix - float3 vBinormal = normalize( cross( pixelData.geomNormal, vTangent )@insertpiece( tbnApplyReflection ) ); - float3x3 TBN = buildFloat3x3( vTangent, vBinormal, pixelData.geomNormal ); + midf3 vBinormal = normalize( cross( pixelData.geomNormal, vTangent )@insertpiece( tbnApplyReflection ) ); + midf3x3 TBN = buildMidf3x3( vTangent, vBinormal, pixelData.geomNormal ); @property( normal_map_tex ) pixelData.normal = getTSNormal( textureMaps@value( normal_map_tex_idx ), @@ -379,11 +394,11 @@ UV_NORMAL( inPs.uv@value(uv_normal).xy ), texIndex_normalIdx ); @else - pixelData.normal = float3( 0.0, 0.0, 1.0 ); + pixelData.normal = midf3_c( 0.0, 0.0, 1.0 ); @end @property( normal_weight_tex ) // Apply the weight to the main normal map - pixelData.normal = lerp( float3( 0.0, 0.0, 1.0 ), pixelData.normal, normalMapWeight ); + pixelData.normal = lerp( midf3_c( 0.0, 0.0, 1.0 ), pixelData.normal, normalMapWeight ); @end @end @end @@ -402,7 +417,7 @@ @property( detail_maps_normal ) @foreach( 4, n ) @property( normal_weight_detail@n ) - @piece( detail@n_nm_weight_mul ) * material.normalWeights.@insertpiece( detail_swizzle@n )@end + @piece( detail@n_nm_weight_mul ) * midf_c( material.normalWeights.@insertpiece( detail_swizzle@n ) )@end @end @end @end @@ -410,7 +425,7 @@ @foreach( detail_maps_normal, n ) @piece( SampleDetailMapNm@n )getTSNormal( textureMaps@value(detail_map_nm@n_idx), samplerState@value(detail_map_nm@n_sampler), - UV_DETAIL_NM@n( inPs.uv@value(uv_detail_nm@n).xy@insertpiece( offsetDetail@n ) ), + UV_DETAIL_NM@n( @property( detail_triplanar )GetTriplanarUVTp( inPs.worldPos, pixelData.worldSpaceNormal )@else inPs.uv@value(uv_detail_nm@n).xy@end @insertpiece( offsetDetail@n ) ), texIndex_detailNormMapIdx@n ) * detailWeights.@insertpiece(detail_swizzle@n) @insertpiece( detail@n_nm_weight_mul )@end @end @@ -433,30 +448,53 @@ //Everything's in Camera space @property( needs_view_dir ) @property( !hlms_instanced_stereo ) - pixelData.viewDir = normalize( -inPs.pos ); + pixelData.viewDir = midf3_c( normalize( -inPs.pos ) ); @else - pixelData.viewDir = -inPs.pos; if( gl_FragCoord.x > passBuf.rightEyePixelStartX ) - pixelData.viewDir += passBuf.leftToRightView.xyz; - pixelData.viewDir = normalize( pixelData.viewDir ); + pixelData.viewDir = midf3_c( normalize( -inPs.pos + passBuf.leftToRightView.xyz ) ); + else + pixelData.viewDir = midf3_c( normalize( -inPs.pos ) ); @end pixelData.NdotV = saturate( dot( pixelData.normal, pixelData.viewDir ) ); @end @property( !ambient_fixed || vct_num_probes ) - float3 finalColour = float3(0, 0, 0); + midf3 finalColour = midf3_c(0, 0, 0); @else - float3 finalColour = passBuf.ambientUpperHemi.xyz * pixelData.diffuse.xyz; + midf3 finalColour = midf3_c( passBuf.ambientUpperHemi.xyz ) * pixelData.diffuse.xyz; @end - @property( hlms_lights_point || hlms_lights_spot || hlms_lights_area_approx || hlms_lights_area_ltc ) + @property( hlms_static_branch_shadow_map_lights || hlms_lights_point || hlms_lights_spot || hlms_lights_area_approx || hlms_lights_area_ltc ) float3 lightDir; float fDistance; - float3 tmpColour; - float spotCosAngle; + midf3 tmpColour; + midf spotCosAngle; + @end + + @property( hlms_static_branch_shadow_map_lights ) + const float2 shadowmap_uv_min[@value(hlms_num_shadow_map_lights)] = + OGRE_ARRAY_START( float2 ) + hlms_shadowmap0_uv_min + @foreach( hlms_num_shadow_map_lights, n, 1 ) + , hlms_shadowmap@n_uv_min@end + OGRE_ARRAY_END; + const float2 shadowmap_uv_max[@value(hlms_num_shadow_map_lights)] = + OGRE_ARRAY_START( float2 ) + hlms_shadowmap0_uv_max + @foreach( hlms_num_shadow_map_lights, n, 1 ) + , hlms_shadowmap@n_uv_max@end + OGRE_ARRAY_END; + const float2 shadowmap_uv_length[@value(hlms_num_shadow_map_lights)] = + OGRE_ARRAY_START( float2 ) + hlms_shadowmap0_uv_length + @foreach( hlms_num_shadow_map_lights, n, 1 ) + , hlms_shadowmap@n_uv_length@end + OGRE_ARRAY_END; @end + @property( needs_refl_dir ) - pixelData.reflDir = 2.0 * dot( pixelData.viewDir, pixelData.normal ) * pixelData.normal - pixelData.viewDir; + pixelData.reflDir = _h( 2.0 ) * dot( pixelData.viewDir, pixelData.normal ) * pixelData.normal - + pixelData.viewDir; @end @insertpiece( DoAmbientHeader ) @@ -465,22 +503,30 @@ @piece( DoDirectionalLights ) @property( hlms_lights_directional ) @insertpiece( ObjLightMaskCmp ) - finalColour += BRDF( light0Buf.lights[0].position.xyz, light0Buf.lights[0].diffuse.xyz, light0Buf.lights[0].specular, pixelData ) @insertpiece( DarkenWithShadowFirstLight ); + finalColour += BRDF( midf3_c( light0Buf.lights[0].position.xyz ), + midf3_c( light0Buf.lights[0].diffuse.xyz ), + midf3_c( light0Buf.lights[0].specular ), pixelData PASSBUF_ARG ) @insertpiece( DarkenWithShadowFirstLight ); @end @foreach( hlms_lights_directional, n, 1 ) @insertpiece( ObjLightMaskCmp ) - finalColour += BRDF( light0Buf.lights[@n].position.xyz, light0Buf.lights[@n].diffuse.xyz, light0Buf.lights[@n].specular, pixelData )@insertpiece( DarkenWithShadow );@end + finalColour += BRDF( midf3_c( light0Buf.lights[@n].position.xyz ), + midf3_c( light0Buf.lights[@n].diffuse.xyz ), + midf3_c( light0Buf.lights[@n].specular ), pixelData PASSBUF_ARG )@insertpiece( DarkenWithShadow );@end @property( !hlms_static_branch_lights ) @foreach( hlms_lights_directional_non_caster, n, hlms_lights_directional ) @insertpiece( ObjLightMaskCmp ) - finalColour += BRDF( light0Buf.lights[@n].position.xyz, light0Buf.lights[@n].diffuse.xyz, light0Buf.lights[@n].specular, pixelData );@end + finalColour += BRDF( midf3_c( light0Buf.lights[@n].position.xyz ), + midf3_c( light0Buf.lights[@n].diffuse.xyz ), + midf3_c( light0Buf.lights[@n].specular ), pixelData PASSBUF_ARG );@end @else for( int n=0; n= light0Buf.lights[light_idx].spotParams.y @insertpiece( andObjLightMaskCmp_light_idx ) ) + { + midf spotAtten = saturate( (spotCosAngle - midf_c( light0Buf.lights[light_idx].spotParams.y )) * midf_c( light0Buf.lights[light_idx].spotParams.x ) ); + spotAtten = pow( spotAtten, midf_c( light0Buf.lights[light_idx].spotParams.z ) ); + + @property( light_profiles_texture ) + spotAtten *= getPhotometricAttenuation( spotCosAngle, + light0Buf.lights[light_idx].lightTexProfileIdx + OGRE_PHOTOMETRIC_ARG ); + @end + + tmpColour = BRDF( midf3_c( lightDir ), midf3_c( light0Buf.lights[light_idx].diffuse.xyz ), + midf3_c( light0Buf.lights[light_idx].specular ), + pixelData PASSBUF_ARG )@insertpiece( DarkenWithShadow_cur_shadow_map ); + midf atten = midf_c( 1.0 / (0.5 + (light0Buf.lights[light_idx].attenuation.y + light0Buf.lights[light_idx].attenuation.z * fDistance) * fDistance ) ); + finalColour += tmpColour * (atten * spotAtten); + } + cur_shadow_map++; + } +@else @foreach( hlms_lights_spot, n, hlms_lights_point ) lightDir = light0Buf.lights[@n].position.xyz - inPs.pos; fDistance= length( lightDir ); lightDir *= 1.0 / fDistance; @property( !hlms_lights_spot_textured ) - spotCosAngle = dot( -lightDir, light0Buf.lights[@n].spotDirection.xyz ); + spotCosAngle = dot( midf3_c( -lightDir ), midf3_c( light0Buf.lights[@n].spotDirection.xyz ) ); @else - spotCosAngle = dot( -lightDir, zAxis( light0Buf.lights[@n].spotQuaternion ) ); + spotCosAngle = dot( midf3_c( -lightDir ), zAxis( midf4_c( light0Buf.lights[@n].spotQuaternion ) ) ); @end if( fDistance <= light0Buf.lights[@n].attenuation.x && spotCosAngle >= light0Buf.lights[@n].spotParams.y @insertpiece( andObjLightMaskCmp ) ) { @property( hlms_lights_spot_textured ) float3 posInLightSpace = qmul( spotQuaternion[@value(spot_params)], inPs.pos ); - float spotAtten = texture( texSpotLight, normalize( posInLightSpace ).xy ).x; //TODO + midf spotAtten = texture( texSpotLight, normalize( posInLightSpace ).xy ).x; //TODO @else - float spotAtten = saturate( (spotCosAngle - light0Buf.lights[@n].spotParams.y) * light0Buf.lights[@n].spotParams.x ); - spotAtten = pow( spotAtten, light0Buf.lights[@n].spotParams.z ); + midf spotAtten = saturate( (spotCosAngle - midf_c( light0Buf.lights[@n].spotParams.y )) * midf_c( light0Buf.lights[@n].spotParams.x ) ); + spotAtten = pow( spotAtten, midf_c( light0Buf.lights[@n].spotParams.z ) ); @end @property( light_profiles_texture ) @@ -532,68 +631,71 @@ OGRE_PHOTOMETRIC_ARG ); @end - tmpColour = BRDF( lightDir, light0Buf.lights[@n].diffuse.xyz, light0Buf.lights[@n].specular, pixelData )@insertpiece( DarkenWithShadow ); - float atten = 1.0 / (0.5 + (light0Buf.lights[@n].attenuation.y + light0Buf.lights[@n].attenuation.z * fDistance) * fDistance ); + tmpColour = BRDF( midf3_c( lightDir ), midf3_c( light0Buf.lights[@n].diffuse.xyz ), + midf3_c( light0Buf.lights[@n].specular ), + pixelData PASSBUF_ARG )@insertpiece( DarkenWithShadow ); + midf atten = midf_c( 1.0 / (0.5 + (light0Buf.lights[@n].attenuation.y + light0Buf.lights[@n].attenuation.z * fDistance) * fDistance ) ); finalColour += tmpColour * (atten * spotAtten); } @end @end +@end @piece( DoEmissiveLight ) @property( emissive_map || emissive_constant ) ///Emissive is not part of PixelData because emissive can just be accumulated to finalColour @property( emissive_map ) - float3 emissiveCol = SampleEmissive( textureMaps@value( emissive_map_idx ), - samplerState@value(emissive_map_sampler), - UV_EMISSIVE( inPs.uv@value(uv_emissive).xy ), - texIndex_emissiveMapIdx ).xyz; + midf3 emissiveCol = SampleEmissive( textureMaps@value( emissive_map_idx ), + samplerState@value(emissive_map_sampler), + UV_EMISSIVE( inPs.uv@value(uv_emissive).xy ), + texIndex_emissiveMapIdx ).xyz; @property( emissive_constant ) - emissiveCol *= material.emissive.xyz; + emissiveCol *= midf3_c( material.emissive.xyz ); @end @property( emissive_as_lightmap ) emissiveCol *= pixelData.diffuse.xyz; @end finalColour += emissiveCol; @else - finalColour += material.emissive.xyz; + finalColour += midf3_c( material.emissive.xyz ); @end @end @end @piece( CubemapManualPcc ) - float3 posInProbSpace = toProbeLocalSpace( inPs.pos, @insertpiece( pccProbeSource ) ); - float probeFade = getProbeFade( posInProbSpace, @insertpiece( pccProbeSource ) ); + midf3 posInProbSpace = midf3_c( toProbeLocalSpace( inPs.pos, @insertpiece( pccProbeSource ) ) ); + midf probeFade = getProbeFade( posInProbSpace, @insertpiece( pccProbeSource ) ); @property( vct_num_probes ) - if( probeFade > 0 && (pixelData.roughness < 1.0f || vctSpecular.w == 0) ) + if( probeFade > _h( 0 ) && (pixelData.roughness < _h( 1.0f ) || vctSpecular.w == 0) ) @else - if( probeFade > 0 ) + if( probeFade > _h( 0 ) ) @end { - probeFade = saturate( probeFade * 200.0 ); + probeFade = saturate( probeFade * _h( 200.0 ) ); @property( vct_num_probes ) - float4 reflDirLS_dist = localCorrect( pixelData.reflDir, posInProbSpace, @insertpiece( pccProbeSource ) ); - float3 reflDirLS = reflDirLS_dist.xyz; + midf4 reflDirLS_dist = localCorrect( pixelData.reflDir, posInProbSpace, @insertpiece( pccProbeSource ) ); + midf3 reflDirLS = reflDirLS_dist.xyz; @else - float3 reflDirLS = localCorrect( pixelData.reflDir, posInProbSpace, @insertpiece( pccProbeSource ) ).xyz; + midf3 reflDirLS = localCorrect( pixelData.reflDir, posInProbSpace, @insertpiece( pccProbeSource ) ).xyz; @end - float3 nNormalLS = localCorrect( pixelData.normal, posInProbSpace, @insertpiece( pccProbeSource ) ).xyz; - float4 envS = SampleEnvProbe( texEnvProbeMap, samplerState@value(envMapRegSampler), - reflDirLS, @insertpiece( envSpecularRoughness ) ); + midf3 nNormalLS = localCorrect( pixelData.normal, posInProbSpace, @insertpiece( pccProbeSource ) ).xyz; + midf4 envS = SampleEnvProbe( texEnvProbeMap, samplerState@value(envMapRegSampler), + reflDirLS, @insertpiece( envSpecularRoughness ) ); @property( envmap_scale ) - envS.xyz *= passBuf.ambientUpperHemi.w; + envS.xyz *= midf3_c( passBuf.ambientUpperHemi.w ); @end @property( cubemaps_as_diffuse_gi ) - float3 envD = SampleEnvProbe( texEnvProbeMap, samplerState@value(envMapRegSampler), - nNormalLS, 11.0 ).xyz @insertpiece( ApplyEnvMapScale ); + midf3 envD = SampleEnvProbe( texEnvProbeMap, samplerState@value(envMapRegSampler), + nNormalLS, 11.0 ).xyz @insertpiece( ApplyEnvMapScale ); envD.xyz *= probeFade; @end envS.xyz *= probeFade; @property( clear_coat ) - float3 clearCoatEnvS = SampleEnvProbe( texEnvProbeMap, samplerState@value( envMapRegSampler ), - reflDirLS, - @insertpiece( envSpecularRoughnessClearCoat ) ).xyz @insertpiece( ApplyEnvMapScale ); + midf3 clearCoatEnvS = SampleEnvProbe( texEnvProbeMap, samplerState@value( envMapRegSampler ), + reflDirLS, + @insertpiece( envSpecularRoughnessClearCoat ) ).xyz @insertpiece( ApplyEnvMapScale ); clearCoatEnvS *= probeFade; @end @@ -608,7 +710,7 @@ pixelData.envColourS = lerp( envS.xyz, pixelData.envColourS, vctLerp ); @property( cubemaps_as_diffuse_gi ) - pixelData.envColourD += vctSpecular.w > 0 ? float3( 0, 0, 0 ) : envD; + pixelData.envColourD += vctSpecular.w > 0 ? midf3_c( 0, 0, 0 ) : envD; @end @property( clear_coat ) @@ -629,23 +731,30 @@ @piece( CubemapGlobal ) pixelData.envColourS += SampleEnvProbe( texEnvProbeMap, samplerState@value(envMapRegSampler), - mul( pixelData.reflDir, passBuf.invViewMatCubemap ), + mul( pixelData.reflDir, midf3x3_c( passBuf.invViewMatCubemap ) ), @insertpiece( envSpecularRoughness ) ).xyz @insertpiece( ApplyEnvMapScale ); @property( cubemaps_as_diffuse_gi ) pixelData.envColourD += SampleEnvProbe( texEnvProbeMap, samplerState@value(envMapRegSampler), - mul( pixelData.normal, passBuf.invViewMatCubemap ), + mul( pixelData.normal, midf3x3_c( passBuf.invViewMatCubemap ) ), 11.0 ).xyz @insertpiece( ApplyEnvMapScale ); @end @property( clear_coat ) pixelData.clearCoatEnvColourS += SampleEnvProbe( texEnvProbeMap, samplerState@value( envMapRegSampler ), - mul( pixelData.reflDir, passBuf.invViewMatCubemap ), + mul( pixelData.reflDir, midf3x3_c( passBuf.invViewMatCubemap ) ), @insertpiece( envSpecularRoughnessClearCoat ) ).xyz @insertpiece( ApplyEnvMapScale ); @end @end @property( !hlms_shadowcaster ) @piece( DefaultBodyPS ) + @property( hlms_emulate_clip_distances && hlms_global_clip_planes && hlms_pso_clip_distances && syntax == glslvk ) + @foreach( hlms_pso_clip_distances, n ) + if( inPs.clipDistance@n < 0.0 ) + discard; + @end + @end + @property( hlms_screen_pos_uv ) float2 screenPosUv = gl_FragCoord.xy * passBuf.invWindowRes.xy; @end @@ -732,17 +841,17 @@ int gBufSubsample = int( findLSB( sampleMask ) ); - pixelData.normal = normalize( OGRE_Load2DMS( gBuf_normals, iFragCoord, gBufSubsample ).xyz * 2.0 - 1.0 ); - float2 shadowRoughness = OGRE_Load2DMS( gBuf_shadowRoughness, iFragCoord, gBufSubsample ).xy; + pixelData.normal = normalize( OGRE_Load2DMSF16( gBuf_normals, iFragCoord, gBufSubsample ).xyz * _h( 2.0 ) - _h( 1.0 ) ); + midf2 shadowRoughness = OGRE_Load2DMSF16( gBuf_shadowRoughness, iFragCoord, gBufSubsample ).xy; @else - pixelData.normal = normalize( OGRE_Load2D( gBuf_normals, iFragCoord, 0 ).xyz * 2.0 - 1.0 ); - float2 shadowRoughness = OGRE_Load2D( gBuf_shadowRoughness, iFragCoord, 0 ).xy; + pixelData.normal = normalize( OGRE_Load2DF16( gBuf_normals, iFragCoord, 0 ).xyz * _h( 2.0 ) - _h( 1.0 ) ); + midf2 shadowRoughness = OGRE_Load2DF16( gBuf_shadowRoughness, iFragCoord, 0 ).xy; @end - float fShadow = shadowRoughness.x; + midf fShadow = shadowRoughness.x; @property( roughness_map ) - pixelData.roughness = shadowRoughness.y * 0.98 + 0.02; + pixelData.roughness = shadowRoughness.y * _h( 0.98 ) + _h( 0.02 ); @end @end @@ -764,11 +873,11 @@ @insertpiece( forward3dLighting ) @property( needs_env_brdf ) - pixelData.envColourS = float3( 0, 0, 0 ); - pixelData.envColourD = float3( 0, 0, 0 ); + pixelData.envColourS = midf3_c( 0, 0, 0 ); + pixelData.envColourD = midf3_c( 0, 0, 0 ); @property( clear_coat ) - pixelData.clearCoatEnvColourS = float3( 0, 0, 0 ); + pixelData.clearCoatEnvColourS = midf3_c( 0, 0, 0 ); @end @end @@ -807,6 +916,18 @@ @end @insertpiece( applyRefractions ) + + @property( hlms_fog ) + const float distToCamera = length( inPs.pos.xyz ); + const midf luminance = dot( finalColour.xyz, + midf3_c( _h( 0.212655 ), _h( 0.715158 ), _h( 0.072187 ) ) ); + const midf lumFogWeight = max( exp2( atmoSettings.fogBreakFalloff * luminance + + atmoSettings.fogBreakMinBrightness ), + _h( 0.0 ) ); + midf fogWeight = midf_c( exp2( -distToCamera * atmoSettings.fogDensity ) ); + fogWeight = lerp( _h( 1.0 ), fogWeight, lumFogWeight ); + finalColour.xyz = lerp( inPs.fog.xyz, finalColour.xyz, fogWeight ); + @end @end ///!hlms_prepass @end ///!hlms_normal || hlms_qtangent @@ -823,32 +944,32 @@ @property( hlms_alphablend || hlms_alpha_to_coverage ) @property( use_texture_alpha ) - outPs_colour0.w = material.F0.w * pixelData.diffuse.w; + outPs_colour0.w = midf_c( material.F0.w ) * pixelData.diffuse.w; @else - outPs_colour0.w = material.F0.w; + outPs_colour0.w = midf_c( material.F0.w ); @end @else - outPs_colour0.w = 1.0; + outPs_colour0.w = _h( 1.0 ); @end @property( debug_pssm_splits ) - outPs_colour0.xyz = mix( outPs_colour0.xyz, debugPssmSplit.xyz, 0.2f ); + outPs_colour0.xyz = lerp( outPs_colour0.xyz, debugPssmSplit.xyz, _h( 0.2f ) ); @end @property( hlms_gen_normals_gbuffer ) - outPs_normals = float4( pixelData.normal * 0.5 + 0.5, 1.0 ); + outPs_normals = midf4_c( pixelData.normal * _h( 0.5 ) + _h( 0.5 ), 1.0 ); @end @else - outPs_colour0 = float4( 1.0, 1.0, 1.0, 1.0 ); + outPs_colour0 = midf4_c( 1.0, 1.0, 1.0, 1.0 ); @property( hlms_gen_normals_gbuffer ) - outPs_normals = float4( 0.5, 0.5, 1.0, 1.0 ); + outPs_normals = midf4_c( 0.5, 0.5, 1.0, 1.0 ); @end @end @else - outPs_normals = float4( pixelData.normal * 0.5 + 0.5, 1.0 ); + outPs_normals = midf4_c( pixelData.normal * _h( 0.5 ) + _h( 0.5 ), 1.0 ); @property( hlms_pssm_splits ) - outPs_shadowRoughness = float2( fShadow, (pixelData.roughness - 0.02) * 1.02040816 ); + outPs_shadowRoughness = midf2_c( fShadow, (pixelData.roughness - 0.02) * 1.02040816 ); @end @property( !hlms_pssm_splits ) - outPs_shadowRoughness = float2( 1.0, (pixelData.roughness - 0.02) * 1.02040816 ); + outPs_shadowRoughness = midf2_c( 1.0, (pixelData.roughness - 0.02) * 1.02040816 ); @end @end @end diff --git a/ogre2/src/media/Hlms/Pbs/Any/Main/800.VertexShader_piece_vs.any b/ogre2/src/media/Hlms/Pbs/Any/Main/800.VertexShader_piece_vs.any index 4e0f17728..1016983eb 100644 --- a/ogre2/src/media/Hlms/Pbs/Any/Main/800.VertexShader_piece_vs.any +++ b/ogre2/src/media/Hlms/Pbs/Any/Main/800.VertexShader_piece_vs.any @@ -13,7 +13,8 @@ // START UNIFORM DECLARATION @insertpiece( PassStructDecl ) - @property( hlms_skeleton || hlms_shadowcaster || hlms_pose )@insertpiece( InstanceStructDecl )@end + @property( hlms_skeleton || hlms_shadowcaster || hlms_pose || syntax == metal || lower_gpu_overhead )@insertpiece( InstanceStructDecl )@end + @insertpiece( AtmosphereNprSkyStructDecl ) @insertpiece( custom_vs_uniformStructDeclaration ) // END UNIFORM DECLARATION @@ -25,6 +26,24 @@ @end @insertpiece( DeclShadowMapMacros ) + @insertpiece( DeclAtmosphereNprSkyFuncs ) + + @property( accurate_non_uniform_scaled_normals ) + midf3x3 adjugate( midf3x3 m ) + { + midf3x3 n; + n[0][0] = m[1][1] * m[2][2] - m[1][2] * m[2][1]; + n[0][1] = m[0][2] * m[2][1] - m[0][1] * m[2][2]; + n[0][2] = m[0][1] * m[1][2] - m[0][2] * m[1][1]; + n[1][0] = m[1][2] * m[2][0] - m[1][0] * m[2][2]; + n[1][1] = m[0][0] * m[2][2] - m[0][2] * m[2][0]; + n[1][2] = m[0][2] * m[1][0] - m[0][0] * m[1][2]; + n[2][0] = m[1][0] * m[2][1] - m[2][0] * m[1][1]; + n[2][1] = m[0][1] * m[2][0] - m[0][0] * m[2][1]; + n[2][2] = m[0][0] * m[1][1] - m[0][1] * m[1][0]; + return n; + } + @end @end @property( !hlms_skeleton ) @@ -51,45 +70,46 @@ worldPos.z = dot( worldMat[2], inputPos ); worldPos.xyz *= inVs_blendWeights[0]; @property( hlms_normal || hlms_qtangent ) - float3 worldNorm; - worldNorm.x = dot( worldMat[0].xyz, inputNormal ); - worldNorm.y = dot( worldMat[1].xyz, inputNormal ); - worldNorm.z = dot( worldMat[2].xyz, inputNormal ); - worldNorm *= inVs_blendWeights[0]; + midf3 worldNorm; + worldNorm.x = dot( midf3_c( worldMat[0].xyz ), inputNormal ); + worldNorm.y = dot( midf3_c( worldMat[1].xyz ), inputNormal ); + worldNorm.z = dot( midf3_c( worldMat[2].xyz ), inputNormal ); + worldNorm *= midf_c( inVs_blendWeights[0] ); @end @property( normal_map ) - float3 worldTang; - worldTang.x = dot( worldMat[0].xyz, inputTangent ); - worldTang.y = dot( worldMat[1].xyz, inputTangent ); - worldTang.z = dot( worldMat[2].xyz, inputTangent ); - worldTang *= inVs_blendWeights[0]; + midf3 worldTang; + worldTang.x = dot( midf3_c( worldMat[0].xyz ), inputTangent ); + worldTang.y = dot( midf3_c( worldMat[1].xyz ), inputTangent ); + worldTang.z = dot( midf3_c( worldMat[2].xyz ), inputTangent ); + worldTang *= midf_c( inVs_blendWeights[0] ); @end @psub( NeedsMoreThan1BonePerVertex, hlms_bones_per_vertex, 1 ) @property( NeedsMoreThan1BonePerVertex ) - float4 tmp; - tmp.w = 1.0; + float4 tmp4; + tmp4.w = 1.0; + midf3 tmp3; @end //!NeedsMoreThan1BonePerVertex @foreach( hlms_bones_per_vertex, n, 1 ) _idx = (inVs_blendIndices[@n] << 1u) + inVs_blendIndices[@n]; //inVs_blendIndices[@n] * 3; a 32-bit int multiply is 4 cycles on GCN! (and mul24 is not exposed to GLSL...) worldMat[0] = readOnlyFetch( worldMatBuf, int(matStart + _idx + 0u) ); worldMat[1] = readOnlyFetch( worldMatBuf, int(matStart + _idx + 1u) ); worldMat[2] = readOnlyFetch( worldMatBuf, int(matStart + _idx + 2u) ); - tmp.x = dot( worldMat[0], inputPos ); - tmp.y = dot( worldMat[1], inputPos ); - tmp.z = dot( worldMat[2], inputPos ); - worldPos.xyz += (tmp * inVs_blendWeights[@n]).xyz; + tmp4.x = dot( worldMat[0], inputPos ); + tmp4.y = dot( worldMat[1], inputPos ); + tmp4.z = dot( worldMat[2], inputPos ); + worldPos.xyz += (tmp4 * inVs_blendWeights[@n]).xyz; @property( hlms_normal || hlms_qtangent ) - tmp.x = dot( worldMat[0].xyz, inputNormal ); - tmp.y = dot( worldMat[1].xyz, inputNormal ); - tmp.z = dot( worldMat[2].xyz, inputNormal ); - worldNorm += tmp.xyz * inVs_blendWeights[@n]; + tmp3.x = dot( midf3_c( worldMat[0].xyz ), inputNormal ); + tmp3.y = dot( midf3_c( worldMat[1].xyz ), inputNormal ); + tmp3.z = dot( midf3_c( worldMat[2].xyz ), inputNormal ); + worldNorm += tmp3.xyz * midf_c( inVs_blendWeights[@n] ); @end @property( normal_map ) - tmp.x = dot( worldMat[0].xyz, inputTangent ); - tmp.y = dot( worldMat[1].xyz, inputTangent ); - tmp.z = dot( worldMat[2].xyz, inputTangent ); - worldTang += tmp.xyz * inVs_blendWeights[@n]; + tmp3.x = dot( midf3_c( worldMat[0].xyz ), inputTangent ); + tmp3.y = dot( midf3_c( worldMat[1].xyz ), inputTangent ); + tmp3.z = dot( midf3_c( worldMat[2].xyz ), inputTangent ); + worldTang += tmp3.xyz * midf_c( inVs_blendWeights[@n] ); @end @end @@ -116,10 +136,10 @@ @psub( MoreThanOnePose, hlms_pose, 1 ) @property( !MoreThanOnePose ) float4 poseWeights = readOnlyFetch( worldMatBuf, int(poseDataStart + 1u) ); - float4 posePos = bufferFetch( poseBuf, int( vertexID @property( hlms_pose_normals )<< 1u@end ) ); + float4 posePos = float4( bufferFetch( poseBuf, int( vertexID @property( hlms_pose_normals )<< 1u@end ) ) ); inputPos += posePos * poseWeights.x; @property( hlms_pose_normals && (hlms_normal || hlms_qtangent) ) - float4 poseNormal = bufferFetch( poseBuf, int( (vertexID << 1u) + 1u ) ); + float4 poseNormal = float4( bufferFetch( poseBuf, int( (vertexID << 1u) + 1u ) ) ); inputNormal += poseNormal.xyz * poseWeights.x; @end @pset( NumPoseWeightVectors, 1 ) @@ -135,9 +155,9 @@ @property( !MoreThanOnePoseWeightVector ) float4 poseWeights = readOnlyFetch( worldMatBuf, int( poseDataStart + 1u ) ); @foreach( hlms_pose, n ) - inputPos += bufferFetch( poseBuf, int( (vertexID + numVertices * @nu) @property( hlms_pose_normals )<< 1u@end ) ) * poseWeights[@n]; + inputPos += float4( bufferFetch( poseBuf, int( (vertexID + numVertices * @nu) @property( hlms_pose_normals )<< 1u@end ) ) ) * poseWeights[@n]; @property( hlms_pose_normals && (hlms_normal || hlms_qtangent) ) - inputNormal += bufferFetch( poseBuf, int( ((vertexID + numVertices * @nu) << 1u) + 1u ) ).xyz * poseWeights[@n]; + inputNormal += midf3_c( bufferFetch( poseBuf, int( ((vertexID + numVertices * @nu) << 1u) + 1u ) ).xyz * poseWeights[@n] ); @end @end @else @@ -150,9 +170,9 @@ poseWeights[@n * 4 + 3] = weights@n[3]; @end @foreach( hlms_pose, n ) - inputPos += bufferFetch( poseBuf, int( (vertexID + numVertices * @nu) @property( hlms_pose_normals )<< 1u@end ) ) * poseWeights[@n]; + inputPos += float4( bufferFetch( poseBuf, int( (vertexID + numVertices * @nu) @property( hlms_pose_normals )<< 1u@end ) ) ) * poseWeights[@n]; @property( hlms_pose_normals && (hlms_normal || hlms_qtangent) ) - inputNormal += bufferFetch( poseBuf, int( ((vertexID + numVertices * @nu) << 1u) + 1u ) ).xyz * poseWeights[@nu]; + inputNormal += midf3_c( bufferFetch( poseBuf, int( ((vertexID + numVertices * @nu) << 1u) + 1u ) ).xyz * poseWeights[@nu] ); @end @end @end @@ -190,8 +210,16 @@ @insertpiece( custom_vs_preTransform ) //Lighting is in view space @property( hlms_normal || hlms_qtangent ) outVs.pos = @insertpiece( CalculatePsPos );@end - @property( hlms_normal || hlms_qtangent ) outVs.normal = mul( @insertpiece(local_normal), toFloat3x3( worldViewMat ) );@end - @property( normal_map ) outVs.tangent = mul( @insertpiece(local_tangent), toFloat3x3( worldViewMat ) );@end + @property( hlms_normal || hlms_qtangent ) + midf3x3 worldMat3x3 = toMidf3x3( worldViewMat ); + @property( accurate_non_uniform_scaled_normals ) + midf3x3 normalMat = transpose( adjugate( worldMat3x3 ) ); + outVs.normal = normalize( mul( @insertpiece(local_normal), normalMat ) ); + @else + outVs.normal = mul( @insertpiece(local_normal), worldMat3x3 ); + @end + @end + @property( normal_map ) outVs.tangent = mul( @insertpiece(local_tangent), toMidf3x3( worldViewMat ) );@end @property( !hlms_dual_paraboloid_mapping ) @property( !hlms_use_uv_baking ) @property( !hlms_instanced_stereo ) @@ -233,24 +261,21 @@ // Define inputNormal and inputTangent using inVs_normal, inVs_tangent, inVs_qtangent @property( hlms_qtangent ) //Decode qTangent to TBN with reflection - float3 inputNormal = xAxis( normalize( inVs_qtangent ) ); + const midf4 qTangent = normalize( inVs_qtangent ); + midf3 inputNormal = xAxis( qTangent ); @property( normal_map ) - float3 inputTangent = yAxis( inVs_qtangent ); + midf3 inputTangent = yAxis( qTangent ); outVs.biNormalReflection = sign( inVs_qtangent.w ); //We ensure in C++ qtangent.w is never 0 @end @else - @property( hlms_normal && hlms_pose && hlms_pose_normals ) - float3 inputNormal = inVs_normal; // We need inputNormal as lvalue for PoseTransform - @else - #define inputNormal inVs_normal + @property( hlms_normal ) + midf3 inputNormal = midf3_c( inVs_normal ); // We need inputNormal as lvalue for PoseTransform @end - @property( hlms_tangent4 ) - #define inputTangent inVs_tangent.xyz - @property( normal_map ) - outVs.biNormalReflection = sign( inVs_tangent.w ); + @property( normal_map ) + midf3 inputTangent = midf3_c( inVs_tangent.xyz ); + @property( hlms_tangent4 ) + outVs.biNormalReflection = sign( midf( inVs_tangent.w ) ); @end - @else - #define inputTangent inVs_tangent @end @end @@ -263,7 +288,7 @@ float4 worldPos = float4( mul(inVs_vertex, worldMat).xyz, 1.0f ); @property( ( hlms_normal || hlms_qtangent) && hlms_num_shadow_map_lights ) // We need worldNorm for normal offset bias - float3 worldNorm = mul( inputNormal, toFloat3x3( worldMat ) ).xyz; + midf3 worldNorm = mul( inputNormal, toMidf3x3( worldMat ) ).xyz; @end @end @@ -271,10 +296,10 @@ @property( !hlms_skeleton && hlms_pose && ( hlms_normal || hlms_qtangent) && hlms_num_shadow_map_lights ) // We need worldNorm for normal offset bias, special path when using poses - float3 worldNorm; - worldNorm.x = dot( worldMat[0].xyz, inputNormal ); - worldNorm.y = dot( worldMat[1].xyz, inputNormal ); - worldNorm.z = dot( worldMat[2].xyz, inputNormal ); + midf3 worldNorm; + worldNorm.x = dot( midf3_c( worldMat[0].xyz ), inputNormal ); + worldNorm.y = dot( midf3_c( worldMat[1].xyz ), inputNormal ); + worldNorm.z = dot( midf3_c( worldMat[2].xyz ), inputNormal ); @end @insertpiece( SkeletonTransform ) @@ -283,15 +308,13 @@ @insertpiece( DoShadowReceiveVS ) @insertpiece( DoShadowCasterVS ) + @insertpiece( DoAtmosphereNprSky ) + /// hlms_uv_count will be 0 on shadow caster passes w/out alpha test @foreach( hlms_uv_count, n ) outVs.uv@n = inVs_uv@n;@end -@property( syntax != metal ) - @property( (!hlms_shadowcaster || alpha_test) && !lower_gpu_overhead ) - outVs.drawId = inVs_drawId; - @end -@else +@property( syntax == metal || lower_gpu_overhead ) @property( (!hlms_shadowcaster || alpha_test) && !lower_gpu_overhead ) outVs.materialId = worldMaterialIdx[inVs_drawId].x & 0x1FFu; @end @@ -301,7 +324,11 @@ @end @property( use_planar_reflections ) - outVs.planarReflectionIdx = (ushort)(worldMaterialIdx[inVs_drawId].w); + outVs.planarReflectionIdx = ushort( worldMaterialIdx[inVs_drawId].w ); + @end +@else + @property( (!hlms_shadowcaster || alpha_test) && !lower_gpu_overhead ) + outVs.drawId = inVs_drawId; @end @end diff --git a/ogre2/src/media/Hlms/Pbs/Any/PlanarReflections_piece_ps.any b/ogre2/src/media/Hlms/Pbs/Any/PlanarReflections_piece_ps.any index 0e639a203..395a49f12 100644 --- a/ogre2/src/media/Hlms/Pbs/Any/PlanarReflections_piece_ps.any +++ b/ogre2/src/media/Hlms/Pbs/Any/PlanarReflections_piece_ps.any @@ -23,10 +23,10 @@ @end @piece( DoPlanarReflectionsPS ) - @property( syntax != metal ) - uint planarReflectionIdx = worldMaterialIdx[inPs.drawId].w; - @else + @property( syntax == metal || lower_gpu_overhead ) ushort planarReflectionIdx = inPs.planarReflectionIdx; + @else + uint planarReflectionIdx = worldMaterialIdx[inPs.drawId].w; @end float4 planarReflection = passBuf.planarReflections[planarReflectionIdx]; diff --git a/ogre2/src/media/Hlms/Pbs/Any/Refractions_piece_ps.any b/ogre2/src/media/Hlms/Pbs/Any/Refractions_piece_ps.any index 415ccbe30..e238c2610 100644 --- a/ogre2/src/media/Hlms/Pbs/Any/Refractions_piece_ps.any +++ b/ogre2/src/media/Hlms/Pbs/Any/Refractions_piece_ps.any @@ -3,12 +3,12 @@ @property( hlms_screen_space_refractions ) @piece( DeclRefractionsFuncs ) - float3 OGRE_refract( float3 viewDir, float3 normal, float refractionIndex, float NdotV ) + midf3 OGRE_refract( midf3 viewDir, midf3 normal, midf refractionIndex, midf NdotV ) { - float3 retVal; - float k = 1.0 - refractionIndex * refractionIndex * (1.0 - NdotV * NdotV); - if( k < 0.0 ) - retVal = float3( 0, 0, 0 ); + midf3 retVal; + midf k = _h( 1.0 ) - refractionIndex * refractionIndex * (_h( 1.0 ) - NdotV * NdotV); + if( k < _h( 0.0 ) ) + retVal = midf3_c( 0, 0, 0 ); else retVal = -refractionIndex * viewDir - (sqrt( k ) - refractionIndex * NdotV) * normal; return retVal; @@ -17,27 +17,27 @@ @piece( applyRefractions ) @property( !fresnel_scalar ) - float refractF0 = pixelData.F0; + midf refractF0 = pixelData.F0; @else - float refractF0 = max( pixelData.F0.x, pixelData.F0.y, pixelData.F0.z ); + midf refractF0 = max( pixelData.F0.x, pixelData.F0.y, pixelData.F0.z ); @end // refractNormal must be in view space, and we ignore .z component - float2 refractNormal2d = OGRE_refract( pixelData.viewDir, pixelData.normal, - refractF0, pixelData.NdotV ).xy; - float2 refractUv = screenPosUv.xy + refractNormal2d.xy * + midf2 refractNormal2d = OGRE_refract( pixelData.viewDir, pixelData.normal, + refractF0, pixelData.NdotV ).xy; + float2 refractUv = screenPosUv.xy + float2( refractNormal2d.xy ) * float2( material.refractionStrength, material.refractionStrength * passBuf.aspectRatio ) / ( (-inPs.pos.z + 1.0) * (-inPs.pos.z + 1.0) ); - float3 refractionCol = OGRE_SampleLevel( refractionMap, refractionMapSampler, refractUv, 0 ).xyz; - float refractionDepth = OGRE_SampleLevel( depthTextureNoMsaa, refractionMapSampler, refractUv, 0 ).x; + midf3 refractionCol = OGRE_SampleLevelF16( refractionMap, refractionMapSampler, refractUv, 0 ).xyz; + midf refractionDepth = OGRE_SampleLevelF16( depthTextureNoMsaa, refractionMapSampler, refractUv, 0 ).x; // We may need to fallback to regular transparency if we're sampling to close to the edges // or the object being refracted is in front of us. - float3 fallbackRefractionCol = OGRE_Load2D( refractionMap, iFragCoord.xy, 0 ).xyz; + midf3 fallbackRefractionCol = OGRE_Load2DF16( refractionMap, iFragCoord.xy, 0 ).xyz; refractUv = saturate( abs( screenPosUv.xy * 2.0 - 1.0 ) * 10.0 - 9.0 ); - float fallbackRefrW = max( refractUv.x, refractUv.y ); + midf fallbackRefrW = midf_c( max( refractUv.x, refractUv.y ) ); fallbackRefrW = fallbackRefrW * fallbackRefrW; @property( hlms_no_reverse_depth ) @@ -47,17 +47,17 @@ @end { // We're trying to refract an object that is in front of us. We can't do that. - fallbackRefrW = 1.0; + fallbackRefrW = _h( 1.0 ); } refractionCol = lerp( refractionCol, fallbackRefractionCol, fallbackRefrW ); @property( use_texture_alpha ) - float refractionAlpha = material.F0.w * pixelData.diffuse.w; + midf refractionAlpha = midf_c( material.F0.w ) * pixelData.diffuse.w; @else - float refractionAlpha = material.F0.w; + midf refractionAlpha = midf_c( material.F0.w ); @end - finalColour += refractionCol.xyz * (1.0 - refractionAlpha); + finalColour += refractionCol.xyz * (_h( 1.0 ) - refractionAlpha); @end @end diff --git a/ogre2/src/media/Hlms/Pbs/Any/ShadowMapping_piece_all.any b/ogre2/src/media/Hlms/Pbs/Any/ShadowMapping_piece_all.any index cd6a57797..2d36bc9dc 100644 --- a/ogre2/src/media/Hlms/Pbs/Any/ShadowMapping_piece_all.any +++ b/ogre2/src/media/Hlms/Pbs/Any/ShadowMapping_piece_all.any @@ -5,7 +5,7 @@ @piece( DeclNormalOffsetBiasFunc ) @foreach( 2, m ) // Perform normal offset bias. See https://github.com/OGRECave/ogre-next/issues/100 - INLINE float3 getNormalOffsetBias( float3 worldNormal, float3 viewSpaceNormal, + INLINE float3 getNormalOffsetBias( midf3 worldNormal, midf3 viewSpaceNormal, float3 lightDir, float shadowMapTexSize, float depthRange, float normalOffsetBias @property( @m == 0 ) @@ -14,13 +14,13 @@ , float2 minUV, float2 maxUV ) @end { - float tmpNdotL = saturate( dot( lightDir.xyz, viewSpaceNormal.xyz ) ); + float tmpNdotL = saturate( dot( lightDir.xyz, float3( viewSpaceNormal.xyz ) ) ); @property( @m == 1 ) shadowMapTexSize /= maxUV.x - minUV.x; @end - return ( ( 1.0f - tmpNdotL ) * normalOffsetBias * worldNormal.xyz * shadowMapTexSize ); + return ( ( 1.0f - tmpNdotL ) * normalOffsetBias * float3( worldNormal.xyz ) * shadowMapTexSize ); } @end @end diff --git a/ogre2/src/media/Hlms/Pbs/Any/ShadowMapping_piece_ps.any b/ogre2/src/media/Hlms/Pbs/Any/ShadowMapping_piece_ps.any index 82d4c8a2c..92789f0c1 100644 --- a/ogre2/src/media/Hlms/Pbs/Any/ShadowMapping_piece_ps.any +++ b/ogre2/src/media/Hlms/Pbs/Any/ShadowMapping_piece_ps.any @@ -26,8 +26,10 @@ #define hlms_shadowmap@n_uv_min float2( @value( hlms_shadowmap@n_uv_min_x_int ).@value( hlms_shadowmap@n_uv_min_x_fract ), @value( hlms_shadowmap@n_uv_min_y_int ).@value( hlms_shadowmap@n_uv_min_y_fract ) ) #define hlms_shadowmap@n_uv_max float2( @value( hlms_shadowmap@n_uv_max_x_int ).@value( hlms_shadowmap@n_uv_max_x_fract ), @value( hlms_shadowmap@n_uv_max_y_int ).@value( hlms_shadowmap@n_uv_max_y_fract ) ) @property( hlms_shadowmap@n_uvs_fulltex ) - @property( hlms_shadowmap@n_is_point_light ) + @property( hlms_shadowmap@n_is_point_light || hlms_static_branch_shadow_map_lights ) #define hlms_shadowmap@n_uv_length float2( @value( hlms_shadowmap@n_uv_length_x_int ).@value( hlms_shadowmap@n_uv_length_x_fract ), @value( hlms_shadowmap@n_uv_length_y_int ).@value( hlms_shadowmap@n_uv_length_y_fract ) ) + @end + @property( hlms_shadowmap@n_is_point_light ) #define hlms_shadowmap@n_uv_param , hlms_shadowmap@n_uv_min, hlms_shadowmap@n_uv_max, hlms_shadowmap@n_uv_length @else #define hlms_shadowmap@n_uv_param , hlms_shadowmap@n_uv_min, hlms_shadowmap@n_uv_max @@ -38,22 +40,22 @@ @end @property( syntax == glsl || syntax == glsles ) - #define OGRE_SAMPLE_SHADOW( tex, sampler, uv, depth ) (OGRE_DEPTH_CMP_GE( depth, OGRE_DEPTH_DEFAULT_CLEAR ) ? 1.0 : texture( tex, vec3( uv, depth ) )) + #define OGRE_SAMPLE_SHADOW( tex, sampler, uv, depth ) (OGRE_DEPTH_CMP_GE( depth, OGRE_DEPTH_DEFAULT_CLEAR ) ? _h( 1.0 ) : midf_c( texture( tex, vec3( uv, depth ) ) )) #define OGRE_SAMPLE_SHADOW_ESM( tex, sampler, uv ) textureLod( tex, uv, 0 ).x @end @property( syntax == glslvk ) - #define OGRE_SAMPLE_SHADOW( tex, sampler, uv, depth ) (OGRE_DEPTH_CMP_GE( depth, OGRE_DEPTH_DEFAULT_CLEAR ) ? 1.0 : texture( sampler2DShadow( tex, sampler ), vec3( uv, depth ) ) ) + #define OGRE_SAMPLE_SHADOW( tex, sampler, uv, depth ) (OGRE_DEPTH_CMP_GE( depth, OGRE_DEPTH_DEFAULT_CLEAR ) ? _h( 1.0 ) : midf_c( texture( sampler2DShadow( tex, sampler ), vec3( uv, depth ) ) ) ) #define OGRE_SAMPLE_SHADOW_ESM( tex, sampler, uv ) textureLod( sampler2D( tex, sampler ), uv, 0 ).x @end @property( syntax == hlsl ) - #define OGRE_SAMPLE_SHADOW( tex, sampler, uv, depth ) (OGRE_DEPTH_CMP_GE( depth, OGRE_DEPTH_DEFAULT_CLEAR ) ? 1.0 : tex.SampleCmpLevelZero( sampler, uv.xy, depth ).x) + #define OGRE_SAMPLE_SHADOW( tex, sampler, uv, depth ) (OGRE_DEPTH_CMP_GE( depth, OGRE_DEPTH_DEFAULT_CLEAR ) ? _h( 1.0 ) : midf_c( tex.SampleCmpLevelZero( sampler, uv.xy, depth ).x )) #define OGRE_SAMPLE_SHADOW_ESM( tex, sampler, uv ) tex.SampleLevel( sampler, uv, 0 ).x @end @property( syntax == metal ) - #define OGRE_SAMPLE_SHADOW( tex, sampler, uv, depth ) (OGRE_DEPTH_CMP_GE( depth, OGRE_DEPTH_DEFAULT_CLEAR ) ? 1.0 : tex.sample_compare( sampler, float2( uv.xy ), depth )) + #define OGRE_SAMPLE_SHADOW( tex, sampler, uv, depth ) (OGRE_DEPTH_CMP_GE( depth, OGRE_DEPTH_DEFAULT_CLEAR ) ? _h( 1.0 ) : tex.sample_compare( sampler, float2( uv.xy ), depth )) #define OGRE_SAMPLE_SHADOW_ESM( tex, sampler, uv ) tex.sample( sampler, float2( uv.xy ), level(0) ).x @end @end @@ -90,19 +92,19 @@ @property( pcf < 3 ) //2x2 PCF: It's slightly faster to calculate this directly. - retVal += mix( - mix( c.w, c.z, fW.x ), - mix( c.x, c.y, fW.x ), + retVal += lerp( + lerp( c.w, c.z, fW.x ), + lerp( c.x, c.y, fW.x ), fW.y ); @else - row[0] += mix( c.w, c.z, fW.x ); - row[1] += mix( c.x, c.y, fW.x ); + row[0] += lerp( c.w, c.z, fW.x ); + row[1] += lerp( c.x, c.y, fW.x ); @end @end @property( pcf >= 3 ) //NxN PCF: It's much faster to leave the final mix out of the loop (when N > 2). - retVal = mix( row[0], row[1], fW.y ); + retVal = lerp( row[0], row[1], fW.y ); @end @end @end @@ -181,6 +183,15 @@ #define inPs_posL@n worldPosToSpotLightSpace( inPs.worldPos, passBuf.shadowRcv[@n], @insertpiece( shadowMapNormalOffsetBias@n ) ) @end @end + @property( hlms_static_branch_shadow_map_lights ) + @property( !skip_normal_offset_bias_vs ) + @piece( shadowMapNormalOffsetBias_cur_shadow_map )getNormalOffsetBias( inPs.worldNorm, pixelData.geomNormal, light0Buf.lights[light_idx].spotDirection.xyz, passBuf.shadowRcv[cur_shadow_map].invShadowMapSize.x, passBuf.shadowRcv[cur_shadow_map].shadowDepthRange.y, passBuf.shadowRcv[cur_shadow_map].normalOffsetBias, shadowmap_uv_min[cur_shadow_map], shadowmap_uv_max[cur_shadow_map] )@end + @else + @piece( shadowMapNormalOffsetBias_cur_shadow_map )float3( 0.0f, 0.0f, 0.0f )@end + @end + + #define inPs_posL_cur_shadow_map worldPosToSpotLightSpace( inPs.worldPos, passBuf.shadowRcv[cur_shadow_map], @insertpiece( shadowMapNormalOffsetBias_cur_shadow_map ) ) + @end @foreach( 2, m ) @property( @m == 0 ) @@ -224,7 +235,7 @@ @foreach( 2, m ) // Perform normal offset bias. See https://github.com/OGRECave/ogre-next/issues/100 - INLINE float3 getNormalOffsetBiasPoint( float3 geomNormal, float3 lightDir, + INLINE float3 getNormalOffsetBiasPoint( midf3 geomNormal, float3 lightDir, float normalOffsetBias, float shadowMapTexSize, float depthRange @property( @m == 0 ) @@ -233,30 +244,30 @@ , float2 minUV, float2 maxUV ) @end { - float tmpNdotL = saturate( dot( lightDir.xyz, geomNormal.xyz ) ); + float tmpNdotL = saturate( dot( lightDir.xyz, float3( geomNormal.xyz ) ) ); @property( @m == 1 ) shadowMapTexSize /= maxUV.x - minUV.x; @end - return ( ( 1.0f - tmpNdotL ) * normalOffsetBias * geomNormal.xyz * shadowMapTexSize ); + return ( ( 1.0f - tmpNdotL ) * normalOffsetBias * float3( geomNormal.xyz ) * shadowMapTexSize ); } @end @foreach( 4, m ) @property( @m == 0 ) - INLINE float getShadow( @insertpiece( TEXTURE2DSHADOW ) shadowMap, @insertpiece( SamplerShadow ) + INLINE midf getShadow( @insertpiece( TEXTURE2DSHADOW ) shadowMap, @insertpiece( SamplerShadow ) float4 psPosLN, float4 invShadowMapSize ) @end @property( @m == 1 ) - INLINE float getShadow( @insertpiece( TEXTURE2DSHADOW ) shadowMap, @insertpiece( SamplerShadow ) + INLINE midf getShadow( @insertpiece( TEXTURE2DSHADOW ) shadowMap, @insertpiece( SamplerShadow ) float4 psPosLN, float4 invShadowMapSize, float2 minUV, float2 maxUV ) @end @property( @m == 2 ) - INLINE float getShadowPoint( @insertpiece( TEXTURE2DSHADOW ) shadowMap, @insertpiece( SamplerShadow ) - float3 geomNormal, float normalOffsetBias, + INLINE midf getShadowPoint( @insertpiece( TEXTURE2DSHADOW ) shadowMap, @insertpiece( SamplerShadow ) + midf3 geomNormal, float normalOffsetBias, float3 posVS, float3 lightPos,float4 invShadowMapSize, float2 invDepthRange PASSBUF_ARG_DECL ) @end @property( @m == 3 ) - INLINE float getShadowPoint( @insertpiece( TEXTURE2DSHADOW ) shadowMap, @insertpiece( SamplerShadow ) - float3 geomNormal, float normalOffsetBias, + INLINE midf getShadowPoint( @insertpiece( TEXTURE2DSHADOW ) shadowMap, @insertpiece( SamplerShadow ) + midf3 geomNormal, float normalOffsetBias, float3 posVS, float3 lightPos, float4 invShadowMapSize, float2 invDepthRange, float2 minUV, float2 maxUV, float2 lengthUV PASSBUF_ARG_DECL ) @@ -302,7 +313,7 @@ @end @property( !exponential_shadow_maps ) - float retVal = 0.; + midf retVal = _h( 0. ); @property( pcf >= 3 ) float2 offsets[@value(pcf_iterations)] = @@ -397,30 +408,30 @@ @end @property( pcf == 3 ) - retVal *= 0.25; + retVal *= _h( 0.25 ); @end @property( pcf == 4 ) - retVal *= 0.11111111111111; + retVal *= _h( 0.11111111111111 ); @end @property( pcf == 5) - retVal *= 0.0625; + retVal *= _h( 0.0625 ); @end @property( pcf == 6 ) - retVal *= 0.04; + retVal *= _h( 0.04 ); @end @end ///! exponential_shadow_maps @property( exponential_shadow_maps ) float expDepth = OGRE_SAMPLE_SHADOW_ESM( shadowMap, shadowSampler, uv ); - float retVal = exp( @value( exponential_shadow_maps ).0 * (expDepth - fDepth) ); - retVal = min( retVal, 1.0 ); + float unclampedVal = exp( @value( exponential_shadow_maps ).0 * (expDepth - fDepth) ); + midf retVal = midf_c( min( unclampedVal, 1.0 ) ); @end ///! exponential_shadow_maps @property( (@m == 0 || @m == 2) && syntax == metal ) //Metal does not support clamp to border colour retVal = (uv.x <= 0.0h || uv.x >= 1.0h || - uv.y <= 0.0h || uv.y >= 1.0h) ? 1.0 : retVal; + uv.y <= 0.0h || uv.y >= 1.0h) ? _h( 1.0 ) : retVal; @end @property( @m == 1 || @m == 3 ) retVal = (uv.x <= minUV.x || uv.x >= maxUV.x || - uv.y <= minUV.y || uv.y >= maxUV.y) ? 1.0 : retVal; + uv.y <= minUV.y || uv.y >= maxUV.y) ? _h( 1.0 ) : retVal; @end return retVal; @@ -437,9 +448,9 @@ @property( debug_pssm_splits ) float3 debugPssmSplit = float3( 0, 0, 0 ); @end - float fShadow = 1.0; + midf fShadow = _h( 1.0 ); @property( hlms_pssm_blend ) - float fShadowBlend = 1.0; + midf fShadowBlend = _h( 1.0 ); @end @property( receive_shadows ) if( inPs.depth <= passBuf.pssmSplitPoints@value(CurrentShadowMap) ) @@ -455,9 +466,11 @@ inPs_posL1, passBuf.shadowRcv[@value(CurrentShadowMap)].invShadowMapSize hlms_shadowmap@value(CurrentShadowMap)_uv_param ); - fShadow = lerp( fShadow, fShadowBlend, - (inPs.depth - passBuf.pssmBlendPoints@value(CurrentShadowMapBlend)) / - (passBuf.pssmSplitPoints@value(CurrentShadowMapBlend) - passBuf.pssmBlendPoints@counter(CurrentShadowMapBlend)) ); + fShadow = lerp( + fShadow, fShadowBlend, + midf_c( ( inPs.depth - passBuf.pssmBlendPoints@value( CurrentShadowMapBlend ) ) / + ( passBuf.pssmSplitPoints@value( CurrentShadowMapBlend ) - + passBuf.pssmBlendPoints@counter( CurrentShadowMapBlend ) ) ) ); } @end @property( debug_pssm_splits ) @@ -478,16 +491,19 @@ inPs_posL@value(CurrentShadowMap), passBuf.shadowRcv[@value(CurrentShadowMap)].invShadowMapSize hlms_shadowmap@value(CurrentShadowMap)_uv_param ); - fShadow = lerp( fShadow, fShadowBlend, - (inPs.depth - passBuf.pssmBlendPoints@value(CurrentShadowMapBlend)) / - (passBuf.pssmSplitPoints@value(CurrentShadowMapBlend) - passBuf.pssmBlendPoints@counter(CurrentShadowMapBlend)) ); + fShadow = lerp( + fShadow, fShadowBlend, + midf_c( ( inPs.depth - passBuf.pssmBlendPoints@value( CurrentShadowMapBlend ) ) / + ( passBuf.pssmSplitPoints@value( CurrentShadowMapBlend ) - + passBuf.pssmBlendPoints@counter( CurrentShadowMapBlend ) ) ) ); } @end @property( hlms_pssm_fade && @n == hlms_pssm_splits_minus_one ) if( inPs.depth > passBuf.pssmFadePoint ) { - fShadow = lerp( fShadow, 1.0, - (inPs.depth - passBuf.pssmFadePoint) / - (passBuf.pssmSplitPoints@value(hlms_pssm_splits_minus_one) - passBuf.pssmFadePoint) ); + fShadow = lerp( fShadow, _h( 1.0 ), + midf_c( ( inPs.depth - passBuf.pssmFadePoint ) / + ( passBuf.pssmSplitPoints@value( hlms_pssm_splits_minus_one ) - + passBuf.pssmFadePoint ) ) ); } @end @property( debug_pssm_splits ) @@ -503,12 +519,12 @@ @end @end @property( !hlms_pssm_splits && hlms_num_shadow_map_lights && hlms_lights_directional ) @property( receive_shadows ) - float fShadow = getShadow( hlms_shadowmap@value(CurrentShadowMap), @insertpiece( UseSamplerShadow ) - inPs_posL0, - passBuf.shadowRcv[@value(CurrentShadowMap)].invShadowMapSize - hlms_shadowmap@counter(CurrentShadowMap)_uv_param ); + midf fShadow = getShadow( hlms_shadowmap@value(CurrentShadowMap), @insertpiece( UseSamplerShadow ) + inPs_posL0, + passBuf.shadowRcv[@value(CurrentShadowMap)].invShadowMapSize + hlms_shadowmap@counter(CurrentShadowMap)_uv_param ); @else - float fShadow = 1.0; + midf fShadow = _h( 1.0 ); @end @end @end @@ -516,6 +532,29 @@ @property( receive_shadows ) @piece( DarkenWithShadowFirstLight )* fShadow@end +@property( hlms_static_branch_shadow_map_lights ) + + // All point and spot lights must share the same hlms_shadowmap atlas + // See HlmsPbs::setStaticBranchingLights + @piece( DarkenWithShadow_cur_shadow_map ) + * getShadow( hlms_shadowmap@value(CurrentShadowMap), @insertpiece( UseSamplerShadow ) + inPs_posL_cur_shadow_map, + passBuf.shadowRcv[cur_shadow_map].invShadowMapSize + , shadowmap_uv_min[cur_shadow_map], shadowmap_uv_max[cur_shadow_map] ) + @end + + @piece( DarkenWithShadowPoint_cur_shadow_map ) + * getShadowPoint( hlms_shadowmap@value(CurrentShadowMap), @insertpiece( UseSamplerShadow ) + pixelData.geomNormal, + passBuf.shadowRcv[cur_shadow_map].normalOffsetBias, + inPs.pos.xyz, light0Buf.lights[light_idx].position.xyz, + passBuf.shadowRcv[cur_shadow_map].invShadowMapSize, + passBuf.shadowRcv[cur_shadow_map].shadowDepthRange.xy + , shadowmap_uv_min[cur_shadow_map], shadowmap_uv_max[cur_shadow_map], shadowmap_uv_length[cur_shadow_map] PASSBUF_ARG ) + @end + +@else + @piece( DarkenWithShadow ) * getShadow( hlms_shadowmap@value(CurrentShadowMap), @insertpiece( UseSamplerShadow ) inPs_posL@value(CurrentShadowMap), @@ -534,6 +573,9 @@ passBuf.shadowRcv[@value(CurrentShadowMap)].shadowDepthRange.xy hlms_shadowmap@counter(CurrentShadowMap)_uv_param PASSBUF_ARG ) @end + +@end + @end ///!receive_shadows @end diff --git a/ogre2/src/media/Hlms/Pbs/GLSL/Forward3D_piece_ps.glsl b/ogre2/src/media/Hlms/Pbs/GLSL/Forward3D_piece_ps.glsl index 27d4a0563..aae9c0cde 100644 --- a/ogre2/src/media/Hlms/Pbs/GLSL/Forward3D_piece_ps.glsl +++ b/ogre2/src/media/Hlms/Pbs/GLSL/Forward3D_piece_ps.glsl @@ -5,13 +5,13 @@ @property( syntax == glslvk ) layout( ogre_s@value(decalsSampler) ) uniform sampler decalsSampler; @end - @property( hlms_decals_diffuse )vulkan_layout( ogre_t@value(decalsDiffuseTex) ) uniform texture2DArray decalsDiffuseTex;@end - @property( hlms_decals_normals )vulkan_layout( ogre_t@value(decalsNormalsTex) ) uniform texture2DArray decalsNormalsTex;@end + @property( hlms_decals_diffuse )vulkan_layout( ogre_t@value(decalsDiffuseTex) ) midf_tex uniform texture2DArray decalsDiffuseTex;@end + @property( hlms_decals_normals )vulkan_layout( ogre_t@value(decalsNormalsTex) ) midf_tex uniform texture2DArray decalsNormalsTex;@end @property( hlms_decals_diffuse == hlms_decals_emissive ) #define decalsEmissiveTex decalsDiffuseTex @end @property( hlms_decals_emissive && hlms_decals_diffuse != hlms_decals_emissive ) - vulkan_layout( ogre_t@value(decalsEmissiveTex) ) uniform texture2DArray decalsEmissiveTex; + vulkan_layout( ogre_t@value(decalsEmissiveTex) ) midf_tex uniform texture2DArray decalsEmissiveTex; @end @end @end diff --git a/ogre2/src/media/Hlms/Pbs/GLSL/PixelShader_ps.glsl b/ogre2/src/media/Hlms/Pbs/GLSL/PixelShader_ps.glsl index 6a6b632fe..b2c3e7519 100644 --- a/ogre2/src/media/Hlms/Pbs/GLSL/PixelShader_ps.glsl +++ b/ogre2/src/media/Hlms/Pbs/GLSL/PixelShader_ps.glsl @@ -10,15 +10,15 @@ layout(std140) uniform; @property( !hlms_render_depth_only ) @property( !hlms_shadowcaster ) @property( !hlms_prepass ) - layout(location = @counter(rtv_target), index = 0) out vec4 outColour; + layout(location = @counter(rtv_target), index = 0) out midf4 outColour; @end @property( hlms_gen_normals_gbuffer ) #define outPs_normals outNormals - layout(location = @counter(rtv_target)) out vec4 outNormals; + layout(location = @counter(rtv_target)) out midf4 outNormals; @end @property( hlms_prepass ) #define outPs_shadowRoughness outShadowRoughness - layout(location = @counter(rtv_target)) out vec2 outShadowRoughness; + layout(location = @counter(rtv_target)) out midf2 outShadowRoughness; @end @else layout(location = @counter(rtv_target), index = 0) out float outColour; @@ -49,8 +49,8 @@ layout(std140) uniform; vulkan_layout( ogre_t@value(depthTextureNoMsaa) ) uniform texture2D depthTextureNoMsaa; @end @end - vulkan_layout( ogre_t@value(refractionMap) ) uniform texture2D refractionMap; - vulkan( layout( ogre_s@value(refractionMap) ) uniform sampler refractionMapSampler ); + vulkan_layout( ogre_t@value(refractionMap) ) midf_tex uniform texture2D refractionMap; + vulkan( layout( ogre_s@value(refractionMap) ) uniform sampler refractionMapSampler ); @end @insertpiece( DeclPlanarReflTextures ) @@ -82,21 +82,21 @@ vulkan_layout( location = 0 ) in block ReadOnlyBufferF( @value(f3dLightList), float4, f3dLightList ); @end @property( irradiance_volumes ) - vulkan_layout( ogre_t@value(irradianceVolume) ) uniform texture3D irradianceVolume; - vulkan( layout( ogre_s@value(irradianceVolume) )uniform sampler irradianceVolumeSampler ); + vulkan_layout( ogre_t@value(irradianceVolume) ) midf_tex uniform texture3D irradianceVolume; + vulkan( layout( ogre_s@value(irradianceVolume) )uniform sampler irradianceVolumeSampler ); @end @foreach( num_textures, n ) - vulkan_layout( ogre_t@value(textureMaps@n) ) uniform texture2DArray textureMaps@n;@end + vulkan_layout( ogre_t@value(textureMaps@n) ) midf_tex uniform texture2DArray textureMaps@n;@end @property( use_envprobe_map ) @property( !hlms_enable_cubemaps_auto ) - vulkan_layout( ogre_t@value(texEnvProbeMap) ) uniform textureCube texEnvProbeMap; + vulkan_layout( ogre_t@value(texEnvProbeMap) ) midf_tex uniform textureCube texEnvProbeMap; @else @property( !hlms_cubemaps_use_dpm ) - vulkan_layout( ogre_t@value(texEnvProbeMap) ) uniform textureCubeArray texEnvProbeMap; + vulkan_layout( ogre_t@value(texEnvProbeMap) ) midf_tex uniform textureCubeArray texEnvProbeMap; @else - vulkan_layout( ogre_t@value(texEnvProbeMap) ) uniform texture2DArray texEnvProbeMap; + vulkan_layout( ogre_t@value(texEnvProbeMap) ) midf_tex uniform texture2DArray texEnvProbeMap; @insertpiece( DeclDualParaboloidFunc ) @end @end @@ -140,7 +140,7 @@ void main() @property( alpha_test ) @foreach( num_textures, n ) - vulkan_layout( ogre_t@value(textureMaps@n) ) uniform texture2DArray textureMaps@n;@end + vulkan_layout( ogre_t@value(textureMaps@n) ) midf_tex uniform texture2DArray textureMaps@n;@end @property( syntax == glslvk ) @foreach( num_samplers, n ) diff --git a/ogre2/src/media/Hlms/Pbs/GLSL/VertexShader_vs.glsl b/ogre2/src/media/Hlms/Pbs/GLSL/VertexShader_vs.glsl index c33c951b0..3082b996c 100644 --- a/ogre2/src/media/Hlms/Pbs/GLSL/VertexShader_vs.glsl +++ b/ogre2/src/media/Hlms/Pbs/GLSL/VertexShader_vs.glsl @@ -4,7 +4,7 @@ out gl_PerVertex { vec4 gl_Position; -@property( hlms_pso_clip_distances ) +@property( hlms_pso_clip_distances && !hlms_emulate_clip_distances ) float gl_ClipDistance[@value(hlms_pso_clip_distances)]; @end }; @@ -16,13 +16,13 @@ layout(std140) uniform; vulkan_layout( OGRE_POSITION ) in vec4 vertex; -@property( hlms_normal )vulkan_layout( OGRE_NORMAL ) in vec3 normal;@end -@property( hlms_qtangent )vulkan_layout( OGRE_NORMAL ) in vec4 qtangent;@end +@property( hlms_normal )vulkan_layout( OGRE_NORMAL ) in float3 normal;@end +@property( hlms_qtangent )vulkan_layout( OGRE_NORMAL ) in midf4 qtangent;@end @property( normal_map && !hlms_qtangent ) - @property( hlms_tangent4 )vulkan_layout( OGRE_TANGENT ) in vec4 tangent;@end - @property( !hlms_tangent4 )vulkan_layout( OGRE_TANGENT ) in vec3 tangent;@end - @property( hlms_binormal )vulkan_layout( OGRE_BIRNORMAL ) in vec3 binormal;@end + @property( hlms_tangent4 )vulkan_layout( OGRE_TANGENT ) in float4 tangent;@end + @property( !hlms_tangent4 )vulkan_layout( OGRE_TANGENT ) in float3 tangent;@end + @property( hlms_binormal )vulkan_layout( OGRE_BIRNORMAL ) in float3 binormal;@end @end @property( hlms_skeleton ) diff --git a/ogre2/src/media/Hlms/Pbs/HLSL/Textures_piece_ps.hlsl b/ogre2/src/media/Hlms/Pbs/HLSL/Textures_piece_ps.hlsl index 2e1df3879..c3ea30413 100644 --- a/ogre2/src/media/Hlms/Pbs/HLSL/Textures_piece_ps.hlsl +++ b/ogre2/src/media/Hlms/Pbs/HLSL/Textures_piece_ps.hlsl @@ -2,10 +2,10 @@ @property( !hlms_render_depth_only && !hlms_shadowcaster ) @piece( ExtraOutputTypes ) @property( hlms_gen_normals_gbuffer ) - float4 normals : SV_Target@counter(rtv_target); + midf4 normals : SV_Target@counter(rtv_target); @end @property( hlms_prepass ) - float2 shadowRoughness : SV_Target@counter(rtv_target); + midf2 shadowRoughness : SV_Target@counter(rtv_target); @end @end @end diff --git a/ogre2/src/media/Hlms/Pbs/Metal/Forward3D_piece_ps.metal b/ogre2/src/media/Hlms/Pbs/Metal/Forward3D_piece_ps.metal index c3ad0672e..bc9895245 100644 --- a/ogre2/src/media/Hlms/Pbs/Metal/Forward3D_piece_ps.metal +++ b/ogre2/src/media/Hlms/Pbs/Metal/Forward3D_piece_ps.metal @@ -3,13 +3,13 @@ @property( hlms_enable_decals ) @piece( DeclDecalsSamplers ) , sampler decalsSampler [[sampler(@value(decalsSampler))]] - @property( hlms_decals_diffuse ), texture2d_array decalsDiffuseTex [[texture(@value(decalsDiffuseTex))]]@end - @property( hlms_decals_normals ), texture2d_array decalsNormalsTex [[texture(@value(decalsNormalsTex))]]@end + @property( hlms_decals_diffuse ), texture2d_array decalsDiffuseTex [[texture(@value(decalsDiffuseTex))]]@end + @property( hlms_decals_normals ), texture2d_array decalsNormalsTex [[texture(@value(decalsNormalsTex))]]@end @property( hlms_decals_diffuse == hlms_decals_emissive ) #define decalsEmissiveTex decalsDiffuseTex @end @property( hlms_decals_emissive && hlms_decals_diffuse != hlms_decals_emissive ) - , texture2d_array decalsEmissiveTex [[texture(@value(decalsEmissiveTex))]] + , texture2d_array decalsEmissiveTex [[texture(@value(decalsEmissiveTex))]] @end @end @end diff --git a/ogre2/src/media/Hlms/Pbs/Metal/PixelShader_ps.metal b/ogre2/src/media/Hlms/Pbs/Metal/PixelShader_ps.metal index fbe5b4ca7..92ef17edd 100644 --- a/ogre2/src/media/Hlms/Pbs/Metal/PixelShader_ps.metal +++ b/ogre2/src/media/Hlms/Pbs/Metal/PixelShader_ps.metal @@ -67,6 +67,7 @@ fragment @insertpiece( output_type ) main_metal @end @insertpiece( MaterialDecl ) @insertpiece( PccManualProbeDecl ) + @insertpiece( AtmosphereNprSkyDecl ) @end @insertpiece( custom_ps_uniformDeclaration ) // END UNIFORM DECLARATION @@ -77,11 +78,11 @@ fragment @insertpiece( output_type ) main_metal @property( hlms_use_prepass ) @property( !hlms_use_prepass_msaa ) - , texture2d gBuf_normals [[texture(@value(gBuf_normals))]] - , texture2d gBuf_shadowRoughness [[texture(@value(gBuf_shadowRoughness))]] + , texture2d gBuf_normals [[texture(@value(gBuf_normals))]] + , texture2d gBuf_shadowRoughness [[texture(@value(gBuf_shadowRoughness))]] @end @property( hlms_use_prepass_msaa ) - , texture2d_ms gBuf_normals [[texture(@value(gBuf_normals))]] - , texture2d_ms gBuf_shadowRoughness[[texture(@value(gBuf_shadowRoughness))]] + , texture2d_ms gBuf_normals [[texture(@value(gBuf_normals))]] + , texture2d_ms gBuf_shadowRoughness[[texture(@value(gBuf_shadowRoughness))]] @end @property( hlms_use_ssr ) @@ -98,7 +99,7 @@ fragment @insertpiece( output_type ) main_metal , texture2d depthTextureNoMsaa [[texture(@value(depthTextureNoMsaa))]] @end @end - , texture2d refractionMap [[texture(@value(refractionMap))]] + , texture2d refractionMap [[texture(@value(refractionMap))]] , sampler refractionMapSampler [[sampler(@value(refractionMap))]] @end @@ -107,22 +108,22 @@ fragment @insertpiece( output_type ) main_metal @insertpiece( DeclLightProfilesTexture ) @property( irradiance_volumes ) - , texture3d irradianceVolume [[texture(@value(irradianceVolume))]] + , texture3d irradianceVolume [[texture(@value(irradianceVolume))]] , sampler irradianceVolumeSampler [[sampler(@value(irradianceVolume))]] @end @foreach( num_textures, n ) - , texture2d_array textureMaps@n [[texture(@value(textureMaps@n))]]@end + , texture2d_array textureMaps@n [[texture(@value(textureMaps@n))]]@end @property( use_envprobe_map ) @property( !hlms_enable_cubemaps_auto ) - , texturecube texEnvProbeMap [[texture(@value(texEnvProbeMap))]] + , texturecube texEnvProbeMap [[texture(@value(texEnvProbeMap))]] @end @property( hlms_enable_cubemaps_auto ) @property( !hlms_cubemaps_use_dpm ) - , texturecube_array texEnvProbeMap [[texture(@value(texEnvProbeMap))]] + , texturecube_array texEnvProbeMap [[texture(@value(texEnvProbeMap))]] @end @property( hlms_cubemaps_use_dpm ) - , texture2d_array texEnvProbeMap [[texture(@value(texEnvProbeMap))]] + , texture2d_array texEnvProbeMap [[texture(@value(texEnvProbeMap))]] @end @end @property( envMapRegSampler < samplerStateStart ) @@ -172,7 +173,7 @@ fragment @insertpiece( output_type ) main_metal // END UNIFORM DECLARATION @foreach( num_textures, n ) - , texture2d_array textureMaps@n [[texture(@value(textureMaps@n))]]@end + , texture2d_array textureMaps@n [[texture(@value(textureMaps@n))]]@end @foreach( num_samplers, n ) , sampler samplerState@value(samplerStateStart) [[sampler(@counter(samplerStateStart))]]@end ) diff --git a/ogre2/src/media/Hlms/Pbs/Metal/Textures_piece_ps.metal b/ogre2/src/media/Hlms/Pbs/Metal/Textures_piece_ps.metal index fb76fb715..4773328bc 100644 --- a/ogre2/src/media/Hlms/Pbs/Metal/Textures_piece_ps.metal +++ b/ogre2/src/media/Hlms/Pbs/Metal/Textures_piece_ps.metal @@ -2,10 +2,10 @@ @property( !hlms_render_depth_only && !hlms_shadowcaster ) @piece( ExtraOutputTypes ) @property( hlms_gen_normals_gbuffer ) - float4 normals [[ color(@counter(rtv_target)) ]]; + midf4 normals [[ color(@counter(rtv_target)) ]]; @end @property( hlms_prepass ) - float2 shadowRoughness [[ color(@counter(rtv_target)) ]]; + midf2 shadowRoughness [[ color(@counter(rtv_target)) ]]; @end @end @end diff --git a/ogre2/src/media/Hlms/Pbs/Metal/VertexShader_vs.metal b/ogre2/src/media/Hlms/Pbs/Metal/VertexShader_vs.metal index 575ccd561..6f8ad6adf 100644 --- a/ogre2/src/media/Hlms/Pbs/Metal/VertexShader_vs.metal +++ b/ogre2/src/media/Hlms/Pbs/Metal/VertexShader_vs.metal @@ -9,7 +9,7 @@ struct VS_INPUT { float4 position [[attribute(VES_POSITION)]]; @property( hlms_normal ) float3 normal [[attribute(VES_NORMAL)]];@end -@property( hlms_qtangent ) float4 qtangent [[attribute(VES_NORMAL)]];@end +@property( hlms_qtangent ) midf4 qtangent [[attribute(VES_NORMAL)]];@end @property( normal_map && !hlms_qtangent ) @property( hlms_tangent4 )float4 tangent [[attribute(VES_TANGENT)]];@end @@ -53,6 +53,7 @@ vertex PS_INPUT main_metal // START UNIFORM DECLARATION @insertpiece( PassDecl ) @insertpiece( InstanceDecl ) + @insertpiece( AtmosphereNprSkyDecl ) , device const float4 *worldMatBuf [[buffer(TEX_SLOT_START+0)]] @property( hlms_pose ) @property( !hlms_pose_half ) @@ -62,8 +63,8 @@ vertex PS_INPUT main_metal @end @end @property( hlms_vertex_id ) - , uint vertexId [[vertex_id]] - , uint baseVertex [[base_vertex]] + , uint inVs_vertexId [[vertex_id]] + , uint baseVertexID [[base_vertex]] @end @insertpiece( custom_vs_uniformDeclaration ) // END UNIFORM DECLARATION diff --git a/ogre2/src/media/Hlms/Terra/Any/500.Structs_piece_vs_piece_ps.any b/ogre2/src/media/Hlms/Terra/Any/500.Structs_piece_vs_piece_ps.any index c30ac6824..ea4e6717a 100644 --- a/ogre2/src/media/Hlms/Terra/Any/500.Structs_piece_vs_piece_ps.any +++ b/ogre2/src/media/Hlms/Terra/Any/500.Structs_piece_vs_piece_ps.any @@ -86,6 +86,11 @@ struct CellData @piece( Terra_VStoPS_block ) INTERPOLANT( float3 pos, @counter(texcoord) ); INTERPOLANT( float2 uv0, @counter(texcoord) ); + + @property( detail_triplanar ) + INTERPOLANT( float3 worldPos, @counter(texcoord) ); + @end + @insertpiece( VStoPS_block ) @end diff --git a/ogre2/src/media/Hlms/Terra/Any/550.DetailTriplanar_piece_ps.any b/ogre2/src/media/Hlms/Terra/Any/550.DetailTriplanar_piece_ps.any new file mode 100644 index 000000000..3baaa2c81 --- /dev/null +++ b/ogre2/src/media/Hlms/Terra/Any/550.DetailTriplanar_piece_ps.any @@ -0,0 +1,175 @@ +@piece( DeclDetailTriplanarFuncs ) + @property( detail_triplanar ) + // https://catlikecoding.com/unity/tutorials/advanced-rendering/triplanar-mapping/ + + // Side view projection + midf2 GetTriplanarUVSd( midf3 position, midf3 normal ) + { + midf2 uv = -position.zy; + + if( normal.x < _h( 0.0 ) ) + { + uv.x = -uv.x; + } + + return uv; + } + + // Top view projection + midf2 GetTriplanarUVTp( midf3 position, midf3 normal ) + { + return position.xz; + } + + // Front view projection + midf2 GetTriplanarUVFr( midf3 position, midf3 normal ) + { + midf2 uv = -position.xy; + + if( normal.z >= _h( 0.0 ) ) + { + uv.x = -uv.x; + } + + return uv; + } + @end + + @property( detail_triplanar_diffuse ) + @foreach( detail_maps_diffuse, n )@property( detail_map@n ) + #define SampleDetailCol@nTriplanar( tex, sampler, uv, arrayIdx ) (SampleDetailCol@n( tex, \ + sampler, \ + UV_DETAIL@n( GetTriplanarUVSd( inPs.worldPos, pixelData.worldSpaceNormal )@insertpiece( offsetDetail@n ) ), \ + arrayIdx ) * triplanarBlend.x + \ + \ + SampleDetailCol@n( tex, \ + sampler, \ + UV_DETAIL@n( GetTriplanarUVTp( inPs.worldPos, pixelData.worldSpaceNormal )@insertpiece( offsetDetail@n ) ), \ + arrayIdx ) * triplanarBlend.y + \ + \ + SampleDetailCol@n( tex, \ + sampler, \ + UV_DETAIL@n( GetTriplanarUVFr( inPs.worldPos, pixelData.worldSpaceNormal )@insertpiece( offsetDetail@n ) ), \ + arrayIdx ) * triplanarBlend.z) + @end @end + @end + + @property( detail_triplanar_normal ) + // https://catlikecoding.com/unity/tutorials/advanced-rendering/triplanar-mapping/ + midf3 BlendTriplanarNormal( midf3 mappedNormal, midf3 surfaceNormal ) + { + midf3 n; + n.xy = mappedNormal.xy + surfaceNormal.xy; + n.z = mappedNormal.z * surfaceNormal.z; + return n; + } + + @foreach( detail_maps_normal, n )@property( detail_map_nm@n ) + #define SampleDetailMapNm@nSd( tex, sampler, uv, arrayIdx ) SampleDetailMapNm@nSdFn( tex, sampler, uv, arrayIdx, detailWeights, pixelData.worldSpaceNormal ) + #define SampleDetailMapNm@nTp( tex, sampler, uv, arrayIdx ) SampleDetailMapNm@nTpFn( tex, sampler, uv, arrayIdx, detailWeights, pixelData.worldSpaceNormal ) + #define SampleDetailMapNm@nFr( tex, sampler, uv, arrayIdx ) SampleDetailMapNm@nFrFn( tex, sampler, uv, arrayIdx, detailWeights, pixelData.worldSpaceNormal ) + + // Side view projection + midf3 SampleDetailMapNm@nSdFn( Texture2DArray tex, SamplerState smp, midf2 uv, uint arrayIdx, midf4 weights, midf3 normal ) + { + midf3 tangentNormal = getTSNormal( tex, smp, uv, arrayIdx ) * weights.@insertpiece( detail_swizzle@n ); + + if( normal.x >= _h( 0.0 ) ) + { + tangentNormal.x = -tangentNormal.x; + } + + midf3 worldNormal = BlendTriplanarNormal( tangentNormal, normal.zyx ).zyx; + worldNormal.x += _h( 1.0 ) - weights.@insertpiece( detail_swizzle@n ); + + return worldNormal; + } + + // Top view projection + midf3 SampleDetailMapNm@nTpFn( Texture2DArray tex, SamplerState smp, midf2 uv, uint arrayIdx, midf4 weights, midf3 normal ) + { + midf3 tangentNormal = getTSNormal( tex, smp, uv, arrayIdx ) * weights.@insertpiece( detail_swizzle@n ); + tangentNormal.y = -tangentNormal.y; + + midf3 worldNormal = BlendTriplanarNormal( tangentNormal, normal.xzy ).xzy; + worldNormal.y += _h( 1.0 ) - weights.@insertpiece( detail_swizzle@n ); + + return worldNormal; + } + + // Front view projection + midf3 SampleDetailMapNm@nFrFn( Texture2DArray tex, SamplerState smp, midf2 uv, uint arrayIdx, midf4 weights, midf3 normal ) + { + midf3 tangentNormal = getTSNormal( tex, smp, uv, arrayIdx ) * weights.@insertpiece( detail_swizzle@n ); + + if( normal.z < _h( 0.0 ) ) + { + tangentNormal.x = -tangentNormal.x; + } + + midf3 worldNormal = BlendTriplanarNormal( tangentNormal, normal ); + worldNormal.z += _h( 1.0 ) - weights.@insertpiece( detail_swizzle@n ); + + return worldNormal; + } + @end @end + @end + + @property( detail_triplanar_roughness ) + @foreach( 4, n )@property( roughness_map@n ) + #define SampleRoughness@nTriplanar( tex, sampler, uv, arrayIdx ) (SampleRoughness@n( tex, \ + sampler, \ + GetTriplanarUVSd( inPs.worldPos, pixelData.worldSpaceNormal )@insertpiece( offsetDetail@n ), \ + arrayIdx ) * triplanarBlend.x + \ + \ + SampleRoughness@n( tex, \ + sampler, \ + GetTriplanarUVTp( inPs.worldPos, pixelData.worldSpaceNormal )@insertpiece( offsetDetail@n ), \ + arrayIdx ) * triplanarBlend.y + \ + \ + SampleRoughness@n( tex, \ + sampler, \ + GetTriplanarUVFr( inPs.worldPos, pixelData.worldSpaceNormal )@insertpiece( offsetDetail@n ), \ + arrayIdx ) * triplanarBlend.z) + @end @end + @end + + @property( detail_triplanar_metalness ) + @foreach( 4, n )@property( metalness_map@n ) + #define SampleMetalness@nTriplanar( tex, sampler, uv, arrayIdx ) (SampleMetalness@n( tex, \ + sampler, \ + GetTriplanarUVSd( inPs.worldPos, pixelData.worldSpaceNormal )@insertpiece( offsetDetail@n ), \ + arrayIdx ) * triplanarBlend.x + \ + \ + SampleMetalness@n( tex, \ + sampler, \ + GetTriplanarUVTp( inPs.worldPos, pixelData.worldSpaceNormal )@insertpiece( offsetDetail@n ), \ + arrayIdx ) * triplanarBlend.y + \ + \ + SampleMetalness@n( tex, \ + sampler, \ + GetTriplanarUVFr( inPs.worldPos, pixelData.worldSpaceNormal )@insertpiece( offsetDetail@n ), \ + arrayIdx ) * triplanarBlend.z) + @end @end + @end +@end + + +@piece( SampleAndApplyDetailNormalMapsTriplanar ) + @foreach( detail_maps_normal, n )@property( detail_map_nm@n ) + pixelData.normal = SampleDetailMapNm@nSd( textureMaps@value( detail_map_nm@n_idx ), + samplerState@value( detail_map_nm@n_sampler ), + UV_DETAIL_NM@n( GetTriplanarUVSd( inPs.worldPos, pixelData.worldSpaceNormal )@insertpiece( offsetDetail@n ) ), + texIndex_detailNormMapIdx@n ) * triplanarBlend.x + + + SampleDetailMapNm@nTp( textureMaps@value( detail_map_nm@n_idx ), + samplerState@value( detail_map_nm@n_sampler ), + UV_DETAIL_NM@n( GetTriplanarUVTp( inPs.worldPos, pixelData.worldSpaceNormal )@insertpiece( offsetDetail@n ) ), + texIndex_detailNormMapIdx@n ) * triplanarBlend.y + + + SampleDetailMapNm@nFr( textureMaps@value( detail_map_nm@n_idx ), + samplerState@value( detail_map_nm@n_sampler ), + UV_DETAIL_NM@n( GetTriplanarUVFr( inPs.worldPos, pixelData.worldSpaceNormal )@insertpiece( offsetDetail@n ) ), + texIndex_detailNormMapIdx@n ) * triplanarBlend.z; + @end @end +@end diff --git a/ogre2/src/media/Hlms/Terra/Any/800.PixelShader_piece_ps.any b/ogre2/src/media/Hlms/Terra/Any/800.PixelShader_piece_ps.any index fc67ebfb2..fff0d10f9 100644 --- a/ogre2/src/media/Hlms/Terra/Any/800.PixelShader_piece_ps.any +++ b/ogre2/src/media/Hlms/Terra/Any/800.PixelShader_piece_ps.any @@ -22,18 +22,27 @@ @insertpiece( DeclareBRDF_AreaLightApprox ) @end - #define SampleMetalness0( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2D( tex, sampler, uv, arrayIdx ) - #define SampleMetalness1( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2D( tex, sampler, uv, arrayIdx ) - #define SampleMetalness2( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2D( tex, sampler, uv, arrayIdx ) - #define SampleMetalness3( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2D( tex, sampler, uv, arrayIdx ) + #define SampleMetalness0( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2DF16( tex, sampler, uv, arrayIdx ) + #define SampleMetalness1( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2DF16( tex, sampler, uv, arrayIdx ) + #define SampleMetalness2( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2DF16( tex, sampler, uv, arrayIdx ) + #define SampleMetalness3( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2DF16( tex, sampler, uv, arrayIdx ) - #define SampleRoughness0( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2D( tex, sampler, uv, arrayIdx ) - #define SampleRoughness1( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2D( tex, sampler, uv, arrayIdx ) - #define SampleRoughness2( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2D( tex, sampler, uv, arrayIdx ) - #define SampleRoughness3( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2D( tex, sampler, uv, arrayIdx ) + #define SampleRoughness0( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2DF16( tex, sampler, uv, arrayIdx ) + #define SampleRoughness1( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2DF16( tex, sampler, uv, arrayIdx ) + #define SampleRoughness2( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2DF16( tex, sampler, uv, arrayIdx ) + #define SampleRoughness3( tex, sampler, uv, arrayIdx ) OGRE_SampleArray2DF16( tex, sampler, uv, arrayIdx ) #define OGRE_UNPACK_X_2x16( packedInt ) ((packedInt) & 0x0000FFFFu) #define OGRE_UNPACK_Y_2x16( packedInt ) ((packedInt) >> 16u) + + @insertpiece( DeclDetailTriplanarFuncs ) + + @property( detail_triplanar ) + midf3 pow3( midf3 v, midf e ) + { + return midf3_c( pow( v.x, e ), pow( v.y, e ), pow( v.z, e ) ); + } + @end @end //----------------------------------------------------------------------------- @@ -103,27 +112,23 @@ texIndex_diffuseIdx ); @else /// If there are no diffuse maps, we must initialize it to some value. - pixelData.diffuse.xyzw = float4( 1, 1, 1, 1 ); + pixelData.diffuse.xyzw = midf4_c( 1, 1, 1, 1 ); @end @foreach( detail_maps_diffuse, n ) - @property( !detail_map@n )float3 detailCol@n = float3( 0.0f, 0.0f, 0.0f );@end + @property( !detail_map@n )midf3 detailCol@n = midf3_c( 0.0f, 0.0f, 0.0f );@end @end @property( !detail_maps_diffuse && !detail_maps_normal ) - float4 detailWeights = float4( 0.25f, 0.25f, 0.25f, 0.25f ); - @insertpiece( gz_weights ) + midf4 detailWeights = midf4_c( 0.25f, 0.25f, 0.25f, 0.25f ); @else - // GZ CUSTOMIZE BEGIN - //pixelData.diffuse.xyz *= (detailCol0.xyz * detailWeights.x + detailCol1.xyz * detailWeights.y) + - // (detailCol2.xyz * detailWeights.z + detailCol3.xyz * detailWeights.w); - @insertpiece( gz_weights ) - // GZ CUSTOMIZE END + pixelData.diffuse.xyz *= (detailCol0.xyz * detailWeights.x + detailCol1.xyz * detailWeights.y) + + (detailCol2.xyz * detailWeights.z + detailCol3.xyz * detailWeights.w); @end /// Apply the material's diffuse over the textures - pixelData.diffuse.xyz *= material.kD.xyz; + pixelData.diffuse.xyz *= midf3_c( material.kD.xyz ); @end @undefpiece( SampleSpecularMap ) @@ -131,31 +136,23 @@ /// SPECUlAR MAP @foreach( 4, n ) @property( metalness_map@n ) - float metalness@n = SampleMetalness@n( textureMaps@value( metalness_map@n_idx ), - samplerState@value(metalness_map@n_sampler), - inPs.uv0.xy * material.detailOffsetScale[@n].zw + - material.detailOffsetScale[@n].xy, - texIndex_detailMetalnessIdx@n ).x; + midf metalness@n = SampleMetalness@n@property( detail_triplanar_metalness )Triplanar@end ( textureMaps@value( metalness_map@n_idx ), + samplerState@value(metalness_map@n_sampler), + @property( detail_triplanar )GetTriplanarUVTp( inPs.worldPos, pixelData.worldSpaceNormal )@else inPs.uv0.xy@end * material.detailOffsetScale[@n].zw + + material.detailOffsetScale[@n].xy, + texIndex_detailMetalnessIdx@n ).x; @else - float metalness@n = 0; + midf metalness@n = _h( 1.0 ); @end @end - pixelData.specular.xyz = float3( 1.0f, 1.0f, 1.0f ); - - // GZ CUSTOMIZE BEGIN - // float metalness = (metalness0 * detailWeights.x * material.metalness.x + - // metalness1 * detailWeights.y * material.metalness.y) + - // (metalness2 * detailWeights.z * material.metalness.z + - // metalness3 * detailWeights.w * material.metalness.w); - float metalness = 0.0f; - metalness = lerp( metalness, metalness0, detailWeights.x ); - metalness = lerp( metalness, metalness1, detailWeights.y ); - metalness = lerp( metalness, metalness2, detailWeights.z ); - metalness = lerp( metalness, metalness3, detailWeights.w ); - // GZ CUSTOMIZE END - - pixelData.F0 = lerp( float3( 0.03f, 0.03f, 0.03f ), pixelData.diffuse.xyz * 3.14159f, metalness ); + pixelData.specular.xyz = midf3_c( 1.0f, 1.0f, 1.0f ); + midf metalness = (metalness0 * detailWeights.x * midf_c( material.metalness.x ) + + metalness1 * detailWeights.y * midf_c( material.metalness.y )) + + (metalness2 * detailWeights.z * midf_c( material.metalness.z ) + + metalness3 * detailWeights.w * midf_c( material.metalness.w )); + + pixelData.F0 = lerp( midf3_c( 0.03f, 0.03f, 0.03f ), pixelData.diffuse.xyz * _h( 3.14159f ), metalness ); pixelData.diffuse.xyz = pixelData.diffuse.xyz - pixelData.diffuse.xyz * metalness; @end @@ -164,55 +161,63 @@ /// ROUGHNESS MAP @foreach( 4, n ) @property( roughness_map@n ) - float roughness@n = SampleRoughness@n( textureMaps@value( roughness_map@n_idx ), - samplerState@value(roughness_map@n_sampler), - inPs.uv0.xy * material.detailOffsetScale[@n].zw + - material.detailOffsetScale[@n].xy, - texIndex_detailRoughnessIdx@n ).x; + midf roughness@n = SampleRoughness@n@property( detail_triplanar_roughness )Triplanar@end ( textureMaps@value( roughness_map@n_idx ), + samplerState@value(roughness_map@n_sampler), + @property( detail_triplanar )GetTriplanarUVTp( inPs.worldPos, pixelData.worldSpaceNormal )@else inPs.uv0.xy@end * material.detailOffsetScale[@n].zw + + material.detailOffsetScale[@n].xy, + texIndex_detailRoughnessIdx@n ).x; @else - float roughness@n = 0; + midf roughness@n = _h( 1.0 ); @end @end - // GZ CUSTOMIZE BEGIN - // pixelData.perceptualRoughness = (roughness0 * detailWeights.x * material.roughness.x + - // roughness1 * detailWeights.y * material.roughness.y) + - // (roughness2 * detailWeights.z * material.roughness.z + - // roughness3 * detailWeights.w * material.roughness.w); - float roughness = 1.0f; - roughness = lerp( roughness, roughness0, detailWeights.x ); - roughness = lerp( roughness, roughness1, detailWeights.y ); - roughness = lerp( roughness, roughness2, detailWeights.z ); - roughness = lerp( roughness, roughness3, detailWeights.w ); - pixelData.perceptualRoughness = roughness; - // GZ CUSTOMIZE END + pixelData.perceptualRoughness = (roughness0 * detailWeights.x * midf_c( material.roughness.x ) + + roughness1 * detailWeights.y * midf_c( material.roughness.y )) + + (roughness2 * detailWeights.z * midf_c( material.roughness.z ) + + roughness3 * detailWeights.w * midf_c( material.roughness.w )); @property( perceptual_roughness ) - pixelData.roughness = max( pixelData.perceptualRoughness * pixelData.perceptualRoughness, 0.001f ); + pixelData.roughness = max( pixelData.perceptualRoughness * pixelData.perceptualRoughness, _h( 0.001f ) ); @else - pixelData.roughness = max( pixelData.perceptualRoughness, 0.001f ); + pixelData.roughness = max( pixelData.perceptualRoughness, _h( 0.001f ) ); @end @end @undefpiece( LoadNormalData ) @piece( LoadNormalData ) // Geometric normal - pixelData.geomNormal = OGRE_Sample( terrainNormals, samplerStateTerra, inPs.uv0.xy ).xyz * 2.0 - 1.0; + pixelData.geomNormal = OGRE_SampleF16( terrainNormals, samplerStateTerra, inPs.uv0.xy ).xyz * _h( 2.0 ) - _h( 1.0 ); @property( z_up ) - pixelData.geomNormal.yz = float2( -pixelData.geomNormal.z, pixelData.geomNormal.y ); + pixelData.geomNormal.yz = midf2_c( -pixelData.geomNormal.z, pixelData.geomNormal.y ); @end - pixelData.geomNormal = mul( pixelData.geomNormal, toFloat3x3( passBuf.view ) ); + + @property( detail_triplanar ) + pixelData.worldSpaceNormal = pixelData.geomNormal; + // https://bgolus.medium.com/normal-mapping-for-a-triplanar-shader-10bf39dca05a + midf3 triplanarBlend = pow3( pixelData.worldSpaceNormal, _h( 4.0 ) ); + triplanarBlend /= dot( triplanarBlend, midf3_c( 1.0, 1.0, 1.0 ) ); + @end + + pixelData.geomNormal = mul( pixelData.geomNormal, toMidf3x3( passBuf.view ) ); + @property( normal_map ) //Get the TBN matrix - float3 viewSpaceUnitX = mul( float3( 1, 0, 0 ) , toFloat3x3( passBuf.view ) ); - float3 vTangent = normalize( cross( pixelData.geomNormal, viewSpaceUnitX ) ); - float3 vBinormal = cross( vTangent, pixelData.geomNormal ); - float3x3 TBN = buildFloat3x3( vBinormal, vTangent, pixelData.geomNormal ); + midf3 viewSpaceUnitX = mul( midf3_c( 1, 0, 0 ), toMidf3x3( passBuf.view ) ); + midf3 vTangent = normalize( cross( pixelData.geomNormal, viewSpaceUnitX ) ); + midf3 vBinormal = cross( vTangent, pixelData.geomNormal ); + midf3x3 TBN = buildMidf3x3( vBinormal, vTangent, pixelData.geomNormal ); @end @end @piece( DefaultTerraBodyPS ) PixelData pixelData; + @property( hlms_emulate_clip_distances && hlms_global_clip_planes && hlms_pso_clip_distances && syntax == glslvk) + @foreach( hlms_pso_clip_distances, n ) + if( inPs.clipDistance@n < 0.0 ) + discard; + @end + @end + @insertpiece( LoadMaterial ) @insertpiece( UnpackTextureIndices0 ) @insertpiece( UnpackTextureIndices1 ) @@ -221,6 +226,11 @@ @insertpiece( LoadDetailWeights ) + @property( !hlms_use_prepass ) + @insertpiece( LoadNormalData ) + @insertpiece( SampleAndApplyDetailNormalMaps@property( detail_triplanar_normal )Triplanar@end ) + @end + @insertpiece( SampleDetailMaps ) @property( !hlms_prepass ) @@ -233,15 +243,16 @@ @insertpiece( forwardPlusDoDecals ) @property( !hlms_use_prepass ) - @insertpiece( LoadNormalData ) - @insertpiece( SampleAndApplyDetailNormalMaps ) - @insertpiece( custom_ps_posSampleNormal ) @insertpiece( forwardPlusApplyDecalsNormal ) @property( normal_map ) - pixelData.normal = normalize( mul( TBN, pixelData.normal ) ); + @property( detail_triplanar_normal ) + pixelData.normal = mul( pixelData.normal, toMidf3x3( passBuf.view ) ); + @else + pixelData.normal = normalize( mul( TBN, pixelData.normal ) ); + @end @end @insertpiece( DoDirectionalShadowMaps ) @@ -284,14 +295,14 @@ int gBufSubsample = int( findLSB( sampleMask ) ); - pixelData.normal = normalize( OGRE_Load2DMS( gBuf_normals, iFragCoord, gBufSubsample ).xyz * 2.0 - 1.0 ); - float2 shadowRoughness = OGRE_Load2DMS( gBuf_shadowRoughness, iFragCoord, gBufSubsample ).xy; + pixelData.normal = normalize( OGRE_Load2DMSF16( gBuf_normals, iFragCoord, gBufSubsample ).xyz * _h( 2.0 ) - _h( 1.0 ) ); + midf2 shadowRoughness = OGRE_Load2DMSF16( gBuf_shadowRoughness, iFragCoord, gBufSubsample ).xy; @else - pixelData.normal = normalize( OGRE_Load2D( gBuf_normals, iFragCoord, 0 ).xyz * 2.0 - 1.0 ); - float2 shadowRoughness = OGRE_Load2D( gBuf_shadowRoughness, iFragCoord, 0 ).xy; + pixelData.normal = normalize( OGRE_Load2DF16( gBuf_normals, iFragCoord, 0 ).xyz * _h( 2.0 ) - _h( 1.0 ) ); + midf2 shadowRoughness = OGRE_Load2DF16( gBuf_shadowRoughness, iFragCoord, 0 ).xy; @end - float fShadow = shadowRoughness.x; + midf fShadow = shadowRoughness.x; @property( roughness_map ) pixelData.roughness = shadowRoughness.y * 0.98 + 0.02; @@ -304,9 +315,9 @@ @insertpiece( custom_ps_preLights ) @property( !custom_disable_directional_lights ) - float fTerrainShadow = OGRE_Sample( terrainShadows, samplerStateTerra, inPs.uv0.xy ).x; + midf fTerrainShadow = OGRE_SampleF16( terrainShadows, samplerStateTerra, inPs.uv0.xy ).x; @property( !(hlms_pssm_splits || (!hlms_pssm_splits && hlms_num_shadow_map_lights && hlms_lights_directional)) ) - float fShadow = 1.0f; + midf fShadow = _h( 1.0f ); @end fShadow *= fTerrainShadow; @@ -322,8 +333,8 @@ @insertpiece( forward3dLighting ) @property( needs_env_brdf && (use_envprobe_map || hlms_use_ssr || use_planar_reflections || vct_num_probes) ) - pixelData.envColourS = float3( 0, 0, 0 ); - pixelData.envColourD = float3( 0, 0, 0 ); + pixelData.envColourS = midf3_c( 0, 0, 0 ); + pixelData.envColourD = midf3_c( 0, 0, 0 ); @end @insertpiece( applyVoxelConeTracing ) @@ -360,25 +371,41 @@ if( vctSpecular.w == 0 ) { @end - pixelData.envColourS += lerp( passBuf.ambientLowerHemi.xyz, passBuf.ambientUpperHemi.xyz, ambientWD ); - pixelData.envColourD += lerp( passBuf.ambientLowerHemi.xyz, passBuf.ambientUpperHemi.xyz, ambientWS ); - @property( vct_num_probes ) + pixelData.envColourS += lerp( midf3_c( passBuf.ambientLowerHemi.xyz ), + midf3_c( passBuf.ambientUpperHemi.xyz ), ambientWD ); + pixelData.envColourD += lerp( midf3_c( passBuf.ambientLowerHemi.xyz ), + midf3_c( passBuf.ambientUpperHemi.xyz ), ambientWS ); + @property( vct_num_probes ) } @end - @else - pixelData.envColourS = lerp( passBuf.ambientLowerHemi.xyz, passBuf.ambientUpperHemi.xyz, ambientWD ); - pixelData.envColourD = lerp( passBuf.ambientLowerHemi.xyz, passBuf.ambientUpperHemi.xyz, ambientWS ); - @end - @end - @property( ambient_fixed && vct_num_probes ) - //Only use ambient lighting if object is outside any VCT probe - finalColour += vctSpecular.w == 0 ? float3( 0, 0, 0 ) : - (passBuf.ambientUpperHemi.xyz * pixelData.diffuse.xyz); + @else + pixelData.envColourS = lerp( midf3_c( passBuf.ambientLowerHemi.xyz ), + midf3_c( passBuf.ambientUpperHemi.xyz ), ambientWD ); + pixelData.envColourD = lerp( midf3_c( passBuf.ambientLowerHemi.xyz ), + midf3_c( passBuf.ambientUpperHemi.xyz ), ambientWS ); @end + @end + @property( ambient_fixed && vct_num_probes ) + //Only use ambient lighting if object is outside any VCT probe + finalColour += vctSpecular.w == 0 ? midf3_c( 0, 0, 0 ) : + (midf3_c( passBuf.ambientUpperHemi.xyz ) * pixelData.diffuse.xyz); + @end @property( needs_env_brdf ) @insertpiece( BRDF_EnvMap ) @end + + @property( hlms_fog ) + const float distToCamera = length( inPs.pos.xyz ); + const midf luminance = dot( finalColour.xyz, + midf3_c( _h( 0.212655 ), _h( 0.715158 ), _h( 0.072187 ) ) ); + const midf lumFogWeight = max( exp2( atmoSettings.fogBreakFalloff * luminance + + atmoSettings.fogBreakMinBrightness ), + _h( 0.0 ) ); + midf fogWeight = midf_c( exp2( -distToCamera * atmoSettings.fogDensity ) ); + fogWeight = lerp( _h( 1.0 ), fogWeight, lumFogWeight ); + finalColour.xyz = lerp( inPs.fog.xyz, finalColour.xyz, fogWeight ); + @end @end ///!hlms_prepass @property( !hlms_render_depth_only ) @@ -392,27 +419,27 @@ @property( hlms_alphablend ) @property( use_texture_alpha ) - outPs_colour0.w = material.F0.w * pixelData.diffuse.w; + outPs_colour0.w = _h( material.F0.w ) * pixelData.diffuse.w; @else - outPs_colour0.w = material.F0.w; + outPs_colour0.w = _h( material.F0.w ); @end @else - outPs_colour0.w = 1.0; + outPs_colour0.w = _h( 1.0 ); @end @property( debug_pssm_splits ) - outPs_colour0.xyz = mix( outPs_colour0.xyz, debugPssmSplit.xyz, 0.2f ); + outPs_colour0.xyz = lerp( outPs_colour0.xyz, debugPssmSplit.xyz, _h( 0.2f ) ); @end - @property( hlms_gen_normals_gbuffer ) - outPs_normals = float4( pixelData.normal * 0.5 + 0.5, 1.0 ); + @property( hlms_gen_normals_gbuffer ) + outPs_normals = midf4_c( pixelData.normal * _h( 0.5 ) + _h( 0.5 ), 1.0 ); @end @else - outPs_normals = float4( pixelData.normal * 0.5 + 0.5, 1.0 ); + outPs_normals = midf4_c( pixelData.normal * _h( 0.5 ) + _h( 0.5 ), 1.0 ); @property( hlms_pssm_splits ) - outPs_shadowRoughness = float2( fShadow, (pixelData.roughness - 0.02) * 1.02040816 ); + outPs_shadowRoughness = midf2_c( fShadow, (pixelData.roughness - 0.02) * 1.02040816 ); @end @property( !hlms_pssm_splits ) - outPs_shadowRoughness = float2( 1.0, (pixelData.roughness - 0.02) * 1.02040816 ); + outPs_shadowRoughness = midf2_c( 1.0, (pixelData.roughness - 0.02) * 1.02040816 ); @end @end @end diff --git a/ogre2/src/media/Hlms/Terra/Any/800.VertexShader_piece_vs.any b/ogre2/src/media/Hlms/Terra/Any/800.VertexShader_piece_vs.any index 345a0d002..383e6e688 100644 --- a/ogre2/src/media/Hlms/Terra/Any/800.VertexShader_piece_vs.any +++ b/ogre2/src/media/Hlms/Terra/Any/800.VertexShader_piece_vs.any @@ -20,8 +20,11 @@ @insertpiece( TerraMaterialStructDecl ) #define material materialArray[0] @end + @insertpiece( AtmosphereNprSkyStructDecl ) @insertpiece( custom_vs_uniformStructDeclaration ) // END UNIFORM DECLARATION + + @insertpiece( DeclAtmosphereNprSkyFuncs ) @end @piece( VertexTerraTransform ) @@ -111,6 +114,10 @@ worldPos.yz = float2( -worldPos.z, worldPos.y ); @end + @property( detail_triplanar ) + outVs.worldPos = worldPos.xyz; + @end + @insertpiece( VertexTerraTransform ) outVs.uv0.xy = float2( uVertexPos.xy ) * float2( cellData.pos.w, cellData.scale.w ); @@ -125,11 +132,9 @@ @insertpiece( DoShadowCasterVS ) @end -@property( syntax != metal ) - @property( (!hlms_shadowcaster || alpha_test) && !lower_gpu_overhead ) - outVs.drawId = inVs_drawId; - @end -@else + @insertpiece( DoAtmosphereNprSky ) + +@property( syntax == metal || lower_gpu_overhead ) @property( hlms_fine_light_mask || hlms_forwardplus_fine_light_mask ) outVs.objLightMask = worldMaterialIdx[inVs_drawId].z; @end @@ -137,12 +142,20 @@ @property( use_planar_reflections ) outVs.planarReflectionIdx = (ushort)(worldMaterialIdx[inVs_drawId].w); @end +@else + @property( (!hlms_shadowcaster || alpha_test) && !lower_gpu_overhead ) + outVs.drawId = inVs_drawId; + @end @end @property( hlms_use_prepass_msaa > 1 ) outVs.zwDepth.xy = outVs_Position.zw; @end + @property( hlms_global_clip_planes ) + outVs_clipDistance0 = dot( float4( worldPos.xyz, 1.0 ), passBuf.clipPlane0.xyzw ); + @end + @property( hlms_instanced_stereo ) outVs_viewportIndex = int( inVs_stereoDrawId & 0x01u ); @end diff --git a/ogre2/src/media/Hlms/Terra/GLSL/PbsTerraShadows/PbsTerraShadows_piece_vs_piece_ps.glsl b/ogre2/src/media/Hlms/Terra/GLSL/PbsTerraShadows/PbsTerraShadows_piece_vs_piece_ps.glsl index 95b42365b..0f80f6605 100644 --- a/ogre2/src/media/Hlms/Terra/GLSL/PbsTerraShadows/PbsTerraShadows_piece_vs_piece_ps.glsl +++ b/ogre2/src/media/Hlms/Terra/GLSL/PbsTerraShadows/PbsTerraShadows_piece_vs_piece_ps.glsl @@ -1,12 +1,12 @@ @property( !hlms_shadowcaster && terra_enabled ) -@piece( custom_VStoPS_terra ) - float terrainShadow; +@piece( custom_VStoPS ) + midf terrainShadow; @end /// Extra per-pass global data we need for applying our /// shadows to regular objects, passed to all PBS shaders. -@piece( custom_passBuffer_terra ) +@piece( custom_passBuffer ) vec4 terraOrigin; //Normalized. i.e. -terrainOrigin / terrainDimensions //.xz = terrain 1.0 / XZ dimensions. //.y = 1.0 / terrainHeight; @@ -14,7 +14,7 @@ @end /// Add the shadows' texture to the vertex shader -@piece( custom_vs_uniformDeclaration_terra ) +@piece( custom_vs_uniformDeclaration ) vulkan_layout( ogre_t@value(terrainShadows) ) uniform texture2D terrainShadows; vulkan( layout( ogre_s@value(terrainShadows) ) uniform sampler terrainShadowSampler ); @end @@ -22,7 +22,7 @@ /// Evaluate the shadow based on world XZ position & height in the vertex shader. /// Doing it at the pixel shader level would be more accurate, but the difference /// is barely noticeable, and slower -@piece( custom_vs_posExecution_terra ) +@piece( custom_vs_posExecution ) @property( z_up ) vec3 terraWorldPos = vec3( worldPos.x, -worldPos.z, worldPos.y ); @else @@ -33,7 +33,7 @@ passBuf.terraOrigin.xz, 0 ).xyz; float terraHeightWeight = terraWorldPos.y * passBuf.invTerraBounds.y + passBuf.terraOrigin.y; terraHeightWeight = (terraHeightWeight - terraShadowData.y) * terraShadowData.z * 1023.0; - outVs.terrainShadow = mix( terraShadowData.x, 1.0, clamp( terraHeightWeight, 0.0, 1.0 ) ); + outVs.terrainShadow = mix( midf_c( terraShadowData.x ), _h( 1.0 ), midf_c( saturate( terraHeightWeight ) ) ); @end @property( hlms_lights_directional && hlms_num_shadow_map_lights ) diff --git a/ogre2/src/media/Hlms/Terra/GLSL/PixelShader_ps.glsl b/ogre2/src/media/Hlms/Terra/GLSL/PixelShader_ps.glsl index 186ec618f..7df903db0 100644 --- a/ogre2/src/media/Hlms/Terra/GLSL/PixelShader_ps.glsl +++ b/ogre2/src/media/Hlms/Terra/GLSL/PixelShader_ps.glsl @@ -28,7 +28,7 @@ in block @property( !hlms_render_depth_only ) @property( !hlms_shadowcaster ) @property( !hlms_prepass ) - layout(location = @counter(rtv_target), index = 0) out vec4 outColour; + layout(location = @counter(rtv_target), index = 0) out midf4 outColour; @end @property( hlms_gen_normals_gbuffer ) #define outPs_normals outNormals @@ -67,8 +67,8 @@ in block in vec4 gl_FragCoord; @end -vulkan_layout( ogre_t@value(terrainNormals) ) uniform texture2D terrainNormals; -vulkan_layout( ogre_t@value(terrainShadows) ) uniform texture2D terrainShadows; +vulkan_layout( ogre_t@value(terrainNormals) ) midf_tex uniform texture2D terrainNormals; +vulkan_layout( ogre_t@value(terrainShadows) ) midf_tex uniform texture2D terrainShadows; vulkan( layout( ogre_s@value(terrainNormals) ) uniform sampler samplerStateTerra ); @property( hlms_forwardplus ) @@ -76,21 +76,21 @@ vulkan( layout( ogre_s@value(terrainNormals) ) uniform sampler samplerStateTerra ReadOnlyBufferF( @value(f3dLightList), float4, f3dLightList ); @end @property( irradiance_volumes ) - vulkan_layout( ogre_t@value(irradianceVolume) ) uniform texture3D irradianceVolume; - vulkan( layout( ogre_s@value(irradianceVolume) )uniform sampler irradianceVolumeSampler ); + vulkan_layout( ogre_t@value(irradianceVolume) ) midf_tex uniform texture3D irradianceVolume; + vulkan( layout( ogre_s@value(irradianceVolume) )uniform sampler irradianceVolumeSampler ); @end @foreach( num_textures, n ) - vulkan_layout( ogre_t@value(textureMaps@n) ) uniform texture2DArray textureMaps@n;@end + vulkan_layout( ogre_t@value(textureMaps@n) ) midf_tex uniform texture2DArray textureMaps@n;@end @property( use_envprobe_map ) @property( !hlms_enable_cubemaps_auto ) - vulkan_layout( ogre_t@value(texEnvProbeMap) ) uniform textureCube texEnvProbeMap; + vulkan_layout( ogre_t@value(texEnvProbeMap) ) midf_tex uniform textureCube texEnvProbeMap; @else @property( !hlms_cubemaps_use_dpm ) - vulkan_layout( ogre_t@value(texEnvProbeMap) ) uniform textureCubeArray texEnvProbeMap; + vulkan_layout( ogre_t@value(texEnvProbeMap) ) midf_tex uniform textureCubeArray texEnvProbeMap; @else - vulkan_layout( ogre_t@value(texEnvProbeMap) ) uniform texture2DArray texEnvProbeMap; + vulkan_layout( ogre_t@value(texEnvProbeMap) ) midf_tex uniform texture2DArray texEnvProbeMap; @insertpiece( DeclDualParaboloidFunc ) @end @end @@ -108,28 +108,7 @@ vulkan( layout( ogre_s@value(terrainNormals) ) uniform sampler samplerStateTerra @insertpiece( DeclParallaxLocalCorrect ) @end -// The DeclDecalsSamplers insertpiece does not seem to do anything. -// The contents from DeclDecalsSamplers are manually pasted below -// This prevents a crash when there are decals in the scene -// @insertpiece( DeclDecalsSamplers ) -@property( hlms_forwardplus ) -@property( hlms_enable_decals ) - @piece( DeclDecalsSamplers ) - @property( syntax == glslvk ) - layout( ogre_s@value(decalsSampler) ) uniform sampler decalsSampler; - @end - @property( hlms_decals_diffuse ) vulkan_layout( ogre_t@value(decalsDiffuseTex) ) uniform texture2DArray decalsDiffuseTex;@end - @property( hlms_decals_normals )vulkan_layout( ogre_t@value(decalsNormalsTex) ) uniform texture2DArray decalsNormalsTex;@end - @property( hlms_decals_diffuse == hlms_decals_emissive ) - #define decalsEmissiveTex decalsDiffuseTex - @end - @property( hlms_decals_emissive && hlms_decals_diffuse != hlms_decals_emissive ) - vulkan_layout( ogre_t@value(decalsEmissiveTex) ) uniform texture2DArray decalsEmissiveTex; - @end - @end -@end -@end - +@insertpiece( DeclDecalsSamplers ) @insertpiece( DeclShadowMapMacros ) @insertpiece( DeclShadowSamplers ) diff --git a/ogre2/src/media/Hlms/Terra/GLSL/VertexShader_vs.glsl b/ogre2/src/media/Hlms/Terra/GLSL/VertexShader_vs.glsl index 6c8a1cee0..35d9bf77f 100644 --- a/ogre2/src/media/Hlms/Terra/GLSL/VertexShader_vs.glsl +++ b/ogre2/src/media/Hlms/Terra/GLSL/VertexShader_vs.glsl @@ -4,7 +4,7 @@ out gl_PerVertex { vec4 gl_Position; -@property( hlms_pso_clip_distances ) +@property( hlms_pso_clip_distances && !hlms_emulate_clip_distances ) float gl_ClipDistance[@value(hlms_pso_clip_distances)]; @end }; diff --git a/ogre2/src/media/Hlms/Terra/GLSLES/PbsTerraShadows/PbsTerraShadows_piece_vs_piece_ps.glsl b/ogre2/src/media/Hlms/Terra/GLSLES/PbsTerraShadows/PbsTerraShadows_piece_vs_piece_ps.glsl new file mode 100644 index 000000000..0a13d01d6 --- /dev/null +++ b/ogre2/src/media/Hlms/Terra/GLSLES/PbsTerraShadows/PbsTerraShadows_piece_vs_piece_ps.glsl @@ -0,0 +1,38 @@ +@property( !hlms_shadowcaster && terra_enabled ) + +@piece( custom_VStoPS ) + float terrainShadow; +@end + +/// Extra per-pass global data we need for applying our +/// shadows to regular objects, passed to all PBS shaders. +@piece( custom_passBuffer ) + vec4 terraOrigin; //Normalized. i.e. -terrainOrigin / terrainDimensions + //.xz = terrain 1.0 / XZ dimensions. + //.y = 1.0 / terrainHeight; + vec4 invTerraBounds; +@end + +/// Add the shadows' texture to the vertex shader +@piece( custom_vs_uniformDeclaration ) + uniform sampler2D terrainShadows; +@end + +/// Evaluate the shadow based on world XZ position & height in the vertex shader. +/// Doing it at the pixel shader level would be more accurate, but the difference +/// is barely noticeable, and slower +@piece( custom_vs_posExecution ) + vec3 terraShadowData = textureLod( terrainShadows, worldPos.xz * passBuf.invTerraBounds.xz + passBuf.terraOrigin.xz, 0 ).xyz; + float terraHeightWeight = worldPos.y * passBuf.invTerraBounds.y + passBuf.terraOrigin.y; + terraHeightWeight = (terraHeightWeight - terraShadowData.y) * terraShadowData.z * 1023.0; + outVs.terrainShadow = mix( terraShadowData.x, 1.0, clamp( terraHeightWeight, 0.0, 1.0 ) ); +@end + +@property( hlms_lights_directional ) + @piece( custom_ps_preLights )fShadow *= inPs.terrainShadow;@end +@end @property( !hlms_num_shadow_map_lights ) + @piece( custom_ps_preLights )float fShadow = inPs.terrainShadow;@end + @piece( DarkenWithShadowFirstLight )* fShadow@end +@end + +@end diff --git a/ogre2/src/media/Hlms/Terra/GLSLES/PixelShader_ps.glsl b/ogre2/src/media/Hlms/Terra/GLSLES/PixelShader_ps.glsl new file mode 100644 index 000000000..a30680158 --- /dev/null +++ b/ogre2/src/media/Hlms/Terra/GLSLES/PixelShader_ps.glsl @@ -0,0 +1,334 @@ +@property( false ) +@insertpiece( SetCrossPlatformSettings ) +@insertpiece( SetCompatibilityLayer ) + +layout(std140) uniform; +#define FRAG_COLOR 0 +layout(location = FRAG_COLOR) out vec4 outColour; + +uniform sampler2D terrainNormals; + +in block +{ +@insertpiece( Terra_VStoPS_block ) +} inPs; + +void main() +{ + outColour = vec4( inPs.uv0.xy, 0.0, 1.0 ); +} + +@end +@property( !false ) +@insertpiece( SetCrossPlatformSettings ) +@property( GL3+ < 430 ) + @property( hlms_tex_gather )#extension GL_ARB_texture_gather: require@end +@end + +@property( hlms_amd_trinary_minmax )#extension GL_AMD_shader_trinary_minmax: require@end +@insertpiece( SetCompatibilityLayer ) + +layout(std140) uniform; +#define FRAG_COLOR 0 +layout(location = FRAG_COLOR) out vec4 outColour; + +@property( hlms_vpos ) +in vec4 gl_FragCoord; +@end + +// START UNIFORM DECLARATION +@insertpiece( PassDecl ) +@insertpiece( TerraMaterialDecl ) +@insertpiece( TerraInstanceDecl ) +@insertpiece( custom_ps_uniformDeclaration ) +// END UNIFORM DECLARATION +in block +{ +@insertpiece( Terra_VStoPS_block ) +} inPs; + +uniform sampler2D terrainNormals; +uniform sampler2D terrainShadows; + +@property( hlms_forwardplus ) +/*layout(binding = 1) */uniform usamplerBuffer f3dGrid; +/*layout(binding = 2) */uniform samplerBuffer f3dLightList; +@end +@property( num_textures )uniform sampler2DArray textureMaps[@value( num_textures )];@end +@property( envprobe_map )uniform samplerCube texEnvProbeMap;@end + +vec4 diffuseCol; +@insertpiece( FresnelType ) F0; +float ROUGHNESS; + +vec3 nNormal; + +@property( hlms_lights_spot_textured )@insertpiece( DeclQuat_zAxis ) +vec3 qmul( vec4 q, vec3 v ) +{ + return v + 2.0 * cross( cross( v, q.xyz ) + q.w * v, q.xyz ); +} +@end + +@property( detail_maps_normal )vec3 getTSDetailNormal( sampler2DArray normalMap, vec3 uv ) +{ + vec3 tsNormal; +@property( signed_int_textures ) + //Normal texture must be in U8V8 or BC5 format! + tsNormal.xy = texture( normalMap, uv ).xy; +@end @property( !signed_int_textures ) + //Normal texture must be in LA format! + tsNormal.xy = texture( normalMap, uv ).xw * 2.0 - 1.0; +@end + tsNormal.z = sqrt( max( 0.0, 1.0 - tsNormal.x * tsNormal.x - tsNormal.y * tsNormal.y ) ); + + return tsNormal; +} + @foreach( 4, n ) + @property( normal_weight_detail@n ) + @piece( detail@n_nm_weight_mul ) * material.normalWeights.@insertpiece( detail_swizzle@n )@end + @end + @end +@end + +@insertpiece( DeclareBRDF ) + +@insertpiece( DeclShadowMapMacros ) +@insertpiece( DeclShadowSamplers ) +@insertpiece( DeclShadowSamplingFuncs ) + +void main() +{ + @insertpiece( custom_ps_preExecution ) + + @insertpiece( custom_ps_posMaterialLoad ) + +//Prepare weight map for the detail maps. +@property( detail_weight_map ) + vec4 detailWeights = texture( textureMaps[@value( detail_weight_map_idx )], + vec3( inPs.uv0.xy, @value(detail_weight_map_idx_slice) ) ); +@end @property( !detail_weight_map ) + vec4 detailWeights = vec4( 1.0, 1.0, 1.0, 1.0 ); +@end + +@property( diffuse_map ) + diffuseCol = texture( textureMaps[@value( diffuse_map_idx )], vec3( inPs.uv0.xy, @value(diffuse_map_idx_slice) ) ); +@end + + /// Sample detail maps +@foreach( 4, n ) + @property( detail_map@n ) + vec3 detailCol@n = texture( textureMaps[@value(detail_map@n_idx)], + vec3( inPs.uv0.xy * material.detailOffsetScale[@value(currOffsetDetail)].zw + + material.detailOffsetScale[@value(currOffsetDetail)].xy, + @value(detail_map@n_idx_slice) ) ).xyz; + @end @property( !detail_map@n ) + vec3 detailCol@n = vec3( 0, 0, 0 ); + @end + + @property( metalness_map@n ) + float metalness@n = texture( textureMaps[@value( metalness_map@n_idx )], + vec3( inPs.uv0.xy * material.detailOffsetScale[@value(currOffsetDetail)].zw + + material.detailOffsetScale[@value(currOffsetDetail)].xy, + @value( metalness_map@n_idx_slice ) ) ).x; + @end @property( !metalness_map@n ) + float metalness@n = 0; + @end + + @property( roughness_map@n ) + float roughness@n = texture( textureMaps[@value( roughness_map@n_idx )], + vec3( inPs.uv0.xy * material.detailOffsetScale[@value(currOffsetDetail)].zw + + material.detailOffsetScale[@value(currOffsetDetail)].xy, + @value( roughness_map@n_idx_slice ) ) ).x; + @end @property( !roughness_map@n ) + float roughness@n = 0; + @end + + @add( currOffsetDetail, 1 ) +@end + + float metalness = (metalness0 * detailWeights.x * material.metalness.x + + metalness1 * detailWeights.y * material.metalness.y) + + (metalness2 * detailWeights.z * material.metalness.z + + metalness3 * detailWeights.w * material.metalness.w); + + ROUGHNESS = (roughness0 * detailWeights.x * material.roughness.x + + roughness1 * detailWeights.y * material.roughness.y) + + (roughness2 * detailWeights.z * material.roughness.z + + roughness3 * detailWeights.w * material.roughness.w); + ROUGHNESS = max( ROUGHNESS, 0.02 ); + +@property( diffuse_map ) + diffuseCol.xyz *= (detailCol0 * detailWeights.x + detailCol1 * detailWeights.y) + + (detailCol2 * detailWeights.z + detailCol3 * detailWeights.w); +@end @property( !diffuse_map ) + @property( detail_maps_diffuse ) + diffuseCol.xyz = (detailCol0 * detailWeights.x + detailCol1 * detailWeights.y) + + (detailCol2 * detailWeights.z + detailCol3 * detailWeights.w); + @end @property( !detail_maps_diffuse ) + diffuseCol.xyzw = vec4( 1, 1, 1, 1 ); + @end +@end + + /// Apply the material's diffuse over the textures + diffuseCol.xyz *= material.kD.xyz; + + //Calculate F0 from metalness, and dim kD as metalness gets bigger. + F0 = mix( vec3( 0.03f ), @insertpiece( kD ).xyz * 3.14159f, metalness ); + @insertpiece( kD ).xyz = @insertpiece( kD ).xyz - @insertpiece( kD ).xyz * metalness; + +@property( !detail_maps_normal ) + // Geometric normal + nNormal = texture( terrainNormals, inPs.uv0.xy ).xyz * 2.0 - 1.0; + //nNormal.xz = texture( terrainNormals, inPs.uv0.xy ).xy; + //nNormal.y = sqrt( max( 1.0 - nNormal.x * nNormal.x - nNormal.z * nNormal.z, 0.0 ) ); + nNormal = nNormal * mat3(passBuf.view); +@end @property( detail_maps_normal ) + vec3 geomNormal = texture( terrainNormals, inPs.uv0.xy ).xyz * 2.0 - 1.0; + geomNormal = geomNormal * mat3(passBuf.view); + + //Get the TBN matrix + vec3 viewSpaceUnitX = vec3( passBuf.view[0].x, passBuf.view[1].x, passBuf.view[2].x ); + vec3 vTangent = normalize( cross( geomNormal, viewSpaceUnitX ) ); + vec3 vBinormal = cross( vTangent, geomNormal ); + mat3 TBN = mat3( vBinormal, vTangent, geomNormal ); +@end + + float fTerrainShadow = texture( terrainShadows, inPs.uv0.xy ).x; + @property( !(hlms_pssm_splits || (!hlms_pssm_splits && hlms_num_shadow_map_lights && hlms_lights_directional)) ) + float fShadow = 1.0f; + @end + @insertpiece( DoDirectionalShadowMaps ) + fShadow *= fTerrainShadow; + + /// The first iteration must initialize nNormal instead of try to merge with it. + /// Blend the detail normal maps with the main normal. +@foreach( second_valid_detail_map_nm, n, first_valid_detail_map_nm ) + vec3 vDetail = @insertpiece( SampleDetailMapNm@n ) * detailWeights.@insertpiece(detail_swizzle@n); + nNormal.xy = vDetail.xy; + nNormal.z = vDetail.z + 1.0 - detailWeights.@insertpiece(detail_swizzle@n);@end +@foreach( detail_maps_normal, n, second_valid_detail_map_nm )@property( detail_map_nm@n ) + vDetail = @insertpiece( SampleDetailMapNm@n ) * detailWeights.@insertpiece(detail_swizzle@n); + nNormal.xy += vDetail.xy; + nNormal.z *= vDetail.z + 1.0 - detailWeights.@insertpiece(detail_swizzle@n);@end @end + +@property( detail_maps_normal ) + nNormal = normalize( TBN * nNormal ); +@end + + //Everything's in Camera space +@property( hlms_lights_spot || ambient_hemisphere || envprobe_map || hlms_forwardplus ) + vec3 viewDir = normalize( -inPs.pos ); + float NdotV = clamp( dot( nNormal, viewDir ), 0.0, 1.0 );@end + +@property( !ambient_fixed ) + vec3 finalColour = vec3(0); +@end @property( ambient_fixed ) + vec3 finalColour = passBuf.ambientUpperHemi.xyz * @insertpiece( kD ).xyz; +@end + + @insertpiece( custom_ps_preLights ) + +@property( !custom_disable_directional_lights ) +@property( hlms_lights_directional ) + finalColour += BRDF( passBuf.lights[0].position.xyz, viewDir, NdotV, passBuf.lights[0].diffuse, passBuf.lights[0].specular ) @insertpiece(DarkenWithShadowFirstLight); +@end +@foreach( hlms_lights_directional, n, 1 ) + finalColour += BRDF( passBuf.lights[@n].position.xyz, viewDir, NdotV, passBuf.lights[@n].diffuse, passBuf.lights[@n].specular )@insertpiece( DarkenWithShadow );@end +@foreach( hlms_lights_directional_non_caster, n, hlms_lights_directional ) + finalColour += BRDF( passBuf.lights[@n].position.xyz, viewDir, NdotV, passBuf.lights[@n].diffuse, passBuf.lights[@n].specular );@end +@end + +@property( hlms_lights_point || hlms_lights_spot ) vec3 lightDir; + float fDistance; + vec3 tmpColour; + float spotCosAngle;@end + + //Point lights +@foreach( hlms_lights_point, n, hlms_lights_directional_non_caster ) + lightDir = passBuf.lights[@n].position.xyz - inPs.pos; + fDistance= length( lightDir ); + if( fDistance <= passBuf.lights[@n].attenuation.x ) + { + lightDir *= 1.0 / fDistance; + tmpColour = BRDF( lightDir, viewDir, NdotV, passBuf.lights[@n].diffuse, passBuf.lights[@n].specular )@insertpiece( DarkenWithShadowPoint ); + float atten = 1.0 / (0.5 + (passBuf.lights[@n].attenuation.y + passBuf.lights[@n].attenuation.z * fDistance) * fDistance ); + finalColour += tmpColour * atten; + }@end + + //Spot lights + //spotParams[@value(spot_params)].x = 1.0 / cos( InnerAngle ) - cos( OuterAngle ) + //spotParams[@value(spot_params)].y = cos( OuterAngle / 2 ) + //spotParams[@value(spot_params)].z = falloff +@foreach( hlms_lights_spot, n, hlms_lights_point ) + lightDir = passBuf.lights[@n].position.xyz - inPs.pos; + fDistance= length( lightDir ); +@property( !hlms_lights_spot_textured ) spotCosAngle = dot( normalize( inPs.pos - passBuf.lights[@n].position.xyz ), passBuf.lights[@n].spotDirection );@end +@property( hlms_lights_spot_textured ) spotCosAngle = dot( normalize( inPs.pos - passBuf.lights[@n].position.xyz ), zAxis( passBuf.lights[@n].spotQuaternion ) );@end + if( fDistance <= passBuf.lights[@n].attenuation.x && spotCosAngle >= passBuf.lights[@n].spotParams.y ) + { + lightDir *= 1.0 / fDistance; + @property( hlms_lights_spot_textured ) + vec3 posInLightSpace = qmul( spotQuaternion[@value(spot_params)], inPs.pos ); + float spotAtten = texture( texSpotLight, normalize( posInLightSpace ).xy ).x; + @end + @property( !hlms_lights_spot_textured ) + float spotAtten = clamp( (spotCosAngle - passBuf.lights[@n].spotParams.y) * passBuf.lights[@n].spotParams.x, 0.0, 1.0 ); + spotAtten = pow( spotAtten, passBuf.lights[@n].spotParams.z ); + @end + tmpColour = BRDF( lightDir, viewDir, NdotV, passBuf.lights[@n].diffuse, passBuf.lights[@n].specular )@insertpiece( DarkenWithShadow ); + float atten = 1.0 / (0.5 + (passBuf.lights[@n].attenuation.y + passBuf.lights[@n].attenuation.z * fDistance) * fDistance ); + finalColour += tmpColour * (atten * spotAtten); + }@end + +@insertpiece( forward3dLighting ) + +@property( envprobe_map || ambient_hemisphere ) + vec3 reflDir = 2.0 * dot( viewDir, nNormal ) * nNormal - viewDir; + + @property( envprobe_map ) + vec3 envColourS = textureLod( texEnvProbeMap, reflDir * passBuf.invViewMatCubemap, ROUGHNESS * 12.0 ).xyz @insertpiece( ApplyEnvMapScale );// * 0.0152587890625; + vec3 envColourD = textureLod( texEnvProbeMap, nNormal * passBuf.invViewMatCubemap, 11.0 ).xyz @insertpiece( ApplyEnvMapScale );// * 0.0152587890625; + @property( !hw_gamma_read ) //Gamma to linear space + envColourS = envColourS * envColourS; + envColourD = envColourD * envColourD; + @end + @end + @property( ambient_hemisphere ) + float ambientWD = dot( passBuf.ambientHemisphereDir.xyz, nNormal ) * 0.5 + 0.5; + float ambientWS = dot( passBuf.ambientHemisphereDir.xyz, reflDir ) * 0.5 + 0.5; + + @property( envprobe_map ) + envColourS += mix( passBuf.ambientLowerHemi.xyz, passBuf.ambientUpperHemi.xyz, ambientWD ); + envColourD += mix( passBuf.ambientLowerHemi.xyz, passBuf.ambientUpperHemi.xyz, ambientWS ); + @end @property( !envprobe_map ) + vec3 envColourS = mix( passBuf.ambientLowerHemi.xyz, passBuf.ambientUpperHemi.xyz, ambientWD ); + vec3 envColourD = mix( passBuf.ambientLowerHemi.xyz, passBuf.ambientUpperHemi.xyz, ambientWS ); + @end + @end + + @insertpiece( BRDF_EnvMap ) +@end +@property( !hw_gamma_write ) + //Linear to Gamma space + outColour.xyz = sqrt( finalColour ); +@end @property( hw_gamma_write ) + outColour.xyz = finalColour; +@end + +@property( hlms_alphablend ) + @property( use_texture_alpha ) + outColour.w = material.F0.w * diffuseCol.w; + @end @property( !use_texture_alpha ) + outColour.w = material.F0.w; + @end +@end @property( !hlms_alphablend ) + outColour.w = 1.0;@end + + @property( debug_pssm_splits ) + outColour.xyz = mix( outColour.xyz, debugPssmSplit.xyz, 0.2f ); + @end + + @insertpiece( custom_ps_posExecution ) +} +@end diff --git a/ogre2/src/media/Hlms/Terra/GLSLES/Structs_piece_vs_piece_ps.glsl b/ogre2/src/media/Hlms/Terra/GLSLES/Structs_piece_vs_piece_ps.glsl new file mode 100644 index 000000000..21f7c4fea --- /dev/null +++ b/ogre2/src/media/Hlms/Terra/GLSLES/Structs_piece_vs_piece_ps.glsl @@ -0,0 +1,46 @@ + +@piece( TerraMaterialDecl ) +layout_constbuffer(binding = 1) uniform MaterialBuf +{ + /* kD is already divided by PI to make it energy conserving. + (formula is finalDiffuse = NdotL * surfaceDiffuse / PI) + */ + vec4 kD; //kD.w is padding + vec4 roughness; + vec4 metalness; + vec4 detailOffsetScale[4]; + + @insertpiece( custom_materialBuffer ) +} material; +@end + +@piece( TerraInstanceDecl ) +struct CellData +{ + //.x = numVertsPerLine + //.y = lodLevel + //.z = vao->getPrimitiveCount() / m_verticesPerLine - 2u + //.w = skirtY (float) + uvec4 numVertsPerLine; + ivec4 xzTexPosBounds; //XZXZ + vec4 pos; //.w contains 1.0 / xzTexPosBounds.z + vec4 scale; //.w contains 1.0 / xzTexPosBounds.w +}; + +layout_constbuffer(binding = 2) uniform InstanceBuffer +{ + CellData cellData[256]; +} instance; +@end + +@piece( Terra_VStoPS_block ) + //flat uint drawId; + vec3 pos; + vec2 uv0; + + @foreach( hlms_num_shadow_map_lights, n ) + @property( !hlms_shadowmap@n_is_point_light ) + vec4 posL@n;@end @end + @property( hlms_pssm_splits )float depth;@end + @insertpiece( custom_VStoPS ) +@end diff --git a/ogre2/src/media/Hlms/Terra/GLSLES/Textures_piece_ps.glsl b/ogre2/src/media/Hlms/Terra/GLSLES/Textures_piece_ps.glsl new file mode 100644 index 000000000..5ae13440d --- /dev/null +++ b/ogre2/src/media/Hlms/Terra/GLSLES/Textures_piece_ps.glsl @@ -0,0 +1,12 @@ + +@undefpiece( kD ) +@piece( kD )diffuseCol@end + +@undefpiece( kS ) +@piece( kS )vec3( 1, 1, 1 )@end + +@foreach( detail_maps_normal, n ) + @undefpiece( SampleDetailMapNm@n ) + @piece( SampleDetailMapNm@n )getTSDetailNormal( textureMaps[@value(detail_map_nm@n_idx)], vec3( inPs.uv0.xy * material.detailOffsetScale[@n].zw + + material.detailOffsetScale[@n].xy , @value(detail_map_nm@n_idx_slice) ) )@end +@end diff --git a/ogre2/src/media/Hlms/Terra/GLSLES/VertexShader_vs.glsl b/ogre2/src/media/Hlms/Terra/GLSLES/VertexShader_vs.glsl new file mode 100644 index 000000000..2377a38b5 --- /dev/null +++ b/ogre2/src/media/Hlms/Terra/GLSLES/VertexShader_vs.glsl @@ -0,0 +1,126 @@ +@insertpiece( SetCrossPlatformSettings ) +@insertpiece( SetCompatibilityLayer ) + +@property( GL3+ ) +out gl_PerVertex +{ + vec4 gl_Position; +}; +@end + +layout(std140) uniform; + +//To render a 2x2 (quads) terrain: +//You'll normally need 6 vertices per line + 2 for degenerates. +//You'll need 8 vertices per line. +//So you'll need a total of 16 vertices. + +//To render a 4x2 (quads) terrain: +//You'll need 10 vertices per line. +//If we include degenerate vertices, you'll need 12 per line +//So you'll need a total of 24 vertices. +//in int gl_VertexID; + +@property( GL_ARB_base_instance ) + in uint drawId; +@end + +@insertpiece( custom_vs_attributes ) + +out block +{ +@insertpiece( Terra_VStoPS_block ) +} outVs; + +// START UNIFORM DECLARATION +@insertpiece( PassDecl ) +@insertpiece( TerraInstanceDecl ) +uniform sampler2D heightMap; +@insertpiece( custom_vs_uniformDeclaration ) +@property( !GL_ARB_base_instance )uniform uint baseInstance;@end +// END UNIFORM DECLARATION + +@piece( VertexTransform ) + //Lighting is in view space + outVs.pos = ( vec4(worldPos.xyz, 1.0f) * passBuf.view ).xyz; +@property( !hlms_dual_paraboloid_mapping ) + gl_Position = vec4(worldPos.xyz, 1.0f) * passBuf.viewProj;@end +@property( hlms_dual_paraboloid_mapping ) + //Dual Paraboloid Mapping + gl_Position.w = 1.0f; + gl_Position.xyz = outVs.pos; + float L = length( gl_Position.xyz ); + gl_Position.z += 1.0f; + gl_Position.xy /= gl_Position.z; + gl_Position.z = (L - NearPlane) / (FarPlane - NearPlane);@end +@end + +void main() +{ +@property( !GL_ARB_base_instance ) + uint drawId = baseInstance + uint( gl_InstanceID ); +@end + + @insertpiece( custom_vs_preExecution ) + + CellData cellData = instance.cellData[drawId]; + + //Map pointInLine from range [0; 12) to range [0; 9] so that it reads: + // 0 0 1 2 3 4 5 6 7 8 9 9 + uint pointInLine = uint(gl_VertexID) % (cellData.numVertsPerLine.x); //cellData.numVertsPerLine.x = 12 + pointInLine = uint(clamp( int(pointInLine) - 1, 0, int(cellData.numVertsPerLine.x - 3u) )); + + uvec2 uVertexPos; + + uVertexPos.x = pointInLine >> 1u; + //Even numbers are the next line, odd numbers are current line. + uVertexPos.y = (pointInLine & 0x01u) == 0u ? 1u : 0u; + uVertexPos.y += uint(gl_VertexID) / cellData.numVertsPerLine.x; + //uVertexPos.y += floor( (float)gl_VertexID / (float)cellData.numVertsPerLine ); Could be faster on GCN. + +@property( use_skirts ) + //Apply skirt. + bool isSkirt =( pointInLine.x <= 1u || + pointInLine.x >= (cellData.numVertsPerLine.x - 4u) || + uVertexPos.y == 0u || + uVertexPos.y == (cellData.numVertsPerLine.z + 2u) ); + + //Now shift X position for the left & right skirts + uVertexPos.x = uint( max( int(uVertexPos.x) - 1, 0 ) ); + uVertexPos.x = min( uVertexPos.x, ((cellData.numVertsPerLine.x - 7u) >> 1u) ); + // uVertexPos.x becomes: + // 0 0 0 1 1 2 2 3 3 4 4 4 + // 0 0 0 0 0 1 1 2 2 3 3 3 + // 0 0 0 0 0 1 1 2 2 2 2 2 + + //Now shift Y position for the front & back skirts + uVertexPos.y = uint( max( int(uVertexPos.y) - 1, 0 ) ); + uVertexPos.y = min( uVertexPos.y, cellData.numVertsPerLine.z ); +@end + + uint lodLevel = cellData.numVertsPerLine.y; + uVertexPos = uVertexPos << lodLevel; + + uVertexPos.xy = uvec2( clamp( ivec2(uVertexPos.xy) + cellData.xzTexPosBounds.xy, + ivec2( 0, 0 ), cellData.xzTexPosBounds.zw ) ); + + vec3 worldPos; + worldPos.y = texelFetch( heightMap, ivec2( uVertexPos.xy ), 0 ).x; +@property( use_skirts ) + worldPos.y = isSkirt ? uintBitsToFloat(cellData.numVertsPerLine.w) : worldPos.y; +@end + worldPos.xz = uVertexPos.xy; + worldPos.xyz = worldPos.xyz * cellData.scale.xyz + cellData.pos.xyz; + + @insertpiece( VertexTransform ) + + outVs.uv0.xy = vec2( uVertexPos.xy ) * vec2( cellData.pos.w, cellData.scale.w ); + + @insertpiece( DoShadowReceiveVS ) + +@property( hlms_pssm_splits ) outVs.depth = gl_Position.z;@end + + //outVs.drawId = drawId; + + @insertpiece( custom_vs_posExecution ) +} diff --git a/ogre2/src/media/Hlms/Terra/HLSL/PbsTerraShadows/PbsTerraShadows_piece_vs_piece_ps.hlsl b/ogre2/src/media/Hlms/Terra/HLSL/PbsTerraShadows/PbsTerraShadows_piece_vs_piece_ps.hlsl index daeadd43a..305e91568 100644 --- a/ogre2/src/media/Hlms/Terra/HLSL/PbsTerraShadows/PbsTerraShadows_piece_vs_piece_ps.hlsl +++ b/ogre2/src/media/Hlms/Terra/HLSL/PbsTerraShadows/PbsTerraShadows_piece_vs_piece_ps.hlsl @@ -1,12 +1,12 @@ @property( !hlms_shadowcaster && terra_enabled ) -@piece( custom_VStoPS_terra ) - float terrainShadow : TEXCOORD@counter(texcoord); +@piece( custom_VStoPS ) + midf terrainShadow : TEXCOORD@counter(texcoord); @end /// Extra per-pass global data we need for applying our /// shadows to regular objects, passed to all PBS shaders. -@piece( custom_passBuffer_terra ) +@piece( custom_passBuffer ) float4 terraOrigin; //Normalized. i.e. -terrainOrigin / terrainDimensions //.xz = terrain 1.0 / XZ dimensions. //.y = 1.0 / terrainHeight; @@ -14,7 +14,7 @@ @end /// Add the shadows' texture to the vertex shader -@piece( custom_vs_uniformDeclaration_terra ) +@piece( custom_vs_uniformDeclaration ) SamplerState terrainShadowSampler : register(s@value(terrainShadows)); Texture2D terrainShadows : register(t@value(terrainShadows)); @end @@ -22,7 +22,7 @@ /// Evaluate the shadow based on world XZ position & height in the vertex shader. /// Doing it at the pixel shader level would be more accurate, but the difference /// is barely noticeable, and slower -@piece( custom_vs_posExecution_terra ) +@piece( custom_vs_posExecution ) @property( z_up ) float3 terraWorldPos = float3( worldPos.x, -worldPos.z, worldPos.y ); @else @@ -33,7 +33,7 @@ 0 ).xyz; float terraHeightWeight = terraWorldPos.y * passBuf.invTerraBounds.y + passBuf.terraOrigin.y; terraHeightWeight = (terraHeightWeight - terraShadowData.y) * terraShadowData.z * 1023.0; - outVs.terrainShadow = lerp( terraShadowData.x, 1.0, saturate( terraHeightWeight ) ); + outVs.terrainShadow = lerp( midf_c( terraShadowData.x ), _h( 1.0 ), midf_c( saturate( terraHeightWeight ) ) ); @end @property( hlms_lights_directional && hlms_num_shadow_map_lights ) diff --git a/ogre2/src/media/Hlms/Terra/Metal/PbsTerraShadows/PbsTerraShadows_piece_vs_piece_ps.metal b/ogre2/src/media/Hlms/Terra/Metal/PbsTerraShadows/PbsTerraShadows_piece_vs_piece_ps.metal index 9633edece..e8e6548ae 100644 --- a/ogre2/src/media/Hlms/Terra/Metal/PbsTerraShadows/PbsTerraShadows_piece_vs_piece_ps.metal +++ b/ogre2/src/media/Hlms/Terra/Metal/PbsTerraShadows/PbsTerraShadows_piece_vs_piece_ps.metal @@ -1,12 +1,12 @@ @property( !hlms_shadowcaster && terra_enabled ) -@piece( custom_VStoPS_terra ) - float terrainShadow; +@piece( custom_VStoPS ) + midf terrainShadow; @end /// Extra per-pass global data we need for applying our /// shadows to regular objects, passed to all PBS shaders. -@piece( custom_passBuffer_terra ) +@piece( custom_passBuffer ) float4 terraOrigin; //Normalized. i.e. -terrainOrigin / terrainDimensions //.xz = terrain 1.0 / XZ dimensions. //.y = 1.0 / terrainHeight; @@ -14,7 +14,7 @@ @end /// Add the shadows' texture to the vertex shader -@piece( custom_vs_uniformDeclaration_terra ) +@piece( custom_vs_uniformDeclaration ) , sampler terrainShadowSampler [[sampler(@value(terrainShadows))]] , texture2d terrainShadows [[texture(@value(terrainShadows))]] @end @@ -22,7 +22,7 @@ /// Evaluate the shadow based on world XZ position & height in the vertex shader. /// Doing it at the pixel shader level would be more accurate, but the difference /// is barely noticeable, and slower -@piece( custom_vs_posExecution_terra ) +@piece( custom_vs_posExecution ) @property( z_up ) float3 terraWorldPos = float3( worldPos.x, -worldPos.z, worldPos.y ); @else @@ -33,7 +33,7 @@ level(0) ).xyz; float terraHeightWeight = terraWorldPos.y * passBuf.invTerraBounds.y + passBuf.terraOrigin.y; terraHeightWeight = (terraHeightWeight - terraShadowData.y) * terraShadowData.z * 1023.0; - outVs.terrainShadow = lerp( terraShadowData.x, 1.0, saturate( terraHeightWeight ) ); + outVs.terrainShadow = lerp( midf_c( terraShadowData.x ), _h( 1.0 ), midf_c( saturate( terraHeightWeight ) ) ); @end @property( hlms_lights_directional && hlms_num_shadow_map_lights ) diff --git a/ogre2/src/media/Hlms/Terra/Metal/PixelShader_ps.metal b/ogre2/src/media/Hlms/Terra/Metal/PixelShader_ps.metal index b70be5100..03c0522f7 100644 --- a/ogre2/src/media/Hlms/Terra/Metal/PixelShader_ps.metal +++ b/ogre2/src/media/Hlms/Terra/Metal/PixelShader_ps.metal @@ -72,11 +72,12 @@ fragment @insertpiece( output_type ) main_metal @insertpiece( TerraMaterialDecl ) @insertpiece( PccManualProbeDecl ) @end + @insertpiece( AtmosphereNprSkyDecl ) @insertpiece( custom_ps_uniformDeclaration ) // END UNIFORM DECLARATION - , texture2d terrainNormals [[texture(@value(terrainNormals))]] - , texture2d terrainShadows [[texture(@value(terrainShadows))]] + , texture2d terrainNormals [[texture(@value(terrainNormals))]] + , texture2d terrainShadows [[texture(@value(terrainShadows))]] , sampler samplerStateTerra [[sampler(@value(terrainNormals))]] @property( hlms_forwardplus ) @@ -86,11 +87,11 @@ fragment @insertpiece( output_type ) main_metal @property( hlms_use_prepass ) @property( !hlms_use_prepass_msaa ) - , texture2d gBuf_normals [[texture(@value(gBuf_normals))]] - , texture2d gBuf_shadowRoughness [[texture(@value(gBuf_shadowRoughness))]] + , texture2d gBuf_normals [[texture(@value(gBuf_normals))]] + , texture2d gBuf_shadowRoughness [[texture(@value(gBuf_shadowRoughness))]] @end @property( hlms_use_prepass_msaa ) - , texture2d_ms gBuf_normals [[texture(@value(gBuf_normals))]] - , texture2d_ms gBuf_shadowRoughness[[texture(@value(gBuf_shadowRoughness))]] + , texture2d_ms gBuf_normals [[texture(@value(gBuf_normals))]] + , texture2d_ms gBuf_shadowRoughness[[texture(@value(gBuf_shadowRoughness))]] @end @property( hlms_use_ssr ) @@ -103,20 +104,20 @@ fragment @insertpiece( output_type ) main_metal @property( irradiance_volumes ) - , texture3d irradianceVolume [[texture(@value(irradianceVolume))]] + , texture3d irradianceVolume [[texture(@value(irradianceVolume))]] , sampler irradianceVolumeSampler [[sampler(@value(irradianceVolume))]] @end @foreach( num_textures, n ) - , texture2d_array textureMaps@n [[texture(@value(textureMaps@n))]]@end + , texture2d_array textureMaps@n [[texture(@value(textureMaps@n))]]@end @property( use_envprobe_map ) @property( !hlms_enable_cubemaps_auto ) - , texturecube texEnvProbeMap [[texture(@value(texEnvProbeMap))]] + , texturecube texEnvProbeMap [[texture(@value(texEnvProbeMap))]] @else @property( !hlms_cubemaps_use_dpm ) - , texturecube_array texEnvProbeMap [[texture(@value(texEnvProbeMap))]] + , texturecube_array texEnvProbeMap [[texture(@value(texEnvProbeMap))]] @else - , texture2d_array texEnvProbeMap [[texture(@value(texEnvProbeMap))]] + , texture2d_array texEnvProbeMap [[texture(@value(texEnvProbeMap))]] @end @end @property( envMapRegSampler < samplerStateStart ) diff --git a/ogre2/src/media/Hlms/Terra/Metal/VertexShader_vs.metal b/ogre2/src/media/Hlms/Terra/Metal/VertexShader_vs.metal index 302272d11..b6eac9773 100644 --- a/ogre2/src/media/Hlms/Terra/Metal/VertexShader_vs.metal +++ b/ogre2/src/media/Hlms/Terra/Metal/VertexShader_vs.metal @@ -37,6 +37,7 @@ vertex PS_INPUT main_metal @property( hlms_shadowcaster ) @insertpiece( MaterialDecl ) @end + @insertpiece( AtmosphereNprSkyDecl ) @property( !terra_use_uint ) , texture2d heightMap [[texture(@value(heightMap))]] @else diff --git a/ogre2/src/media/Hlms/Unlit/Any/500.StructsUnlit_piece_all.any b/ogre2/src/media/Hlms/Unlit/Any/500.StructsUnlit_piece_all.any index 533f6a467..05bca8a58 100644 --- a/ogre2/src/media/Hlms/Unlit/Any/500.StructsUnlit_piece_all.any +++ b/ogre2/src/media/Hlms/Unlit/Any/500.StructsUnlit_piece_all.any @@ -99,7 +99,7 @@ @end @end @property( hlms_colour ) - INTERPOLANT( float4 colour, @counter(texcoord) ); + INTERPOLANT( midf4 colour, @counter(texcoord) ); @end @foreach( out_uv_half_count, n ) INTERPOLANT( float@value( out_uv_half_count@n ) uv@n, @counter(texcoord) );@end @@ -114,6 +114,11 @@ @end @end @end + @property( hlms_emulate_clip_distances && hlms_pso_clip_distances ) + @foreach( hlms_pso_clip_distances, n ) + INTERPOLANT( float clipDistance@n, @counter(texcoord) ); + @end + @end @insertpiece( custom_VStoPS ) @end diff --git a/ogre2/src/media/Hlms/Unlit/Any/700.BlendModes_piece_ps.any b/ogre2/src/media/Hlms/Unlit/Any/700.BlendModes_piece_ps.any index fe29e89c9..7ae5b16ae 100644 --- a/ogre2/src/media/Hlms/Unlit/Any/700.BlendModes_piece_ps.any +++ b/ogre2/src/media/Hlms/Unlit/Any/700.BlendModes_piece_ps.any @@ -4,26 +4,26 @@ @piece( NormalNonPremul ) //Normal Non Premultiplied @counter(t) diffuseCol.xyz = lerp( diffuseCol.xyz, topImage@value(t).xyz, topImage@value(t).a ); - diffuseCol.w = lerp( diffuseCol.w, 1.0, topImage@value(t).w ); + diffuseCol.w = lerp( diffuseCol.w, _h( 1.0 ), topImage@value(t).w ); @end @piece( NormalPremul ) //Normal Premultiplied @counter(t) - diffuseCol.xyz = (1.0 - topImage@value(t).a) * diffuseCol.xyz + topImage@value(t).xyz; - diffuseCol.w = lerp( diffuseCol.w, 1.0, topImage@value(t).w ); + diffuseCol.xyz = (_h( 1.0 ) - topImage@value(t).a) * diffuseCol.xyz + topImage@value(t).xyz; + diffuseCol.w = lerp( diffuseCol.w, _h( 1.0 ), topImage@value(t).w ); @end @piece( Add ) //Add @counter(t) diffuseCol.xyz = lerp( diffuseCol.xyz, - min( diffuseCol.xyz + topImage@value(t).xyz, float3(1.0,1.0,1.0) ), + min( diffuseCol.xyz + topImage@value(t).xyz, midf3_c(1.0,1.0,1.0) ), topImage@value(t).a ); @end @piece( Subtract ) //Subtract @counter(t) diffuseCol.xyz = lerp( diffuseCol.xyz, - max( diffuseCol.xyz - topImage@value(t).xyz, float3(0.0,0.0,0.0) ), + max( diffuseCol.xyz - topImage@value(t).xyz, midf3_c(0.0,0.0,0.0) ), topImage@value(t).a ); @end @@ -37,21 +37,21 @@ @piece( Multiply2x ) //Multiply2x @counter(t) diffuseCol.xyz = lerp( diffuseCol.xyz, - min( diffuseCol.xyz * topImage@value(t).xyz * 2.0, float3(1.0,1.0,1.0) ), + min( diffuseCol.xyz * topImage@value(t).xyz * _h( 2.0 ), midf3_c(1.0,1.0,1.0) ), topImage@value(t).a ); @end @piece( Screen ) //Screen @counter(t) diffuseCol.xyz = lerp( diffuseCol.xyz, - 1.0 - (1.0 - diffuseCol.xyz) * (1.0 - topImage@value(t).xyz), + _h( 1.0 ) - (_h( 1.0 ) - diffuseCol.xyz) * (_h( 1.0 ) - topImage@value(t).xyz), topImage@value(t).a ); @end @piece( Overlay ) //Overlay @counter(t) diffuseCol.xyz = lerp( diffuseCol.xyz, - diffuseCol.xyz * ( diffuseCol.xyz + 2.0 * topImage@value(t).xyz * (1.0 - diffuseCol.xyz) ), + diffuseCol.xyz * ( diffuseCol.xyz + _h( 2.0 ) * topImage@value(t).xyz * (_h( 1.0 ) - diffuseCol.xyz) ), topImage@value(t).a ); @end @@ -72,14 +72,14 @@ @piece( GrainExtract ) //GrainExtract @counter(t) diffuseCol.xyz = lerp( diffuseCol.xyz, - (diffuseCol.xyz - topImage@value(t).xyz) + 0.5f, + (diffuseCol.xyz - topImage@value(t).xyz) + _h( 0.5f ), topImage@value(t).a ); @end @piece( GrainMerge ) //GrainMerge @counter(t) diffuseCol.xyz = lerp( diffuseCol.xyz, - (diffuseCol.xyz + topImage@value(t).xyz) - 0.5f, + (diffuseCol.xyz + topImage@value(t).xyz) - _h( 0.5f ), topImage@value(t).a ); @end diff --git a/ogre2/src/media/Hlms/Unlit/Any/800.PixelShader_piece_ps.any b/ogre2/src/media/Hlms/Unlit/Any/800.PixelShader_piece_ps.any index 7e18e1bd3..5b061d9f7 100644 --- a/ogre2/src/media/Hlms/Unlit/Any/800.PixelShader_piece_ps.any +++ b/ogre2/src/media/Hlms/Unlit/Any/800.PixelShader_piece_ps.any @@ -5,11 +5,11 @@ @foreach( diffuse_map, n ) #define DiffuseSampler@n samplerState@value(diffuse_map@n_sampler) @property( diffuse_map@n_array ) - #define SampleDiffuse@n( tex, sampler, uv, d ) OGRE_SampleArray2D( tex, sampler, uv, d ) + #define SampleDiffuse@n( tex, sampler, uv, d ) OGRE_SampleArray2DF16( tex, sampler, uv, d ) #define DiffuseTexture@n textureMapsArray@value(diffuse_map@n_idx) @piece( diffuseIdx@n ), diffuseIdx@n@end @else - #define SampleDiffuse@n( tex, sampler, uv ) OGRE_Sample( tex, sampler, uv ) + #define SampleDiffuse@n( tex, sampler, uv ) OGRE_SampleF16( tex, sampler, uv ) #define DiffuseTexture@n textureMaps@value(diffuse_map@n_idx) @end @@ -54,7 +54,14 @@ @end @piece( DefaultBodyPS ) - float4 diffuseCol = float4( 1.0f, 1.0f, 1.0f, 1.0f ); + @property( hlms_emulate_clip_distances && hlms_global_clip_planes && hlms_pso_clip_distances && syntax == glslvk) + @foreach( hlms_pso_clip_distances, n ) + if( inPs.clipDistance@n < 0.0 ) + discard; + @end + @end + + midf4 diffuseCol = midf4_c( 1.0f, 1.0f, 1.0f, 1.0f ); @property( diffuse_map || alpha_test || diffuse ) @insertpiece( LoadMaterial ) @@ -86,8 +93,8 @@ // Load each additional layer and blend it @foreach( diffuse_map, n, 1 ) @property( diffuse_map@n ) - float4 topImage@n = SampleDiffuse@n( DiffuseTexture@n, DiffuseSampler@n, - DiffuseUV@n @insertpiece( diffuseIdx@n ) ).@insertpiece(diffuse_map@n_tex_swizzle); + midf4 topImage@n = SampleDiffuse@n( DiffuseTexture@n, DiffuseSampler@n, + DiffuseUV@n @insertpiece( diffuseIdx@n ) ).@insertpiece(diffuse_map@n_tex_swizzle); @insertpiece( blend_mode_idx@n ) @end @end @@ -97,7 +104,7 @@ diffuseCol *= inPs.colour; @else @property( diffuse ) - diffuseCol *= material.diffuse; + diffuseCol *= midf4_c( material.diffuse ); @end @end diff --git a/ogre2/src/media/Hlms/Unlit/Any/800.VertexShader_piece_vs.any b/ogre2/src/media/Hlms/Unlit/Any/800.VertexShader_piece_vs.any index d6ac995b9..af62d3c7c 100644 --- a/ogre2/src/media/Hlms/Unlit/Any/800.VertexShader_piece_vs.any +++ b/ogre2/src/media/Hlms/Unlit/Any/800.VertexShader_piece_vs.any @@ -80,11 +80,11 @@ @property( !hlms_shadowcaster || alpha_test ) @property( hlms_colour ) - outVs.colour = inVs_colour; + outVs.colour = midf4_c( inVs_colour ); @property( diffuse ) @insertpiece( LoadMaterial ) @insertpiece( custom_vs_posMaterialLoad ) - outVs.colour *= material.diffuse; + outVs.colour *= midf4_c( material.diffuse ); @end @end diff --git a/ogre2/src/media/Hlms/Unlit/GLSL/PixelShader_ps.glsl b/ogre2/src/media/Hlms/Unlit/GLSL/PixelShader_ps.glsl index c90469c18..38f47c952 100644 --- a/ogre2/src/media/Hlms/Unlit/GLSL/PixelShader_ps.glsl +++ b/ogre2/src/media/Hlms/Unlit/GLSL/PixelShader_ps.glsl @@ -9,9 +9,9 @@ in vec4 gl_FragCoord; @end @property( !hlms_shadowcaster ) -layout(location = FRAG_COLOR, index = 0) out vec4 outColour; +layout(location = FRAG_COLOR, index = 0) out midf4 outColour; @end @property( hlms_shadowcaster ) -layout(location = FRAG_COLOR, index = 0) out float outColour; +layout(location = FRAG_COLOR, index = 0) out midf outColour; @end // START UNIFORM DECLARATION @@ -28,17 +28,17 @@ layout(location = FRAG_COLOR, index = 0) out float outColour; @property( syntax != glslvk ) @foreach( num_textures, n ) @property( is_texture@n_array ) - uniform sampler2DArray textureMapsArray@n; + midf_tex uniform sampler2DArray textureMapsArray@n; @else - uniform sampler2D textureMaps@n; + midf_tex uniform sampler2D textureMaps@n; @end @end @else @foreach( num_textures, n ) @property( is_texture@n_array ) - layout( ogre_t@value(textureMapsArray@n) ) uniform texture2DArray textureMapsArray@n; + layout( ogre_t@value(textureMapsArray@n) ) midf_tex uniform texture2DArray textureMapsArray@n; @else - layout( ogre_t@value(textureMaps@n) ) uniform texture2D textureMaps@n; + layout( ogre_t@value(textureMaps@n) ) midf_tex uniform texture2D textureMaps@n; @end @end @end diff --git a/ogre2/src/media/Hlms/Unlit/GLSL/VertexShader_vs.glsl b/ogre2/src/media/Hlms/Unlit/GLSL/VertexShader_vs.glsl index 9a6f4d57f..1978c8465 100644 --- a/ogre2/src/media/Hlms/Unlit/GLSL/VertexShader_vs.glsl +++ b/ogre2/src/media/Hlms/Unlit/GLSL/VertexShader_vs.glsl @@ -4,7 +4,7 @@ out gl_PerVertex { vec4 gl_Position; -@property( hlms_pso_clip_distances ) +@property( hlms_pso_clip_distances && !hlms_emulate_clip_distances ) float gl_ClipDistance[@value(hlms_pso_clip_distances)]; @end }; diff --git a/ogre2/src/media/Hlms/Unlit/Metal/PixelShader_ps.metal b/ogre2/src/media/Hlms/Unlit/Metal/PixelShader_ps.metal index 7296fa4d6..a49065067 100644 --- a/ogre2/src/media/Hlms/Unlit/Metal/PixelShader_ps.metal +++ b/ogre2/src/media/Hlms/Unlit/Metal/PixelShader_ps.metal @@ -30,9 +30,9 @@ fragment @insertpiece( output_type ) main_metal @property( !hlms_shadowcaster || alpha_test ) @foreach( num_textures, n ) @property( is_texture@n_array ) - , texture2d_array textureMapsArray@n [[texture(@value(textureMapsArray@n))]] + , texture2d_array textureMapsArray@n [[texture(@value(textureMapsArray@n))]] @else - , texture2d textureMaps@n [[texture(@value(textureMaps@n))]] + , texture2d textureMaps@n [[texture(@value(textureMaps@n))]] @end @end @end