From 97a52317189bc1fa70f0d62fc56afb0b1ea88305 Mon Sep 17 00:00:00 2001 From: Pavel Rojtberg Date: Sun, 2 Feb 2020 22:06:33 +0100 Subject: [PATCH 1/2] Main: factor out generic Buffer interface to use for dumb buffers --- OgreMain/include/OgreHardwareBuffer.h | 293 +++++++++++++------------- 1 file changed, 152 insertions(+), 141 deletions(-) diff --git a/OgreMain/include/OgreHardwareBuffer.h b/OgreMain/include/OgreHardwareBuffer.h index d9bc748fc97..326e5b3d721 100644 --- a/OgreMain/include/OgreHardwareBuffer.h +++ b/OgreMain/include/OgreHardwareBuffer.h @@ -75,6 +75,140 @@ namespace Ogre { */ HBU_CPU_TO_GPU = HBU_CPU_ONLY | HBU_DETAIL_WRITE_ONLY, }; + /** Abstract class defining common features of unstructured memory + + This class defines the core interface which is common to all + buffers, whether it be vertex buffers, index buffers, texture memory + or framebuffer memory etc. + */ + class _OgreExport Buffer + { + public: + /// Locking options + enum LockOptions + { + /** Normal mode, ie allows read/write and contents are preserved. + This kind of lock allows reading and writing from the buffer - it’s also the least + optimal because basically you’re telling the card you could be doing anything at + all. If you’re not using a shadow buffer, it requires the buffer to be transferred + from the card and back again. If you’re using a shadow buffer the effect is + minimal. + */ + HBL_NORMAL, + /** Discards the entire buffer while locking. + This means you are happy for the card to discard the entire current contents of the + buffer. Implicitly this means you are not going to read the data - it also means + that the card can avoid any stalls if the buffer is currently being rendered from, + because it will actually give you an entirely different one. Use this wherever + possible when you are locking a buffer which was not created with a shadow buffer. + If you are using a shadow buffer it matters less, although with a shadow buffer it’s + preferable to lock the entire buffer at once, because that allows the shadow buffer + to use HBL_DISCARD when it uploads the updated contents to the real buffer. + @note Only useful on buffers created with the HBU_CPU_TO_GPU flag. + */ + HBL_DISCARD, + /** Lock the buffer for reading only. Not allowed in buffers which are created with + HBU_WRITE_ONLY. + Mandatory on static buffers, i.e. those created without the HBU_DYNAMIC flag. + */ + HBL_READ_ONLY, + /** As HBL_DISCARD, except the application guarantees not to overwrite any + region of the buffer which has already been used in this frame, can allow + some optimisation on some APIs. + @note Only useful on buffers with no shadow buffer.*/ + HBL_NO_OVERWRITE, + /** Lock the buffer for writing only.*/ + HBL_WRITE_ONLY + }; + Buffer(size_t sizeInBytes, int usage) : mSizeInBytes(sizeInBytes), mUsage(usage), mIsLocked(false) {} + + virtual ~Buffer() {} + /** Reads data from the buffer and places it in the memory pointed to by pDest. + @param offset The byte offset from the start of the buffer to read + @param length The size of the area to read, in bytes + @param pDest The area of memory in which to place the data, must be large enough to + accommodate the data! + */ + virtual void readData(size_t offset, size_t length, void* pDest) /* const */ = 0; + /** Writes data to the buffer from an area of system memory; note that you must + ensure that your buffer is big enough. + @param offset The byte offset from the start of the buffer to start writing + @param length The size of the data to write to, in bytes + @param pSource The source of the data to be written + @param discardWholeBuffer If true, this allows the driver to discard the entire buffer when writing, + such that DMA stalls can be avoided; use if you can. + */ + virtual void writeData(size_t offset, size_t length, const void* pSource, bool discardWholeBuffer = false) = 0; + + /** Copy data from another buffer into this one. + @remarks + Note that the source buffer must not be created with the + usage HBU_WRITE_ONLY otherwise this will fail. + @param srcBuffer The buffer from which to read the copied data + @param srcOffset Offset in the source buffer at which to start reading + @param dstOffset Offset in the destination buffer to start writing + @param length Length of the data to copy, in bytes. + @param discardWholeBuffer If true, will discard the entire contents of this buffer before copying + */ + virtual void copyData(HardwareBuffer& srcBuffer, size_t srcOffset, size_t dstOffset, size_t length, + bool discardWholeBuffer = false) + { + auto& _srcBuffer = (Buffer&)srcBuffer; // backward compat + const void* srcData = _srcBuffer.lock(srcOffset, length, HBL_READ_ONLY); + this->writeData(dstOffset, length, srcData, discardWholeBuffer); + _srcBuffer.unlock(); + } + + /** Copy all data from another buffer into this one. + @remarks + Normally these buffers should be of identical size, but if they're + not, the routine will use the smallest of the two sizes. + */ + void copyData(HardwareBuffer& srcBuffer) + { + auto& _srcBuffer = (Buffer&)srcBuffer; // backward compat + size_t sz = std::min(getSizeInBytes(), _srcBuffer.getSizeInBytes()); + copyData(srcBuffer, 0, 0, sz, true); + } + + /** Lock the buffer for (potentially) reading / writing. + @param offset The byte offset from the start of the buffer to lock + @param length The size of the area to lock, in bytes + @param options Locking options + @return Pointer to the locked memory + */ + virtual void* lock(size_t offset, size_t length, LockOptions options) = 0; + + /// @overload + void* lock(LockOptions options) { return this->lock(0, mSizeInBytes, options); } + + /** Releases the lock on this buffer. + @remarks + Locking and unlocking a buffer can, in some rare circumstances such as + switching video modes whilst the buffer is locked, corrupt the + contents of a buffer. This is pretty rare, but if it occurs, + this method will throw an exception, meaning you + must re-upload the data. + @par + Note that using the 'read' and 'write' forms of updating the buffer does not + suffer from this problem, so if you want to be 100% sure your + data will not be lost, use the 'read' and 'write' forms instead. + */ + virtual void unlock() = 0; + + /// Returns whether or not this buffer is currently locked. + virtual bool isLocked() const { return mIsLocked; } + /// Returns the size of this buffer in bytes + size_t getSizeInBytes(void) const { return mSizeInBytes; } + /// Returns the Usage flags with which this buffer was created + int getUsage(void) const { return mUsage; } + protected: + size_t mSizeInBytes; + int mUsage; + bool mIsLocked; + }; + + /** Abstract class defining common features of hardware buffers. @remarks A 'hardware buffer' is any area of memory held outside of core system ram, @@ -106,7 +240,7 @@ namespace Ogre { You should look for the 'useShadowBuffer' parameter on the creation methods used to create the buffer of the type you require (see HardwareBufferManager) to enable this feature. */ - class _OgreExport HardwareBuffer : public BufferAlloc + class _OgreExport HardwareBuffer : public Buffer { public: @@ -129,50 +263,10 @@ namespace Ogre { /// @deprecated do not use HBU_DYNAMIC_WRITE_ONLY_DISCARDABLE = HBU_CPU_TO_GPU, }; - /// Locking options - enum LockOptions - { - /** Normal mode, ie allows read/write and contents are preserved. - This kind of lock allows reading and writing from the buffer - it’s also the least - optimal because basically you’re telling the card you could be doing anything at - all. If you’re not using a shadow buffer, it requires the buffer to be transferred - from the card and back again. If you’re using a shadow buffer the effect is - minimal. - */ - HBL_NORMAL, - /** Discards the entire buffer while locking. - This means you are happy for the card to discard the entire current contents of the - buffer. Implicitly this means you are not going to read the data - it also means - that the card can avoid any stalls if the buffer is currently being rendered from, - because it will actually give you an entirely different one. Use this wherever - possible when you are locking a buffer which was not created with a shadow buffer. - If you are using a shadow buffer it matters less, although with a shadow buffer it’s - preferable to lock the entire buffer at once, because that allows the shadow buffer - to use HBL_DISCARD when it uploads the updated contents to the real buffer. - @note Only useful on buffers created with the HBU_CPU_TO_GPU flag. - */ - HBL_DISCARD, - /** Lock the buffer for reading only. Not allowed in buffers which are created with - HBU_WRITE_ONLY. - Mandatory on static buffers, i.e. those created without the HBU_DYNAMIC flag. - */ - HBL_READ_ONLY, - /** As HBL_DISCARD, except the application guarantees not to overwrite any - region of the buffer which has already been used in this frame, can allow - some optimisation on some APIs. - @note Only useful on buffers with no shadow buffer.*/ - HBL_NO_OVERWRITE, - /** Lock the buffer for writing only.*/ - HBL_WRITE_ONLY - - }; protected: - size_t mSizeInBytes; - Usage mUsage; - bool mIsLocked; size_t mLockStart; size_t mLockSize; - std::unique_ptr mShadowBuffer; + std::unique_ptr mShadowBuffer; bool mSystemMemory; bool mUseShadowBuffer; bool mShadowUpdated; @@ -186,7 +280,7 @@ namespace Ogre { public: /// Constructor, to be called by HardwareBufferManager only HardwareBuffer(Usage usage, bool systemMemory, bool useShadowBuffer) - : mSizeInBytes(0), mUsage(usage), mIsLocked(false), mLockStart(0), mLockSize(0), mSystemMemory(systemMemory), + : Buffer(0, usage), mLockStart(0), mLockSize(0), mSystemMemory(systemMemory), mUseShadowBuffer(useShadowBuffer), mShadowUpdated(false), mSuppressHardwareUpdate(false) { @@ -200,14 +294,10 @@ namespace Ogre { mUsage = HBU_STATIC_WRITE_ONLY; } } - virtual ~HardwareBuffer() {} - /** Lock the buffer for (potentially) reading / writing. - @param offset The byte offset from the start of the buffer to lock - @param length The size of the area to lock, in bytes - @param options Locking options - @return Pointer to the locked memory - */ - virtual void* lock(size_t offset, size_t length, LockOptions options) + + using Buffer::lock; + + void* lock(size_t offset, size_t length, LockOptions options) override { OgreAssert(!isLocked(), "Cannot lock this buffer: it is already locked"); OgreAssert((length + offset) <= mSizeInBytes, "Lock request out of bounds"); @@ -235,24 +325,7 @@ namespace Ogre { return ret; } - /// @overload - void* lock(LockOptions options) - { - return this->lock(0, mSizeInBytes, options); - } - /** Releases the lock on this buffer. - @remarks - Locking and unlocking a buffer can, in some rare circumstances such as - switching video modes whilst the buffer is locked, corrupt the - contents of a buffer. This is pretty rare, but if it occurs, - this method will throw an exception, meaning you - must re-upload the data. - @par - Note that using the 'read' and 'write' forms of updating the buffer does not - suffer from this problem, so if you want to be 100% sure your - data will not be lost, use the 'read' and 'write' forms instead. - */ - virtual void unlock(void) + void unlock(void) override { OgreAssert(isLocked(), "Cannot unlock this buffer: it is not locked"); @@ -272,89 +345,27 @@ namespace Ogre { } - /** Reads data from the buffer and places it in the memory pointed to by pDest. - @param offset The byte offset from the start of the buffer to read - @param length The size of the area to read, in bytes - @param pDest The area of memory in which to place the data, must be large enough to - accommodate the data! - */ - virtual void readData(size_t offset, size_t length, void* pDest) = 0; - /** Writes data to the buffer from an area of system memory; note that you must - ensure that your buffer is big enough. - @param offset The byte offset from the start of the buffer to start writing - @param length The size of the data to write to, in bytes - @param pSource The source of the data to be written - @param discardWholeBuffer If true, this allows the driver to discard the entire buffer when writing, - such that DMA stalls can be avoided; use if you can. - */ - virtual void writeData(size_t offset, size_t length, const void* pSource, - bool discardWholeBuffer = false) = 0; - - /** Copy data from another buffer into this one. - @remarks - Note that the source buffer must not be created with the - usage HBU_WRITE_ONLY otherwise this will fail. - @param srcBuffer The buffer from which to read the copied data - @param srcOffset Offset in the source buffer at which to start reading - @param dstOffset Offset in the destination buffer to start writing - @param length Length of the data to copy, in bytes. - @param discardWholeBuffer If true, will discard the entire contents of this buffer before copying - */ - virtual void copyData(HardwareBuffer& srcBuffer, size_t srcOffset, - size_t dstOffset, size_t length, bool discardWholeBuffer = false) - { - const void *srcData = srcBuffer.lock( - srcOffset, length, HBL_READ_ONLY); - this->writeData(dstOffset, length, srcData, discardWholeBuffer); - srcBuffer.unlock(); - } - - /** Copy all data from another buffer into this one. - @remarks - Normally these buffers should be of identical size, but if they're - not, the routine will use the smallest of the two sizes. - */ - virtual void copyData(HardwareBuffer& srcBuffer) - { - size_t sz = std::min(getSizeInBytes(), srcBuffer.getSizeInBytes()); - copyData(srcBuffer, 0, 0, sz, true); - } - /// Updates the real buffer from the shadow buffer, if required virtual void _updateFromShadow(void) { if (mUseShadowBuffer && mShadowUpdated && !mSuppressHardwareUpdate) { - // Do this manually to avoid locking problems - const void *srcData = mShadowBuffer->lockImpl( - mLockStart, mLockSize, HBL_READ_ONLY); // Lock with discard if the whole buffer was locked, otherwise w/o - LockOptions lockOpt; - if (mLockStart == 0 && mLockSize == mSizeInBytes) - lockOpt = HBL_DISCARD; - else - lockOpt = HBL_WRITE_ONLY; - - void *destData = this->lockImpl( - mLockStart, mLockSize, lockOpt); + LockOptions lockOpt = mLockSize == mSizeInBytes ? HBL_DISCARD : HBL_WRITE_ONLY; + // Do this manually to avoid locking problems + void* destData = this->lockImpl(mLockStart, mLockSize, lockOpt); // Copy shadow to real - memcpy(destData, srcData, mLockSize); + mShadowBuffer->readData(mLockStart, mLockSize, destData); this->unlockImpl(); - mShadowBuffer->unlockImpl(); mShadowUpdated = false; } } - /// Returns the size of this buffer in bytes - size_t getSizeInBytes(void) const { return mSizeInBytes; } - /// Returns the Usage flags with which this buffer was created - Usage getUsage(void) const { return mUsage; } /// Returns whether this buffer is held in system memory bool isSystemMemory(void) const { return mSystemMemory; } /// Returns whether this buffer has a system memory shadow for quicker reading bool hasShadowBuffer(void) const { return mUseShadowBuffer; } - /// Returns whether or not this buffer is currently locked. - bool isLocked(void) const { + bool isLocked(void) const override { return mIsLocked || (mUseShadowBuffer && mShadowBuffer->isLocked()); } /// Pass true to suppress hardware upload of shadow buffer changes @@ -375,10 +386,10 @@ namespace Ogre { { HardwareBufferLockGuard() : pBuf(0), pData(0) {} - HardwareBufferLockGuard(HardwareBuffer* p, HardwareBuffer::LockOptions options) + HardwareBufferLockGuard(Buffer* p, HardwareBuffer::LockOptions options) : pBuf(0), pData(0) { lock(p, options); } - HardwareBufferLockGuard(HardwareBuffer* p, size_t offset, size_t length, HardwareBuffer::LockOptions options) + HardwareBufferLockGuard(Buffer* p, size_t offset, size_t length, HardwareBuffer::LockOptions options) : pBuf(0), pData(0) { lock(p, offset, length, options); } template @@ -401,7 +412,7 @@ namespace Ogre { } } - void lock(HardwareBuffer* p, HardwareBuffer::LockOptions options) + void lock(Buffer* p, HardwareBuffer::LockOptions options) { assert(p); unlock(); @@ -409,7 +420,7 @@ namespace Ogre { pData = pBuf->lock(options); } - void lock(HardwareBuffer* p, size_t offset, size_t length, HardwareBuffer::LockOptions options) + void lock(Buffer* p, size_t offset, size_t length, HardwareBuffer::LockOptions options) { assert(p); unlock(); @@ -425,7 +436,7 @@ namespace Ogre { void lock(const SharedPtr& p, size_t offset, size_t length, HardwareBuffer::LockOptions options) { lock(p.get(), offset, length, options); } - HardwareBuffer* pBuf; + Buffer* pBuf; void* pData; }; From eb46da5d96031feb6e91a89271586c46e4e8c063 Mon Sep 17 00:00:00 2001 From: Pavel Rojtberg Date: Sun, 20 Dec 2020 21:16:19 +0100 Subject: [PATCH 2/2] Main: add lightweight DefaultBuffer class for shadow buffering --- .../OgreDefaultHardwareBufferManager.h | 16 +++++++++ .../src/OgreDefaultHardwareBufferManager.cpp | 33 +++++++++++++++++++ OgreMain/src/OgreHardwareIndexBuffer.cpp | 3 +- OgreMain/src/OgreHardwareUniformBuffer.cpp | 2 +- OgreMain/src/OgreHardwareVertexBuffer.cpp | 3 +- .../include/OgreD3D11HardwareBuffer.h | 2 +- .../src/OgreD3D11HardwareBuffer.cpp | 2 +- 7 files changed, 54 insertions(+), 7 deletions(-) diff --git a/OgreMain/include/OgreDefaultHardwareBufferManager.h b/OgreMain/include/OgreDefaultHardwareBufferManager.h index 22092efb8ee..5ac7e375691 100644 --- a/OgreMain/include/OgreDefaultHardwareBufferManager.h +++ b/OgreMain/include/OgreDefaultHardwareBufferManager.h @@ -43,6 +43,22 @@ namespace Ogre { * @{ */ + /// Specialisation of Buffer using malloc e.g. for use as shadow buffer + class _OgreExport DefaultBuffer : public Buffer + { + protected: + unsigned char* mData; + void* lockImpl(size_t offset, size_t length, LockOptions options); + void unlockImpl(void); + public: + explicit DefaultBuffer(size_t sizeInBytes); + ~DefaultBuffer(); + void readData(size_t offset, size_t length, void* pDest); + void writeData(size_t offset, size_t length, const void* pSource, bool discardWholeBuffer = false); + void* lock(size_t offset, size_t length, LockOptions options); + void unlock(void); + }; + /// Specialisation of HardwareVertexBuffer for emulation class _OgreExport DefaultHardwareVertexBuffer : public HardwareVertexBuffer { diff --git a/OgreMain/src/OgreDefaultHardwareBufferManager.cpp b/OgreMain/src/OgreDefaultHardwareBufferManager.cpp index 451f89cbd1e..f9376a2ccc8 100644 --- a/OgreMain/src/OgreDefaultHardwareBufferManager.cpp +++ b/OgreMain/src/OgreDefaultHardwareBufferManager.cpp @@ -29,6 +29,39 @@ THE SOFTWARE. #include "OgreDefaultHardwareBufferManager.h" namespace Ogre { + DefaultBuffer::DefaultBuffer(size_t sizeInBytes) + : Buffer(sizeInBytes, HBU_CPU_ONLY) + { + // Allocate aligned memory for better SIMD processing friendly. + mData = static_cast(AlignedMemory::allocate(mSizeInBytes)); + } + //----------------------------------------------------------------------- + DefaultBuffer::~DefaultBuffer() { AlignedMemory::deallocate(mData); } + //----------------------------------------------------------------------- + void* DefaultBuffer::lockImpl(size_t offset, size_t length, LockOptions options) { return mData + offset; } + //----------------------------------------------------------------------- + void DefaultBuffer::unlockImpl() {} + //----------------------------------------------------------------------- + void* DefaultBuffer::lock(size_t offset, size_t length, LockOptions options) + { + mIsLocked = true; + return mData + offset; + } + void DefaultBuffer::unlock(void) { mIsLocked = false; } + //----------------------------------------------------------------------- + void DefaultBuffer::readData(size_t offset, size_t length, void* pDest) + { + assert((offset + length) <= mSizeInBytes); + memcpy(pDest, mData + offset, length); + } + //----------------------------------------------------------------------- + void DefaultBuffer::writeData(size_t offset, size_t length, const void* pSource, bool discardWholeBuffer) + { + assert((offset + length) <= mSizeInBytes); + // ignore discard, memory is not guaranteed to be zeroised + memcpy(mData + offset, pSource, length); + } + DefaultHardwareVertexBuffer::DefaultHardwareVertexBuffer(size_t vertexSize, size_t numVertices, HardwareBuffer::Usage usage) diff --git a/OgreMain/src/OgreHardwareIndexBuffer.cpp b/OgreMain/src/OgreHardwareIndexBuffer.cpp index f62b2fcf301..1b8b3fdb301 100644 --- a/OgreMain/src/OgreHardwareIndexBuffer.cpp +++ b/OgreMain/src/OgreHardwareIndexBuffer.cpp @@ -56,8 +56,7 @@ namespace Ogre { // Create a shadow buffer if required if (mUseShadowBuffer) { - mShadowBuffer.reset(new DefaultHardwareIndexBuffer(mIndexType, - mNumIndexes, HardwareBuffer::HBU_DYNAMIC)); + mShadowBuffer.reset(new DefaultBuffer(mSizeInBytes)); } diff --git a/OgreMain/src/OgreHardwareUniformBuffer.cpp b/OgreMain/src/OgreHardwareUniformBuffer.cpp index 1d835e20a7e..3a90ed772cb 100644 --- a/OgreMain/src/OgreHardwareUniformBuffer.cpp +++ b/OgreMain/src/OgreHardwareUniformBuffer.cpp @@ -44,7 +44,7 @@ namespace Ogre { // Create a shadow buffer if required if (mUseShadowBuffer) { - mShadowBuffer.reset(new DefaultHardwareUniformBuffer(mMgr, sizeBytes, HardwareBuffer::HBU_DYNAMIC, false)); + mShadowBuffer.reset(new DefaultBuffer(sizeBytes)); } } diff --git a/OgreMain/src/OgreHardwareVertexBuffer.cpp b/OgreMain/src/OgreHardwareVertexBuffer.cpp index f568c526872..1744afada61 100644 --- a/OgreMain/src/OgreHardwareVertexBuffer.cpp +++ b/OgreMain/src/OgreHardwareVertexBuffer.cpp @@ -48,8 +48,7 @@ namespace Ogre { // Create a shadow buffer if required if (mUseShadowBuffer) { - mShadowBuffer.reset(new DefaultHardwareVertexBuffer(mMgr, mVertexSize, - mNumVertices, HardwareBuffer::HBU_DYNAMIC)); + mShadowBuffer.reset(new DefaultBuffer(mSizeInBytes)); } } diff --git a/RenderSystems/Direct3D11/include/OgreD3D11HardwareBuffer.h b/RenderSystems/Direct3D11/include/OgreD3D11HardwareBuffer.h index 6ddb7b5114c..16cb6e3881f 100644 --- a/RenderSystems/Direct3D11/include/OgreD3D11HardwareBuffer.h +++ b/RenderSystems/Direct3D11/include/OgreD3D11HardwareBuffer.h @@ -72,7 +72,7 @@ namespace Ogre { /** See HardwareBuffer. We perform a hardware copy here. */ void copyData(HardwareBuffer& srcBuffer, size_t srcOffset, size_t dstOffset, size_t length, bool discardWholeBuffer = false); - void copyDataImpl(HardwareBuffer& srcBuffer, size_t srcOffset, + void copyDataImpl(Buffer& srcBuffer, size_t srcOffset, size_t dstOffset, size_t length, bool discardWholeBuffer = false); /// Updates the real buffer from the shadow buffer, if required virtual void _updateFromShadow(void); diff --git a/RenderSystems/Direct3D11/src/OgreD3D11HardwareBuffer.cpp b/RenderSystems/Direct3D11/src/OgreD3D11HardwareBuffer.cpp index 1b847a1ef75..abcfc5bcda7 100644 --- a/RenderSystems/Direct3D11/src/OgreD3D11HardwareBuffer.cpp +++ b/RenderSystems/Direct3D11/src/OgreD3D11HardwareBuffer.cpp @@ -196,7 +196,7 @@ namespace Ogre { copyDataImpl(srcBuffer, srcOffset, dstOffset, length, discardWholeBuffer); } //--------------------------------------------------------------------- - void D3D11HardwareBuffer::copyDataImpl(HardwareBuffer& srcBuffer, size_t srcOffset, + void D3D11HardwareBuffer::copyDataImpl(Buffer& srcBuffer, size_t srcOffset, size_t dstOffset, size_t length, bool discardWholeBuffer) { // If we're copying same-size buffers in their entirety...