From b5ae14ecc74c4c76b1860500df616c8b83d651da Mon Sep 17 00:00:00 2001 From: Jamie Smith Date: Sun, 3 Dec 2023 23:49:45 -0800 Subject: [PATCH 1/8] Handle cache alignment in DMA SPI driver --- drivers/include/drivers/SPI.h | 50 +++-- drivers/source/SPI.cpp | 57 +++++- hal/include/hal/spi_api.h | 18 +- .../include/platform/CacheAlignedBuffer.h | 183 ++++++++++++++++++ .../COMPONENT_SD/include/SD/SDBlockDevice.h | 6 +- .../COMPONENT_SD/source/SDBlockDevice.cpp | 40 ++-- .../TARGET_Cypress/TARGET_PSOC6/cy_spi_api.c | 4 +- .../TARGET_MCU_K64F/spi_api.c | 4 +- .../TARGET_NRF5x/TARGET_NRF52/spi_api.c | 10 +- targets/TARGET_NUVOTON/TARGET_M2354/spi_api.c | 4 +- targets/TARGET_NUVOTON/TARGET_M251/spi_api.c | 4 +- targets/TARGET_NUVOTON/TARGET_M261/spi_api.c | 4 +- targets/TARGET_NUVOTON/TARGET_M451/spi_api.c | 4 +- targets/TARGET_NUVOTON/TARGET_M460/spi_api.c | 4 +- targets/TARGET_NUVOTON/TARGET_M480/spi_api.c | 4 +- .../TARGET_NUVOTON/TARGET_NANO100/spi_api.c | 4 +- .../TARGET_NUVOTON/TARGET_NUC472/spi_api.c | 4 +- .../TARGET_RENESAS/TARGET_RZ_A1XX/spi_api.c | 4 +- .../TARGET_RENESAS/TARGET_RZ_A2XX/spi_api.c | 4 +- .../stm32h7xx_hal_dma_ex.c | 13 +- targets/TARGET_STM/stm_spi_api.c | 68 +++---- targets/TARGET_STM/stm_spi_api.h | 1 + .../TARGET_EFM32/spi_api.c | 2 +- .../TARGET_TOSHIBA/TARGET_TMPM46B/spi_api.c | 4 +- .../TARGET_TOSHIBA/TARGET_TMPM4G9/spi_api.c | 4 +- .../TARGET_TOSHIBA/TARGET_TMPM4GR/spi_api.c | 4 +- .../TARGET_TOSHIBA/TARGET_TMPM4NR/spi_api.c | 4 +- 27 files changed, 414 insertions(+), 98 deletions(-) create mode 100644 platform/include/platform/CacheAlignedBuffer.h diff --git a/drivers/include/drivers/SPI.h b/drivers/include/drivers/SPI.h index 69df632d333..2c330266883 100644 --- a/drivers/include/drivers/SPI.h +++ b/drivers/include/drivers/SPI.h @@ -26,6 +26,7 @@ #include "drivers/DigitalOut.h" #include "platform/SingletonPtr.h" #include "platform/NonCopyable.h" +#include "platform/CacheAlignedBuffer.h" #if defined MBED_CONF_DRIVERS_SPI_COUNT_MAX && DEVICE_SPI_COUNT > MBED_CONF_DRIVERS_SPI_COUNT_MAX #define SPI_PERIPHERALS_USED MBED_CONF_DRIVERS_SPI_COUNT_MAX @@ -458,8 +459,9 @@ class SPI : private NonCopyable { * @param tx_buffer The TX buffer with data to be transferred. If NULL is passed, * the default %SPI value is sent. * @param tx_length The length of TX buffer in bytes. - * @param rx_buffer The RX buffer which is used for received data. If NULL is passed, - * received data are ignored. + * @param rx_buffer The RX buffer which is used for received data. Rather than a C array, a CacheAlignedBuffer + * structure must be passed so that cache alignment can be handled for data received from DMA. + * May be nullptr if rx_length is 0. * @param rx_length The length of RX buffer in bytes. * @param callback The event callback function. * @param event The logical OR of events to subscribe to. May be #SPI_EVENT_ALL, or some combination @@ -469,17 +471,17 @@ class SPI : private NonCopyable { * @retval 0 If the transfer has started. * @retval -1 if the transfer could not be enqueued (increase drivers.spi_transaction_queue_len option) */ - template + template typename std::enable_if::value, int>::type - transfer(const WordT *tx_buffer, int tx_length, WordT *rx_buffer, int rx_length, const event_callback_t &callback, int event = SPI_EVENT_COMPLETE) + transfer(const WordT *tx_buffer, int tx_length, CacheAlignedBuffer & rx_buffer, int rx_length, const event_callback_t &callback, int event = SPI_EVENT_COMPLETE) { - return transfer_internal(tx_buffer, tx_length, rx_buffer, rx_length, callback, event); + return transfer_internal(tx_buffer, tx_length, rx_buffer.data(), rx_length, callback, event); } // Overloads of the above to support passing nullptr - template + template typename std::enable_if::value, int>::type - transfer(const std::nullptr_t *tx_buffer, int tx_length, WordT *rx_buffer, int rx_length, const event_callback_t &callback, int event = SPI_EVENT_COMPLETE) + transfer(const std::nullptr_t *tx_buffer, int tx_length, CacheAlignedBuffer & rx_buffer, int rx_length, const event_callback_t &callback, int event = SPI_EVENT_COMPLETE) { return transfer_internal(tx_buffer, tx_length, rx_buffer, rx_length, callback, event); } @@ -502,8 +504,10 @@ class SPI : private NonCopyable { * Internally, the chip vendor may implement this function using either DMA or interrupts. * * @param tx_buffer The TX buffer with data to be transferred. May be nullptr if tx_length is 0. - * @param tx_length The length of TX buffer in bytes. If 0, no transmission is done. - * @param rx_buffer The RX buffer, which is used for received data. May be nullptr if tx_length is 0. + * @param tx_length The length of TX buffer in bytes. If 0, the default %SPI data value is sent when receiving data. + * @param rx_buffer The RX buffer which is used for received data. Rather than a C array, a CacheAlignedBuffer + * structure must be passed so that cache alignment can be handled for data received from DMA. + * May be nullptr if rx_length is 0. * @param rx_length The length of RX buffer in bytes If 0, no reception is done. * @param timeout timeout value. Use #rtos::Kernel::wait_for_u32_forever to wait forever (the default). * @@ -513,19 +517,19 @@ class SPI : private NonCopyable { * @retval 2 on other error * @retval 0 on success */ - template + template typename std::enable_if::value, int>::type - transfer_and_wait(const WordT *tx_buffer, int tx_length, WordT *rx_buffer, int rx_length, rtos::Kernel::Clock::duration_u32 timeout = rtos::Kernel::wait_for_u32_forever) + transfer_and_wait(const WordT *tx_buffer, int tx_length, CacheAlignedBuffer & rx_buffer, int rx_length, rtos::Kernel::Clock::duration_u32 timeout = rtos::Kernel::wait_for_u32_forever) { - return transfer_and_wait_internal(tx_buffer, tx_length, rx_buffer, rx_length, timeout); + return transfer_and_wait_internal(tx_buffer, tx_length, rx_buffer.data(), rx_length, timeout); } // Overloads of the above to support passing nullptr - template + template typename std::enable_if::value, int>::type - transfer_and_wait(const std::nullptr_t *tx_buffer, int tx_length, WordT *rx_buffer, int rx_length, rtos::Kernel::Clock::duration_u32 timeout = rtos::Kernel::wait_for_u32_forever) + transfer_and_wait(const std::nullptr_t *tx_buffer, int tx_length, CacheAlignedBuffer & rx_buffer, int rx_length, rtos::Kernel::Clock::duration_u32 timeout = rtos::Kernel::wait_for_u32_forever) { - return transfer_and_wait_internal(tx_buffer, tx_length, rx_buffer, rx_length, timeout); + return transfer_and_wait_internal(tx_buffer, tx_length, rx_buffer.data(), rx_length, timeout); } template typename std::enable_if::value, int>::type @@ -574,7 +578,8 @@ class SPI : private NonCopyable { * the default SPI value is sent * @param tx_length The length of TX buffer in bytes. * @param rx_buffer The RX buffer which is used for received data. If NULL is passed, - * received data are ignored. + * received data are ignored. This buffer is guaranteed to be cache aligned + * if the MCU has a cache. * @param rx_length The length of RX buffer in bytes. * @param callback The event callback function. * @param event The event mask of events to modify. @@ -599,6 +604,7 @@ class SPI : private NonCopyable { * @param tx_buffer The TX buffer with data to be transferred. May be nullptr if tx_length is 0. * @param tx_length The length of TX buffer in bytes. If 0, no transmission is done. * @param rx_buffer The RX buffer, which is used for received data. May be nullptr if tx_length is 0. + * This buffer is guaranteed to be cache aligned if the MCU has a cache. * @param rx_length The length of RX buffer in bytes If 0, no reception is done. * @param timeout timeout value. Use #rtos::Kernel::wait_for_u32_forever to wait forever (the default). * @@ -722,6 +728,18 @@ class SPI : private NonCopyable { * iff start_transfer() has been called and the chip has been selected but irq_handler_asynch() * has NOT been called yet. */ volatile bool _transfer_in_progress = false; + + // If there is a transfer in progress, this indicates whether it used DMA and therefore requires a cache + // flush at the end + bool _transfer_in_progress_uses_dma; + +#if __DCACHE_PRESENT + // These variables store the location and length in bytes of the Rx buffer if an async transfer + // is in progress. They are used for invalidating the cache after the transfer completes. + void * _transfer_in_progress_rx_buffer; + size_t _transfer_in_progress_rx_len; +#endif + /* Event flags used for transfer_and_wait() */ rtos::EventFlags _transfer_and_wait_flags; #endif // DEVICE_SPI_ASYNCH diff --git a/drivers/source/SPI.cpp b/drivers/source/SPI.cpp index 376ec7c708b..af7368e7b93 100644 --- a/drivers/source/SPI.cpp +++ b/drivers/source/SPI.cpp @@ -396,6 +396,17 @@ void SPI::abort_transfer() _set_ssel(1); } +#if __DCACHE_PRESENT + if(_transfer_in_progress_uses_dma) + { + // If the cache is present, invalidate the Rx data so it's loaded from main RAM. + // We only want to do this if DMA actually got used for the transfer because, if interrupts + // were used instead, the cache might have the correct data and NOT the main memory. + SCB_InvalidateDCache_by_Addr(_transfer_in_progress_rx_buffer, _transfer_in_progress_rx_len); + } +#endif + _transfer_in_progress = false; + #if MBED_CONF_DRIVERS_SPI_TRANSACTION_QUEUE_LEN dequeue_transaction(); #endif @@ -466,8 +477,33 @@ void SPI::start_transfer(const void *tx_buffer, int tx_length, void *rx_buffer, _callback = callback; _irq.callback(&SPI::irq_handler_asynch); + +#if __DCACHE_PRESENT + // On devices with a cache, we need to carefully manage the Tx and Rx buffer cache invalidation. + // We can assume that asynchronous SPI implementations might rely on DMA, and that DMA will + // not interoperate with the CPU cache. So, manual flushing/invalidation will be required. + // This page is very useful for how to do this correctly: + // https://community.st.com/t5/stm32-mcus-products/maintaining-cpu-data-cache-coherence-for-dma-buffers/td-p/95746 + if(tx_length > 0) + { + // For chips with a cache, we need to evict the Tx data from cache to main memory. + // This ensures that the DMA controller can see the most up-to-date copy of the data. + SCB_CleanDCache_by_Addr(const_cast(tx_buffer), tx_length); + } + + // Additionally, we have to make sure that there aren't any pending changes which could be written back + // to the Rx buffer memory by the cache at a later date, corrupting the DMA results + if(rx_length > 0) + { + SCB_CleanInvalidateDCache_by_Addr(rx_buffer, rx_length); + } + _transfer_in_progress_rx_buffer = rx_buffer; + _transfer_in_progress_rx_len = rx_length; +#endif + _transfer_in_progress = true; - spi_master_transfer(&_peripheral->spi, tx_buffer, tx_length, rx_buffer, rx_length, bit_width, _irq.entry(), event, _usage); + + _transfer_in_progress_uses_dma = spi_master_transfer(&_peripheral->spi, tx_buffer, tx_length, rx_buffer, rx_length, bit_width, _irq.entry(), event, _usage); } void SPI::lock_deep_sleep() @@ -508,7 +544,18 @@ void SPI::dequeue_transaction() void SPI::irq_handler_asynch(void) { int event = spi_irq_handler_asynch(&_peripheral->spi); - if (_callback && (event & SPI_EVENT_ALL)) { + if ((event & SPI_EVENT_ALL)) { + +#if __DCACHE_PRESENT + if(_transfer_in_progress_uses_dma) + { + // If the cache is present, invalidate the Rx data so it's loaded from main RAM. + // We only want to do this if DMA actually got used for the transfer because, if interrupts + // were used instead, the cache might have the correct data and NOT the main memory. + SCB_InvalidateDCache_by_Addr(_transfer_in_progress_rx_buffer, _transfer_in_progress_rx_len); + } +#endif + // If using GPIO CS mode, unless we were asked to keep the peripheral selected, deselect it. // If there's another transfer queued, we *do* want to deselect the peripheral now. // It will be reselected in start_transfer() which is called by dequeue_transaction() below. @@ -518,7 +565,11 @@ void SPI::irq_handler_asynch(void) _transfer_in_progress = false; unlock_deep_sleep(); - _callback.call(event & SPI_EVENT_ALL); + + if(_callback) + { + _callback.call(event & SPI_EVENT_ALL); + } } #if MBED_CONF_DRIVERS_SPI_TRANSACTION_QUEUE_LEN if (event & (SPI_EVENT_ALL | SPI_EVENT_INTERNAL_TRANSFER_COMPLETE)) { diff --git a/hal/include/hal/spi_api.h b/hal/include/hal/spi_api.h index 65853c69b3b..011073a02a7 100644 --- a/hal/include/hal/spi_api.h +++ b/hal/include/hal/spi_api.h @@ -25,6 +25,8 @@ #include "hal/dma_api.h" #include "hal/buffer.h" +#include + #if DEVICE_SPI /** @@ -393,7 +395,11 @@ const PinMap *spi_slave_cs_pinmap(void); * @{ */ -/** Begin the SPI transfer. Buffer pointers and lengths are specified in tx_buff and rx_buff +/** Begin the asynchronous SPI transfer. Buffer pointers and lengths are specified in tx_buff and rx_buff. + * + * If the device has a data cache, the Tx data is guaranteed to have been flushed from the cache + * to main memory already. Additionally, the Rx buffer is guaranteed to be cache aligned, and will + * be invalidated by the SPI layer after the transfer is complete. * * @param[in] obj The SPI object that holds the transfer information * @param[in] tx The transmit buffer @@ -404,8 +410,16 @@ const PinMap *spi_slave_cs_pinmap(void); * @param[in] event The logical OR of events to be registered * @param[in] handler SPI interrupt handler * @param[in] hint A suggestion for how to use DMA with this transfer + * + * @return True if DMA was actually used for the transfer, false otherwise (if interrupts or another CPU-based + * method is used to do the transfer). + * + * @note On MCUs with a data cache, the return value is used to determine if a cache invalidation needs to be done + * after the transfer is complete. If this function returns true, the driver layer will cache invalidate the Rx buffer under + * the assumption that the data needs to be re-read from main memory. Be careful, because if the read was not actually + * done by DMA, and the rx data is in the CPU cache, this invalidation will corrupt it. */ -void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint); +bool spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint); /** The asynchronous IRQ handler * diff --git a/platform/include/platform/CacheAlignedBuffer.h b/platform/include/platform/CacheAlignedBuffer.h new file mode 100644 index 00000000000..61b6f09f6a0 --- /dev/null +++ b/platform/include/platform/CacheAlignedBuffer.h @@ -0,0 +1,183 @@ +/* mbed Microcontroller Library + * Copyright (c) 2006-2023 ARM Limited + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MBED_CACHEALIGNEDBUFFER_H +#define MBED_CACHEALIGNEDBUFFER_H + +#include + +#include "device.h" + +namespace mbed { + + +/** + * @brief CacheAlignedBuffer is used by Mbed in locations where we need a cache-aligned buffer. + * + * Cache alignment is desirable in several different situations in embedded programming; often + * when working with DMA or other peripherals which write their results back to main memory. + * In order to read those results from memory without risk of getting old data from the + * CPU cache, one needs to align the buffer so it takes up an integer number of cache lines, + * then invalidate the cache lines so that the data gets reread from RAM. + * + * CacheAlignedBuffer provides an easy way to allocate the correct amount of space so that + * a buffer of any size can be made cache-aligned. + * + * @tparam DataT Type of the data to store in the buffer. Note: %CacheAlignedBuffer is not designed for + * using class types as DataT, and will not call constructors. + * @tparam BufferSize Buffer size (number of elements) needed by the application for the buffer. + */ +template +struct +CacheAlignedBuffer { +private: +#if __DCACHE_PRESENT + // Allocate enough extra space that we can shift the start of the buffer forward to be on a cache line. + // The worst case for this is when the first byte is allocated 1 byte past the start of a cache line, so we + // will need an additional (cache line size - 1) bytes. + // Additionally, since we are going to be invalidating this buffer, we can't allow any other variables to be + // in the same cache lines, or they might get corrupted. + // So, we need to round up the backing buffer size to the nearest multiple of the cache line size. + // The math for rounding up can be found here: + // https://community.st.com/t5/stm32-mcus-products/maintaining-cpu-data-cache-coherence-for-dma-buffers/td-p/95746 + constexpr static size_t requiredSizeRoundedUp = (BufferSize * sizeof(DataT) + __SCB_DCACHE_LINE_SIZE - 1) & ~((__SCB_DCACHE_LINE_SIZE) - 1); + constexpr static size_t backingBufferSizeBytes = requiredSizeRoundedUp + __SCB_DCACHE_LINE_SIZE - 1; +#else + constexpr static size_t backingBufferSize = BufferSize * sizeof(DataT); +#endif + + uint8_t _backingBuffer[backingBufferSizeBytes]; + DataT * _alignedArrayPtr; + + /** + * Find and return the first location in the given buffer that starts on a cache line. + * If this MCU does not use a cache, this function is a no-op. + * + * @param buffer Pointer to buffer + * + * @return Pointer to first data item, aligned at the start of a cache line. + */ + inline DataT * findCacheLineStart(uint8_t * buffer) + { +#if __DCACHE_PRESENT + // Use integer division to divide the address down to the cache line size, which + // rounds to the cache line before the given address. + // So that we always go one cache line back even if the given address is on a cache line, + // subtract 1. + ptrdiff_t prevCacheLine = ((ptrdiff_t)(buffer - 1)) / __SCB_DCACHE_LINE_SIZE; + + // Now we just have to multiply up again to get an address (adding 1 to go forward by 1 cache line) + return reinterpret_cast((prevCacheLine + 1) * __SCB_DCACHE_LINE_SIZE); +#else + return reinterpret_cast(buffer); +#endif + } + +public: + + // Iterator types + typedef DataT * iterator; + typedef DataT const * const_iterator; + + /** + * @brief Construct new cache-aligned buffer. Buffer will be zero-initialized. + */ + CacheAlignedBuffer(): + _backingBuffer{}, + _alignedArrayPtr(findCacheLineStart(_backingBuffer)) + {} + + /** + * @brief Copy from other cache-aligned buffer. Buffer memory will be copied. + */ + CacheAlignedBuffer(CacheAlignedBuffer const & other): + _alignedArrayPtr(findCacheLineStart(_backingBuffer)) + { + memcpy(this->_alignedArrayPtr, other._alignedArrayPtr, BufferSize * sizeof(DataT)); + } + + /** + * @brief Assign from other cache-aligned buffer. Buffer memory will be assigned. + */ + CacheAlignedBuffer & operator=(CacheAlignedBuffer const & other) + { + memcpy(this->_alignedArrayPtr, other._alignedArrayPtr, BufferSize * sizeof(DataT)); + } + + /** + * @brief Get a pointer to the aligned data array inside the buffer + */ + DataT * data() { return _alignedArrayPtr; } + + /** + * @brief Get a pointer to the aligned data array inside the buffer (const version) + */ + DataT const * data() const { return _alignedArrayPtr; } + + /** + * @brief Element access + */ + DataT & operator[](size_t index) { return _alignedArrayPtr[index]; } + + /** + * @brief Element access (const) + */ + DataT operator[](size_t index) const { return _alignedArrayPtr[index]; } + + /** + * @brief Get iterator for start of buffer + */ + iterator begin() { return _alignedArrayPtr; } + + /** + * @brief Get iterator for start of buffer + */ + const_iterator begin() const { return _alignedArrayPtr; } + + /** + * @brief Get iterator for end of buffer + */ + iterator end() { return _alignedArrayPtr + BufferSize; } + + /** + * @brief Get iterator for end of buffer + */ + const_iterator end() const { return _alignedArrayPtr + BufferSize; } + + /** + * @return The maximum amount of DataT elements that this buffer can hold + */ + constexpr size_t capacity() { return BufferSize; } + + /** + * @brief If this MCU has a data cache, this function _invalidates_ the buffer in the data cache. + * + * Invalidation means that the next time code access the buffer data, it is guaranteed to be fetched from + * main memory rather than from the CPU cache. If there are changes made to the data which have not been + * flushed back to main memory, those changes will be lost! + */ + void invalidate() + { +#if __DCACHE_PRESENT + SCB_InvalidateDCache_by_Addr(_alignedArrayPtr, BufferSize * sizeof(DataT)); +#endif + } +}; + +} + +#endif //MBED_CACHEALIGNEDBUFFER_H diff --git a/storage/blockdevice/COMPONENT_SD/include/SD/SDBlockDevice.h b/storage/blockdevice/COMPONENT_SD/include/SD/SDBlockDevice.h index b1c7562596f..246fed59538 100644 --- a/storage/blockdevice/COMPONENT_SD/include/SD/SDBlockDevice.h +++ b/storage/blockdevice/COMPONENT_SD/include/SD/SDBlockDevice.h @@ -57,6 +57,10 @@ * Access an SD Card using SPI bus */ class SDBlockDevice : public mbed::BlockDevice { + + // Only HC block size is supported. Making this a static constant reduces code size. + static constexpr uint32_t _block_size = 512; /*!< Block size supported for SDHC card is 512 bytes */ + public: /** Creates an SDBlockDevice on a SPI bus specified by pins (using dynamic pin-map) * @@ -302,7 +306,6 @@ class SDBlockDevice : public mbed::BlockDevice { } rtos::Mutex _mutex; - static const uint32_t _block_size; uint32_t _erase_size; bool _is_initialized; bool _dbg; @@ -314,6 +317,7 @@ class SDBlockDevice : public mbed::BlockDevice { #if DEVICE_SPI_ASYNCH bool _async_spi_enabled = false; + mbed::CacheAlignedBuffer _async_data_buffer; #endif }; diff --git a/storage/blockdevice/COMPONENT_SD/source/SDBlockDevice.cpp b/storage/blockdevice/COMPONENT_SD/source/SDBlockDevice.cpp index 381071b6803..8e9ac5fb902 100644 --- a/storage/blockdevice/COMPONENT_SD/source/SDBlockDevice.cpp +++ b/storage/blockdevice/COMPONENT_SD/source/SDBlockDevice.cpp @@ -181,7 +181,6 @@ using namespace std::chrono; #define SD_BLOCK_DEVICE_ERROR_ERASE -5010 /*!< Erase error: reset/sequence */ #define SD_BLOCK_DEVICE_ERROR_WRITE -5011 /*!< SPI Write error: !SPI_DATA_ACCEPTED */ -#define BLOCK_SIZE_HC 512 /*!< Block size supported for SD card is 512 bytes */ #define WRITE_BL_PARTIAL 0 /*!< Partial block write - Not supported */ #define SPI_CMD(x) (0x40 | (x & 0x3f)) @@ -248,9 +247,6 @@ using namespace std::chrono; #define SPI_READ_ERROR_ECC_C (0x1 << 2) /*!< Card ECC failed */ #define SPI_READ_ERROR_OFR (0x1 << 3) /*!< Out of Range */ -// Only HC block size is supported. Making this a static constant reduces code size. -const uint32_t SDBlockDevice::_block_size = BLOCK_SIZE_HC; - #if MBED_CONF_SD_CRC_ENABLED SDBlockDevice::SDBlockDevice(PinName mosi, PinName miso, PinName sclk, PinName cs, uint64_t hz, bool crc_on) : _sectors(0), _spi(mosi, miso, sclk, cs, use_gpio_ssel), _is_initialized(0), @@ -269,7 +265,7 @@ SDBlockDevice::SDBlockDevice(PinName mosi, PinName miso, PinName sclk, PinName c _init_sck = MBED_CONF_SD_INIT_FREQUENCY; _transfer_sck = hz; - _erase_size = BLOCK_SIZE_HC; + _erase_size = _block_size; } #if MBED_CONF_SD_CRC_ENABLED @@ -290,7 +286,7 @@ SDBlockDevice::SDBlockDevice(const spi_pinmap_t &spi_pinmap, PinName cs, uint64_ _init_sck = MBED_CONF_SD_INIT_FREQUENCY; _transfer_sck = hz; - _erase_size = BLOCK_SIZE_HC; + _erase_size = _block_size; } SDBlockDevice::~SDBlockDevice() @@ -925,7 +921,16 @@ int SDBlockDevice::_read_bytes(uint8_t *buffer, uint32_t length) // read data #if DEVICE_SPI_ASYNCH if (_async_spi_enabled) { - _spi.transfer_and_wait(nullptr, 0, buffer, length); + if(length > _async_data_buffer.capacity()) + { + return SD_BLOCK_DEVICE_ERROR_PARAMETER; + } + + // Do read into cache aligned buffer, then copy data into application buffer + if (_spi.transfer_and_wait(nullptr, 0, _async_data_buffer, length) != 0) { + return SD_BLOCK_DEVICE_ERROR_WRITE; + } + memcpy(buffer, _async_data_buffer.data(), length); } else #endif { @@ -968,9 +973,16 @@ int SDBlockDevice::_read(uint8_t *buffer, uint32_t length) // read data #if DEVICE_SPI_ASYNCH if (_async_spi_enabled) { - if (_spi.transfer_and_wait(nullptr, 0, buffer, length) != 0) { + if(length > _async_data_buffer.capacity()) + { + return SD_BLOCK_DEVICE_ERROR_PARAMETER; + } + + // Do read into cache aligned buffer, then copy data into application buffer + if (_spi.transfer_and_wait(nullptr, 0, _async_data_buffer, length) != 0) { return SD_BLOCK_DEVICE_ERROR_WRITE; } + memcpy(buffer, _async_data_buffer.data(), length); } else #endif { @@ -1067,7 +1079,13 @@ bd_size_t SDBlockDevice::_sd_sectors() debug_if(SD_DBG, "Didn't get a response from the disk\n"); return 0; } +#ifdef DEVICE_SPI_ASYNCH + CacheAlignedBuffer csd_buffer; + uint8_t * csd = csd_buffer.data(); +#else uint8_t csd[16]; +#endif + if (_read_bytes(csd, 16) != 0) { debug_if(SD_DBG, "Couldn't read csd response from disk\n"); return 0; @@ -1091,10 +1109,10 @@ bd_size_t SDBlockDevice::_sd_sectors() // ERASE_BLK_EN = 1: Erase in multiple of 512 bytes supported if (ext_bits(csd, 46, 46)) { - _erase_size = BLOCK_SIZE_HC; + _erase_size = _block_size; } else { // ERASE_BLK_EN = 1: Erase in multiple of SECTOR_SIZE supported - _erase_size = BLOCK_SIZE_HC * (ext_bits(csd, 45, 39) + 1); + _erase_size = _block_size * (ext_bits(csd, 45, 39) + 1); } break; @@ -1105,7 +1123,7 @@ bd_size_t SDBlockDevice::_sd_sectors() debug_if(SD_DBG, "Sectors: 0x%" PRIx64 "x : %" PRIu64 "\n", blocks, blocks); debug_if(SD_DBG, "Capacity: %" PRIu64 " MB\n", (blocks / (2048U))); // ERASE_BLK_EN is fixed to 1, which means host can erase one or multiple of 512 bytes. - _erase_size = BLOCK_SIZE_HC; + _erase_size = _block_size; break; default: diff --git a/targets/TARGET_Cypress/TARGET_PSOC6/cy_spi_api.c b/targets/TARGET_Cypress/TARGET_PSOC6/cy_spi_api.c index 6780ecff399..de40eed7301 100644 --- a/targets/TARGET_Cypress/TARGET_PSOC6/cy_spi_api.c +++ b/targets/TARGET_Cypress/TARGET_PSOC6/cy_spi_api.c @@ -234,7 +234,7 @@ const PinMap *spi_slave_cs_pinmap(void) #if DEVICE_SPI_ASYNCH -void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, MBED_UNUSED uint8_t bit_width, uint32_t handler, uint32_t event, MBED_UNUSED DMAUsage hint) +bool spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, MBED_UNUSED uint8_t bit_width, uint32_t handler, uint32_t event, MBED_UNUSED DMAUsage hint) { struct spi_s *spi = cy_get_spi(obj); spi->async_handler = (void (*)(void))handler; @@ -248,6 +248,8 @@ void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, MBED_ERROR(MBED_MAKE_ERROR(MBED_MODULE_DRIVER_SPI, MBED_ERROR_CODE_FAILED_OPERATION), "cyhal_spi_transfer_async"); } spi->async_in_progress = true; + + return false; // Currently we always use interrupts, not DMA } uint32_t spi_irq_handler_asynch(spi_t *obj) diff --git a/targets/TARGET_Freescale/TARGET_MCUXpresso_MCUS/TARGET_MCU_K64F/spi_api.c b/targets/TARGET_Freescale/TARGET_MCUXpresso_MCUS/TARGET_MCU_K64F/spi_api.c index a35f0a9cdc0..1ac6a1856bb 100644 --- a/targets/TARGET_Freescale/TARGET_MCUXpresso_MCUS/TARGET_MCU_K64F/spi_api.c +++ b/targets/TARGET_Freescale/TARGET_MCUXpresso_MCUS/TARGET_MCU_K64F/spi_api.c @@ -358,7 +358,7 @@ static void spi_buffer_set(spi_t *obj, const void *tx, uint32_t tx_length, void obj->rx_buff.width = bit_width; } -void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) +bool spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) { if (spi_active(obj)) { return; @@ -400,6 +400,8 @@ void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, // Can't enter deep sleep as long as SPI transfer is active sleep_manager_lock_deep_sleep(); } + + return hint != DMA_USAGE_NEVER; // DMA will be used as long as the hint is not NEVER } uint32_t spi_irq_handler_asynch(spi_t *obj) diff --git a/targets/TARGET_NORDIC/TARGET_NRF5x/TARGET_NRF52/spi_api.c b/targets/TARGET_NORDIC/TARGET_NRF5x/TARGET_NRF52/spi_api.c index 3cd900edca1..77b0e0a70f9 100644 --- a/targets/TARGET_NORDIC/TARGET_NRF5x/TARGET_NRF52/spi_api.c +++ b/targets/TARGET_NORDIC/TARGET_NRF5x/TARGET_NRF52/spi_api.c @@ -882,8 +882,10 @@ static void nordic_nrf5_spi_event_handler(nrfx_spi_evt_t const *p_event, void *p * Parameter event The logical OR of events to be registered * Parameter handler SPI interrupt handler * Parameter hint A suggestion for how to use DMA with this transfer + * + * Returns bool True if DMA was used for the transfer, false otherwise */ -void spi_master_transfer(spi_t *obj, +bool spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, @@ -949,6 +951,12 @@ void spi_master_transfer(spi_t *obj, /* Signal callback handler. */ callback(); } + +#if NRFX_CHECK(NRFX_SPIM_ENABLED) + return true; // We always use DMA when the SPIM preripheral is available +#else + return false; // We always use interrupts when the SPI peripheral is available +#endif } /** The asynchronous IRQ handler diff --git a/targets/TARGET_NUVOTON/TARGET_M2354/spi_api.c b/targets/TARGET_NUVOTON/TARGET_M2354/spi_api.c index 3e6342935aa..3a8cd3f1276 100644 --- a/targets/TARGET_NUVOTON/TARGET_M2354/spi_api.c +++ b/targets/TARGET_NUVOTON/TARGET_M2354/spi_api.c @@ -445,7 +445,7 @@ void spi_slave_write(spi_t *obj, int value) #endif #if DEVICE_SPI_ASYNCH -void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) +bool spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) { SPI_T *spi_base = (SPI_T *) NU_MODBASE(obj->spi.spi); SPI_SET_DATA_WIDTH(spi_base, bit_width); @@ -573,6 +573,8 @@ void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, /* Don't enable SPI TX/RX threshold interrupts as commented above */ } + + return obj->spi.dma_usage != DMA_USAGE_NEVER; } /** diff --git a/targets/TARGET_NUVOTON/TARGET_M251/spi_api.c b/targets/TARGET_NUVOTON/TARGET_M251/spi_api.c index d8e9d378aca..25eeb2d1c32 100644 --- a/targets/TARGET_NUVOTON/TARGET_M251/spi_api.c +++ b/targets/TARGET_NUVOTON/TARGET_M251/spi_api.c @@ -369,7 +369,7 @@ void spi_slave_write(spi_t *obj, int value) #endif #if DEVICE_SPI_ASYNCH -void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) +bool spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) { SPI_T *spi_base = (SPI_T *) NU_MODBASE(obj->spi.spi); SPI_SET_DATA_WIDTH(spi_base, bit_width); @@ -497,6 +497,8 @@ void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, /* Don't enable SPI TX/RX threshold interrupts as commented above */ } + + return obj->spi.dma_usage != DMA_USAGE_NEVER; } /** diff --git a/targets/TARGET_NUVOTON/TARGET_M261/spi_api.c b/targets/TARGET_NUVOTON/TARGET_M261/spi_api.c index ebf7da50f25..313781728b7 100644 --- a/targets/TARGET_NUVOTON/TARGET_M261/spi_api.c +++ b/targets/TARGET_NUVOTON/TARGET_M261/spi_api.c @@ -390,7 +390,7 @@ void spi_slave_write(spi_t *obj, int value) #endif #if DEVICE_SPI_ASYNCH -void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) +bool spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) { SPI_T *spi_base = (SPI_T *) NU_MODBASE(obj->spi.spi); SPI_SET_DATA_WIDTH(spi_base, bit_width); @@ -518,6 +518,8 @@ void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, /* Don't enable SPI TX/RX threshold interrupts as commented above */ } + + return obj->spi.dma_usage != DMA_USAGE_NEVER; } /** diff --git a/targets/TARGET_NUVOTON/TARGET_M451/spi_api.c b/targets/TARGET_NUVOTON/TARGET_M451/spi_api.c index 8b2de0ed2e4..2e80b1fb691 100644 --- a/targets/TARGET_NUVOTON/TARGET_M451/spi_api.c +++ b/targets/TARGET_NUVOTON/TARGET_M451/spi_api.c @@ -383,7 +383,7 @@ void spi_slave_write(spi_t *obj, int value) #endif #if DEVICE_SPI_ASYNCH -void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) +bool spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) { SPI_T *spi_base = (SPI_T *) NU_MODBASE(obj->spi.spi); SPI_SET_DATA_WIDTH(spi_base, bit_width); @@ -501,6 +501,8 @@ void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, /* Don't enable SPI TX/RX threshold interrupts as commented above */ } + + return obj->spi.dma_usage != DMA_USAGE_NEVER; } /** diff --git a/targets/TARGET_NUVOTON/TARGET_M460/spi_api.c b/targets/TARGET_NUVOTON/TARGET_M460/spi_api.c index c9f7d4802e8..126bde64428 100644 --- a/targets/TARGET_NUVOTON/TARGET_M460/spi_api.c +++ b/targets/TARGET_NUVOTON/TARGET_M460/spi_api.c @@ -488,7 +488,7 @@ void spi_slave_write(spi_t *obj, int value) #endif #if DEVICE_SPI_ASYNCH -void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) +bool spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) { SPI_T *spi_base = (SPI_T *) NU_MODBASE(obj->spi.spi); SPI_SET_DATA_WIDTH(spi_base, bit_width); @@ -616,6 +616,8 @@ void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, /* Don't enable SPI TX/RX threshold interrupts as commented above */ } + + return obj->spi.dma_usage != DMA_USAGE_NEVER; } /** diff --git a/targets/TARGET_NUVOTON/TARGET_M480/spi_api.c b/targets/TARGET_NUVOTON/TARGET_M480/spi_api.c index ca5cf79e8b4..e3d2b552e11 100644 --- a/targets/TARGET_NUVOTON/TARGET_M480/spi_api.c +++ b/targets/TARGET_NUVOTON/TARGET_M480/spi_api.c @@ -439,7 +439,7 @@ void spi_slave_write(spi_t *obj, int value) #endif #if DEVICE_SPI_ASYNCH -void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) +bool spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) { SPI_T *spi_base = (SPI_T *) NU_MODBASE(obj->spi.spi); SPI_SET_DATA_WIDTH(spi_base, bit_width); @@ -567,6 +567,8 @@ void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, /* Don't enable SPI TX/RX threshold interrupts as commented above */ } + + return obj->spi.dma_usage != DMA_USAGE_NEVER; } /** diff --git a/targets/TARGET_NUVOTON/TARGET_NANO100/spi_api.c b/targets/TARGET_NUVOTON/TARGET_NANO100/spi_api.c index 3862edf5162..c546862fc93 100644 --- a/targets/TARGET_NUVOTON/TARGET_NANO100/spi_api.c +++ b/targets/TARGET_NUVOTON/TARGET_NANO100/spi_api.c @@ -434,7 +434,7 @@ void spi_slave_write(spi_t *obj, int value) #endif #if DEVICE_SPI_ASYNCH -void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) +bool spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) { SPI_T *spi_base = (SPI_T *) NU_MODBASE(obj->spi.spi); SPI_SET_DATA_WIDTH(spi_base, bit_width); @@ -549,6 +549,8 @@ void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, PDMA_Trigger(obj->spi.dma_chn_id_rx); PDMA_Trigger(obj->spi.dma_chn_id_tx); } + + return obj->spi.dma_usage != DMA_USAGE_NEVER; } /** diff --git a/targets/TARGET_NUVOTON/TARGET_NUC472/spi_api.c b/targets/TARGET_NUVOTON/TARGET_NUC472/spi_api.c index ddcf800de57..2c154dff563 100644 --- a/targets/TARGET_NUVOTON/TARGET_NUC472/spi_api.c +++ b/targets/TARGET_NUVOTON/TARGET_NUC472/spi_api.c @@ -389,7 +389,7 @@ void spi_slave_write(spi_t *obj, int value) #endif #if DEVICE_SPI_ASYNCH -void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) +bool spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) { SPI_T *spi_base = (SPI_T *) NU_MODBASE(obj->spi.spi); SPI_SET_DATA_WIDTH(spi_base, bit_width); @@ -503,6 +503,8 @@ void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, /* Don't enable SPI TX/RX threshold interrupts as commented above */ } + + return obj->spi.dma_usage != DMA_USAGE_NEVER; } /** diff --git a/targets/TARGET_RENESAS/TARGET_RZ_A1XX/spi_api.c b/targets/TARGET_RENESAS/TARGET_RZ_A1XX/spi_api.c index 294de6efba6..0b6a09c17b4 100644 --- a/targets/TARGET_RENESAS/TARGET_RZ_A1XX/spi_api.c +++ b/targets/TARGET_RENESAS/TARGET_RZ_A1XX/spi_api.c @@ -518,7 +518,7 @@ static void spi_async_read(spi_t *obj) * ASYNCHRONOUS HAL ******************************************************************************/ -void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) +bool spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) { int i; MBED_ASSERT(obj); @@ -556,6 +556,8 @@ void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, spi_irqs_set(obj, 1); spi_async_write(obj); + + return false; // Currently we always use interrupts, not DMA } uint32_t spi_irq_handler_asynch(spi_t *obj) diff --git a/targets/TARGET_RENESAS/TARGET_RZ_A2XX/spi_api.c b/targets/TARGET_RENESAS/TARGET_RZ_A2XX/spi_api.c index 295ae9c4876..4e402514e45 100644 --- a/targets/TARGET_RENESAS/TARGET_RZ_A2XX/spi_api.c +++ b/targets/TARGET_RENESAS/TARGET_RZ_A2XX/spi_api.c @@ -526,7 +526,7 @@ static void spi_async_read(spi_t *obj) * ASYNCHRONOUS HAL ******************************************************************************/ -void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) +bool spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) { int i; MBED_ASSERT(obj); @@ -564,6 +564,8 @@ void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, spi_irqs_set(obj, 1); spi_async_write(obj); + + return false; // Currently we always use interrupts, not DMA } uint32_t spi_irq_handler_asynch(spi_t *obj) diff --git a/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dma_ex.c b/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dma_ex.c index e55c3d54ceb..c5f2ef7d9d2 100644 --- a/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dma_ex.c +++ b/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dma_ex.c @@ -290,7 +290,18 @@ HAL_StatusTypeDef HAL_DMAEx_MultiBufferStart_IT(DMA_HandleTypeDef *hdma, uint32_ { /* Enable Common interrupts*/ MODIFY_REG(((DMA_Stream_TypeDef *)hdma->Instance)->CR, (DMA_IT_TC | DMA_IT_TE | DMA_IT_DME | DMA_IT_HT), (DMA_IT_TC | DMA_IT_TE | DMA_IT_DME)); - ((DMA_Stream_TypeDef *)hdma->Instance)->FCR |= DMA_IT_FE; + + /* Mbed CE mod: Only enable the FIFO Error interrupt if the FIFO is actually enabled. + * If it's not enabled, then this interrupt can trigger spuriously from memory bus + * stalls that the DMA engine encounters, and this creates random DMA failures. + * Reference forum thread here: + * https://community.st.com/t5/stm32-mcus-products/spi-dma-fifo-error-issue-feifx/td-p/537074 + * also: https://community.st.com/t5/stm32-mcus-touch-gfx-and-gui/spi-dma-error-is-occurred-when-the-other-dma-memory-to-memory-is/td-p/191590 + */ + if(((DMA_Stream_TypeDef *)hdma->Instance)->FCR & DMA_SxFCR_DMDIS) + { + ((DMA_Stream_TypeDef *)hdma->Instance)->FCR |= DMA_IT_FE; + } if((hdma->XferHalfCpltCallback != NULL) || (hdma->XferM1HalfCpltCallback != NULL)) { diff --git a/targets/TARGET_STM/stm_spi_api.c b/targets/TARGET_STM/stm_spi_api.c index f444afeb1e6..65d65e059cf 100644 --- a/targets/TARGET_STM/stm_spi_api.c +++ b/targets/TARGET_STM/stm_spi_api.c @@ -1410,13 +1410,15 @@ int spi_master_block_write(spi_t *obj, const char *tx_buffer, int tx_length, int total = (tx_length > rx_length) ? tx_length : rx_length; if (handle->Init.Direction == SPI_DIRECTION_2LINES) { + const int word_size = 0x01 << bitshift; + int write_fill_frame = write_fill; /* extend fill symbols for 16/32 bit modes */ - for (int i = 0; i < bitshift; i++) { + for (int i = 0; i < word_size; i++) { write_fill_frame = (write_fill_frame << 8) | write_fill; } - const int word_size = 0x01 << bitshift; + for (int i = 0; i < total; i += word_size) { int out = (i < tx_length) ? spi_get_word_from_buffer(tx_buffer + i, bitshift) : write_fill_frame; int in = spi_master_write(obj, out); @@ -1534,8 +1536,8 @@ typedef enum { } transfer_type_t; -/// @returns the number of bytes transferred, or `0` if nothing transferred -static int spi_master_start_asynch_transfer(spi_t *obj, transfer_type_t transfer_type, const void *tx, void *rx, size_t length, DMAUsage hint) +/// @returns True if DMA was used, false otherwise +static bool spi_master_start_asynch_transfer(spi_t *obj, transfer_type_t transfer_type, const void *tx, void *rx, size_t length, DMAUsage hint) { struct spi_s *spiobj = SPI_S(obj); SPI_HandleTypeDef *handle = &(spiobj->handle); @@ -1547,7 +1549,6 @@ static int spi_master_start_asynch_transfer(spi_t *obj, transfer_type_t transfer size_t words; obj->spi.transfer_type = transfer_type; - words = length >> bitshift; bool useDMA = false; @@ -1579,6 +1580,8 @@ static int spi_master_start_asynch_transfer(spi_t *obj, transfer_type_t transfer useDMA = true; } + + obj->spi.curr_transfer_uses_dma = useDMA; #endif DEBUG_PRINTF("SPI inst=0x%8X Start: type=%u, length=%u, DMA=%d\r\n", (int) handle->Instance, transfer_type, length, !!useDMA); @@ -1590,15 +1593,6 @@ static int spi_master_start_asynch_transfer(spi_t *obj, transfer_type_t transfer NVIC_SetPriority(irq_n, 1); NVIC_EnableIRQ(irq_n); -#if defined(STM32_SPI_CAPABILITY_DMA) && defined(__DCACHE_PRESENT) - if (useDMA && transfer_type != SPI_TRANSFER_TYPE_RX) - { - // For chips with a cache (e.g. Cortex-M7), we need to evict the Tx data from cache to main memory. - // This ensures that the DMA controller can see the most up-to-date copy of the data. - SCB_CleanDCache_by_Addr((volatile void*)tx, length); - } -#endif - // flush FIFO #if defined(SPI_FLAG_FRLVL) HAL_SPIEx_FlushRxFifo(handle); @@ -1635,7 +1629,7 @@ static int spi_master_start_asynch_transfer(spi_t *obj, transfer_type_t transfer if (useDMA) { #if defined(STM32_SPI_CAPABILITY_DMA) && defined(__DCACHE_PRESENT) - // For chips with a cache (e.g. Cortex-M7), we need to evict the Tx data from cache to main memory. + // For chips with a cache (e.g. Cortex-M7), we need to evict the Tx fill data from cache to main memory. // This ensures that the DMA controller can see the most up-to-date copy of the data. SCB_CleanDCache_by_Addr(rx, length); #endif @@ -1650,15 +1644,6 @@ static int spi_master_start_asynch_transfer(spi_t *obj, transfer_type_t transfer length = 0; } -#if defined(STM32_SPI_CAPABILITY_DMA) && defined(__DCACHE_PRESENT) - if (useDMA && transfer_type != SPI_TRANSFER_TYPE_TX) - { - // For chips with a cache (e.g. Cortex-M7), we need to invalidate the Rx data in cache. - // This ensures that the CPU will fetch the data from SRAM instead of using its cache. - SCB_InvalidateDCache_by_Addr(rx, length); - } -#endif - if (rc) { #if defined(SPI_IP_VERSION_V2) // enable SPI back in case of error @@ -1667,14 +1652,13 @@ static int spi_master_start_asynch_transfer(spi_t *obj, transfer_type_t transfer } #endif DEBUG_PRINTF("SPI: RC=%u\n", rc); - length = 0; } - return length; + return useDMA; } // asynchronous API -void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) +bool spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) { struct spi_s *spiobj = SPI_S(obj); SPI_HandleTypeDef *handle = &(spiobj->handle); @@ -1687,7 +1671,7 @@ void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, // don't do anything, if the buffers aren't valid if (!use_tx && !use_rx) { - return; + return false; } // copy the buffers to the SPI object @@ -1717,11 +1701,14 @@ void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, obj->tx_buff.length = size; obj->rx_buff.length = size; } - spi_master_start_asynch_transfer(obj, SPI_TRANSFER_TYPE_TXRX, tx, rx, size, hint); + return spi_master_start_asynch_transfer(obj, SPI_TRANSFER_TYPE_TXRX, tx, rx, size, hint); } else if (use_tx) { - spi_master_start_asynch_transfer(obj, SPI_TRANSFER_TYPE_TX, tx, NULL, tx_length, hint); + return spi_master_start_asynch_transfer(obj, SPI_TRANSFER_TYPE_TX, tx, NULL, tx_length, hint); } else if (use_rx) { - spi_master_start_asynch_transfer(obj, SPI_TRANSFER_TYPE_RX, NULL, rx, rx_length, hint); + return spi_master_start_asynch_transfer(obj, SPI_TRANSFER_TYPE_RX, NULL, rx, rx_length, hint); + } + else { + return false; } } @@ -1830,21 +1817,10 @@ void spi_abort_asynch(spi_t *obj) NVIC_ClearPendingIRQ(irq_n); NVIC_DisableIRQ(irq_n); -#ifdef STM32_SPI_CAPABILITY_DMA - // Abort DMA transfers if DMA channels have been allocated - if(spiobj->txDMAInitialized) { - HAL_DMA_Abort_IT(spiobj->handle.hdmatx); - } - if(spiobj->rxDMAInitialized) { - HAL_DMA_Abort_IT(spiobj->handle.hdmarx); - } -#endif - - // clean-up - LL_SPI_Disable(SPI_INST(obj)); - HAL_SPI_DeInit(handle); - - init_spi(obj); + // Use HAL abort function. + // Conveniently, this is smart enough to automatically abort the DMA transfer + // if DMA was used. + HAL_SPI_Abort_IT(handle); } #endif //DEVICE_SPI_ASYNCH diff --git a/targets/TARGET_STM/stm_spi_api.h b/targets/TARGET_STM/stm_spi_api.h index 6fb22c93c4e..f34f7d17ef7 100644 --- a/targets/TARGET_STM/stm_spi_api.h +++ b/targets/TARGET_STM/stm_spi_api.h @@ -36,6 +36,7 @@ struct spi_s { #if DEVICE_SPI_ASYNCH uint32_t event; uint8_t transfer_type; + bool curr_transfer_uses_dma; // Callback function for when we get an interrupt on an async transfer. // This will point, through a bit of indirection, to SPI::irq_handler_asynch() diff --git a/targets/TARGET_Silicon_Labs/TARGET_EFM32/spi_api.c b/targets/TARGET_Silicon_Labs/TARGET_EFM32/spi_api.c index c7a2b76d09d..02d44822d2d 100644 --- a/targets/TARGET_Silicon_Labs/TARGET_EFM32/spi_api.c +++ b/targets/TARGET_Silicon_Labs/TARGET_EFM32/spi_api.c @@ -1168,7 +1168,7 @@ void spi_master_transfer_dma(spi_t *obj, const void *txdata, void *rxdata, int t * @param[in] handler SPI interrupt handler * @param[in] hint A suggestion for how to use DMA with this transfer */ -void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) +bool spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) { if( spi_active(obj) ) return; diff --git a/targets/TARGET_TOSHIBA/TARGET_TMPM46B/spi_api.c b/targets/TARGET_TOSHIBA/TARGET_TMPM46B/spi_api.c index bd98132b0b1..ddce123061a 100644 --- a/targets/TARGET_TOSHIBA/TARGET_TMPM46B/spi_api.c +++ b/targets/TARGET_TOSHIBA/TARGET_TMPM46B/spi_api.c @@ -409,7 +409,7 @@ const PinMap *spi_slave_cs_pinmap() #ifdef DEVICE_SPI_ASYNCH -void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, +bool spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) { struct spi_s *obj_s = SPI_S(obj); @@ -455,6 +455,8 @@ void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, SSP_Enable(spi); NVIC_EnableIRQ(obj_s->irqn); + + return false; // Currently we always use interrupts, not DMA } uint32_t spi_irq_handler_asynch(spi_t *obj) diff --git a/targets/TARGET_TOSHIBA/TARGET_TMPM4G9/spi_api.c b/targets/TARGET_TOSHIBA/TARGET_TMPM4G9/spi_api.c index f8ebe48fa29..3e3cfe722e8 100644 --- a/targets/TARGET_TOSHIBA/TARGET_TMPM4G9/spi_api.c +++ b/targets/TARGET_TOSHIBA/TARGET_TMPM4G9/spi_api.c @@ -484,7 +484,7 @@ const PinMap *spi_slave_cs_pinmap() #ifdef DEVICE_SPI_ASYNCH -void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) +bool spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) { struct spi_s *obj_s = SPI_S(obj); tspi_t *p_obj = &(obj_s->p_obj); @@ -535,6 +535,8 @@ void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, NVIC_EnableIRQ(obj_s->rxirqn); } NVIC_EnableIRQ(obj_s->errirqn); + + return false; // Currently we always use interrupts, not DMA } uint32_t spi_irq_handler_asynch(spi_t *obj) diff --git a/targets/TARGET_TOSHIBA/TARGET_TMPM4GR/spi_api.c b/targets/TARGET_TOSHIBA/TARGET_TMPM4GR/spi_api.c index 7ebaef77823..1a997914003 100644 --- a/targets/TARGET_TOSHIBA/TARGET_TMPM4GR/spi_api.c +++ b/targets/TARGET_TOSHIBA/TARGET_TMPM4GR/spi_api.c @@ -517,7 +517,7 @@ const PinMap *spi_slave_cs_pinmap() #if DEVICE_SPI_ASYNCH -void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, +bool spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) { struct spi_s *spiobj = SPI_S(obj); @@ -574,6 +574,8 @@ void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, NVIC_EnableIRQ(p_irqn->Error); NVIC_EnableIRQ(p_irqn->Tx); NVIC_EnableIRQ(p_irqn->Rx); + + return false; // Currently we always use interrupts, not DMA } uint32_t spi_irq_handler_asynch(spi_t *obj) { diff --git a/targets/TARGET_TOSHIBA/TARGET_TMPM4NR/spi_api.c b/targets/TARGET_TOSHIBA/TARGET_TMPM4NR/spi_api.c index dc958ba927e..f288f5b3987 100644 --- a/targets/TARGET_TOSHIBA/TARGET_TMPM4NR/spi_api.c +++ b/targets/TARGET_TOSHIBA/TARGET_TMPM4NR/spi_api.c @@ -398,7 +398,7 @@ const PinMap *spi_slave_cs_pinmap() #if DEVICE_SPI_ASYNCH -void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, +bool spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) { struct spi_s *spiobj = SPI_S(obj); @@ -455,6 +455,8 @@ void spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, NVIC_EnableIRQ(p_irqn->Error); NVIC_EnableIRQ(p_irqn->Tx); NVIC_EnableIRQ(p_irqn->Rx); + + return false; // Currently we always use interrupts, not DMA } uint32_t spi_irq_handler_asynch(spi_t *obj) { From cfbb8104e2fe4d75ca94a9eb7bd4bb8d3c22be64 Mon Sep 17 00:00:00 2001 From: Jamie Smith Date: Mon, 4 Dec 2023 00:39:55 -0800 Subject: [PATCH 2/8] Fix build on cache-less devices --- platform/include/platform/CacheAlignedBuffer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platform/include/platform/CacheAlignedBuffer.h b/platform/include/platform/CacheAlignedBuffer.h index 61b6f09f6a0..97b5339a6a9 100644 --- a/platform/include/platform/CacheAlignedBuffer.h +++ b/platform/include/platform/CacheAlignedBuffer.h @@ -57,7 +57,7 @@ CacheAlignedBuffer { constexpr static size_t requiredSizeRoundedUp = (BufferSize * sizeof(DataT) + __SCB_DCACHE_LINE_SIZE - 1) & ~((__SCB_DCACHE_LINE_SIZE) - 1); constexpr static size_t backingBufferSizeBytes = requiredSizeRoundedUp + __SCB_DCACHE_LINE_SIZE - 1; #else - constexpr static size_t backingBufferSize = BufferSize * sizeof(DataT); + constexpr static size_t backingBufferSizeBytes = BufferSize * sizeof(DataT); #endif uint8_t _backingBuffer[backingBufferSizeBytes]; From b45357e3204880d4ee787ac30bd4a7c6207139de Mon Sep 17 00:00:00 2001 From: Jamie Smith Date: Mon, 4 Dec 2023 01:39:19 -0800 Subject: [PATCH 3/8] Fix a couple things I missed --- drivers/include/drivers/SPI.h | 9 +++++++-- drivers/source/SPI.cpp | 8 ++++---- targets/TARGET_Silicon_Labs/TARGET_EFM32/spi_api.c | 13 +++++++++++-- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/drivers/include/drivers/SPI.h b/drivers/include/drivers/SPI.h index 2c330266883..c12cd381cd9 100644 --- a/drivers/include/drivers/SPI.h +++ b/drivers/include/drivers/SPI.h @@ -195,6 +195,11 @@ const use_gpio_ssel_t use_gpio_ssel; * the transfer but others can execute). Here's a sample of how to send the same data as above * using the blocking async API:

* + *

Note that when using the asynchronous API, you must use the CacheAlignedBuffer class when declaring the + * receive buffer. This is because some processors' async SPI implementations require the received buffer to + * be at an address which is aligned to the processor cache line size. CacheAlignedBuffer takes care of this + * for you and provides functions (data(), begin(), end()) to access the underlying data in the buffer.

+ * * @code * #include "mbed.h" * @@ -204,8 +209,8 @@ const use_gpio_ssel_t use_gpio_ssel; * device.format(8, 0); * * uint8_t command[2] = {0x0A, 0x0B}; - * uint8_t response[2]; - * int result = device.transfer_and_wait(command, sizeof(command), response, sizeof(response)); + * CacheAlignedBuffer response; + * int result = device.transfer_and_wait(command, sizeof(command), response, sizeof(command)); * } * @endcode * diff --git a/drivers/source/SPI.cpp b/drivers/source/SPI.cpp index af7368e7b93..9c6a56dcbb0 100644 --- a/drivers/source/SPI.cpp +++ b/drivers/source/SPI.cpp @@ -397,7 +397,7 @@ void SPI::abort_transfer() } #if __DCACHE_PRESENT - if(_transfer_in_progress_uses_dma) + if(_transfer_in_progress_uses_dma && _transfer_in_progress_rx_len > 0) { // If the cache is present, invalidate the Rx data so it's loaded from main RAM. // We only want to do this if DMA actually got used for the transfer because, if interrupts @@ -492,10 +492,10 @@ void SPI::start_transfer(const void *tx_buffer, int tx_length, void *rx_buffer, } // Additionally, we have to make sure that there aren't any pending changes which could be written back - // to the Rx buffer memory by the cache at a later date, corrupting the DMA results + // to the Rx buffer memory by the cache at a later date, corrupting the DMA results. if(rx_length > 0) { - SCB_CleanInvalidateDCache_by_Addr(rx_buffer, rx_length); + SCB_InvalidateDCache_by_Addr(rx_buffer, rx_length); } _transfer_in_progress_rx_buffer = rx_buffer; _transfer_in_progress_rx_len = rx_length; @@ -547,7 +547,7 @@ void SPI::irq_handler_asynch(void) if ((event & SPI_EVENT_ALL)) { #if __DCACHE_PRESENT - if(_transfer_in_progress_uses_dma) + if(_transfer_in_progress_uses_dma && _transfer_in_progress_rx_len > 0) { // If the cache is present, invalidate the Rx data so it's loaded from main RAM. // We only want to do this if DMA actually got used for the transfer because, if interrupts diff --git a/targets/TARGET_Silicon_Labs/TARGET_EFM32/spi_api.c b/targets/TARGET_Silicon_Labs/TARGET_EFM32/spi_api.c index 02d44822d2d..0a852b5f9cb 100644 --- a/targets/TARGET_Silicon_Labs/TARGET_EFM32/spi_api.c +++ b/targets/TARGET_Silicon_Labs/TARGET_EFM32/spi_api.c @@ -1116,8 +1116,10 @@ static void spi_activate_dma(spi_t *obj, void* rxdata, const void* txdata, int t * * ALWAYS: use DMA if channels are available, and hold on to the channels after the transfer. * If the previous transfer has kept the channel, that channel will continue to get used. * +* Returns true if DMA was used, false otherwise. +* ********************************************************************/ -void spi_master_transfer_dma(spi_t *obj, const void *txdata, void *rxdata, int tx_length, int rx_length, void* cb, DMAUsage hint) +bool spi_master_transfer_dma(spi_t *obj, const void *txdata, void *rxdata, int tx_length, int rx_length, void* cb, DMAUsage hint) { /* Init DMA here to include it in the power figure */ dma_init(); @@ -1128,11 +1130,13 @@ void spi_master_transfer_dma(spi_t *obj, const void *txdata, void *rxdata, int t if (hint != DMA_USAGE_NEVER && obj->spi.dmaOptionsTX.dmaUsageState == DMA_USAGE_ALLOCATED) { /* setup has already been done, so just activate the transfer */ spi_activate_dma(obj, rxdata, txdata, tx_length, rx_length); + return true; } else if (hint == DMA_USAGE_NEVER) { /* use IRQ */ obj->spi.spi->IFC = 0xFFFFFFFF; spi_master_write_asynch(obj); spi_enable_interrupt(obj, (uint32_t)cb, true); + return false; } else { /* try to acquire channels */ dma_init(); @@ -1147,11 +1151,13 @@ void spi_master_transfer_dma(spi_t *obj, const void *txdata, void *rxdata, int t spi_master_dma_channel_setup(obj, cb); /* and activate the transfer */ spi_activate_dma(obj, rxdata, txdata, tx_length, rx_length); + return true; } else { /* DMA is unavailable, so fall back to IRQ */ obj->spi.spi->IFC = 0xFFFFFFFF; spi_master_write_asynch(obj); spi_enable_interrupt(obj, (uint32_t)cb, true); + return false; } } } @@ -1167,6 +1173,9 @@ void spi_master_transfer_dma(spi_t *obj, const void *txdata, void *rxdata, int t * @param[in] event The logical OR of events to be registered * @param[in] handler SPI interrupt handler * @param[in] hint A suggestion for how to use DMA with this transfer + * + * @return True if DMA was actually used for the transfer, false otherwise (if interrupts or another CPU-based + * method is used to do the transfer). */ bool spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, size_t rx_length, uint8_t bit_width, uint32_t handler, uint32_t event, DMAUsage hint) { @@ -1190,7 +1199,7 @@ bool spi_master_transfer(spi_t *obj, const void *tx, size_t tx_length, void *rx, spi_enable_event(obj, event, true); /* And kick off the transfer */ - spi_master_transfer_dma(obj, tx, rx, tx_length, rx_length, (void*)handler, hint); + return spi_master_transfer_dma(obj, tx, rx, tx_length, rx_length, (void*)handler, hint); } From 86dbd364d35996ee97a45d0b84019456f38e10ad Mon Sep 17 00:00:00 2001 From: Jamie Smith Date: Mon, 4 Dec 2023 01:56:33 -0800 Subject: [PATCH 4/8] Run formatter, improve CacheAlignedBuffer docs --- drivers/include/drivers/SPI.h | 10 +-- drivers/source/SPI.cpp | 17 ++--- .../include/platform/CacheAlignedBuffer.h | 74 +++++++++++++------ .../COMPONENT_SD/source/SDBlockDevice.cpp | 8 +- 4 files changed, 66 insertions(+), 43 deletions(-) diff --git a/drivers/include/drivers/SPI.h b/drivers/include/drivers/SPI.h index c12cd381cd9..59f5f8befb7 100644 --- a/drivers/include/drivers/SPI.h +++ b/drivers/include/drivers/SPI.h @@ -478,7 +478,7 @@ class SPI : private NonCopyable { */ template typename std::enable_if::value, int>::type - transfer(const WordT *tx_buffer, int tx_length, CacheAlignedBuffer & rx_buffer, int rx_length, const event_callback_t &callback, int event = SPI_EVENT_COMPLETE) + transfer(const WordT *tx_buffer, int tx_length, CacheAlignedBuffer &rx_buffer, int rx_length, const event_callback_t &callback, int event = SPI_EVENT_COMPLETE) { return transfer_internal(tx_buffer, tx_length, rx_buffer.data(), rx_length, callback, event); } @@ -486,7 +486,7 @@ class SPI : private NonCopyable { // Overloads of the above to support passing nullptr template typename std::enable_if::value, int>::type - transfer(const std::nullptr_t *tx_buffer, int tx_length, CacheAlignedBuffer & rx_buffer, int rx_length, const event_callback_t &callback, int event = SPI_EVENT_COMPLETE) + transfer(const std::nullptr_t *tx_buffer, int tx_length, CacheAlignedBuffer &rx_buffer, int rx_length, const event_callback_t &callback, int event = SPI_EVENT_COMPLETE) { return transfer_internal(tx_buffer, tx_length, rx_buffer, rx_length, callback, event); } @@ -524,7 +524,7 @@ class SPI : private NonCopyable { */ template typename std::enable_if::value, int>::type - transfer_and_wait(const WordT *tx_buffer, int tx_length, CacheAlignedBuffer & rx_buffer, int rx_length, rtos::Kernel::Clock::duration_u32 timeout = rtos::Kernel::wait_for_u32_forever) + transfer_and_wait(const WordT *tx_buffer, int tx_length, CacheAlignedBuffer &rx_buffer, int rx_length, rtos::Kernel::Clock::duration_u32 timeout = rtos::Kernel::wait_for_u32_forever) { return transfer_and_wait_internal(tx_buffer, tx_length, rx_buffer.data(), rx_length, timeout); } @@ -532,7 +532,7 @@ class SPI : private NonCopyable { // Overloads of the above to support passing nullptr template typename std::enable_if::value, int>::type - transfer_and_wait(const std::nullptr_t *tx_buffer, int tx_length, CacheAlignedBuffer & rx_buffer, int rx_length, rtos::Kernel::Clock::duration_u32 timeout = rtos::Kernel::wait_for_u32_forever) + transfer_and_wait(const std::nullptr_t *tx_buffer, int tx_length, CacheAlignedBuffer &rx_buffer, int rx_length, rtos::Kernel::Clock::duration_u32 timeout = rtos::Kernel::wait_for_u32_forever) { return transfer_and_wait_internal(tx_buffer, tx_length, rx_buffer.data(), rx_length, timeout); } @@ -741,7 +741,7 @@ class SPI : private NonCopyable { #if __DCACHE_PRESENT // These variables store the location and length in bytes of the Rx buffer if an async transfer // is in progress. They are used for invalidating the cache after the transfer completes. - void * _transfer_in_progress_rx_buffer; + void *_transfer_in_progress_rx_buffer; size_t _transfer_in_progress_rx_len; #endif diff --git a/drivers/source/SPI.cpp b/drivers/source/SPI.cpp index 9c6a56dcbb0..a0339487215 100644 --- a/drivers/source/SPI.cpp +++ b/drivers/source/SPI.cpp @@ -397,8 +397,7 @@ void SPI::abort_transfer() } #if __DCACHE_PRESENT - if(_transfer_in_progress_uses_dma && _transfer_in_progress_rx_len > 0) - { + if (_transfer_in_progress_uses_dma && _transfer_in_progress_rx_len > 0) { // If the cache is present, invalidate the Rx data so it's loaded from main RAM. // We only want to do this if DMA actually got used for the transfer because, if interrupts // were used instead, the cache might have the correct data and NOT the main memory. @@ -484,17 +483,15 @@ void SPI::start_transfer(const void *tx_buffer, int tx_length, void *rx_buffer, // not interoperate with the CPU cache. So, manual flushing/invalidation will be required. // This page is very useful for how to do this correctly: // https://community.st.com/t5/stm32-mcus-products/maintaining-cpu-data-cache-coherence-for-dma-buffers/td-p/95746 - if(tx_length > 0) - { + if (tx_length > 0) { // For chips with a cache, we need to evict the Tx data from cache to main memory. // This ensures that the DMA controller can see the most up-to-date copy of the data. - SCB_CleanDCache_by_Addr(const_cast(tx_buffer), tx_length); + SCB_CleanDCache_by_Addr(const_cast(tx_buffer), tx_length); } // Additionally, we have to make sure that there aren't any pending changes which could be written back // to the Rx buffer memory by the cache at a later date, corrupting the DMA results. - if(rx_length > 0) - { + if (rx_length > 0) { SCB_InvalidateDCache_by_Addr(rx_buffer, rx_length); } _transfer_in_progress_rx_buffer = rx_buffer; @@ -547,8 +544,7 @@ void SPI::irq_handler_asynch(void) if ((event & SPI_EVENT_ALL)) { #if __DCACHE_PRESENT - if(_transfer_in_progress_uses_dma && _transfer_in_progress_rx_len > 0) - { + if (_transfer_in_progress_uses_dma && _transfer_in_progress_rx_len > 0) { // If the cache is present, invalidate the Rx data so it's loaded from main RAM. // We only want to do this if DMA actually got used for the transfer because, if interrupts // were used instead, the cache might have the correct data and NOT the main memory. @@ -566,8 +562,7 @@ void SPI::irq_handler_asynch(void) unlock_deep_sleep(); - if(_callback) - { + if (_callback) { _callback.call(event & SPI_EVENT_ALL); } } diff --git a/platform/include/platform/CacheAlignedBuffer.h b/platform/include/platform/CacheAlignedBuffer.h index 97b5339a6a9..6e8e56a66f3 100644 --- a/platform/include/platform/CacheAlignedBuffer.h +++ b/platform/include/platform/CacheAlignedBuffer.h @@ -28,8 +28,11 @@ namespace mbed { /** * @brief CacheAlignedBuffer is used by Mbed in locations where we need a cache-aligned buffer. * - * Cache alignment is desirable in several different situations in embedded programming; often - * when working with DMA or other peripherals which write their results back to main memory. + * Cache alignment is desirable in several different situations in embedded programming -- one common + * use is when working with DMA or other peripherals which write their results back to main memory. + * After these peripherals do their work, the data will be correct in main memory, but the CPU cache + * might also contain a value cached from that memory which is now incorrect. + * * In order to read those results from memory without risk of getting old data from the * CPU cache, one needs to align the buffer so it takes up an integer number of cache lines, * then invalidate the cache lines so that the data gets reread from RAM. @@ -43,7 +46,7 @@ namespace mbed { */ template struct -CacheAlignedBuffer { + CacheAlignedBuffer { private: #if __DCACHE_PRESENT // Allocate enough extra space that we can shift the start of the buffer forward to be on a cache line. @@ -54,14 +57,14 @@ CacheAlignedBuffer { // So, we need to round up the backing buffer size to the nearest multiple of the cache line size. // The math for rounding up can be found here: // https://community.st.com/t5/stm32-mcus-products/maintaining-cpu-data-cache-coherence-for-dma-buffers/td-p/95746 - constexpr static size_t requiredSizeRoundedUp = (BufferSize * sizeof(DataT) + __SCB_DCACHE_LINE_SIZE - 1) & ~((__SCB_DCACHE_LINE_SIZE) - 1); + constexpr static size_t requiredSizeRoundedUp = (BufferSize *sizeof(DataT) + __SCB_DCACHE_LINE_SIZE - 1) & ~((__SCB_DCACHE_LINE_SIZE) - 1); constexpr static size_t backingBufferSizeBytes = requiredSizeRoundedUp + __SCB_DCACHE_LINE_SIZE - 1; #else constexpr static size_t backingBufferSizeBytes = BufferSize * sizeof(DataT); #endif uint8_t _backingBuffer[backingBufferSizeBytes]; - DataT * _alignedArrayPtr; + DataT *_alignedArrayPtr; /** * Find and return the first location in the given buffer that starts on a cache line. @@ -71,7 +74,7 @@ CacheAlignedBuffer { * * @return Pointer to first data item, aligned at the start of a cache line. */ - inline DataT * findCacheLineStart(uint8_t * buffer) + inline DataT *findCacheLineStart(uint8_t *buffer) { #if __DCACHE_PRESENT // Use integer division to divide the address down to the cache line size, which @@ -90,22 +93,22 @@ CacheAlignedBuffer { public: // Iterator types - typedef DataT * iterator; - typedef DataT const * const_iterator; + typedef DataT *iterator; + typedef DataT const *const_iterator; /** * @brief Construct new cache-aligned buffer. Buffer will be zero-initialized. */ CacheAlignedBuffer(): - _backingBuffer{}, - _alignedArrayPtr(findCacheLineStart(_backingBuffer)) + _backingBuffer{}, + _alignedArrayPtr(findCacheLineStart(_backingBuffer)) {} /** * @brief Copy from other cache-aligned buffer. Buffer memory will be copied. */ - CacheAlignedBuffer(CacheAlignedBuffer const & other): - _alignedArrayPtr(findCacheLineStart(_backingBuffer)) + CacheAlignedBuffer(CacheAlignedBuffer const &other): + _alignedArrayPtr(findCacheLineStart(_backingBuffer)) { memcpy(this->_alignedArrayPtr, other._alignedArrayPtr, BufferSize * sizeof(DataT)); } @@ -113,7 +116,7 @@ CacheAlignedBuffer { /** * @brief Assign from other cache-aligned buffer. Buffer memory will be assigned. */ - CacheAlignedBuffer & operator=(CacheAlignedBuffer const & other) + CacheAlignedBuffer &operator=(CacheAlignedBuffer const &other) { memcpy(this->_alignedArrayPtr, other._alignedArrayPtr, BufferSize * sizeof(DataT)); } @@ -121,47 +124,74 @@ CacheAlignedBuffer { /** * @brief Get a pointer to the aligned data array inside the buffer */ - DataT * data() { return _alignedArrayPtr; } + DataT *data() + { + return _alignedArrayPtr; + } /** * @brief Get a pointer to the aligned data array inside the buffer (const version) */ - DataT const * data() const { return _alignedArrayPtr; } + DataT const *data() const + { + return _alignedArrayPtr; + } /** * @brief Element access */ - DataT & operator[](size_t index) { return _alignedArrayPtr[index]; } + DataT &operator[](size_t index) + { + return _alignedArrayPtr[index]; + } /** * @brief Element access (const) */ - DataT operator[](size_t index) const { return _alignedArrayPtr[index]; } + DataT operator[](size_t index) const + { + return _alignedArrayPtr[index]; + } /** * @brief Get iterator for start of buffer */ - iterator begin() { return _alignedArrayPtr; } + iterator begin() + { + return _alignedArrayPtr; + } /** * @brief Get iterator for start of buffer */ - const_iterator begin() const { return _alignedArrayPtr; } + const_iterator begin() const + { + return _alignedArrayPtr; + } /** * @brief Get iterator for end of buffer */ - iterator end() { return _alignedArrayPtr + BufferSize; } + iterator end() + { + return _alignedArrayPtr + BufferSize; + } /** * @brief Get iterator for end of buffer */ - const_iterator end() const { return _alignedArrayPtr + BufferSize; } + const_iterator end() const + { + return _alignedArrayPtr + BufferSize; + } /** * @return The maximum amount of DataT elements that this buffer can hold */ - constexpr size_t capacity() { return BufferSize; } + constexpr size_t capacity() + { + return BufferSize; + } /** * @brief If this MCU has a data cache, this function _invalidates_ the buffer in the data cache. diff --git a/storage/blockdevice/COMPONENT_SD/source/SDBlockDevice.cpp b/storage/blockdevice/COMPONENT_SD/source/SDBlockDevice.cpp index 8e9ac5fb902..21d5bb8657f 100644 --- a/storage/blockdevice/COMPONENT_SD/source/SDBlockDevice.cpp +++ b/storage/blockdevice/COMPONENT_SD/source/SDBlockDevice.cpp @@ -921,8 +921,7 @@ int SDBlockDevice::_read_bytes(uint8_t *buffer, uint32_t length) // read data #if DEVICE_SPI_ASYNCH if (_async_spi_enabled) { - if(length > _async_data_buffer.capacity()) - { + if (length > _async_data_buffer.capacity()) { return SD_BLOCK_DEVICE_ERROR_PARAMETER; } @@ -973,8 +972,7 @@ int SDBlockDevice::_read(uint8_t *buffer, uint32_t length) // read data #if DEVICE_SPI_ASYNCH if (_async_spi_enabled) { - if(length > _async_data_buffer.capacity()) - { + if (length > _async_data_buffer.capacity()) { return SD_BLOCK_DEVICE_ERROR_PARAMETER; } @@ -1081,7 +1079,7 @@ bd_size_t SDBlockDevice::_sd_sectors() } #ifdef DEVICE_SPI_ASYNCH CacheAlignedBuffer csd_buffer; - uint8_t * csd = csd_buffer.data(); + uint8_t *csd = csd_buffer.data(); #else uint8_t csd[16]; #endif From 1f2a9feb4e73059ce67bf731fadeef190b6385e2 Mon Sep 17 00:00:00 2001 From: Jamie Smith Date: Wed, 6 Dec 2023 19:01:52 -0800 Subject: [PATCH 5/8] Add missing license identifiers to source files --- targets/TARGET_NORDIC/TARGET_NRF5x/TARGET_NRF52/spi_api.c | 1 + targets/TARGET_NUVOTON/TARGET_M451/spi_api.c | 1 + targets/TARGET_NUVOTON/TARGET_NANO100/spi_api.c | 1 + targets/TARGET_NUVOTON/TARGET_NUC472/spi_api.c | 1 + targets/TARGET_STM/stm_spi_api.c | 1 + targets/TARGET_TOSHIBA/TARGET_TMPM46B/spi_api.c | 1 + 6 files changed, 6 insertions(+) diff --git a/targets/TARGET_NORDIC/TARGET_NRF5x/TARGET_NRF52/spi_api.c b/targets/TARGET_NORDIC/TARGET_NRF5x/TARGET_NRF52/spi_api.c index 77b0e0a70f9..56b2600ed9b 100644 --- a/targets/TARGET_NORDIC/TARGET_NRF5x/TARGET_NRF52/spi_api.c +++ b/targets/TARGET_NORDIC/TARGET_NRF5x/TARGET_NRF52/spi_api.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2017 Nordic Semiconductor ASA * All rights reserved. + * SPDX-License-Identifier: LicenseRef-Nordic-5-Clause * * Redistribution and use in source and binary forms, with or without modification, * are permitted provided that the following conditions are met: diff --git a/targets/TARGET_NUVOTON/TARGET_M451/spi_api.c b/targets/TARGET_NUVOTON/TARGET_M451/spi_api.c index 2e80b1fb691..a43b8756960 100644 --- a/targets/TARGET_NUVOTON/TARGET_M451/spi_api.c +++ b/targets/TARGET_NUVOTON/TARGET_M451/spi_api.c @@ -1,5 +1,6 @@ /* mbed Microcontroller Library * Copyright (c) 2015-2016 Nuvoton + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/targets/TARGET_NUVOTON/TARGET_NANO100/spi_api.c b/targets/TARGET_NUVOTON/TARGET_NANO100/spi_api.c index c546862fc93..fe8c4e79e61 100644 --- a/targets/TARGET_NUVOTON/TARGET_NANO100/spi_api.c +++ b/targets/TARGET_NUVOTON/TARGET_NANO100/spi_api.c @@ -1,5 +1,6 @@ /* mbed Microcontroller Library * Copyright (c) 2015-2017 Nuvoton + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/targets/TARGET_NUVOTON/TARGET_NUC472/spi_api.c b/targets/TARGET_NUVOTON/TARGET_NUC472/spi_api.c index 2c154dff563..6ce546a7967 100644 --- a/targets/TARGET_NUVOTON/TARGET_NUC472/spi_api.c +++ b/targets/TARGET_NUVOTON/TARGET_NUC472/spi_api.c @@ -1,5 +1,6 @@ /* mbed Microcontroller Library * Copyright (c) 2015-2016 Nuvoton + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/targets/TARGET_STM/stm_spi_api.c b/targets/TARGET_STM/stm_spi_api.c index 65d65e059cf..d5c49fbcd08 100644 --- a/targets/TARGET_STM/stm_spi_api.c +++ b/targets/TARGET_STM/stm_spi_api.c @@ -2,6 +2,7 @@ ******************************************************************************* * Copyright (c) 2015, STMicroelectronics * All rights reserved. + * SPDX-License-Identifier: BSD-3-Clause * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/targets/TARGET_TOSHIBA/TARGET_TMPM46B/spi_api.c b/targets/TARGET_TOSHIBA/TARGET_TMPM46B/spi_api.c index ddce123061a..9d8b846e582 100644 --- a/targets/TARGET_TOSHIBA/TARGET_TMPM46B/spi_api.c +++ b/targets/TARGET_TOSHIBA/TARGET_TMPM46B/spi_api.c @@ -2,6 +2,7 @@ ******************************************************************************* * (C)Copyright TOSHIBA ELECTRONIC DEVICES & STORAGE CORPORATION 2017 All rights reserved * All rights reserved. + * SPDX-License-Identifier: BSD-3-Clause * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: From 29e76a7e71432a09ec53ae85dc9e3ff17537154a Mon Sep 17 00:00:00 2001 From: Jamie Smith Date: Tue, 12 Dec 2023 23:22:48 -0800 Subject: [PATCH 6/8] Make CacheAlignedBuffer heap-allocatable, try and add exclusion for Nordic license in scancode_evaluate.py --- drivers/include/drivers/SPI.h | 20 +- .../include/platform/CacheAlignedBuffer.h | 260 +++++++++++++----- .../COMPONENT_SD/include/SD/SDBlockDevice.h | 2 +- .../COMPONENT_SD/source/SDBlockDevice.cpp | 2 +- tools/python/run_python_tests.sh | 2 +- .../scancode_evaluate/scancode_evaluate.py | 32 ++- 6 files changed, 230 insertions(+), 88 deletions(-) diff --git a/drivers/include/drivers/SPI.h b/drivers/include/drivers/SPI.h index 59f5f8befb7..58e79f9f16d 100644 --- a/drivers/include/drivers/SPI.h +++ b/drivers/include/drivers/SPI.h @@ -476,18 +476,20 @@ class SPI : private NonCopyable { * @retval 0 If the transfer has started. * @retval -1 if the transfer could not be enqueued (increase drivers.spi_transaction_queue_len option) */ - template + template typename std::enable_if::value, int>::type - transfer(const WordT *tx_buffer, int tx_length, CacheAlignedBuffer &rx_buffer, int rx_length, const event_callback_t &callback, int event = SPI_EVENT_COMPLETE) + transfer(const WordT *tx_buffer, int tx_length, CacheAlignedBuffer &rx_buffer, int rx_length, const event_callback_t &callback, int event = SPI_EVENT_COMPLETE) { + MBED_ASSERT(rx_length <= static_cast(rx_buffer.capacity())); return transfer_internal(tx_buffer, tx_length, rx_buffer.data(), rx_length, callback, event); } // Overloads of the above to support passing nullptr - template + template typename std::enable_if::value, int>::type - transfer(const std::nullptr_t *tx_buffer, int tx_length, CacheAlignedBuffer &rx_buffer, int rx_length, const event_callback_t &callback, int event = SPI_EVENT_COMPLETE) + transfer(const std::nullptr_t *tx_buffer, int tx_length, CacheAlignedBuffer &rx_buffer, int rx_length, const event_callback_t &callback, int event = SPI_EVENT_COMPLETE) { + MBED_ASSERT(rx_length <= static_cast(rx_buffer.capacity())); return transfer_internal(tx_buffer, tx_length, rx_buffer, rx_length, callback, event); } template @@ -522,18 +524,20 @@ class SPI : private NonCopyable { * @retval 2 on other error * @retval 0 on success */ - template + template typename std::enable_if::value, int>::type - transfer_and_wait(const WordT *tx_buffer, int tx_length, CacheAlignedBuffer &rx_buffer, int rx_length, rtos::Kernel::Clock::duration_u32 timeout = rtos::Kernel::wait_for_u32_forever) + transfer_and_wait(const WordT *tx_buffer, int tx_length, CacheAlignedBuffer &rx_buffer, int rx_length, rtos::Kernel::Clock::duration_u32 timeout = rtos::Kernel::wait_for_u32_forever) { + MBED_ASSERT(rx_length <= static_cast(rx_buffer.capacity())); return transfer_and_wait_internal(tx_buffer, tx_length, rx_buffer.data(), rx_length, timeout); } // Overloads of the above to support passing nullptr - template + template typename std::enable_if::value, int>::type - transfer_and_wait(const std::nullptr_t *tx_buffer, int tx_length, CacheAlignedBuffer &rx_buffer, int rx_length, rtos::Kernel::Clock::duration_u32 timeout = rtos::Kernel::wait_for_u32_forever) + transfer_and_wait(const std::nullptr_t *tx_buffer, int tx_length, CacheAlignedBuffer &rx_buffer, int rx_length, rtos::Kernel::Clock::duration_u32 timeout = rtos::Kernel::wait_for_u32_forever) { + MBED_ASSERT(rx_length <= static_cast(rx_buffer.capacity())); return transfer_and_wait_internal(tx_buffer, tx_length, rx_buffer.data(), rx_length, timeout); } template diff --git a/platform/include/platform/CacheAlignedBuffer.h b/platform/include/platform/CacheAlignedBuffer.h index 6e8e56a66f3..777d5aadb33 100644 --- a/platform/include/platform/CacheAlignedBuffer.h +++ b/platform/include/platform/CacheAlignedBuffer.h @@ -24,47 +24,90 @@ namespace mbed { +namespace detail::cab { + /** + * @brief Calculate the needed capacity for a cache aligned buffer's backing buffer based on the + * needed capacity and element size. + * + * @param neededCapacity Capacity needed for the buffer + * @param elementSize Size of each element + * + * @return Needed backing buffer size + */ + constexpr inline size_t getNeededBackingBufferSize(size_t neededCapacity, size_t elementSize) { +#if __DCACHE_PRESENT + // Allocate enough extra space that we can shift the start of the buffer towards higher addresses to be on a cache line. + // The worst case for this is when the first byte is allocated 1 byte past the start of a cache line, so we + // will need an additional (cache line size - 1) bytes. + // Additionally, since we are going to be invalidating this buffer, we can't allow any other variables to be + // in the same cache lines, or they might get corrupted. + // So, we need to round up the backing buffer size to the nearest multiple of the cache line size. + // The math for rounding up can be found here: + // https://community.st.com/t5/stm32-mcus-products/maintaining-cpu-data-cache-coherence-for-dma-buffers/td-p/95746 + size_t requiredSizeRoundedUp = (neededCapacity * elementSize + __SCB_DCACHE_LINE_SIZE - 1) & ~((__SCB_DCACHE_LINE_SIZE) - 1); + return requiredSizeRoundedUp + __SCB_DCACHE_LINE_SIZE - 1; +#else + // No cache on this platform so don't need any extra space. + return neededCapacity * elementSize; +#endif + } +} /** * @brief CacheAlignedBuffer is used by Mbed in locations where we need a cache-aligned buffer. * - * Cache alignment is desirable in several different situations in embedded programming -- one common + *

Cache alignment is desirable in several different situations in embedded programming -- one common * use is when working with DMA or other peripherals which write their results back to main memory. * After these peripherals do their work, the data will be correct in main memory, but the CPU cache - * might also contain a value cached from that memory which is now incorrect. + * might also contain a value cached from that memory which is now incorrect.

* - * In order to read those results from memory without risk of getting old data from the + *

In order to read those results from memory without risk of getting old data from the * CPU cache, one needs to align the buffer so it takes up an integer number of cache lines, - * then invalidate the cache lines so that the data gets reread from RAM. + * then invalidate the cache lines so that the data gets reread from RAM.

* - * CacheAlignedBuffer provides an easy way to allocate the correct amount of space so that - * a buffer of any size can be made cache-aligned. + *

%CacheAlignedBuffer provides an easy way to allocate the correct amount of space so that + * a buffer of any size can be made cache-aligned. To instantiate a %CacheAlignedBuffer, create one of its + * subtypes, #StaticCacheAlignedBuffer or #DynamicCacheAlignedBuffer.

+ * + *

Converting Code to use CacheAlignedBuffer

+ * For code using static arrays, like this: + * \code{.cpp} + * uint8_t rxBuffer[16]; + * spi.transfer_and_wait(nullptr, 0, rxBuffer, 16); + * \endcode + * Use a StaticCacheAlignedBuffer: + * \code{.cpp} + * StaticCacheAlignedBuffer rxBuffer; + * spi.transfer_and_wait(nullptr, 0, rxBuffer, 16); + * \endcode + * For code using dynamic allocation to handle unknown buffer sizes, like: + * \code{.cpp} + * uint16_t * rxBuffer = new uint16_t[bufferSize]; + * spi.transfer_and_wait(nullptr, 0, rxBuffer, bufferSize); + * ... + * delete[] rxBuffer; + * \endcode + * use a DynamicCacheAlignedBuffer: + * \code{.cpp} + * DynamicCacheAlignedBuffer rxBuffer(bufferSize); + * spi.transfer_and_wait(nullptr, 0, rxBuffer, bufferSize); + * \endcode * * @tparam DataT Type of the data to store in the buffer. Note: %CacheAlignedBuffer is not designed for * using class types as DataT, and will not call constructors. - * @tparam BufferSize Buffer size (number of elements) needed by the application for the buffer. */ -template -struct - CacheAlignedBuffer { -private: -#if __DCACHE_PRESENT - // Allocate enough extra space that we can shift the start of the buffer forward to be on a cache line. - // The worst case for this is when the first byte is allocated 1 byte past the start of a cache line, so we - // will need an additional (cache line size - 1) bytes. - // Additionally, since we are going to be invalidating this buffer, we can't allow any other variables to be - // in the same cache lines, or they might get corrupted. - // So, we need to round up the backing buffer size to the nearest multiple of the cache line size. - // The math for rounding up can be found here: - // https://community.st.com/t5/stm32-mcus-products/maintaining-cpu-data-cache-coherence-for-dma-buffers/td-p/95746 - constexpr static size_t requiredSizeRoundedUp = (BufferSize *sizeof(DataT) + __SCB_DCACHE_LINE_SIZE - 1) & ~((__SCB_DCACHE_LINE_SIZE) - 1); - constexpr static size_t backingBufferSizeBytes = requiredSizeRoundedUp + __SCB_DCACHE_LINE_SIZE - 1; -#else - constexpr static size_t backingBufferSizeBytes = BufferSize * sizeof(DataT); -#endif +template +class CacheAlignedBuffer { + +protected: + /// Pointer to the aligned buffer. Must be set in each constructor of each subclass. + DataT * _alignedBufferPtr; + + /// Capacity of the aligned buffer, in terms of number of DataT elements + size_t _alignedBufferCapacity; - uint8_t _backingBuffer[backingBufferSizeBytes]; - DataT *_alignedArrayPtr; + // Protected constructor to block instantiation + CacheAlignedBuffer() = default; /** * Find and return the first location in the given buffer that starts on a cache line. @@ -74,7 +117,7 @@ struct * * @return Pointer to first data item, aligned at the start of a cache line. */ - inline DataT *findCacheLineStart(uint8_t *buffer) + static inline DataT *findCacheLineStart(uint8_t *buffer) { #if __DCACHE_PRESENT // Use integer division to divide the address down to the cache line size, which @@ -96,37 +139,12 @@ struct typedef DataT *iterator; typedef DataT const *const_iterator; - /** - * @brief Construct new cache-aligned buffer. Buffer will be zero-initialized. - */ - CacheAlignedBuffer(): - _backingBuffer{}, - _alignedArrayPtr(findCacheLineStart(_backingBuffer)) - {} - - /** - * @brief Copy from other cache-aligned buffer. Buffer memory will be copied. - */ - CacheAlignedBuffer(CacheAlignedBuffer const &other): - _alignedArrayPtr(findCacheLineStart(_backingBuffer)) - { - memcpy(this->_alignedArrayPtr, other._alignedArrayPtr, BufferSize * sizeof(DataT)); - } - - /** - * @brief Assign from other cache-aligned buffer. Buffer memory will be assigned. - */ - CacheAlignedBuffer &operator=(CacheAlignedBuffer const &other) - { - memcpy(this->_alignedArrayPtr, other._alignedArrayPtr, BufferSize * sizeof(DataT)); - } - /** * @brief Get a pointer to the aligned data array inside the buffer */ DataT *data() { - return _alignedArrayPtr; + return _alignedBufferPtr; } /** @@ -134,7 +152,7 @@ struct */ DataT const *data() const { - return _alignedArrayPtr; + return _alignedBufferPtr; } /** @@ -142,7 +160,7 @@ struct */ DataT &operator[](size_t index) { - return _alignedArrayPtr[index]; + return _alignedBufferPtr[index]; } /** @@ -150,7 +168,7 @@ struct */ DataT operator[](size_t index) const { - return _alignedArrayPtr[index]; + return _alignedBufferPtr[index]; } /** @@ -158,7 +176,7 @@ struct */ iterator begin() { - return _alignedArrayPtr; + return _alignedBufferPtr; } /** @@ -166,7 +184,7 @@ struct */ const_iterator begin() const { - return _alignedArrayPtr; + return _alignedBufferPtr; } /** @@ -174,7 +192,7 @@ struct */ iterator end() { - return _alignedArrayPtr + BufferSize; + return _alignedBufferPtr + _alignedBufferCapacity; } /** @@ -182,29 +200,129 @@ struct */ const_iterator end() const { - return _alignedArrayPtr + BufferSize; + return _alignedBufferPtr + _alignedBufferCapacity; } /** - * @return The maximum amount of DataT elements that this buffer can hold + * @return The maximum amount of DataT elements that this buffer can hold. */ constexpr size_t capacity() { - return BufferSize; + return _alignedBufferCapacity; + } +}; + +/** + * @brief CacheAlignedBuffer type designed for static allocation. + * + * Use a StaticCacheAlignedBuffer when you want to create a cache-aligned buffer with a fixed size + * at compile time. %StaticCacheAlignedBuffers can be declared globally, as local variables, or using + * new[] and delete[]. + * + * @tparam DataT Type of the data to store in the buffer. Note: %CacheAlignedBuffer is not designed for + * using class types as DataT, and will not call constructors. + * @tparam BufferSize Buffer size (number of elements) needed by the application for the buffer. + */ +template +class StaticCacheAlignedBuffer : public CacheAlignedBuffer { +private: + + uint8_t _backingBuffer[detail::cab::getNeededBackingBufferSize(BufferSize, sizeof(DataT))]; + +public: + + /** + * @brief Construct new cache-aligned buffer. Buffer will be zero-initialized. + */ + StaticCacheAlignedBuffer(): + _backingBuffer{} + { + this->_alignedBufferPtr = this->findCacheLineStart(_backingBuffer); + this->_alignedBufferCapacity = BufferSize; } /** - * @brief If this MCU has a data cache, this function _invalidates_ the buffer in the data cache. + * @brief Copy from other cache-aligned buffer. Buffer memory will be copied. + */ + StaticCacheAlignedBuffer(StaticCacheAlignedBuffer const &other) + { + this->_alignedBufferPtr = this->findCacheLineStart(_backingBuffer); + memcpy(this->_alignedBufferPtr, other._alignedBufferPtr, BufferSize * sizeof(DataT)); + this->_alignedBufferCapacity = BufferSize; + } + + /** + * @brief Assign from other cache-aligned buffer. Buffer memory will be assigned. * - * Invalidation means that the next time code access the buffer data, it is guaranteed to be fetched from - * main memory rather than from the CPU cache. If there are changes made to the data which have not been - * flushed back to main memory, those changes will be lost! + * Only a buffer with the same data type and size can be assigned. */ - void invalidate() + StaticCacheAlignedBuffer &operator=(StaticCacheAlignedBuffer const &other) { -#if __DCACHE_PRESENT - SCB_InvalidateDCache_by_Addr(_alignedArrayPtr, BufferSize * sizeof(DataT)); -#endif + memmove(this->_alignedBufferPtr, other._alignedBufferPtr, BufferSize * sizeof(DataT)); + } +}; + +/** + * @brief CacheAlignedBuffer type which allocates its backing buffer on the heap. + * + * Use a DynamicCacheAlignedBuffer when you want to create a cache-aligned buffer with a size + * known only at runtime. When constructed, %DynamicCacheAlignedBuffers allocate backing memory off the + * heap for the provided number of elements. The memory will be released when the buffer object is destroyed. + * + * @tparam DataT Type of the data to store in the buffer. Note: %CacheAlignedBuffer is not designed for + * using class types as DataT, and will not call constructors. + */ +template +class DynamicCacheAlignedBuffer : public CacheAlignedBuffer { + uint8_t * _heapMem; +public: + /** + * @brief Construct new cache-aligned buffer. Buffer will be zero-initialized and allocated from the heap. + * + * @param capacity Number of elements the buffer shall hold + */ + explicit DynamicCacheAlignedBuffer(size_t capacity): + _heapMem(new uint8_t[detail::cab::getNeededBackingBufferSize(capacity, sizeof(DataT))]()) + { + this->_alignedBufferPtr = this->findCacheLineStart(_heapMem); + this->_alignedBufferCapacity = capacity; + } + + /** + * @brief Copy from other cache-aligned buffer. A new backing buffer will be allocated on the heap and + * its data will be copied from the other buffer. + */ + DynamicCacheAlignedBuffer(DynamicCacheAlignedBuffer const &other): + _heapMem(new uint8_t[detail::cab::getNeededBackingBufferSize(other._alignedBufferCapacity, sizeof(DataT))]) + { + this->_alignedBufferCapacity = other._alignedBufferCapacity; + this->_alignedBufferPtr = this->findCacheLineStart(_heapMem); + memcpy(this->_alignedBufferPtr, other._alignedBufferPtr, this->_alignedBufferCapacity * sizeof(DataT)); + } + + // Destructor + ~DynamicCacheAlignedBuffer() + { + delete[] this->_heapMem; + } + + /** + * @brief Assign from other cache-aligned buffer with the same type. A new buffer will be allocated + * of the correct size. + */ + DynamicCacheAlignedBuffer &operator=(DynamicCacheAlignedBuffer const &other) + { + // Check for self assignment + if(&other == this) + { + return *this; + } + + delete[] _heapMem; + _heapMem = new uint8_t[detail::cab::getNeededBackingBufferSize(other._alignedBufferCapacity, sizeof(DataT))]; + this->_alignedBufferPtr = this->findCacheLineStart(_heapMem); + + memcpy(this->_alignedBufferPtr, other._alignedBufferPtr, this->_alignedBufferCapacity * sizeof(DataT)); } }; diff --git a/storage/blockdevice/COMPONENT_SD/include/SD/SDBlockDevice.h b/storage/blockdevice/COMPONENT_SD/include/SD/SDBlockDevice.h index 246fed59538..e5aafcf609f 100644 --- a/storage/blockdevice/COMPONENT_SD/include/SD/SDBlockDevice.h +++ b/storage/blockdevice/COMPONENT_SD/include/SD/SDBlockDevice.h @@ -317,7 +317,7 @@ class SDBlockDevice : public mbed::BlockDevice { #if DEVICE_SPI_ASYNCH bool _async_spi_enabled = false; - mbed::CacheAlignedBuffer _async_data_buffer; + mbed::StaticCacheAlignedBuffer _async_data_buffer; #endif }; diff --git a/storage/blockdevice/COMPONENT_SD/source/SDBlockDevice.cpp b/storage/blockdevice/COMPONENT_SD/source/SDBlockDevice.cpp index 21d5bb8657f..73569fb6fbf 100644 --- a/storage/blockdevice/COMPONENT_SD/source/SDBlockDevice.cpp +++ b/storage/blockdevice/COMPONENT_SD/source/SDBlockDevice.cpp @@ -1078,7 +1078,7 @@ bd_size_t SDBlockDevice::_sd_sectors() return 0; } #ifdef DEVICE_SPI_ASYNCH - CacheAlignedBuffer csd_buffer; + StaticCacheAlignedBuffer csd_buffer; uint8_t *csd = csd_buffer.data(); #else uint8_t csd[16]; diff --git a/tools/python/run_python_tests.sh b/tools/python/run_python_tests.sh index b398106635e..208bd8b6cd7 100755 --- a/tools/python/run_python_tests.sh +++ b/tools/python/run_python_tests.sh @@ -11,7 +11,7 @@ set -e -PYTHON=python3 +PYTHON=python # Comma separated list of directories to exclude from coverage COVERAGE_EXCLUDES='--omit=python_tests/*' diff --git a/tools/python/scancode_evaluate/scancode_evaluate.py b/tools/python/scancode_evaluate/scancode_evaluate.py index 3cf24457f9d..3e1ebeb91df 100644 --- a/tools/python/scancode_evaluate/scancode_evaluate.py +++ b/tools/python/scancode_evaluate/scancode_evaluate.py @@ -72,14 +72,30 @@ def has_spdx_text_in_scancode_output(scancode_output_data_file_licenses): ) +SPDX_LICENSE_REGEX = re.compile(r"SPDX-License-Identifier: *(\S+)") + + def has_spdx_text_in_analysed_file(scanned_file_content): """Returns true if the file analysed by ScanCode contains SPDX identifier.""" - return bool(re.findall("SPDX-License-Identifier:?", scanned_file_content)) + return bool(SPDX_LICENSE_REGEX.findall(scanned_file_content)) - -def has_binary_license(scanned_file_content): - """Returns true if the file analysed by ScanCode contains a Permissive Binary License.""" - return bool(re.findall("Permissive Binary License", scanned_file_content)) +def has_exempted_spdx_identifier(scanned_file_content): + """ + Returns true if the file analysed by scancode contains an exempted SPDX identifier. + (this is used for licenses like Nordic BSD which are not technically permissive licenses according + to SPDX but which are still OK for us) + """ + spdx_find_result = SPDX_LICENSE_REGEX.findall(scanned_file_content) + if len(spdx_find_result) == 1: + if spdx_find_result[0] == "LicenseRef-Nordic-5-Clause": + return True + if spdx_find_result[0] == "LicenseRef-PBL": + return True + else: + return False + else: + # Either 0 or multiple matches, ignore + return False def get_file_text(scancode_output_data_file): @@ -94,6 +110,7 @@ def get_file_text(scancode_output_data_file): except UnicodeDecodeError: userlog.warning("Unable to decode file text in: %s" % file_path) # Ignore files that cannot be decoded + return None def license_check(scancode_output_path): @@ -135,7 +152,10 @@ def license_check(scancode_output_path): if not has_permissive_text_in_scancode_output(scancode_output_data_file['licenses']): scanned_file_content = get_file_text(scancode_output_data_file) - if not (scanned_file_content and has_binary_license(scanned_file_content)): + if (scanned_file_content is None + or has_exempted_spdx_identifier(scanned_file_content)): + continue + else: scancode_output_data_file['fail_reason'] = MISSING_PERMISSIVE_LICENSE_TEXT license_offenders.append(scancode_output_data_file) From 3ec38f6cf7d6c146c27c5518c1811bcb2fa0f660 Mon Sep 17 00:00:00 2001 From: Jamie Smith Date: Tue, 12 Dec 2023 23:59:01 -0800 Subject: [PATCH 7/8] Formatting, docs, revert accidental change --- .../include/platform/CacheAlignedBuffer.h | 69 +++++++++++-------- tools/python/run_python_tests.sh | 2 +- 2 files changed, 40 insertions(+), 31 deletions(-) diff --git a/platform/include/platform/CacheAlignedBuffer.h b/platform/include/platform/CacheAlignedBuffer.h index 777d5aadb33..72477d14cff 100644 --- a/platform/include/platform/CacheAlignedBuffer.h +++ b/platform/include/platform/CacheAlignedBuffer.h @@ -25,34 +25,42 @@ namespace mbed { namespace detail::cab { - /** - * @brief Calculate the needed capacity for a cache aligned buffer's backing buffer based on the - * needed capacity and element size. - * - * @param neededCapacity Capacity needed for the buffer - * @param elementSize Size of each element - * - * @return Needed backing buffer size - */ - constexpr inline size_t getNeededBackingBufferSize(size_t neededCapacity, size_t elementSize) { +/** + * @brief Calculate the needed capacity for a cache aligned buffer's backing buffer based on the + * needed capacity and element size. + * + * @param neededCapacity Capacity needed for the buffer + * @param elementSize Size of each element + * + * @return Needed backing buffer size + */ +constexpr inline size_t getNeededBackingBufferSize(size_t neededCapacity, size_t elementSize) +{ #if __DCACHE_PRESENT - // Allocate enough extra space that we can shift the start of the buffer towards higher addresses to be on a cache line. - // The worst case for this is when the first byte is allocated 1 byte past the start of a cache line, so we - // will need an additional (cache line size - 1) bytes. - // Additionally, since we are going to be invalidating this buffer, we can't allow any other variables to be - // in the same cache lines, or they might get corrupted. - // So, we need to round up the backing buffer size to the nearest multiple of the cache line size. - // The math for rounding up can be found here: - // https://community.st.com/t5/stm32-mcus-products/maintaining-cpu-data-cache-coherence-for-dma-buffers/td-p/95746 - size_t requiredSizeRoundedUp = (neededCapacity * elementSize + __SCB_DCACHE_LINE_SIZE - 1) & ~((__SCB_DCACHE_LINE_SIZE) - 1); - return requiredSizeRoundedUp + __SCB_DCACHE_LINE_SIZE - 1; + // Allocate enough extra space that we can shift the start of the buffer towards higher addresses to be on a cache line. + // The worst case for this is when the first byte is allocated 1 byte past the start of a cache line, so we + // will need an additional (cache line size - 1) bytes. + // Additionally, since we are going to be invalidating this buffer, we can't allow any other variables to be + // in the same cache lines, or they might get corrupted. + // So, we need to round up the backing buffer size to the nearest multiple of the cache line size. + // The math for rounding up can be found here: + // https://community.st.com/t5/stm32-mcus-products/maintaining-cpu-data-cache-coherence-for-dma-buffers/td-p/95746 + size_t requiredSizeRoundedUp = (neededCapacity * elementSize + __SCB_DCACHE_LINE_SIZE - 1) & ~((__SCB_DCACHE_LINE_SIZE) - 1); + return requiredSizeRoundedUp + __SCB_DCACHE_LINE_SIZE - 1; #else - // No cache on this platform so don't need any extra space. - return neededCapacity * elementSize; + // No cache on this platform so don't need any extra space. + return neededCapacity * elementSize; #endif - } +} } +/** \addtogroup platform-public-api */ +/** @{*/ +/** + * \defgroup platform_CacheAlignedBuffer CacheAlignedBuffer class + * @{ + */ + /** * @brief CacheAlignedBuffer is used by Mbed in locations where we need a cache-aligned buffer. * @@ -67,7 +75,7 @@ namespace detail::cab { * *

%CacheAlignedBuffer provides an easy way to allocate the correct amount of space so that * a buffer of any size can be made cache-aligned. To instantiate a %CacheAlignedBuffer, create one of its - * subtypes, #StaticCacheAlignedBuffer or #DynamicCacheAlignedBuffer.

+ * subtypes, StaticCacheAlignedBuffer or DynamicCacheAlignedBuffer.

* *

Converting Code to use CacheAlignedBuffer

* For code using static arrays, like this: @@ -101,7 +109,7 @@ class CacheAlignedBuffer { protected: /// Pointer to the aligned buffer. Must be set in each constructor of each subclass. - DataT * _alignedBufferPtr; + DataT *_alignedBufferPtr; /// Capacity of the aligned buffer, in terms of number of DataT elements size_t _alignedBufferCapacity; @@ -274,7 +282,7 @@ class StaticCacheAlignedBuffer : public CacheAlignedBuffer { */ template class DynamicCacheAlignedBuffer : public CacheAlignedBuffer { - uint8_t * _heapMem; + uint8_t *_heapMem; public: /** * @brief Construct new cache-aligned buffer. Buffer will be zero-initialized and allocated from the heap. @@ -282,7 +290,7 @@ class DynamicCacheAlignedBuffer : public CacheAlignedBuffer { * @param capacity Number of elements the buffer shall hold */ explicit DynamicCacheAlignedBuffer(size_t capacity): - _heapMem(new uint8_t[detail::cab::getNeededBackingBufferSize(capacity, sizeof(DataT))]()) + _heapMem(new uint8_t[detail::cab::getNeededBackingBufferSize(capacity, sizeof(DataT))]()) { this->_alignedBufferPtr = this->findCacheLineStart(_heapMem); this->_alignedBufferCapacity = capacity; @@ -293,7 +301,7 @@ class DynamicCacheAlignedBuffer : public CacheAlignedBuffer { * its data will be copied from the other buffer. */ DynamicCacheAlignedBuffer(DynamicCacheAlignedBuffer const &other): - _heapMem(new uint8_t[detail::cab::getNeededBackingBufferSize(other._alignedBufferCapacity, sizeof(DataT))]) + _heapMem(new uint8_t[detail::cab::getNeededBackingBufferSize(other._alignedBufferCapacity, sizeof(DataT))]) { this->_alignedBufferCapacity = other._alignedBufferCapacity; this->_alignedBufferPtr = this->findCacheLineStart(_heapMem); @@ -313,8 +321,7 @@ class DynamicCacheAlignedBuffer : public CacheAlignedBuffer { DynamicCacheAlignedBuffer &operator=(DynamicCacheAlignedBuffer const &other) { // Check for self assignment - if(&other == this) - { + if (&other == this) { return *this; } @@ -326,6 +333,8 @@ class DynamicCacheAlignedBuffer : public CacheAlignedBuffer { } }; +/// @} + } #endif //MBED_CACHEALIGNEDBUFFER_H diff --git a/tools/python/run_python_tests.sh b/tools/python/run_python_tests.sh index 208bd8b6cd7..b398106635e 100755 --- a/tools/python/run_python_tests.sh +++ b/tools/python/run_python_tests.sh @@ -11,7 +11,7 @@ set -e -PYTHON=python +PYTHON=python3 # Comma separated list of directories to exclude from coverage COVERAGE_EXCLUDES='--omit=python_tests/*' From 9195ed83db38ef7f1f0ba38844d460568b409941 Mon Sep 17 00:00:00 2001 From: Jamie Smith Date: Wed, 13 Dec 2023 00:06:56 -0800 Subject: [PATCH 8/8] Update code blocks to pass spell checker --- platform/include/platform/CacheAlignedBuffer.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/platform/include/platform/CacheAlignedBuffer.h b/platform/include/platform/CacheAlignedBuffer.h index 72477d14cff..50b418b7de8 100644 --- a/platform/include/platform/CacheAlignedBuffer.h +++ b/platform/include/platform/CacheAlignedBuffer.h @@ -79,27 +79,27 @@ constexpr inline size_t getNeededBackingBufferSize(size_t neededCapacity, size_t * *

Converting Code to use CacheAlignedBuffer

* For code using static arrays, like this: - * \code{.cpp} + * @code * uint8_t rxBuffer[16]; * spi.transfer_and_wait(nullptr, 0, rxBuffer, 16); - * \endcode + * @endcode * Use a StaticCacheAlignedBuffer: - * \code{.cpp} + * @code * StaticCacheAlignedBuffer rxBuffer; * spi.transfer_and_wait(nullptr, 0, rxBuffer, 16); - * \endcode + * @endcode * For code using dynamic allocation to handle unknown buffer sizes, like: - * \code{.cpp} + * @code * uint16_t * rxBuffer = new uint16_t[bufferSize]; * spi.transfer_and_wait(nullptr, 0, rxBuffer, bufferSize); * ... * delete[] rxBuffer; - * \endcode + * @endcode * use a DynamicCacheAlignedBuffer: - * \code{.cpp} + * @code * DynamicCacheAlignedBuffer rxBuffer(bufferSize); * spi.transfer_and_wait(nullptr, 0, rxBuffer, bufferSize); - * \endcode + * @endcode * * @tparam DataT Type of the data to store in the buffer. Note: %CacheAlignedBuffer is not designed for * using class types as DataT, and will not call constructors.