Skip to content

Commit

Permalink
add some docs
Browse files Browse the repository at this point in the history
  • Loading branch information
rzblue committed Sep 19, 2024
1 parent 282d741 commit 76ae584
Showing 1 changed file with 69 additions and 23 deletions.
92 changes: 69 additions & 23 deletions hal/src/main/native/athena/AddressableLEDSimd.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,40 +7,41 @@

#include "hal/AddressableLEDTypes.h"
#include "simd/simd.h"
//https://developer.arm.com/documentation/ddi0409/i/instruction-timing/instruction-specific-scheduling/advanced-simd-load-store-instructions?lang=en
//https://developer.arm.com/documentation/ddi0406/c/Application-Level-Architecture/Instruction-Details/Alphabetical-list-of-instructions/VLD4--single-4-element-structure-to-one-lane-

// https://developer.arm.com/documentation/ddi0409/i/instruction-timing/instruction-specific-scheduling/advanced-simd-load-store-instructions?lang=en
// https://developer.arm.com/documentation/ddi0406/c/Application-Level-Architecture/Instruction-Details/Alphabetical-list-of-instructions/VLD4--single-4-element-structure-to-one-lane-

namespace {
using namespace Simd::Neon;

template<typename T>
using ConvertFunc = void(*)(T);
template <typename T>
using ConvertFunc = void (*)(T);

template <typename T>
void RGBToRBG(T val) {
std::swap(val[0], val[1]); // swap G and B
std::swap(val[0], val[1]); // swap G and B
}

template <typename T>
void RGBToBGR(T val) {
std::swap(val[0], val[2]); // swap R and B
std::swap(val[0], val[2]); // swap R and B
}

template <typename T>
void RGBToBRG(T val) {
std::swap(val[0], val[2]); // swap R and B
std::swap(val[0], val[1]); // swap G and R
std::swap(val[0], val[2]); // swap R and B
std::swap(val[0], val[1]); // swap G and R
}

template <typename T>
void RGBToGRB(T val) {
std::swap(val[1], val[2]); // swap R and G
std::swap(val[1], val[2]); // swap R and G
}

template <typename T>
void RGBToGBR(T val) {
std::swap(val[0], val[2]); // swap R and B
std::swap(val[1], val[2]); // swap B and G
std::swap(val[0], val[2]); // swap R and B
std::swap(val[1], val[2]); // swap B and G
}

template <bool inAlign, bool outAlign, ConvertFunc<uint8x16_t*> Convert>
Expand Down Expand Up @@ -78,6 +79,21 @@ void RGBConvert_16(const uint8_t* src, uint8_t* dst) {
}
}

/**
* Copies 8 pixels from src to dst, converting from RGB(?) to order. Optimizes
* based on alignment of input and output arrays specified by inAlign and
* outAlign
* @tparam order the color order to convert to
* @tparam inAlign whether src is aligned to the size of a NEON register (16
* bytes)
* @tparam outAlign whether dst is aligned to the size of a NEON register (16
* bytes)
* @param[in] src The source array
* @param[out] dst the destination array
* @pre src and dst must contain at least 32 bytes (8 pixels)
* @pre if inAlign is true, src must be 16 byte aligned
* @pre if outAlign is true, src muts be 16 byte aligned
*/
template <HAL_AddressableLEDColorOrder order, bool inAlign, bool outAlign>
void RGBConvert_8(const uint8_t* src, uint8_t* dst) {
switch (order) {
Expand All @@ -98,8 +114,15 @@ void RGBConvert_8(const uint8_t* src, uint8_t* dst) {
break;
}
}

void RGBConvert_1(HAL_AddressableLEDColorOrder order, const uint8_t* in, uint8_t* out) {
/**
* Copies 1 pixel from in to out, converting from RGB to the specified order.
* @param[in] order the color order to convert to
* @param[in] in the source array
* @param[out] the destination array
* @pre in and out must contain at least 1 pixel.
*/
void RGBConvert_1(HAL_AddressableLEDColorOrder order, const uint8_t* in,
uint8_t* out) {
uint8_t tmp[4];
std::memcpy(tmp, in, 4);
switch (order) {
Expand All @@ -122,11 +145,27 @@ void RGBConvert_1(HAL_AddressableLEDColorOrder order, const uint8_t* in, uint8_t
std::memcpy(out, in, 4);
}
}

/**
* Copies len pixels from src to dst, converting from RGB(?) to order. Optimizes
* based on alignment of input and output arrays specified by inAlign and
* outAlign
* @tparam order the color order to convert to
* @tparam inAlign whether src is aligned to the size of a NEON register (16
* bytes)
* @tparam outAlign whether dst is aligned to the size of a NEON register (16
* bytes)
* @param[in] src The source array
* @param[out] dst the destination array
* @param[in] len the size (in pixels, len = (size in bytes) / 4)
* @pre src and dst must have at least len*4 capacity in bytes
* @pre if inAlign is true, src must be 16 byte aligned
* @pre if outAlign is true, src muts be 16 byte aligned
*/
template <HAL_AddressableLEDColorOrder order, bool inAlign, bool outAlign>
void RGBConvert(const uint8_t* src, uint8_t* dst, size_t len) {
if(len >= 16) {
constexpr size_t A4 = A * 4; // Stride of 1 16 pixel conversion operation. simd register size
if (len >= 16) {
constexpr size_t A4 =
A * 4; // Stride of 1 16 pixel conversion operation. simd register size
size_t size = len * 4;
size_t aligned = Simd::AlignLo(size, A4);
for (size_t i = 0; i < aligned; i += A4) {
Expand All @@ -137,30 +176,37 @@ void RGBConvert(const uint8_t* src, uint8_t* dst, size_t len) {
src + size - A4,
dst + size - A4); // copy last 16 pixels, possibly recopying.
}
} else if( len >=8 ) {
} else if (len >= 8) {
RGBConvert_8<order, inAlign, outAlign>(src, dst);
if(len > 8) {
if (len > 8) {
size_t recopyOffset = (len * 4) - (HA * 4);
RGBConvert_8<order, false, false>(src + recopyOffset, dst + recopyOffset); // copy last 8 pixels, possibly recopying
RGBConvert_8<order, false, false>(
src + recopyOffset,
dst + recopyOffset); // copy last 8 pixels, possibly recopying
}
} else {
for(size_t i = 0; i < len; i += 4) {
for (size_t i = 0; i < len; i += 4) {
RGBConvert_1(order, src + i, dst + i);
}
// we could also use neon single lane instructions
// https://developer.arm.com/documentation/ddi0406/c/Application-Level-Architecture/Instruction-Details/Alphabetical-list-of-instructions/VLD4--single-4-element-structure-to-all-lanes-
// https://developer.arm.com/documentation/102474/0100/Fundamentals-of-Armv8-Neon-technology/Registers--vectors--lanes-and-elements
// vld4_lane_u8
}

}

/**
* Copies pixelCount pixels from src to dst, converting from RGB to the
* specified order
*
*/
template <HAL_AddressableLEDColorOrder order>
void RGBConvert(const uint8_t* src, uint8_t* dst, size_t pixelCount) {
if (Aligned(src) && Aligned(dst)) {
RGBConvert<order, true, true>(src, dst, pixelCount);
} else if(Aligned(src)) {
} else if (Aligned(src)) {
RGBConvert<order, true, false>(src, dst, pixelCount);
} else if(Aligned(dst)) {
} else if (Aligned(dst)) {
RGBConvert<order, false, true>(src, dst, pixelCount);
} else {
RGBConvert<order, false, false>(src, dst, pixelCount);
Expand Down

0 comments on commit 76ae584

Please sign in to comment.