Skip to content

Commit

Permalink
Add NEON
Browse files Browse the repository at this point in the history
  • Loading branch information
stellar-aria committed Nov 29, 2024
1 parent c1321ad commit 2e6c77a
Showing 1 changed file with 45 additions and 2 deletions.
47 changes: 45 additions & 2 deletions src/deluge/dsp/memmove.c
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#include "mem_functions.h"
#include <arm_neon.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>

void* memmove(void* dst, const void* src, size_t len) {
void* my_memmove(void* dst, const void* src, size_t len) {
ptrdiff_t result;
asm("sub %0, %1, %2" : "=r"(result) : "r"(dst), "r"(src));
if (abs(result) >= (ptrdiff_t)len) {
Expand All @@ -30,9 +31,51 @@ void* memmove(void* dst, const void* src, size_t len) {
// copy backwards by word...
const uint32_t* s_32 = (const uint32_t*)s_16;
uint32_t* d_32 = (uint32_t*)d_16;
while ((intptr_t)d_32 > (intptr_t)dst) {
if (len % 8) {
*--d_32 = *--s_32;
}

// doubleword
s = (const char*)s_32;
d = (char*)d_32;
if (len % 16) {
s -= 8;
d -= 8;
vst1_u8(d, vld1_u8(s));
}

// quadword
if (len % 32) {
s -= 16;
d -= 16;
vst1q_u8(d, vld1q_u8(s));
}

// quadword x2
if (len % 64) {
s -= 32;
d -= 32;
vst1q_u8_x2(d, vld1q_u8_x2(s));
}

// quadword x4
if (len % 128) {
s -= 64;
d -= 64;
vst1q_u8_x4(d, vld1q_u8_x4(s));
}

// quadword x8
while ((intptr_t)d > (intptr_t)dst) {
s -= 64;
uint8x16x4_t ld1 = vld1q_u8_x4(s);
s -= 64;
uint8x16x4_t ld2 = vld1q_u8_x4(s);
d -= 64;
vst1q_u8_x4(d, ld1);
d -= 64;
vst1q_u8_x4(d, ld2);
}

return dst;
}

0 comments on commit 2e6c77a

Please sign in to comment.