-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #318 from 00pauln00/arm-crc32c
Arm crc32c
- Loading branch information
Showing
8 changed files
with
737 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
// Copyright 2017 The CRC32C Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style license that can be | ||
// found in the LICENSE file. See the AUTHORS file for names of contributors. | ||
|
||
// In a separate source file to allow this accelerated CRC32C function to be | ||
// compiled with the appropriate compiler flags to enable ARM NEON CRC32C | ||
// instructions. | ||
|
||
// This implementation is based on https://github.com/google/leveldb/pull/490. | ||
|
||
|
||
#include <stddef.h> | ||
|
||
#include <arm_acle.h> | ||
#include <arm_neon.h> | ||
|
||
#define KBYTES 1032 | ||
#define SEGMENTBYTES 256 | ||
|
||
// compute 8bytes for each segment parallelly | ||
#define CRC32C32BYTES(P, IND) \ | ||
do { \ | ||
crc1 = __crc32cd( \ | ||
crc1, *((const uint64_t *)(P) + (SEGMENTBYTES / 8) * 1 + (IND))); \ | ||
crc2 = __crc32cd( \ | ||
crc2, *((const uint64_t *)(P) + (SEGMENTBYTES / 8) * 2 + (IND))); \ | ||
crc3 = __crc32cd( \ | ||
crc3, *((const uint64_t *)(P) + (SEGMENTBYTES / 8) * 3 + (IND))); \ | ||
crc0 = __crc32cd( \ | ||
crc0, *((const uint64_t *)(P) + (SEGMENTBYTES / 8) * 0 + (IND))); \ | ||
} while (0); | ||
|
||
// compute 8*8 bytes for each segment parallelly | ||
#define CRC32C256BYTES(P, IND) \ | ||
do { \ | ||
CRC32C32BYTES((P), (IND)*8 + 0) \ | ||
CRC32C32BYTES((P), (IND)*8 + 1) \ | ||
CRC32C32BYTES((P), (IND)*8 + 2) \ | ||
CRC32C32BYTES((P), (IND)*8 + 3) \ | ||
CRC32C32BYTES((P), (IND)*8 + 4) \ | ||
CRC32C32BYTES((P), (IND)*8 + 5) \ | ||
CRC32C32BYTES((P), (IND)*8 + 6) \ | ||
CRC32C32BYTES((P), (IND)*8 + 7) \ | ||
} while (0); | ||
|
||
// compute 4*8*8 bytes for each segment parallelly | ||
#define CRC32C1024BYTES(P) \ | ||
do { \ | ||
CRC32C256BYTES((P), 0) \ | ||
CRC32C256BYTES((P), 1) \ | ||
CRC32C256BYTES((P), 2) \ | ||
CRC32C256BYTES((P), 3) \ | ||
(P) += 4 * SEGMENTBYTES; \ | ||
} while (0) | ||
|
||
// niova-core uses linux convention of 0 xor | ||
#define kCRC32Xor 0x0 | ||
uint32_t crc32_arm(uint32_t crc, const uint8_t *data, size_t size) { | ||
int64_t length = size; | ||
uint32_t crc0, crc1, crc2, crc3; | ||
uint64_t t0, t1, t2; | ||
|
||
// k0=CRC(x^(3*SEGMENTBYTES*8)), k1=CRC(x^(2*SEGMENTBYTES*8)), | ||
// k2=CRC(x^(SEGMENTBYTES*8)) | ||
const poly64_t k0 = 0x8d96551c, k1 = 0xbd6f81f8, k2 = 0xdcb17aa4; | ||
|
||
crc = crc ^ kCRC32Xor; | ||
|
||
while (length >= KBYTES) { | ||
crc0 = crc; | ||
crc1 = 0; | ||
crc2 = 0; | ||
crc3 = 0; | ||
|
||
// Process 1024 bytes in parallel. | ||
CRC32C1024BYTES(data); | ||
|
||
// Merge the 4 partial CRC32C values. | ||
t2 = (uint64_t)vmull_p64(crc2, k2); | ||
t1 = (uint64_t)vmull_p64(crc1, k1); | ||
t0 = (uint64_t)vmull_p64(crc0, k0); | ||
crc = __crc32cd(crc3, *(uint64_t *)data); | ||
data += sizeof(uint64_t); | ||
crc ^= __crc32cd(0, t2); | ||
crc ^= __crc32cd(0, t1); | ||
crc ^= __crc32cd(0, t0); | ||
|
||
length -= KBYTES; | ||
} | ||
|
||
while (length >= 8) { | ||
crc = __crc32cd(crc, *(uint64_t *)data); | ||
data += 8; | ||
length -= 8; | ||
} | ||
|
||
if (length & 4) { | ||
crc = __crc32cw(crc, *(uint32_t *)data); | ||
data += 4; | ||
} | ||
|
||
if (length & 2) { | ||
crc = __crc32ch(crc, *(uint16_t *)data); | ||
data += 2; | ||
} | ||
|
||
if (length & 1) { | ||
crc = __crc32cb(crc, *data); | ||
} | ||
|
||
return crc ^ kCRC32Xor; | ||
} |
Oops, something went wrong.