Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ChaCha20: add optimized versions for amd64 (SSSE3 & AVX2) #392

Draft
wants to merge 1 commit into
base: 1.1
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions src/benchmark.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#ifndef TINC_BENCHMARK_H
#define TINC_BENCHMARK_H

#include "system.h"

static struct timespec start;
static struct timespec end;
static double elapsed;
static double rate;
static unsigned int count;

static void clock_start(void) {
count = 0;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
}

static bool clock_countto(double seconds) {
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);
elapsed = (double) end.tv_sec + (double) end.tv_nsec * 1e-9
- (double) start.tv_sec - (double) start.tv_nsec * 1e-9;

if(elapsed < seconds) {
return ++count;
}

rate = count / elapsed;
return false;
}

#endif // TINC_BENCHMARK_H
61 changes: 61 additions & 0 deletions src/chacha-poly1305/bench_chacha.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#include "../system.h"

#include "../benchmark.h"
#include "../random.h"
#include "../crypto.h"
#include "../xalloc.h"
#include "chacha.h"

#define BUFFER_SIZE (1024 * 1024)

static FILE *dev_null;

static void benchmark(chacha_ctx *ctx, const uint8_t *plaintext, uint8_t *ciphertext, size_t len) {
for(clock_start(); clock_countto(5);) {
chacha_encrypt_bytes(ctx, plaintext, ciphertext, len);
}

// Prevent the compiler from optimizing out encryption
fwrite(ciphertext, len, 1, dev_null);
fprintf(stderr, "%8zu: %14.2lf op/s\n", len, rate);
}

const size_t block_sizes[] = {
32,
256,
512,
1024,
16 * 1024,
128 * 1024,
BUFFER_SIZE,
};

int main(void) {
dev_null = fopen("/dev/null", "w");
random_init();
chacha_resolve_functions();

uint8_t key[256 / 8];
uint8_t iv[8];
randomize(key, sizeof(key));
randomize(iv, sizeof(iv));

chacha_ctx ctx;
chacha_keysetup(&ctx, key, 256);
chacha_ivsetup(&ctx, iv, NULL);

uint8_t *plaintext = xmalloc(BUFFER_SIZE);
uint8_t *ciphertext = malloc(BUFFER_SIZE);
randomize(plaintext, BUFFER_SIZE);

for(size_t i = 0; i < sizeof(block_sizes) / sizeof(*block_sizes); ++i) {
benchmark(&ctx, plaintext, ciphertext, block_sizes[i]);
}

free(ciphertext);
free(plaintext);
random_exit();
fclose(dev_null);

return 0;
}
34 changes: 30 additions & 4 deletions src/chacha-poly1305/chacha.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ Public domain.
#include "../system.h"

#include "chacha.h"

typedef struct chacha_ctx chacha_ctx;
#include "../cpu.h"

#define U8C(v) (v##U)
#define U32C(v) (v##U)
Expand Down Expand Up @@ -79,8 +78,7 @@ void chacha_ivsetup(chacha_ctx *x, const uint8_t *iv, const uint8_t *counter) {
x->input[15] = U8TO32_LITTLE(iv + 4);
}

void
chacha_encrypt_bytes(chacha_ctx *x, const uint8_t *m, uint8_t *c, uint32_t bytes) {
static void chacha_encrypt_bytes_generic(chacha_ctx *x, const uint8_t *m, uint8_t *c, uint32_t bytes) {
uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
uint32_t j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
uint8_t *ctarget = NULL;
Expand Down Expand Up @@ -222,3 +220,31 @@ chacha_encrypt_bytes(chacha_ctx *x, const uint8_t *m, uint8_t *c, uint32_t bytes
m += 64;
}
}

static chacha_encrypt_bytes_t *chacha_encrypt_bytes_impl;

void chacha_encrypt_bytes(struct chacha_ctx *x, const uint8_t *m, uint8_t *c, uint32_t bytes) {
chacha_encrypt_bytes_impl(x, m, c, bytes);
}

void chacha_resolve_functions(void) {
cpu_detect_features();

#ifdef HAVE_CPU_AVX2

if(cpu_supports(CPU_AVX2)) {
chacha_encrypt_bytes_impl = chacha_encrypt_bytes_avx2;
return;
}

#endif
#ifdef HAVE_CPU_SSSE3

if(cpu_supports(CPU_SSSE3)) {
chacha_encrypt_bytes_impl = chacha_encrypt_bytes_ssse3;
return;
}

#endif
chacha_encrypt_bytes_impl = chacha_encrypt_bytes_generic;
}
16 changes: 14 additions & 2 deletions src/chacha-poly1305/chacha.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,28 @@ Public domain.
#ifndef CHACHA_H
#define CHACHA_H

struct chacha_ctx {
typedef struct chacha_ctx {
uint32_t input[16];
};
} chacha_ctx;

#define ROUNDS 20

#define CHACHA_MINKEYLEN 16
#define CHACHA_NONCELEN 8
#define CHACHA_CTRLEN 8
#define CHACHA_STATELEN (CHACHA_NONCELEN+CHACHA_CTRLEN)
#define CHACHA_BLOCKLEN 64

typedef void (chacha_encrypt_bytes_t)(chacha_ctx *ctx, const uint8_t *m, uint8_t *c, uint32_t bytes);

#ifdef HAVE_CPU_AVX2
extern chacha_encrypt_bytes_t chacha_encrypt_bytes_avx2;
#endif
#ifdef HAVE_CPU_SSSE3
extern chacha_encrypt_bytes_t chacha_encrypt_bytes_ssse3;
#endif

void chacha_resolve_functions(void);
void chacha_keysetup(struct chacha_ctx *x, const uint8_t *k, uint32_t kbits);
void chacha_ivsetup(struct chacha_ctx *x, const uint8_t *iv, const uint8_t *ctr);
void chacha_encrypt_bytes(struct chacha_ctx *x, const uint8_t *m, uint8_t *c, uint32_t bytes);
Expand Down
27 changes: 27 additions & 0 deletions src/chacha-poly1305/chacha_avx2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#include "../system.h"

#include "chacha.h"
#include "../xalloc.h"

#if defined(__clang__)
# pragma clang attribute push (__attribute__((target("sse2,ssse3,sse4.1,avx2"))), apply_to=function)
#elif defined(__GNUC__)
# pragma GCC target("sse2", "ssse3", "sse4.1", "avx2")
#endif

#include <immintrin.h>

void chacha_encrypt_bytes_avx2(chacha_ctx *ctx, const uint8_t *m, uint8_t *c, uint32_t bytes) {
uint32_t *x = &ctx->input[0];

if(!bytes) {
return;
}

#include "chacha_avx2.h"
#include "chacha_ssse3.h"
}

#ifdef __clang__
# pragma clang attribute pop
#endif
Loading