Skip to content

Commit

Permalink
working lstm
Browse files Browse the repository at this point in the history
  • Loading branch information
PABannier committed Oct 19, 2024
1 parent d1118e9 commit 13d6ed1
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 65 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ encodec
*.th
.vscode/

build/
build/

*.wav
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ endif()

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
set(CMAKE_CXX_FLAGS_DEBUG "-g -O0")

set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
set(ENCODEC_STANDALONE ON)
Expand Down
178 changes: 115 additions & 63 deletions encodec.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "ggml-alloc.h"
#include "ggml-backend.h"
#include "ggml.h"
#include "ggml/src/ggml-impl.h"

#ifdef GGML_USE_CUBLAS
#include "ggml-cuda.h"
Expand Down Expand Up @@ -32,6 +33,7 @@
#include "quantizer.h"

#define ENCODEC_FILE_MAGIC 'ggml'
#define ENCODEC_MAX_NODES 80000

typedef enum {
// Run the end-to-end encoder-decoder pipeline
Expand Down Expand Up @@ -96,9 +98,28 @@ struct encodec_model {
std::map<std::string, struct ggml_tensor *> tensors;
};

struct encodec_ggml_cgraph_deleter {
void operator()(struct ggml_cgraph * cgraph) {
if (cgraph->nodes)
free(cgraph->nodes);
if (cgraph->leafs)
free(cgraph->leafs);
if (cgraph->visited_hash_set.keys)
free(cgraph->visited_hash_set.keys);
if (cgraph->grads)
free(cgraph->grads);
free(cgraph);
}
};

struct encodec_context {
encodec_model model;

// computational graph stored on the heap to avoid stack overflows
// the computational graph grows with the sequence length (because of the LSTM)
// which requires a lot of nodes
std::unique_ptr<struct ggml_cgraph, encodec_ggml_cgraph_deleter> gf;

// buffer for model evaluation
ggml_backend_buffer_t buf_compute;

Expand Down Expand Up @@ -201,7 +222,6 @@ bool encodec_load_model_weights(std::ifstream &infile, encodec_model &model, int
#ifdef GGML_USE_METAL
if (n_gpu_layers > 0) {
fprintf(stderr, "%s: using Metal backend\n", __func__);
ggml_metal_log_set_callback(ggml_log_callback_default, nullptr);
model.backend = ggml_backend_metal_init();
if (!model.backend) {
fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__);
Expand Down Expand Up @@ -473,24 +493,65 @@ bool encodec_load_model_weights(std::ifstream &infile, encodec_model &model, int
model.n_loaded++;
}

printf("%s: model size = %8.2f MB\n", __func__, total_size / 1024.0 / 1024.0);
printf("%s: model size = %.2f MB\n", __func__, total_size / 1024.0 / 1024.0);
}

infile.close();

return true;
}

struct ggml_cgraph *encodec_build_graph(struct encodec_context *ectx,
const float * inp_audio,
const int n_samples,
const encodec_run_mode_t mode) {
static struct ggml_cgraph * encodec_ggml_cgraph_create(size_t size) {
struct ggml_cgraph * cgraph = (struct ggml_cgraph *)calloc(1, sizeof(struct ggml_cgraph));
cgraph->size = size;
cgraph->n_nodes = 0;
cgraph->n_leafs = 0;
cgraph->nodes = (struct ggml_tensor **)calloc(1, size * sizeof(struct ggml_tensor *));
cgraph->leafs = (struct ggml_tensor **)calloc(1, size * sizeof(struct ggml_tensor *));

// next primes after powers of two
static const size_t primes[] = {
2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031,
2053, 4099, 8209, 16411, 32771, 65537, 131101,
262147, 524309, 1048583, 2097169, 4194319, 8388617,
16777259, 33554467, 67108879, 134217757, 268435459,
536870923, 1073741827, 2147483659
};
static const size_t n_primes = sizeof(primes)/sizeof(primes[0]);

// find the smallest prime that is larger or equal to size
size_t l = 0;
size_t r = n_primes;
while (l < r) {
size_t m = (l + r)/2;
if (primes[m] < size * 2) {
l = m + 1;
} else {
r = m;
}
}
size_t hash_size = l < n_primes ? primes[l] : (size * 2 + 1);

cgraph->visited_hash_set.size = hash_size;
cgraph->visited_hash_set.keys = (struct ggml_tensor **)calloc(1, hash_size * sizeof(struct ggml_tensor *));
cgraph->visited_hash_set.used = (ggml_bitset_t *)calloc(1, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t));
cgraph->order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT;

return cgraph;
}

void encodec_build_graph(struct encodec_context *ectx,
const float * inp_audio,
const int n_samples,
const encodec_run_mode_t mode) {
assert(mode == encodec_run_mode_t::FULL || mode == encodec_run_mode_t::ENCODE);

const auto & model = ectx->model;
const auto & hparams = model.hparams;
const auto & allocr = ectx->allocr;

auto & gf = ectx->gf;

const int *ratios = hparams.ratios;
const int kernel_size = hparams.kernel_size;
const int res_kernel_sz = hparams.residual_kernel_size;
Expand All @@ -504,7 +565,7 @@ struct ggml_cgraph *encodec_build_graph(struct encodec_context *ectx,

// since we are using ggml-alloc, this buffer only needs enough space to hold the
// ggml_tensor and ggml_cgraph structs, but not the tensor data
static size_t buf_size = ggml_tensor_overhead() * GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead();
static size_t buf_size = ggml_tensor_overhead() * ENCODEC_MAX_NODES + ggml_graph_overhead();
static std::vector<uint8_t> buf(buf_size);

struct ggml_init_params ggml_params = {
Expand All @@ -515,7 +576,7 @@ struct ggml_cgraph *encodec_build_graph(struct encodec_context *ectx,

struct ggml_context *ctx0 = ggml_init(ggml_params);

struct ggml_cgraph *gf = ggml_new_graph(ctx0);
gf = std::unique_ptr<struct ggml_cgraph, encodec_ggml_cgraph_deleter>(encodec_ggml_cgraph_create(ENCODEC_MAX_NODES));

struct ggml_tensor *inp = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, n_samples);
ggml_set_name(inp, "inp");
Expand All @@ -541,19 +602,18 @@ struct ggml_cgraph *encodec_build_graph(struct encodec_context *ectx,
case encodec_run_mode_t::FULL: {
ggml_set_name(decoded, "decoded");
ggml_set_output(decoded);
ggml_build_forward_expand(gf, decoded);
ggml_build_forward_expand(gf.get(), decoded);
} break;
case encodec_run_mode_t::ENCODE: {
ggml_set_name(codes, "codes");
ggml_set_output(codes);
ggml_build_forward_expand(gf, codes);
ggml_build_forward_expand(gf.get(), codes);
} break;
case encodec_run_mode_t::DECODE: {
return NULL;
assert(false);
} break;
default: {
fprintf(stderr, "%s: unknown run mode\n", __func__);
return NULL;
} break;
}

Expand All @@ -562,18 +622,18 @@ struct ggml_cgraph *encodec_build_graph(struct encodec_context *ectx,
ectx->encoded = encoded;
ectx->codes = codes;
ectx->decoded = decoded;

return gf;
}

struct ggml_cgraph *encodec_build_graph(struct encodec_context *ectx, const int32_t *codes,
const int n_codes, const encodec_run_mode_t mode) {
void encodec_build_graph(struct encodec_context *ectx, const int32_t *codes,
const int n_codes, const encodec_run_mode_t mode) {
assert(mode == encodec_run_mode_t::DECODE);

const auto & model = ectx->model;
const auto & hparams = model.hparams;
const auto & allocr = ectx->allocr;

auto & gf = ectx->gf;

const int n_bins = hparams.n_bins;
const int sr = hparams.sr;
const int bandwidth = hparams.bandwidth;
Expand All @@ -589,12 +649,12 @@ struct ggml_cgraph *encodec_build_graph(struct encodec_context *ectx, const int3

if (n_codes % n_q != 0) {
fprintf(stderr, "%s: invalid number of codes\n", __func__);
return NULL;
assert(false);
}

const int N = n_codes / n_q;

static size_t buf_size = ggml_tensor_overhead() * GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead();
static size_t buf_size = ggml_tensor_overhead() * ENCODEC_MAX_NODES + ggml_graph_overhead();
static std::vector<uint8_t> buf(buf_size);

struct ggml_init_params ggml_params = {
Expand All @@ -605,7 +665,7 @@ struct ggml_cgraph *encodec_build_graph(struct encodec_context *ectx, const int3

struct ggml_context *ctx0 = ggml_init(ggml_params);

struct ggml_cgraph *gf = ggml_new_graph(ctx0);
gf = std::unique_ptr<struct ggml_cgraph, encodec_ggml_cgraph_deleter>(encodec_ggml_cgraph_create(ENCODEC_MAX_NODES));

struct ggml_tensor *inp_codes = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, N, n_q);
ggml_set_name(inp_codes, "inp_codes");
Expand All @@ -626,20 +686,18 @@ struct ggml_cgraph *encodec_build_graph(struct encodec_context *ectx, const int3
case encodec_run_mode_t::DECODE: {
ggml_set_name(decoded, "decoded");
ggml_set_output(decoded);
ggml_build_forward_expand(gf, decoded);
ggml_build_forward_expand(gf.get(), decoded);
} break;
default: {
fprintf(stderr, "%s: unknown run mode\n", __func__);
return NULL;
assert(false);
} break;
}

ggml_free(ctx0);

ectx->codes = inp_codes;
ectx->decoded = decoded;

return gf;
}

static void encodec_zero_tensor(struct ggml_cgraph *gf, const char *name) {
Expand All @@ -652,39 +710,36 @@ bool encodec_eval_internal(struct encodec_context *ectx, const float * raw_audio
const encodec_run_mode_t mode) {
auto & model = ectx->model;
auto & allocr = ectx->allocr;
auto & gf = ectx->gf;

struct ggml_cgraph *gf = encodec_build_graph(ectx, raw_audio, n_samples, mode);
encodec_build_graph(ectx, raw_audio, n_samples, mode);

// allocate the graph tensors
ggml_gallocr_alloc_graph(allocr, gf);
ggml_gallocr_alloc_graph(allocr, gf.get());

// set the graph inputs
struct ggml_tensor * inp = ggml_graph_get_tensor(gf, "inp");
struct ggml_tensor * inp = ggml_graph_get_tensor(gf.get(), "inp");
ggml_backend_tensor_set(inp, raw_audio, 0, n_samples * ggml_element_size(inp));

// make sure accumulation tensor are zeroed
encodec_zero_tensor(gf, "enc_l0_ht");
encodec_zero_tensor(gf, "enc_l1_ht");
encodec_zero_tensor(gf, "enc_l0_ct");
encodec_zero_tensor(gf, "enc_l1_ct");
encodec_zero_tensor(gf.get(), "enc_l0_ht");
encodec_zero_tensor(gf.get(), "enc_l1_ht");
encodec_zero_tensor(gf.get(), "enc_l0_ct");
encodec_zero_tensor(gf.get(), "enc_l1_ct");

encodec_zero_tensor(gf, "dec_l0_ht");
encodec_zero_tensor(gf, "dec_l1_ht");
encodec_zero_tensor(gf, "dec_l0_ct");
encodec_zero_tensor(gf, "dec_l1_ct");
encodec_zero_tensor(gf.get(), "dec_l0_ht");
encodec_zero_tensor(gf.get(), "dec_l1_ht");
encodec_zero_tensor(gf.get(), "dec_l0_ct");
encodec_zero_tensor(gf.get(), "dec_l1_ct");

encodec_zero_tensor(gf, "quantized_out");
encodec_zero_tensor(gf.get(), "quantized_out");

// run the computation
if (ggml_backend_is_cpu(model.backend)) {
ggml_backend_cpu_set_n_threads(model.backend, n_threads);
}
#ifdef GGML_USE_METAL
if (ggml_backend_is_metal(model.backend)) {
ggml_backend_metal_set_n_cb(model.backend, n_threads);
}
#endif
ggml_backend_graph_compute(model.backend, gf);

ggml_backend_graph_compute(model.backend, gf.get());

return true;
}
Expand All @@ -694,39 +749,36 @@ bool encodec_eval_internal(struct encodec_context *ectx, const int32_t *codes,
const encodec_run_mode_t mode) {
auto & model = ectx->model;
auto & allocr = ectx->allocr;
auto & gf = ectx->gf;

struct ggml_cgraph *gf = encodec_build_graph(ectx, codes, n_codes, mode);
encodec_build_graph(ectx, codes, n_codes, mode);

// allocate the graph tensors
ggml_gallocr_alloc_graph(allocr, gf);
ggml_gallocr_alloc_graph(allocr, gf.get());

// set the graph inputs
struct ggml_tensor * inp = ggml_graph_get_tensor(gf, "inp_codes");
struct ggml_tensor * inp = ggml_graph_get_tensor(gf.get(), "inp_codes");
ggml_backend_tensor_set(inp, codes, 0, n_codes * ggml_element_size(inp));

// make sure accumulation tensor are zeroed
encodec_zero_tensor(gf, "enc_l0_ht");
encodec_zero_tensor(gf, "enc_l1_ht");
encodec_zero_tensor(gf, "enc_l0_ct");
encodec_zero_tensor(gf, "enc_l1_ct");
encodec_zero_tensor(gf.get(), "enc_l0_ht");
encodec_zero_tensor(gf.get(), "enc_l1_ht");
encodec_zero_tensor(gf.get(), "enc_l0_ct");
encodec_zero_tensor(gf.get(), "enc_l1_ct");

encodec_zero_tensor(gf, "dec_l0_ht");
encodec_zero_tensor(gf, "dec_l1_ht");
encodec_zero_tensor(gf, "dec_l0_ct");
encodec_zero_tensor(gf, "dec_l1_ct");
encodec_zero_tensor(gf.get(), "dec_l0_ht");
encodec_zero_tensor(gf.get(), "dec_l1_ht");
encodec_zero_tensor(gf.get(), "dec_l0_ct");
encodec_zero_tensor(gf.get(), "dec_l1_ct");

encodec_zero_tensor(gf, "quantized_out");
encodec_zero_tensor(gf.get(), "quantized_out");

// run the computation
if (ggml_backend_is_cpu(model.backend)) {
ggml_backend_cpu_set_n_threads(model.backend, n_threads);
}
#ifdef GGML_USE_METAL
if (ggml_backend_is_metal(model.backend)) {
ggml_backend_metal_set_n_cb(model.backend, n_threads);
}
#endif
ggml_backend_graph_compute(model.backend, gf);

ggml_backend_graph_compute(model.backend, gf.get());

return true;
}
Expand All @@ -742,10 +794,10 @@ bool encodec_eval(struct encodec_context *ectx, const float *raw_audio,
ectx->allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(ectx->model.backend));

// create the graph for memory usage estimation
struct ggml_cgraph *gf = encodec_build_graph(ectx, raw_audio, n_samples, mode);
encodec_build_graph(ectx, raw_audio, n_samples, mode);

// pre-allocate the compute buffer
ggml_gallocr_reserve(ectx->allocr, gf);
ggml_gallocr_reserve(ectx->allocr, ectx->gf.get());
size_t mem_size = ggml_gallocr_get_buffer_size(ectx->allocr, 0);
fprintf(stderr, "%s: compute buffer size: %.2f MB\n\n", __func__, mem_size / 1024.0 / 1024.0);
}
Expand All @@ -772,10 +824,10 @@ bool encodec_eval(struct encodec_context *ectx, const int32_t *codes,
ectx->allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(ectx->model.backend));

// create the graph for memory usage estimation
struct ggml_cgraph *gf = encodec_build_graph(ectx, codes, n_codes, mode);
encodec_build_graph(ectx, codes, n_codes, mode);

// pre-allocate the compute buffer
ggml_gallocr_reserve(ectx->allocr, gf);
ggml_gallocr_reserve(ectx->allocr, ectx->gf.get());
size_t mem_size = ggml_gallocr_get_buffer_size(ectx->allocr, 0);
fprintf(stderr, "%s: compute buffer size: %.2f MB\n\n", __func__, mem_size / 1024.0 / 1024.0);
}
Expand Down
2 changes: 1 addition & 1 deletion lstm.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ struct ggml_tensor *forward_pass_lstm_unilayer(struct ggml_context *ctx0,

struct ggml_tensor *current = ggml_cont(ctx0, ggml_transpose(ctx0, inp));

for (int t = 0; t < 2; t++) {
for (int t = 0; t < seq_length; t++) {
struct ggml_tensor *x_t = ggml_view_1d(ctx0, current, input_dim, t * current->nb[1]);

struct ggml_tensor *inp_gates = ggml_mul_mat(ctx0, weight_ih, x_t);
Expand Down

0 comments on commit 13d6ed1

Please sign in to comment.