Skip to content

Commit

Permalink
add expression encoding for double
Browse files Browse the repository at this point in the history
  • Loading branch information
azimafroozeh committed Sep 16, 2024
1 parent 6428561 commit b088c4a
Show file tree
Hide file tree
Showing 15 changed files with 131 additions and 31 deletions.
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ include(CTest)

# Options : ------------------------------------------------------------------------------------------------------------
option(FLS_BUILD_FLS "Build FLS" ON)
option(FLS_BUILD_ALP "Build ALP" OFF)
option(FLS_BUILD_TESTING "Build Test" OFF)
option(FLS_BUILD_BENCHMARKING "Enable Benchmark Build" OFF)
option(FLS_BUILD_EXAMPLE "Build Example" OFF)
Expand Down Expand Up @@ -66,6 +67,12 @@ if (FLS_BUILD_TESTING OR FLS_BUILD_GPU)
endif ()


# ALP: ---------------------------------------------------------------------------------------------------------------
if (FLS_BUILD_ALP)
message("---------------------------------------------------------------------------------------------------------")
message("-- FLS: Build ALP.")
endif ()

# DATA : ---------------------------------------------------------------------------------------------------------------
if (FLS_BUILD_TESTING OR FLS_BUILD_BENCHMARKING OR FLS_BUILD_EXAMPLE)
message("---------------------------------------------------------------------------------------------------------")
Expand Down
1 change: 0 additions & 1 deletion example/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ target_link_libraries(cpp_example PUBLIC fastlanes)
target_compile_options(cpp_example PRIVATE "-fsanitize=address")
target_link_options(cpp_example PRIVATE "-fsanitize=address")


# C Example : ----------------------------------------------------------------------------------------------------------
add_executable(c_api ${CMAKE_CURRENT_SOURCE_DIR}/c_api.c)
target_link_libraries(c_api PUBLIC fastlanes)
Expand Down
66 changes: 61 additions & 5 deletions example/c_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,68 @@
#include <stdlib.h>

int main() {

// double
{
const enum data_t data_type = DOUBLE;
const uint64_t expr_pool_size = 2;
const enum dbl_expr_encoding_t expression_pool[] = {DBL_UNCOMPRESSED, DBL_ALP};
const char* expression_pool_str[] = {"DBL_UNCOMPRESSED", "DBL_ALP"};
// int64_t

// init
const int64_t N_TUP = 64 * 1024;
const int64_t INPUT_SIZE = N_TUP * sizeof(double);
const int64_t CAPACITY = 2 * INPUT_SIZE;
double input_arr[N_TUP];
uint8_t encoded_buf[CAPACITY];
double output_arr[N_TUP];
uint64_t encoded_bsz;
uint64_t mtd_bsz;

// initialize values;
for (int64_t i = 0; i < N_TUP; ++i) {
input_arr[i] = 1241.52;
}

printf("-- compression ratio of each expression: \n");
for (uint64_t expr_idx = 0; expr_idx < expr_pool_size; expr_idx++) {
const enum dbl_expr_encoding_t expression_encoding_type = expression_pool[expr_idx];

// encode
encode_from_memory(input_arr, //
N_TUP,
CAPACITY,
encoded_buf,
&encoded_bsz,
&mtd_bsz,
data_type,
expression_encoding_type);

// decode
decode_to_memory(encoded_buf, output_arr, data_type);

// verify
for (int64_t i = 0; i < N_TUP; ++i) {
if (output_arr[i] != input_arr[i]) {
printf("--ERROR output_arr[%" PRId64 "] != i\n", i);
exit(EXIT_FAILURE);
}
}

const double compression_ratio = (double)INPUT_SIZE / (double)encoded_bsz;
printf("-- %s : %.2fX\n", expression_pool_str[expr_idx], compression_ratio);
}
}

// int64_t
{
// expression pool for type int64_t
const enum data_t data_type = INT64;
const uint64_t expr_pool_size = 3;
const enum expression_encoding_t int64_t_expression_pool[] = {UNCOMPRESSED, FFOR_NO_PATCH, DELTA_NO_PATCH};
const char* int64_t_expression_pool_str[] = {"UNCOMPRESSED", "FFOR_NO_PATCH", "DELTA_NO_PATCH"};
const enum data_t data_type = INT64;
const uint64_t expr_pool_size = 3;
const enum i64_expr_encoding_t int64_t_expression_pool[] = {
I64_UNCOMPRESSED, I64_FFOR_NO_PATCH, I64_DELTA_NO_PATCH};
const char* int64_t_expression_pool_str[] = {"I64_UNCOMPRESSED", "I64_FFOR_NO_PATCH", "I64_DELTA_NO_PATCH"};
// int64_t

// init
Expand All @@ -31,7 +87,7 @@ int main() {

printf("-- compression ratio of each expression: \n");
for (uint64_t expr_idx = 0; expr_idx < expr_pool_size; expr_idx++) {
const enum expression_encoding_t expression_encoding_type = int64_t_expression_pool[expr_idx];
const enum i64_expr_encoding_t expression_encoding_type = int64_t_expression_pool[expr_idx];

// encode
encode_from_memory(input_arr, //
Expand Down
16 changes: 8 additions & 8 deletions example/cpp_example.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,24 @@ int main() {

{
// example 1: single column encoding from memory:
constexpr int64_t N_TUP {64 * 1024};
array<int64_t, N_TUP> input_arr {};
array<int64_t, N_TUP> encoded_arr {};
array<int64_t, N_TUP> output_arr {};
bsz_t encoded_bsz = 0;
constexpr int64_t N_TUP {64 * 1024};
array<double, N_TUP> input_arr {};
array<uint8_t, N_TUP * 8 * 2> encoded_arr {};
array<double, N_TUP> output_arr {};
bsz_t encoded_bsz = 0;

for (size_t i = 0; i < N_TUP; ++i) {
input_arr[i] = 1370;
input_arr[i] = 1370.1;
}

Connection::encode_from_memory(input_arr.data(), //
N_TUP,
encoded_arr.size() * 8,
encoded_arr.data(),
&encoded_bsz,
DataType::INT64,
DataType::DOUBLE,
1);
Connection::decode_to_memory(encoded_arr.data(), output_arr.data(), DataType::INT64);
Connection::decode_to_memory(encoded_arr.data(), output_arr.data(), DataType::DOUBLE);

for (size_t i = 0; i < N_TUP; ++i) {
if (output_arr[i] != input_arr[i]) { throw std::runtime_error("decoding failed"); }
Expand Down
20 changes: 13 additions & 7 deletions include/fastlanes.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,17 @@ enum data_t : uint8_t {
FALLBACK = 18,
};

// encoding expression type
enum expression_encoding_t : uint8_t {
UNCOMPRESSED = 0,
FFOR_NO_PATCH = 1,
DELTA_NO_PATCH = 11,
// i64_t encoding expression type
enum i64_expr_encoding_t : uint8_t {
I64_UNCOMPRESSED = 0,
I64_FFOR_NO_PATCH = 1,
I64_DELTA_NO_PATCH = 11,
};

// double encoding expression type
enum dbl_expr_encoding_t : uint8_t {
DBL_UNCOMPRESSED = 0,
DBL_ALP = 1,
};

// Opaque pointer type for Connection
Expand Down Expand Up @@ -69,8 +75,8 @@ bool encode_from_memory(void* in_data, // pointer to data.
uint8_t* out_encoded_data, // pointer to where the encoded data should be written.
uint64_t* out_encoded_size, // encoded size.
uint64_t* out_metadadata_size, // metadata size, metadata starts from out_encoded_data pointer.
enum data_t in_datatype,
enum expression_encoding_t in_expression_type);
enum data_t in_datatype,
uint8_t in_expression_type);

/*--------------------------------------------------------------------------------------------------------------------*\
* decode
Expand Down
4 changes: 3 additions & 1 deletion include/fls/primitive/untranspose/untranspose.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

namespace fastlanes {
void untranspose_i(const int64_t* __restrict in, int64_t* __restrict out);
}
void untranspose_i(const double* __restrict in, double* __restrict out);

} // namespace fastlanes

#endif // FLS_PRIMITIVE_UNTRANSPOSE_UNTRANSPOSE_HPP
16 changes: 8 additions & 8 deletions src/c_api/c_api_connector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ void fls_cpi_connection_close(connection_cpi* fls_connection) {
delete reinterpret_cast<fastlanes::Connection*>(fls_connection);
}

bool encode_from_memory(void* in_data,
uint64_t in_n_input,
uint64_t in_capacity,
uint8_t* out_encoded_data,
uint64_t* out_encoded_size,
uint64_t* out_metadadata_size,
data_t in_datatype,
expression_encoding_t in_expression_type) {
bool encode_from_memory(void* in_data,
uint64_t in_n_input,
uint64_t in_capacity,
uint8_t* out_encoded_data,
uint64_t* out_encoded_size,
uint64_t* out_metadadata_size,
data_t in_datatype,
uint8_t in_expression_type) {

fastlanes::Connection::encode_from_memory(in_data,
in_n_input,
Expand Down
1 change: 1 addition & 0 deletions src/cor/eng/decompressor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ void Decompressor<T>::InitDict(const uint8_t* p, DecompressState& stt) {
case ExpT::BYTE_ARR:
case ExpT::FFOR:
case ExpT::DELTA:
case ExpT::ALP:
case ExpT::RLE: {
this->vec.dict_up = nullptr;
return;
Expand Down
2 changes: 2 additions & 0 deletions src/encoder/exp_col_decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,6 @@ void ExpColDecoder<PT>::full_decode(span<std::byte> input_data, span<PT> output_
}

template class ExpColDecoder<i64_pt>;
template class ExpColDecoder<dbl_pt>;

} // namespace fastlanes
2 changes: 2 additions & 0 deletions src/encoder/materializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,6 @@ void Materializer<PT>::Materialize(const Vec& vec) {
}

template class Materializer<i64_pt>;
template class Materializer<dbl_pt>;

} // namespace fastlanes
7 changes: 7 additions & 0 deletions src/encoder/single_col_decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@ void SingleColDecoder::full_decode() const {
const auto output_span = IO::read<i64_pt>(m_output_io);
exp_i64_decoder.full_decode(encoded_span, output_span);

} break;
case DataType::DOUBLE: {
ExpColDecoder<double> expr_double_decoder;
const auto encoded_span = IO::read<std::byte>(m_encoded_io);
const auto output_span = IO::read<double>(m_output_io);
expr_double_decoder.full_decode(encoded_span, output_span);

} break;
case DataType::INVALID:
default:
Expand Down
8 changes: 8 additions & 0 deletions src/encoder/single_col_encoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@ void SingleColEncoder::encode() const {

res = i64_encoder.encode_span(span, expression);

} break;
case DataType::DOUBLE: {
ExpColEncoder<double> double_encoder(false);
const auto expression = ExpPool<double>::get_expression(m_exp_id);
const auto span = IO::read<double>(m_input_io);

res = double_encoder.encode_span(span, expression);

} break;
case DataType::INVALID:
default:
Expand Down
4 changes: 3 additions & 1 deletion src/expression/data_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ uint64_t SizeOf(const DataType datatype) {
switch (datatype) {
case DataType::INT64:
return sizeof(i64_pt);
default:;
case DataType::DOUBLE:
return sizeof(double);
default:
FLS_IMPLEMENT_THIS()
}
}
Expand Down
1 change: 1 addition & 0 deletions src/io/io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ span<PT> IO::read(const io& io) {
}

template span<i64_pt> IO::read(const io& io);
template span<dbl_pt> IO::read(const io& io);
template span<std::byte> IO::read(const io& io);

} // namespace fastlanes
7 changes: 7 additions & 0 deletions src/primitive/untraspose/untranspose.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,11 @@ void untranspose_i(const int64_t* a_in, int64_t* a_out) {
generated::untranspose::fallback::scalar::untranspose_i(in, out);
}

void untranspose_i(const double* a_in, double* a_out) {
auto* in = reinterpret_cast<const uint64_t*>(a_in);
auto* out = reinterpret_cast<uint64_t*>(a_out);

generated::untranspose::fallback::scalar::untranspose_i(in, out);
}

} // namespace fastlanes

0 comments on commit b088c4a

Please sign in to comment.