Skip to content

Commit

Permalink
Efficient storage of integer G3Map and G3Vector objects (#140)
Browse files Browse the repository at this point in the history
Use int64_t for in-memory representation of integer values, but store as 8-, 16-
or 32- bit integers on disk, depending on bit depth of the underlying data.
Backwards compatible with v1 int32 G3Map objects.

Closes #122.
  • Loading branch information
arahlin authored Feb 20, 2024
1 parent e382a59 commit cc46e8d
Show file tree
Hide file tree
Showing 7 changed files with 334 additions and 45 deletions.
1 change: 1 addition & 0 deletions core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ add_spt3g_library(core SHARED
src/crc32.c ${CORE_EXTRA_SRCS}
src/G3NetworkSender.cxx src/G3SyslogLogger.cxx
src/G3PipelineInfo.cxx src/G3Quat.cxx
src/int_storage.cxx
)

# Link dependencies
Expand Down
16 changes: 14 additions & 2 deletions core/include/core/G3Map.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ class G3Map : public G3FrameObject, public std::map<Key, Value> {
cereal::base_class<std::map<Key, Value> >(this));
}

template <class A> void load(A &ar, unsigned v);
template <class A> void save(A &ar, unsigned v) const;

std::string Summary() const
{
if (this->size() < 5)
Expand Down Expand Up @@ -72,16 +75,25 @@ G3MAP_OF(std::string, double, G3MapDouble);
G3MAP_OF(std::string, G3MapDouble, G3MapMapDouble);
G3MAP_OF(std::string, std::vector<double>, G3MapVectorDouble);
G3MAP_OF(std::string, std::vector<bool>, G3MapVectorBool);
G3MAP_OF(std::string, std::vector<int32_t>, G3MapVectorInt);
G3MAP_OF(std::string, std::vector<std::string>, G3MapVectorString);
G3MAP_OF(std::string, G3VectorVectorString, G3MapVectorVectorString);
G3MAP_OF(std::string, std::vector<std::complex<double> >, G3MapVectorComplexDouble);
G3MAP_OF(std::string, G3VectorTime, G3MapVectorTime);
G3MAP_OF(std::string, int32_t, G3MapInt);
G3MAP_OF(std::string, std::string, G3MapString);
G3MAP_OF(std::string, quat, G3MapQuat);
G3MAP_OF(std::string, G3VectorQuat, G3MapVectorQuat);

#define G3MAP_SPLIT(key, value, name, version) \
typedef G3Map< key, value > name; \
namespace cereal { \
template <class A> struct specialize<A, name, cereal::specialization::member_load_save> {}; \
} \
G3_POINTERS(name); \
G3_SERIALIZABLE(name, version);

G3MAP_SPLIT(std::string, std::vector<int64_t>, G3MapVectorInt, 2);
G3MAP_SPLIT(std::string, int64_t, G3MapInt, 2);

namespace cereal {
template <class A> struct specialize<A, G3MapFrameObject, cereal::specialization::member_load_save> {};
}
Expand Down
151 changes: 149 additions & 2 deletions core/src/G3Map.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,152 @@
#include <pybindings.h>
#include <container_pybindings.h>
#include <serialization.h>
#include "int_storage.h"

/* Special load/save for int64_t, using the same encoding at G3VectorInt */

template <>
template <class A>
void G3Map<std::string, int64_t>::load(A &ar, const unsigned v)
{
G3_CHECK_VERSION(v);

ar & cereal::make_nvp("G3FrameObject",
cereal::base_class<G3FrameObject>(this));
int store_bits = 32;
if (v >= 2)
ar & cereal::make_nvp("store_bits", store_bits);

switch(store_bits) {
case 64:
ar & cereal::make_nvp("map",
cereal::base_class<std::map<std::string, int64_t> >(this));
break;
case 32:
load_as<A, int32_t>(ar, *this);
break;
case 16:
load_as<A, int16_t>(ar, *this);
break;
case 8:
load_as<A, int8_t>(ar, *this);
break;
}
}

template <>
template <class A>
void G3Map<std::string, int64_t>::save(A &ar, const unsigned v) const
{
// v == 2
ar & cereal::make_nvp("G3FrameObject",
cereal::base_class<G3FrameObject>(this));
// Count the interesting bits, and convert to nearest power of 2.
int sig_bits = bit_count(*this);
int store_bits = 8;
while (store_bits < sig_bits)
store_bits *= 2;
ar & cereal::make_nvp("store_bits", store_bits);
switch(store_bits) {
case 8:
save_as<A, int8_t>(ar, *this);
break;
case 16:
save_as<A, int16_t>(ar, *this);
break;
case 32:
save_as<A, int32_t>(ar, *this);
break;
default:
ar & cereal::make_nvp("map",
cereal::base_class<std::map<std::string, int64_t> >(this));
}
}

template <>
template <class A>
void G3Map<std::string, std::vector<int64_t> >::load(A &ar, const unsigned v)
{
G3_CHECK_VERSION(v);

ar & cereal::make_nvp("G3FrameObject",
cereal::base_class<G3FrameObject>(this));

if (v == 1) {
std::map<std::string, std::vector<int32_t> > temp;
ar & cereal::make_nvp("map", temp);
for (auto const &i: temp) {
std::vector<int64_t> v(i.second.begin(), i.second.end());
(*this)[i.first] = v;
}

return;
}

uint32_t len;
ar & cereal::make_nvp("len", len);

for (uint32_t i = 0; i < len; i++) {
std::pair<std::string, std::vector<int64_t> > item;
ar & cereal::make_nvp("key", item.first);
int store_bits;
ar & cereal::make_nvp("store_bits", store_bits);

switch(store_bits) {
case 64:
ar & cereal::make_nvp("vector", item.second);
break;
case 32:
load_as<A, int32_t>(ar, item.second);
break;
case 16:
load_as<A, int16_t>(ar, item.second);
break;
case 8:
load_as<A, int8_t>(ar, item.second);
break;
}

this->insert(item);
}
}

template <>
template <class A>
void G3Map<std::string, std::vector<int64_t> >::save(A &ar, const unsigned v) const
{
// v == 2
ar & cereal::make_nvp("G3FrameObject",
cereal::base_class<G3FrameObject>(this));

uint32_t len = size();
ar & cereal::make_nvp("len", len);

for (auto const &i: *this) {
ar & cereal::make_nvp("key", i.first);

// Count the interesting bits, and convert to nearest power of 2.
int sig_bits = bit_count(i.second);
int store_bits = 8;
while (store_bits < sig_bits)
store_bits *= 2;
ar & cereal::make_nvp("store_bits", store_bits);

switch(store_bits) {
case 8:
save_as<A, int8_t>(ar, i.second);
break;
case 16:
save_as<A, int16_t>(ar, i.second);
break;
case 32:
save_as<A, int32_t>(ar, i.second);
break;
default:
ar & cereal::make_nvp("vector", i.second);
}
}
}

template <class A> void G3MapFrameObject::save(A &ar, const unsigned v) const
{
Expand Down Expand Up @@ -70,20 +216,21 @@ std::string G3MapFrameObject::Description() const
return s.str();
}

G3_SERIALIZABLE_CODE(G3MapInt);
G3_SERIALIZABLE_CODE(G3MapDouble);
G3_SERIALIZABLE_CODE(G3MapMapDouble);
G3_SERIALIZABLE_CODE(G3MapString);
G3_SERIALIZABLE_CODE(G3MapQuat);
G3_SERIALIZABLE_CODE(G3MapVectorBool);
G3_SERIALIZABLE_CODE(G3MapVectorInt);
G3_SERIALIZABLE_CODE(G3MapVectorDouble);
G3_SERIALIZABLE_CODE(G3MapVectorString);
G3_SERIALIZABLE_CODE(G3MapVectorVectorString);
G3_SERIALIZABLE_CODE(G3MapVectorComplexDouble);
G3_SERIALIZABLE_CODE(G3MapVectorTime);
G3_SERIALIZABLE_CODE(G3MapVectorQuat);

G3_SPLIT_SERIALIZABLE_CODE(G3MapInt);
G3_SPLIT_SERIALIZABLE_CODE(G3MapVectorInt);

G3_SPLIT_SERIALIZABLE_CODE(G3MapFrameObject);

PYBINDINGS("core") {
Expand Down
48 changes: 7 additions & 41 deletions core/src/G3Vector.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <container_pybindings.h>
#include <G3Vector.h>
#include <complex>
#include "int_storage.h"

G3_SPLIT_SERIALIZABLE_CODE(G3VectorInt);
G3_SERIALIZABLE_CODE(G3VectorBool);
Expand All @@ -16,41 +17,6 @@ G3_SERIALIZABLE_CODE(G3VectorTime);

/* Special load/save for int64_t. */

static
int bit_count(std::vector<int64_t> const &d) {
// Returns the smallest number N such that all ints in the
// vector could be safely expressed as intN_t. Assumes two's
// complement integers. Return value will be between 1 and
// 64.
uint64_t mask = 0;
for (auto c: d) {
if (c < 0)
mask |= ~c;
else
mask |= c;
}
for (int i=1; i<64; i++) {
if (mask == 0)
return i;
mask >>= 1;
}
return 64;
}

template <class A, typename FROM_TYPE, typename TO_TYPE>
void load_as(A &ar, std::vector<TO_TYPE> &dest) {
std::vector<FROM_TYPE> temp;
ar & cereal::make_nvp("vector", temp);
dest.resize(temp.size());
std::copy(temp.begin(), temp.end(), dest.begin());
}

template <class A, typename FROM_TYPE, typename TO_TYPE>
void save_as(A &ar, const std::vector<FROM_TYPE> &src) {
std::vector<TO_TYPE> temp(src.begin(), src.end());
ar & cereal::make_nvp("vector", temp);
}

template <>
template <class A>
void G3Vector<int64_t>::load(A &ar, const unsigned v)
Expand All @@ -69,13 +35,13 @@ void G3Vector<int64_t>::load(A &ar, const unsigned v)
cereal::base_class<std::vector<int64_t> >(this));
break;
case 32:
load_as<A, int32_t, int64_t>(ar, *this);
load_as<A, int32_t>(ar, *this);
break;
case 16:
load_as<A, int16_t, int64_t>(ar, *this);
load_as<A, int16_t>(ar, *this);
break;
case 8:
load_as<A, int8_t, int64_t>(ar, *this);
load_as<A, int8_t>(ar, *this);
break;
}
}
Expand All @@ -95,13 +61,13 @@ void G3Vector<int64_t>::save(A &ar, const unsigned v) const
ar & cereal::make_nvp("store_bits", store_bits);
switch(store_bits) {
case 8:
save_as<A, int64_t, int8_t>(ar, *this);
save_as<A, int8_t>(ar, *this);
break;
case 16:
save_as<A, int64_t, int16_t>(ar, *this);
save_as<A, int16_t>(ar, *this);
break;
case 32:
save_as<A, int64_t, int32_t>(ar, *this);
save_as<A, int32_t>(ar, *this);
break;
default:
ar & cereal::make_nvp("vector",
Expand Down
55 changes: 55 additions & 0 deletions core/src/int_storage.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#include "int_storage.h"

int bit_count(const std::vector<int64_t> &d) {
// Returns the smallest number N such that all ints in the
// vector could be safely expressed as intN_t. Assumes two's
// complement integers. Return value will be between 1 and
// 64.
uint64_t mask = 0;
for (auto c: d) {
if (c < 0)
mask |= ~c;
else
mask |= c;
}
for (int i=1; i<64; i++) {
if (mask == 0)
return i;
mask >>= 1;
}
return 64;
}

int bit_count(const std::map<std::string, int64_t> &d) {
// Returns the smallest number N such that all ints in the
// map could be safely expressed as intN_t. Assumes two's
// complement integers. Return value will be between 1 and
// 64.
uint64_t mask = 0;
for (auto c: d) {
if (c.second < 0)
mask |= ~c.second;
else
mask |= c.second;
}
for (int i=1; i<64; i++) {
if (mask == 0)
return i;
mask >>= 1;
}
return 64;
}

#define INT_SERIALIZABLE_CODE(inttype) \
template <typename inttype> \
void load_as(cereal::PortableBinaryInputArchive &, std::vector<int64_t> &dest); \
template <typename inttype> \
void load_as(cereal::PortableBinaryInputArchive &, std::map<std::string, int64_t> &dest); \
template <typename inttype> \
void save_as(cereal::PortableBinaryOutputArchive &, const std::vector<int64_t> &src); \
template <typename inttype> \
void save_as(cereal::PortableBinaryOutputArchive &, const std::map<std::string, int64_t> &src)

INT_SERIALIZABLE_CODE(int8_t);
INT_SERIALIZABLE_CODE(int16_t);
INT_SERIALIZABLE_CODE(int32_t);
Loading

0 comments on commit cc46e8d

Please sign in to comment.