Skip to content

Commit

Permalink
better serialize interface; fix typos in ffi; ffi for approxset. stil…
Browse files Browse the repository at this point in the history
…l todo deserialize; save/load to files; maps with bytes as output
  • Loading branch information
Mike Hamburg committed Apr 9, 2022
1 parent 13f4845 commit cd382f4
Show file tree
Hide file tree
Showing 4 changed files with 245 additions and 20 deletions.
5 changes: 3 additions & 2 deletions TODO.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Release items

* C / C++ interface / dynamic lib
* Demo app
* C interface / dylib: deserialize, save/load file, map with bytes output
* Demo apps
* Examples in doc

# Post 0.2 quality items
Expand All @@ -12,6 +12,7 @@

# Performance

* Reduce C dylib size?
* Why is SipHasher so slow on Intel?
* Multithread hashing even if we aren't multithreading bucketsort. (Using Rayon??)
* Improve optimization of the threaded version
Expand Down
2 changes: 1 addition & 1 deletion examples/ffi.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ int main(int argc, char **argv) {
end = now();
printf("Build compressed_random_map of %lld items: %0.3fs = %0.1f ns/item\n",
(long long)hashmap_len, end-start, (end-start)*1e9/hashmap_len);
size_t ser_size = cmap_compressed_random_map_u64_u64_ser_size(map);
size_t ser_size = cmap_compressed_random_map_u64_u64_encode(map,NULL,0);
printf("Size is %lld bytes = %0.1f bytes/item\n",
(long long)ser_size, (double)ser_size/hashmap_len);

Expand Down
256 changes: 240 additions & 16 deletions src/cffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
* @author Mike Hamburg
* @copyright 2020-2022 Rambus Inc.
*
* C foreign function interface
* C foreign function interface.
* All boilerplate. Would be nice if GPT3 / macros could write this :-/
*/

use crate::{BuildOptions,CompressedRandomMap,ApproxSet,STD_BINCODE_CONFIG,DefaultHasher,CompressedMap,serialized_size};
use crate::{BuildOptions,CompressedRandomMap,ApproxSet,STD_BINCODE_CONFIG,CompressedMap,serialized_size};
use std::collections::{HashSet,HashMap};
use std::ptr::NonNull;
use core::ptr::NonNull;
use core::slice::from_raw_parts_mut;
use bincode::encode_into_slice;

/// Rust version of a vector of bytes
pub type Bytes = Box<[u8]>;
Expand All @@ -22,7 +25,7 @@ unsafe fn ptr_to_bytes(ptr: *const u8, len: usize) -> Bytes {
****************************************************************************/

#[no_mangle]
/// Create new HashMap<Bytes,u64>
/// Create new HashMap
pub extern fn cmap_hashmap_bytes_u64_new() -> *mut HashMap<Bytes,u64> {
Box::into_raw(Box::new(HashMap::new()))
}
Expand All @@ -34,19 +37,26 @@ pub unsafe extern fn cmap_hashmap_bytes_u64_len(ptr: NonNull<HashMap<Bytes,u64>>
}

#[no_mangle]
/// Insert an item into a hashmap
/// Insert an item into a HashMap
pub unsafe extern fn cmap_hashmap_bytes_u64_insert(mut ptr: NonNull<HashMap<Bytes,u64>>,
key: *const u8, key_len: usize, value:u64) {
ptr.as_mut().insert(ptr_to_bytes(key,key_len),value);
}

#[no_mangle]
/// Does a HashMap contain a given key?
/// Remove an item from a HashMap
pub unsafe extern fn cmap_hashmap_bytes_u64_remove(mut ptr: NonNull<HashMap<Bytes,u64>>,
key: *const u8, key_len: usize) {
ptr.as_mut().remove(&ptr_to_bytes(key,key_len));
}

#[no_mangle]
/// Does this HashMap contain a given key?
pub unsafe extern fn cmap_hashmap_bytes_u64_contains(mut ptr: NonNull<HashMap<Bytes,u64>>,
key: *const u8, key_len: usize) -> bool {
ptr.as_mut().contains_key(&ptr_to_bytes(key,key_len))
}

#[no_mangle]
/// Look up a key in a hashmap. Return true if it contains the key
pub unsafe extern fn cmap_hashmap_bytes_u64_get(ptr: NonNull<HashMap<Bytes,u64>>,
Expand Down Expand Up @@ -83,6 +93,20 @@ pub unsafe extern fn cmap_compressed_map_bytes_u64_query<'a>(
ptr.as_ref()[&ptr_to_bytes(key,key_len)]
}

#[no_mangle]
/// Encode to output_buf, if it's big enough. Return the serialized size of the object, in bytes.
pub unsafe extern fn cmap_compressed_map_bytes_u64_encode<'a>(
ptr: NonNull<CompressedMap<'a,Bytes,u64>>,
output_buf: *mut u8,
output_buf_size: usize
) -> usize {
let required_size = serialized_size(ptr.as_ref(),STD_BINCODE_CONFIG).unwrap();
if required_size <= output_buf_size && !output_buf.is_null() {
encode_into_slice(ptr.as_ref(), from_raw_parts_mut(output_buf,output_buf_size), STD_BINCODE_CONFIG).unwrap();
}
required_size
}

#[no_mangle]
/// Destroy and free a CompressedMap
pub unsafe extern fn cmap_compressed_map_bytes_u64_free<'a>(ptr: *mut CompressedMap<'a,Bytes,u64>) {
Expand All @@ -109,6 +133,20 @@ pub unsafe extern fn cmap_compressed_random_map_bytes_u64_query<'a>(
ptr.as_ref().query(&ptr_to_bytes(key,key_len))
}

#[no_mangle]
/// Encode to output_buf, if it's big enough. Return the serialized size of the object, in bytes.
pub unsafe extern fn cmap_compressed_random_map_bytes_u64_encode<'a>(
ptr: NonNull<CompressedRandomMap<'a,Bytes,u64>>,
output_buf: *mut u8,
output_buf_size: usize
) -> usize {
let required_size = serialized_size(ptr.as_ref(),STD_BINCODE_CONFIG).unwrap();
if required_size <= output_buf_size && !output_buf.is_null() {
encode_into_slice(ptr.as_ref(), from_raw_parts_mut(output_buf,output_buf_size), STD_BINCODE_CONFIG).unwrap();
}
required_size
}

#[no_mangle]
/// Destroy and free a CompressedRandomMap
pub unsafe extern fn cmap_compressed_random_map_bytes_u64_free<'a>(ptr: *mut CompressedRandomMap<'a,Bytes,u64>) {
Expand All @@ -122,7 +160,7 @@ pub unsafe extern fn cmap_compressed_random_map_bytes_u64_free<'a>(ptr: *mut Com


#[no_mangle]
/// Create new HashMap<u64,u64>
/// Create new HashMap
pub extern fn cmap_hashmap_u64_u64_new() -> *mut HashMap<u64,u64> {
Box::into_raw(Box::new(HashMap::new()))
}
Expand All @@ -134,19 +172,27 @@ pub unsafe extern fn cmap_hashmap_u64_u64_len(ptr: NonNull<HashMap<u64,u64>>) ->
}

#[no_mangle]
/// Insert an item into a hashmap
/// Insert an item into a HashMap
pub unsafe extern fn cmap_hashmap_u64_u64_insert(mut ptr: NonNull<HashMap<u64,u64>>,
key: u64, value:u64) {
ptr.as_mut().insert(key,value);
}

#[no_mangle]
/// Does a HashMap contain a given key?
/// Remove an item from a HashMap
pub unsafe extern fn cmap_hashmap_u64_u64_remove(mut ptr: NonNull<HashMap<u64,u64>>,
key: u64) {
ptr.as_mut().remove(&key);
}

#[no_mangle]
/// Does this HashMap contain a given key?
pub unsafe extern fn cmap_hashmap_bu64_u64_contains(mut ptr: NonNull<HashMap<u64,u64>>,
key: u64) -> bool {
ptr.as_mut().contains_key(&key)
}


#[no_mangle]
/// Look up a key in a hashmap. Return true if it contains the key
pub unsafe extern fn cmap_hashmap_u64_u64_get(ptr: NonNull<HashMap<u64,u64>>,
Expand Down Expand Up @@ -184,10 +230,17 @@ pub unsafe extern fn cmap_compressed_map_u64_u64_query<'a>(
}

#[no_mangle]
/// Return serialized size of the map, in bytes
pub unsafe extern fn cmap_compressed_map_u64_u64_ser_size<'a>(ptr: NonNull<CompressedMap<'a,u64,u64>>)
-> usize {
serialized_size(ptr.as_ref(),STD_BINCODE_CONFIG).unwrap_or(0)
/// Encode to output_buf, if it's big enough. Return the serialized size of the object, in bytes.
pub unsafe extern fn cmap_compressed_map_u64_u64_encode<'a>(
ptr: NonNull<CompressedMap<'a,u64,u64>>,
output_buf: *mut u8,
output_buf_size: usize
) -> usize {
let required_size = serialized_size(ptr.as_ref(),STD_BINCODE_CONFIG).unwrap();
if required_size <= output_buf_size && !output_buf.is_null() {
encode_into_slice(ptr.as_ref(), from_raw_parts_mut(output_buf,output_buf_size), STD_BINCODE_CONFIG).unwrap();
}
required_size
}

#[no_mangle]
Expand All @@ -209,9 +262,16 @@ pub unsafe extern fn cmap_compressed_random_map_u64_u64_build<'a>(ptr: NonNull<H

#[no_mangle]
/// Return serialized size of the map, in bytes
pub unsafe extern fn cmap_compressed_random_map_u64_u64_ser_size<'a>(ptr: NonNull<CompressedRandomMap<u64,u64>>)
-> usize {
serialized_size(ptr.as_ref(),STD_BINCODE_CONFIG).unwrap_or(0)
pub unsafe extern fn cmap_compressed_random_map_u64_u64_encode<'a>(
ptr: NonNull<CompressedRandomMap<u64,u64>>,
output_buf: *mut u8,
output_buf_size: usize
) -> usize {
let required_size = serialized_size(ptr.as_ref(),STD_BINCODE_CONFIG).unwrap();
if required_size <= output_buf_size && !output_buf.is_null() {
encode_into_slice(ptr.as_ref(), from_raw_parts_mut(output_buf,output_buf_size), STD_BINCODE_CONFIG).unwrap();
}
required_size
}

#[no_mangle]
Expand All @@ -228,3 +288,167 @@ pub unsafe extern fn cmap_compressed_random_map_u64_u64_query<'a>(
pub unsafe extern fn cmap_compressed_random_map_u64_u64_free<'a>(ptr: *mut CompressedRandomMap<'a,u64,u64>) {
if !ptr.is_null() { Box::from_raw(ptr); }
}


/****************************************************************************
* HashSet<Bytes>
****************************************************************************/

#[no_mangle]
/// Create new HashSet
pub extern fn cmap_hashset_bytes_new() -> *mut HashSet<Bytes> {
Box::into_raw(Box::new(HashSet::new()))
}

#[no_mangle]
/// Count the items in a HashMap
pub unsafe extern fn cmap_hashset_bytes_len(ptr: NonNull<HashSet<Bytes>>) -> usize {
ptr.as_ref().len()
}

#[no_mangle]
/// Insert an item into a HashSet
pub unsafe extern fn cmap_hashset_bytes_insert(mut ptr: NonNull<HashSet<Bytes>>,
key: *const u8, key_len: usize) {
ptr.as_mut().insert(ptr_to_bytes(key,key_len));
}

#[no_mangle]
/// Remove an item from a HashSet
pub unsafe extern fn cmap_hashset_bytes_remove(mut ptr: NonNull<HashSet<Bytes>>,
key: *const u8, key_len: usize) {
ptr.as_mut().remove(&ptr_to_bytes(key,key_len));
}

#[no_mangle]
/// Does this HashSet contain a key
pub unsafe extern fn cmap_hashset_bytes_contains(mut ptr: NonNull<HashSet<Bytes>>,
key: *const u8, key_len: usize) -> bool {
ptr.as_mut().contains(&ptr_to_bytes(key,key_len))
}

#[no_mangle]
/// Free a HashSet
pub unsafe extern fn cmap_hashset_bytes_free(ptr: *mut HashSet<Bytes>) {
if !ptr.is_null() { Box::from_raw(ptr); }
}

#[no_mangle]
/// Build an ApproxSet. Return NULL on failure
pub unsafe extern fn cmap_approxset_bytes_build<'a>(ptr: NonNull<HashSet<Bytes>>)
-> *mut ApproxSet<'a, Bytes> {
let mut options = BuildOptions::default();
if let Some(aset) = ApproxSet::build(ptr.as_ref(),&mut options) {
return Box::into_raw(Box::new(aset));
}
std::ptr::null_mut()
}

#[no_mangle]
/// Look up a key in an ApproxSet
pub unsafe extern fn cmap_approxset_bytes_probably_contains<'a>(
ptr: NonNull<ApproxSet<'a, Bytes>>,
key: *const u8, key_len: usize
) -> bool {
ptr.as_ref().probably_contains(&ptr_to_bytes(key,key_len))
}

#[no_mangle]
/// Encode to output_buf, if it's big enough. Return the serialized size of the object, in bytes.
pub unsafe extern fn cmap_approxset_bytes_encode<'a>(
ptr: NonNull<ApproxSet<'a,Bytes>>,
output_buf: *mut u8,
output_buf_size: usize
) -> usize {
let required_size = serialized_size(ptr.as_ref(),STD_BINCODE_CONFIG).unwrap();
if required_size <= output_buf_size && !output_buf.is_null() {
encode_into_slice(ptr.as_ref(), from_raw_parts_mut(output_buf,output_buf_size), STD_BINCODE_CONFIG).unwrap();
}
required_size
}

#[no_mangle]
/// Destroy and free an ApproxSet
pub unsafe extern fn cmap_approxset_bytes_free<'a>(ptr: *mut ApproxSet<'a,Bytes>) {
if !ptr.is_null() { Box::from_raw(ptr); }
}

/****************************************************************************
* HashSet<u64>
****************************************************************************/

#[no_mangle]
/// Create new HashSet
pub extern fn cmap_hashset_u64_new() -> *mut HashSet<u64> {
Box::into_raw(Box::new(HashSet::new()))
}

#[no_mangle]
/// Count the items in a HashMap
pub unsafe extern fn cmap_hashset_u64_len(ptr: NonNull<HashSet<u64>>) -> usize {
ptr.as_ref().len()
}

#[no_mangle]
/// Insert an item into a HashSet
pub unsafe extern fn cmap_hashset_u64_insert(mut ptr: NonNull<HashSet<u64>>, key: u64) {
ptr.as_mut().insert(key);
}

#[no_mangle]
/// Remove an item from a HashSet
pub unsafe extern fn cmap_hashset_u64_remove(mut ptr: NonNull<HashSet<u64>>, key: u64) {
ptr.as_mut().remove(&key);
}

#[no_mangle]
/// Does this HashSet contain a key
pub unsafe extern fn cmap_hashset_u64_contains(mut ptr: NonNull<HashSet<u64>>, key: u64) -> bool {
ptr.as_mut().contains(&key)
}

#[no_mangle]
/// Free a HashSet
pub unsafe extern fn cmap_hashset_u64_free(ptr: *mut HashSet<u64>) {
if !ptr.is_null() { Box::from_raw(ptr); }
}

#[no_mangle]
/// Build an ApproxSet. Return NULL on failure
pub unsafe extern fn cmap_approxset_u64_build<'a>(ptr: NonNull<HashSet<u64>>)
-> *mut ApproxSet<'a, u64> {
let mut options = BuildOptions::default();
if let Some(aset) = ApproxSet::build(ptr.as_ref(),&mut options) {
return Box::into_raw(Box::new(aset));
}
std::ptr::null_mut()
}

#[no_mangle]
/// Look up a key in an ApproxSet
pub unsafe extern fn cmap_approxset_u64_probably_contains<'a>(
ptr: NonNull<ApproxSet<'a, u64>>, key:u64
) -> bool {
ptr.as_ref().probably_contains(&key)
}

#[no_mangle]
/// Encode to output_buf, if it's big enough. Return the serialized size of the object, in u64.
pub unsafe extern fn cmap_approxset_u64_encode<'a>(
ptr: NonNull<ApproxSet<'a,u64>>,
output_buf: *mut u8,
output_buf_size: usize
) -> usize {
let required_size = serialized_size(ptr.as_ref(),STD_BINCODE_CONFIG).unwrap();
if required_size <= output_buf_size && !output_buf.is_null() {
encode_into_slice(ptr.as_ref(), from_raw_parts_mut(output_buf,output_buf_size), STD_BINCODE_CONFIG).unwrap();
}
required_size
}

#[no_mangle]
/// Destroy and free an ApproxSet
pub unsafe extern fn cmap_approxset_u64_free<'a>(ptr: *mut ApproxSet<'a,u64>) {
if !ptr.is_null() { Box::from_raw(ptr); }
}

2 changes: 1 addition & 1 deletion src/size.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* @author Mike Hamburg
* @copyright 2020-2022 Rambus Inc.
*
* Get size of serialized object. Should be part of bincode, dunno why it isn't.
* Get size of serialized object.
*/

use bincode::{Encode,config::Config,enc::EncoderImpl,error::EncodeError,enc::write::Writer};
Expand Down

0 comments on commit cd382f4

Please sign in to comment.