From cd382f46574735ffd07cf3b1c690481442f7f9d6 Mon Sep 17 00:00:00 2001 From: Mike Hamburg Date: Sat, 9 Apr 2022 16:30:17 +0100 Subject: [PATCH] better serialize interface; fix typos in ffi; ffi for approxset. still todo deserialize; save/load to files; maps with bytes as output --- TODO.md | 5 +- examples/ffi.c | 2 +- src/cffi.rs | 256 +++++++++++++++++++++++++++++++++++++++++++++---- src/size.rs | 2 +- 4 files changed, 245 insertions(+), 20 deletions(-) diff --git a/TODO.md b/TODO.md index 1e4db23..fb909af 100644 --- a/TODO.md +++ b/TODO.md @@ -1,7 +1,7 @@ # Release items -* C / C++ interface / dynamic lib -* Demo app +* C interface / dylib: deserialize, save/load file, map with bytes output +* Demo apps * Examples in doc # Post 0.2 quality items @@ -12,6 +12,7 @@ # Performance +* Reduce C dylib size? * Why is SipHasher so slow on Intel? * Multithread hashing even if we aren't multithreading bucketsort. (Using Rayon??) * Improve optimization of the threaded version diff --git a/examples/ffi.c b/examples/ffi.c index 5d59155..cb03e1c 100644 --- a/examples/ffi.c +++ b/examples/ffi.c @@ -54,7 +54,7 @@ int main(int argc, char **argv) { end = now(); printf("Build compressed_random_map of %lld items: %0.3fs = %0.1f ns/item\n", (long long)hashmap_len, end-start, (end-start)*1e9/hashmap_len); - size_t ser_size = cmap_compressed_random_map_u64_u64_ser_size(map); + size_t ser_size = cmap_compressed_random_map_u64_u64_encode(map,NULL,0); printf("Size is %lld bytes = %0.1f bytes/item\n", (long long)ser_size, (double)ser_size/hashmap_len); diff --git a/src/cffi.rs b/src/cffi.rs index 98b334d..cb152af 100644 --- a/src/cffi.rs +++ b/src/cffi.rs @@ -3,12 +3,15 @@ * @author Mike Hamburg * @copyright 2020-2022 Rambus Inc. * - * C foreign function interface + * C foreign function interface. + * All boilerplate. Would be nice if GPT3 / macros could write this :-/ */ -use crate::{BuildOptions,CompressedRandomMap,ApproxSet,STD_BINCODE_CONFIG,DefaultHasher,CompressedMap,serialized_size}; +use crate::{BuildOptions,CompressedRandomMap,ApproxSet,STD_BINCODE_CONFIG,CompressedMap,serialized_size}; use std::collections::{HashSet,HashMap}; -use std::ptr::NonNull; +use core::ptr::NonNull; +use core::slice::from_raw_parts_mut; +use bincode::encode_into_slice; /// Rust version of a vector of bytes pub type Bytes = Box<[u8]>; @@ -22,7 +25,7 @@ unsafe fn ptr_to_bytes(ptr: *const u8, len: usize) -> Bytes { ****************************************************************************/ #[no_mangle] -/// Create new HashMap +/// Create new HashMap pub extern fn cmap_hashmap_bytes_u64_new() -> *mut HashMap { Box::into_raw(Box::new(HashMap::new())) } @@ -34,19 +37,26 @@ pub unsafe extern fn cmap_hashmap_bytes_u64_len(ptr: NonNull> } #[no_mangle] -/// Insert an item into a hashmap +/// Insert an item into a HashMap pub unsafe extern fn cmap_hashmap_bytes_u64_insert(mut ptr: NonNull>, key: *const u8, key_len: usize, value:u64) { ptr.as_mut().insert(ptr_to_bytes(key,key_len),value); } #[no_mangle] -/// Does a HashMap contain a given key? +/// Remove an item from a HashMap pub unsafe extern fn cmap_hashmap_bytes_u64_remove(mut ptr: NonNull>, key: *const u8, key_len: usize) { ptr.as_mut().remove(&ptr_to_bytes(key,key_len)); } +#[no_mangle] +/// Does this HashMap contain a given key? +pub unsafe extern fn cmap_hashmap_bytes_u64_contains(mut ptr: NonNull>, + key: *const u8, key_len: usize) -> bool { + ptr.as_mut().contains_key(&ptr_to_bytes(key,key_len)) +} + #[no_mangle] /// Look up a key in a hashmap. Return true if it contains the key pub unsafe extern fn cmap_hashmap_bytes_u64_get(ptr: NonNull>, @@ -83,6 +93,20 @@ pub unsafe extern fn cmap_compressed_map_bytes_u64_query<'a>( ptr.as_ref()[&ptr_to_bytes(key,key_len)] } +#[no_mangle] +/// Encode to output_buf, if it's big enough. Return the serialized size of the object, in bytes. +pub unsafe extern fn cmap_compressed_map_bytes_u64_encode<'a>( + ptr: NonNull>, + output_buf: *mut u8, + output_buf_size: usize +) -> usize { + let required_size = serialized_size(ptr.as_ref(),STD_BINCODE_CONFIG).unwrap(); + if required_size <= output_buf_size && !output_buf.is_null() { + encode_into_slice(ptr.as_ref(), from_raw_parts_mut(output_buf,output_buf_size), STD_BINCODE_CONFIG).unwrap(); + } + required_size +} + #[no_mangle] /// Destroy and free a CompressedMap pub unsafe extern fn cmap_compressed_map_bytes_u64_free<'a>(ptr: *mut CompressedMap<'a,Bytes,u64>) { @@ -109,6 +133,20 @@ pub unsafe extern fn cmap_compressed_random_map_bytes_u64_query<'a>( ptr.as_ref().query(&ptr_to_bytes(key,key_len)) } +#[no_mangle] +/// Encode to output_buf, if it's big enough. Return the serialized size of the object, in bytes. +pub unsafe extern fn cmap_compressed_random_map_bytes_u64_encode<'a>( + ptr: NonNull>, + output_buf: *mut u8, + output_buf_size: usize +) -> usize { + let required_size = serialized_size(ptr.as_ref(),STD_BINCODE_CONFIG).unwrap(); + if required_size <= output_buf_size && !output_buf.is_null() { + encode_into_slice(ptr.as_ref(), from_raw_parts_mut(output_buf,output_buf_size), STD_BINCODE_CONFIG).unwrap(); + } + required_size +} + #[no_mangle] /// Destroy and free a CompressedRandomMap pub unsafe extern fn cmap_compressed_random_map_bytes_u64_free<'a>(ptr: *mut CompressedRandomMap<'a,Bytes,u64>) { @@ -122,7 +160,7 @@ pub unsafe extern fn cmap_compressed_random_map_bytes_u64_free<'a>(ptr: *mut Com #[no_mangle] -/// Create new HashMap +/// Create new HashMap pub extern fn cmap_hashmap_u64_u64_new() -> *mut HashMap { Box::into_raw(Box::new(HashMap::new())) } @@ -134,19 +172,27 @@ pub unsafe extern fn cmap_hashmap_u64_u64_len(ptr: NonNull>) -> } #[no_mangle] -/// Insert an item into a hashmap +/// Insert an item into a HashMap pub unsafe extern fn cmap_hashmap_u64_u64_insert(mut ptr: NonNull>, key: u64, value:u64) { ptr.as_mut().insert(key,value); } #[no_mangle] -/// Does a HashMap contain a given key? +/// Remove an item from a HashMap pub unsafe extern fn cmap_hashmap_u64_u64_remove(mut ptr: NonNull>, key: u64) { ptr.as_mut().remove(&key); } +#[no_mangle] +/// Does this HashMap contain a given key? +pub unsafe extern fn cmap_hashmap_bu64_u64_contains(mut ptr: NonNull>, + key: u64) -> bool { + ptr.as_mut().contains_key(&key) +} + + #[no_mangle] /// Look up a key in a hashmap. Return true if it contains the key pub unsafe extern fn cmap_hashmap_u64_u64_get(ptr: NonNull>, @@ -184,10 +230,17 @@ pub unsafe extern fn cmap_compressed_map_u64_u64_query<'a>( } #[no_mangle] -/// Return serialized size of the map, in bytes -pub unsafe extern fn cmap_compressed_map_u64_u64_ser_size<'a>(ptr: NonNull>) - -> usize { - serialized_size(ptr.as_ref(),STD_BINCODE_CONFIG).unwrap_or(0) +/// Encode to output_buf, if it's big enough. Return the serialized size of the object, in bytes. +pub unsafe extern fn cmap_compressed_map_u64_u64_encode<'a>( + ptr: NonNull>, + output_buf: *mut u8, + output_buf_size: usize +) -> usize { + let required_size = serialized_size(ptr.as_ref(),STD_BINCODE_CONFIG).unwrap(); + if required_size <= output_buf_size && !output_buf.is_null() { + encode_into_slice(ptr.as_ref(), from_raw_parts_mut(output_buf,output_buf_size), STD_BINCODE_CONFIG).unwrap(); + } + required_size } #[no_mangle] @@ -209,9 +262,16 @@ pub unsafe extern fn cmap_compressed_random_map_u64_u64_build<'a>(ptr: NonNull(ptr: NonNull>) - -> usize { - serialized_size(ptr.as_ref(),STD_BINCODE_CONFIG).unwrap_or(0) +pub unsafe extern fn cmap_compressed_random_map_u64_u64_encode<'a>( + ptr: NonNull>, + output_buf: *mut u8, + output_buf_size: usize +) -> usize { + let required_size = serialized_size(ptr.as_ref(),STD_BINCODE_CONFIG).unwrap(); + if required_size <= output_buf_size && !output_buf.is_null() { + encode_into_slice(ptr.as_ref(), from_raw_parts_mut(output_buf,output_buf_size), STD_BINCODE_CONFIG).unwrap(); + } + required_size } #[no_mangle] @@ -228,3 +288,167 @@ pub unsafe extern fn cmap_compressed_random_map_u64_u64_query<'a>( pub unsafe extern fn cmap_compressed_random_map_u64_u64_free<'a>(ptr: *mut CompressedRandomMap<'a,u64,u64>) { if !ptr.is_null() { Box::from_raw(ptr); } } + + +/**************************************************************************** + * HashSet + ****************************************************************************/ + +#[no_mangle] +/// Create new HashSet +pub extern fn cmap_hashset_bytes_new() -> *mut HashSet { + Box::into_raw(Box::new(HashSet::new())) +} + +#[no_mangle] +/// Count the items in a HashMap +pub unsafe extern fn cmap_hashset_bytes_len(ptr: NonNull>) -> usize { + ptr.as_ref().len() +} + +#[no_mangle] +/// Insert an item into a HashSet +pub unsafe extern fn cmap_hashset_bytes_insert(mut ptr: NonNull>, + key: *const u8, key_len: usize) { + ptr.as_mut().insert(ptr_to_bytes(key,key_len)); +} + +#[no_mangle] +/// Remove an item from a HashSet +pub unsafe extern fn cmap_hashset_bytes_remove(mut ptr: NonNull>, + key: *const u8, key_len: usize) { + ptr.as_mut().remove(&ptr_to_bytes(key,key_len)); +} + +#[no_mangle] +/// Does this HashSet contain a key +pub unsafe extern fn cmap_hashset_bytes_contains(mut ptr: NonNull>, + key: *const u8, key_len: usize) -> bool { + ptr.as_mut().contains(&ptr_to_bytes(key,key_len)) +} + +#[no_mangle] +/// Free a HashSet +pub unsafe extern fn cmap_hashset_bytes_free(ptr: *mut HashSet) { + if !ptr.is_null() { Box::from_raw(ptr); } +} + +#[no_mangle] +/// Build an ApproxSet. Return NULL on failure +pub unsafe extern fn cmap_approxset_bytes_build<'a>(ptr: NonNull>) + -> *mut ApproxSet<'a, Bytes> { + let mut options = BuildOptions::default(); + if let Some(aset) = ApproxSet::build(ptr.as_ref(),&mut options) { + return Box::into_raw(Box::new(aset)); + } + std::ptr::null_mut() +} + +#[no_mangle] +/// Look up a key in an ApproxSet +pub unsafe extern fn cmap_approxset_bytes_probably_contains<'a>( + ptr: NonNull>, + key: *const u8, key_len: usize +) -> bool { + ptr.as_ref().probably_contains(&ptr_to_bytes(key,key_len)) +} + +#[no_mangle] +/// Encode to output_buf, if it's big enough. Return the serialized size of the object, in bytes. +pub unsafe extern fn cmap_approxset_bytes_encode<'a>( + ptr: NonNull>, + output_buf: *mut u8, + output_buf_size: usize +) -> usize { + let required_size = serialized_size(ptr.as_ref(),STD_BINCODE_CONFIG).unwrap(); + if required_size <= output_buf_size && !output_buf.is_null() { + encode_into_slice(ptr.as_ref(), from_raw_parts_mut(output_buf,output_buf_size), STD_BINCODE_CONFIG).unwrap(); + } + required_size +} + +#[no_mangle] +/// Destroy and free an ApproxSet +pub unsafe extern fn cmap_approxset_bytes_free<'a>(ptr: *mut ApproxSet<'a,Bytes>) { + if !ptr.is_null() { Box::from_raw(ptr); } +} + +/**************************************************************************** + * HashSet + ****************************************************************************/ + +#[no_mangle] +/// Create new HashSet +pub extern fn cmap_hashset_u64_new() -> *mut HashSet { + Box::into_raw(Box::new(HashSet::new())) +} + +#[no_mangle] +/// Count the items in a HashMap +pub unsafe extern fn cmap_hashset_u64_len(ptr: NonNull>) -> usize { + ptr.as_ref().len() +} + +#[no_mangle] +/// Insert an item into a HashSet +pub unsafe extern fn cmap_hashset_u64_insert(mut ptr: NonNull>, key: u64) { + ptr.as_mut().insert(key); +} + +#[no_mangle] +/// Remove an item from a HashSet +pub unsafe extern fn cmap_hashset_u64_remove(mut ptr: NonNull>, key: u64) { + ptr.as_mut().remove(&key); +} + +#[no_mangle] +/// Does this HashSet contain a key +pub unsafe extern fn cmap_hashset_u64_contains(mut ptr: NonNull>, key: u64) -> bool { + ptr.as_mut().contains(&key) +} + +#[no_mangle] +/// Free a HashSet +pub unsafe extern fn cmap_hashset_u64_free(ptr: *mut HashSet) { + if !ptr.is_null() { Box::from_raw(ptr); } +} + +#[no_mangle] +/// Build an ApproxSet. Return NULL on failure +pub unsafe extern fn cmap_approxset_u64_build<'a>(ptr: NonNull>) + -> *mut ApproxSet<'a, u64> { + let mut options = BuildOptions::default(); + if let Some(aset) = ApproxSet::build(ptr.as_ref(),&mut options) { + return Box::into_raw(Box::new(aset)); + } + std::ptr::null_mut() +} + +#[no_mangle] +/// Look up a key in an ApproxSet +pub unsafe extern fn cmap_approxset_u64_probably_contains<'a>( + ptr: NonNull>, key:u64 +) -> bool { + ptr.as_ref().probably_contains(&key) +} + +#[no_mangle] +/// Encode to output_buf, if it's big enough. Return the serialized size of the object, in u64. +pub unsafe extern fn cmap_approxset_u64_encode<'a>( + ptr: NonNull>, + output_buf: *mut u8, + output_buf_size: usize +) -> usize { + let required_size = serialized_size(ptr.as_ref(),STD_BINCODE_CONFIG).unwrap(); + if required_size <= output_buf_size && !output_buf.is_null() { + encode_into_slice(ptr.as_ref(), from_raw_parts_mut(output_buf,output_buf_size), STD_BINCODE_CONFIG).unwrap(); + } + required_size +} + +#[no_mangle] +/// Destroy and free an ApproxSet +pub unsafe extern fn cmap_approxset_u64_free<'a>(ptr: *mut ApproxSet<'a,u64>) { + if !ptr.is_null() { Box::from_raw(ptr); } +} + \ No newline at end of file diff --git a/src/size.rs b/src/size.rs index 9aab343..f8b47d8 100644 --- a/src/size.rs +++ b/src/size.rs @@ -3,7 +3,7 @@ * @author Mike Hamburg * @copyright 2020-2022 Rambus Inc. * - * Get size of serialized object. Should be part of bincode, dunno why it isn't. + * Get size of serialized object. */ use bincode::{Encode,config::Config,enc::EncoderImpl,error::EncodeError,enc::write::Writer};