Skip to content

Commit

Permalink
feat: add bytes pool for metadata keys and values
Browse files Browse the repository at this point in the history
Add a bytes pool object to use a single allocation to store all bytes
used in rule metadatas.
  • Loading branch information
vthib committed Jul 28, 2024
1 parent 5efbfb3 commit bb1e4c6
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 17 deletions.
20 changes: 10 additions & 10 deletions boreal-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -531,11 +531,11 @@ fn scan_file(scanner: &Scanner, path: &Path, options: &ScanOptions) -> Result<()
let what = path.display().to_string();
match res {
Ok(res) => {
display_scan_results(res, &what, options);
display_scan_results(scanner, res, &what, options);
Ok(())
}
Err((err, res)) => {
display_scan_results(res, &what, options);
display_scan_results(scanner, res, &what, options);
Err(err)
}
}
Expand All @@ -545,17 +545,17 @@ fn scan_process(scanner: &Scanner, pid: u32, options: &ScanOptions) -> Result<()
let what = pid.to_string();
match scanner.scan_process(pid) {
Ok(res) => {
display_scan_results(res, &what, options);
display_scan_results(scanner, res, &what, options);
Ok(())
}
Err((err, res)) => {
display_scan_results(res, &what, options);
display_scan_results(scanner, res, &what, options);
Err(err)
}
}
}

fn display_scan_results(res: ScanResult, what: &str, options: &ScanOptions) {
fn display_scan_results(scanner: &Scanner, res: ScanResult, what: &str, options: &ScanOptions) {
// Print module data first
if options.print_module_data {
for (module_name, module_value) in res.module_values {
Expand Down Expand Up @@ -595,7 +595,7 @@ fn display_scan_results(res: ScanResult, what: &str, options: &ScanOptions) {
write!(stdout, " [{}]", rule.tags.join(",")).unwrap();
}
if options.print_metadata {
print_metadata(&mut stdout, rule.metadatas);
print_metadata(&mut stdout, scanner, rule.metadatas);
}
writeln!(stdout, " {}", what).unwrap();

Expand Down Expand Up @@ -624,17 +624,17 @@ fn display_scan_results(res: ScanResult, what: &str, options: &ScanOptions) {
}
}

fn print_metadata(stdout: &mut StdoutLock, metadatas: &[Metadata]) {
fn print_metadata(stdout: &mut StdoutLock, scanner: &Scanner, metadatas: &[Metadata]) {
write!(stdout, " [").unwrap();
for (i, meta) in metadatas.iter().enumerate() {
if i != 0 {
write!(stdout, ",").unwrap();
}
write!(stdout, "{}=", meta.name).unwrap();
match &meta.value {
write!(stdout, "{}=", scanner.get_string_symbol(meta.name)).unwrap();
match meta.value {
MetadataValue::Bytes(b) => {
write!(stdout, "\"").unwrap();
print_bytes(stdout, b);
print_bytes(stdout, scanner.get_bytes_symbol(b));
write!(stdout, "\"").unwrap();
}
MetadataValue::Integer(i) => {
Expand Down
76 changes: 76 additions & 0 deletions boreal/src/bytes_pool.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/// Bytes intern pool.
#[derive(Default, Debug)]
pub(crate) struct BytesPool {
buffer: Vec<u8>,
}

/// Symbol for a bytes string stored in a bytes intern pool.
#[derive(Copy, Clone, Debug)]
pub struct BytesSymbol {
from: usize,
to: usize,
}

/// Symbol for a string stored in a bytes intern pool.
#[derive(Copy, Clone, Debug)]
pub struct StringSymbol {
from: usize,
to: usize,
}

impl BytesPool {
/// Insert bytes into the bytes pool.
///
/// The returned symbol can then be used to retrieve those bytes from the pool.
pub(crate) fn insert(&mut self, v: &[u8]) -> BytesSymbol {
let from = self.buffer.len();
self.buffer.extend(v);

BytesSymbol {
from,
to: self.buffer.len(),
}
}

/// Insert a string into the bytes pool.
///
/// The returned symbol can then be used to retrieve the string from the pool.
pub(crate) fn insert_str(&mut self, v: &str) -> StringSymbol {
let from = self.buffer.len();
self.buffer.extend(v.as_bytes());

StringSymbol {
from,
to: self.buffer.len(),
}
}

/// Get a byte string from the pool
pub(crate) fn get(&self, symbol: BytesSymbol) -> &[u8] {
&self.buffer[symbol.from..symbol.to]
}

/// Get a string from the pool
pub(crate) fn get_str(&self, symbol: StringSymbol) -> &str {
// Safety:
// - A StringSymbol can only be constructed from `insert_str`
// - Once a symbol is created, it is guaranteed that the indexes in the symbol
// will always refer to the same bytes (the buffer can only grow).
// It is thus safe to rebuild the string from the stored bytes.
unsafe { std::str::from_utf8_unchecked(&self.buffer[symbol.from..symbol.to]) }
}
}

#[cfg(test)]
mod tests {
use crate::test_helpers::{test_type_traits, test_type_traits_non_clonable};

use super::*;

#[test]
fn test_types_traits() {
test_type_traits_non_clonable(BytesPool::default());
test_type_traits(BytesSymbol { from: 0, to: 0 });
test_type_traits(StringSymbol { from: 0, to: 0 });
}
}
9 changes: 9 additions & 0 deletions boreal/src/compiler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ pub use params::CompilerParams;
pub(crate) mod rule;
pub(crate) mod variable;

use crate::bytes_pool::BytesPool;
use crate::{statistics, Scanner};

/// Object used to compile rules.
Expand Down Expand Up @@ -59,6 +60,11 @@ pub struct Compiler {
/// Externally defined symbols.
external_symbols: Vec<external_symbol::ExternalSymbol>,

/// Bytes intern pool.
///
/// This is used to reduce memory footprint and share byte strings.
bytes_pool: BytesPool,

/// Compilation parameters
params: CompilerParams,
}
Expand Down Expand Up @@ -103,6 +109,7 @@ impl Default for Compiler {
available_modules: HashMap::new(),
imported_modules: Vec::new(),
external_symbols: Vec::new(),
bytes_pool: BytesPool::default(),
params: CompilerParams::default(),
}
}
Expand Down Expand Up @@ -395,6 +402,7 @@ impl Compiler {
&self.external_symbols,
&self.params,
parsed_contents,
&mut self.bytes_pool,
)
.map_err(|error| AddRuleError {
path: current_filepath.map(Path::to_path_buf),
Expand Down Expand Up @@ -517,6 +525,7 @@ impl Compiler {
self.imported_modules,
self.external_symbols,
namespaces,
self.bytes_pool,
)
}
}
Expand Down
25 changes: 18 additions & 7 deletions boreal/src/compiler/rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ use boreal_parser::rule;
use super::expression::{compile_bool_expression, Expression, VariableIndex};
use super::external_symbol::ExternalSymbol;
use super::{variable, CompilationError, CompilerParams, Namespace};
use crate::bytes_pool::BytesPool;
use crate::bytes_pool::BytesSymbol;
use crate::bytes_pool::StringSymbol;
use crate::module::Type as ModuleType;
use crate::statistics;

Expand Down Expand Up @@ -42,18 +45,22 @@ pub(crate) struct Rule {
/// A metadata associated with a rule.
#[derive(Debug)]
pub struct Metadata {
/// Index of the name of the metadata in the metadata string table.
pub name: String,
/// Name of the metadata.
///
/// Use [`boreal::Scanner::get_string_symbol`] to retrieve the string.
pub name: StringSymbol,

/// The value of the metadata.
pub value: MetadataValue,
}

/// Value of a rule metadata.
#[derive(Clone, Debug, PartialEq, Eq)]
#[derive(Copy, Clone, Debug)]
pub enum MetadataValue {
/// Bytestring variant.
Bytes(Vec<u8>),
///
/// Use [`boreal::Scanner::get_bytes_symbol`] to retrieve the string.
Bytes(BytesSymbol),
/// Integer variant.
Integer(i64),
/// Boolean variant.
Expand Down Expand Up @@ -228,6 +235,7 @@ pub(super) fn compile_rule(
external_symbols: &Vec<ExternalSymbol>,
params: &CompilerParams,
parsed_contents: &str,
bytes_pool: &mut BytesPool,
) -> Result<CompiledRule, CompilationError> {
// Check duplication of tags
let mut tags_spans = HashMap::with_capacity(rule.tags.len());
Expand Down Expand Up @@ -271,9 +279,11 @@ pub(super) fn compile_rule(
.metadatas
.into_iter()
.map(|rule::Metadata { name, value }| Metadata {
name,
name: bytes_pool.insert_str(&name),
value: match value {
rule::MetadataValue::Bytes(v) => MetadataValue::Bytes(v),
rule::MetadataValue::Bytes(v) => {
MetadataValue::Bytes(bytes_pool.insert(&v))
}
rule::MetadataValue::Integer(v) => MetadataValue::Integer(v),
rule::MetadataValue::Boolean(v) => MetadataValue::Boolean(v),
},
Expand Down Expand Up @@ -338,8 +348,9 @@ mod tests {
name: "a".to_owned(),
used: false,
});
let mut pool = BytesPool::default();
test_type_traits_non_clonable(Metadata {
name: String::new(),
name: pool.insert_str(""),
value: MetadataValue::Boolean(true),
});
test_type_traits(MetadataValue::Boolean(true));
Expand Down
2 changes: 2 additions & 0 deletions boreal/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ use tlsh2 as _;

pub(crate) mod atoms;
mod bitmaps;
mod bytes_pool;
pub use bytes_pool::{BytesSymbol, StringSymbol};
pub mod compiler;
pub use compiler::rule::{Metadata, MetadataValue};
pub use compiler::Compiler;
Expand Down
19 changes: 19 additions & 0 deletions boreal/src/scanner/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::any::TypeId;
use std::collections::HashMap;
use std::sync::Arc;

use crate::bytes_pool::{BytesPool, BytesSymbol, StringSymbol};
use crate::compiler::external_symbol::{ExternalSymbol, ExternalValue};
use crate::compiler::rule::Rule;
use crate::compiler::variable::Variable;
Expand Down Expand Up @@ -107,6 +108,7 @@ impl Scanner {
modules: Vec<Box<dyn Module>>,
external_symbols: Vec<ExternalSymbol>,
namespaces: Vec<Option<String>>,
bytes_pool: BytesPool,
) -> Self {
let ac_scan = ac_scan::AcScan::new(&variables);

Expand All @@ -130,6 +132,7 @@ impl Scanner {
modules,
external_symbols_map,
namespaces,
bytes_pool,
}),
scan_params: ScanParams::default(),
external_symbols_values,
Expand Down Expand Up @@ -340,6 +343,18 @@ impl Scanner {
pub fn scan_params(&self) -> &ScanParams {
&self.scan_params
}

/// Get the value of a bytes symbol.
#[must_use]
pub fn get_bytes_symbol(&self, symbol: BytesSymbol) -> &[u8] {
self.inner.bytes_pool.get(symbol)
}

/// Get the value of a string symbol.
#[must_use]
pub fn get_string_symbol(&self, symbol: StringSymbol) -> &str {
self.inner.bytes_pool.get_str(symbol)
}
}

#[derive(Debug)]
Expand Down Expand Up @@ -378,6 +393,9 @@ struct Inner {
///
/// None is used for the default namespace.
namespaces: Vec<Option<String>>,

/// Bytes intern pool.
bytes_pool: BytesPool,
}

impl Inner {
Expand Down Expand Up @@ -1483,6 +1501,7 @@ mod tests {
Vec::new(),
Vec::new(),
Vec::new(),
BytesPool::default(),
));
test_type_traits_non_clonable(ScanResult {
matched_rules: Vec::new(),
Expand Down

0 comments on commit bb1e4c6

Please sign in to comment.