diff --git a/boreal/src/compiler/expression.rs b/boreal/src/compiler/expression.rs index e0bf47a3..c7554879 100644 --- a/boreal/src/compiler/expression.rs +++ b/boreal/src/compiler/expression.rs @@ -10,6 +10,7 @@ use super::rule::RuleCompiler; use super::{module, CompilationError}; use crate::module::Type as ModuleType; use crate::regex::{regex_ast_to_hir, regex_hir_to_string, Regex}; +use crate::BytesSymbol; /// Type of a parsed expression /// @@ -351,8 +352,8 @@ pub enum Expression { /// The value is the index into the external symbols vector stored in the compiled rules. ExternalSymbol(usize), - /// A byte string. - Bytes(Vec), + /// An interned byte string. + Bytes(BytesSymbol), /// A regex. Regex(Regex), @@ -876,7 +877,7 @@ pub(super) fn compile_expression( Ok(Expr { expr, ty, span }) } parser::ExpressionKind::Bytes(s) => Ok(Expr { - expr: Expression::Bytes(s), + expr: Expression::Bytes(compiler.bytes_pool.insert(&s)), ty: Type::Bytes, span, }), diff --git a/boreal/src/compiler/module.rs b/boreal/src/compiler/module.rs index 2fcca1e0..02f5f9dd 100644 --- a/boreal/src/compiler/module.rs +++ b/boreal/src/compiler/module.rs @@ -347,7 +347,7 @@ impl ModuleUse<'_, '_> { // we can directly generate a primitive expression. StaticValue::Integer(v) => Expression::Integer(*v), StaticValue::Float(v) => Expression::Double(*v), - StaticValue::Bytes(v) => Expression::Bytes(v.clone()), + StaticValue::Bytes(v) => Expression::Bytes(self.compiler.bytes_pool.insert(v)), StaticValue::Boolean(v) => Expression::Boolean(*v), StaticValue::Object(_) => return None, diff --git a/boreal/src/compiler/rule.rs b/boreal/src/compiler/rule.rs index 33f20c27..7d9b7224 100644 --- a/boreal/src/compiler/rule.rs +++ b/boreal/src/compiler/rule.rs @@ -8,9 +8,7 @@ use boreal_parser::rule; use super::expression::{compile_bool_expression, Expression, VariableIndex}; use super::external_symbol::ExternalSymbol; use super::{variable, CompilationError, CompilerParams, Namespace}; -use crate::bytes_pool::BytesPoolBuilder; -use crate::bytes_pool::BytesSymbol; -use crate::bytes_pool::StringSymbol; +use crate::bytes_pool::{BytesPoolBuilder, BytesSymbol, StringSymbol}; use crate::module::Type as ModuleType; use crate::statistics; @@ -105,6 +103,9 @@ pub(super) struct RuleCompiler<'a> { /// Warnings emitted while compiling the rule. pub warnings: Vec, + + /// Bytes intern pool. + pub bytes_pool: &'a mut BytesPoolBuilder, } /// Helper struct used to track variables being compiled in a rule. @@ -126,6 +127,7 @@ impl<'a> RuleCompiler<'a> { namespace: &'a Namespace, external_symbols: &'a Vec, params: &'a CompilerParams, + bytes_pool: &'a mut BytesPoolBuilder, ) -> Result { let mut names_set = HashSet::new(); let mut variables = Vec::with_capacity(rule_variables.len()); @@ -153,6 +155,7 @@ impl<'a> RuleCompiler<'a> { params, condition_depth: 0, warnings: Vec::new(), + bytes_pool, }) } @@ -249,7 +252,26 @@ pub(super) fn compile_rule( } } - let mut compiler = RuleCompiler::new(&rule.variables, namespace, external_symbols, params)?; + let metadatas: Vec<_> = rule + .metadatas + .into_iter() + .map(|rule::Metadata { name, value }| Metadata { + name: bytes_pool.insert_str(&name), + value: match value { + rule::MetadataValue::Bytes(v) => MetadataValue::Bytes(bytes_pool.insert(&v)), + rule::MetadataValue::Integer(v) => MetadataValue::Integer(v), + rule::MetadataValue::Boolean(v) => MetadataValue::Boolean(v), + }, + }) + .collect(); + + let mut compiler = RuleCompiler::new( + &rule.variables, + namespace, + external_symbols, + params, + bytes_pool, + )?; let condition = compile_bool_expression(&mut compiler, rule.condition)?; let mut variables = Vec::with_capacity(rule.variables.len()); @@ -275,20 +297,7 @@ pub(super) fn compile_rule( name: rule.name, namespace_index, tags: rule.tags.into_iter().map(|v| v.tag).collect(), - metadatas: rule - .metadatas - .into_iter() - .map(|rule::Metadata { name, value }| Metadata { - name: bytes_pool.insert_str(&name), - value: match value { - rule::MetadataValue::Bytes(v) => { - MetadataValue::Bytes(bytes_pool.insert(&v)) - } - rule::MetadataValue::Integer(v) => MetadataValue::Integer(v), - rule::MetadataValue::Boolean(v) => MetadataValue::Boolean(v), - }, - }) - .collect(), + metadatas, nb_variables: variables.len(), condition, is_private: rule.is_private, @@ -326,6 +335,7 @@ mod tests { params: &CompilerParams::default(), condition_depth: 0, warnings: Vec::new(), + bytes_pool: &mut BytesPoolBuilder::default(), }); let build_rule = || Rule { name: "a".to_owned(), diff --git a/boreal/src/compiler/tests.rs b/boreal/src/compiler/tests.rs index 5bab5f7c..0a6d03c2 100644 --- a/boreal/src/compiler/tests.rs +++ b/boreal/src/compiler/tests.rs @@ -7,6 +7,7 @@ use super::{ AddRuleError, AddRuleErrorKind, AddRuleStatus, AvailableModule, CompilationError, Compiler, CompilerParams, ImportedModule, ModuleLocation, Namespace, }; +use crate::bytes_pool::BytesPoolBuilder; use crate::test_helpers::{test_type_traits, test_type_traits_non_clonable}; use boreal_parser::parse; @@ -30,12 +31,14 @@ fn compile_expr(expression_str: &str, expected_type: Type) { assert!(compiler.define_symbol("sym_bool", true)); assert!(compiler.define_symbol("sym_bytes", "keyboard")); + let mut bytes_pool = BytesPoolBuilder::default(); let ns = Namespace::default(); let mut rule_compiler = RuleCompiler::new( &rule.variables, &ns, &compiler.external_symbols, &compiler.params, + &mut bytes_pool, ) .unwrap(); let res = compile_expression(&mut rule_compiler, rule.condition).unwrap(); diff --git a/boreal/src/compiler/variable.rs b/boreal/src/compiler/variable.rs index dcb23109..605b71e4 100644 --- a/boreal/src/compiler/variable.rs +++ b/boreal/src/compiler/variable.rs @@ -154,6 +154,7 @@ mod tests { use boreal_parser::rule::VariableModifiers; use super::*; + use crate::bytes_pool::BytesPoolBuilder; use crate::compiler::{CompilerParams, Namespace}; use crate::regex::Regex; use crate::test_helpers::test_type_traits_non_clonable; @@ -169,6 +170,7 @@ mod tests { params: &CompilerParams::default(), condition_depth: 0, warnings: Vec::new(), + bytes_pool: &mut BytesPoolBuilder::default(), }; test_type_traits_non_clonable( compile_variable( diff --git a/boreal/src/evaluator/mod.rs b/boreal/src/evaluator/mod.rs index 70418251..0222ec02 100644 --- a/boreal/src/evaluator/mod.rs +++ b/boreal/src/evaluator/mod.rs @@ -35,6 +35,7 @@ //! - `defined` //! //! For all of those, an undefined value is considered to be equivalent to a false boolean value. +use crate::bytes_pool::BytesPool; use crate::compiler::expression::{Expression, ForIterator, ForSelection, VariableIndex}; use crate::compiler::rule::Rule; #[cfg(feature = "object")] @@ -112,6 +113,7 @@ pub(crate) fn evaluate_rule<'scan>( rule: &Rule, var_matches: Option<&'scan [Vec]>, previous_rules_results: &'scan [bool], + bytes_pool: &'scan BytesPool, scan_data: &'scan mut ScanData, ) -> Result { let mut evaluator = Evaluator { @@ -119,6 +121,7 @@ pub(crate) fn evaluate_rule<'scan>( previous_rules_results, currently_selected_variable_index: None, bounded_identifiers_stack: Vec::new(), + bytes_pool, scan_data, }; match evaluator.evaluate_expr(&rule.condition) { @@ -145,6 +148,9 @@ struct Evaluator<'scan, 'rule, 'mem> { // Stack of bounded identifiers to their integer values. bounded_identifiers_stack: Vec, + // Bytes intern pool, used to resolve expressions that stored bytes in the pool. + bytes_pool: &'rule BytesPool, + // Data related only to the scan, independent of the rule. scan_data: &'rule mut ScanData<'scan, 'mem>, } @@ -679,7 +685,7 @@ impl Evaluator<'_, '_, '_> { Expression::Integer(v) => Ok(Value::Integer(*v)), Expression::Double(v) => Ok(Value::Float(*v)), - Expression::Bytes(v) => Ok(Value::Bytes(v.clone())), + Expression::Bytes(v) => Ok(Value::Bytes(self.bytes_pool.get(*v).to_vec())), Expression::Regex(v) => Ok(Value::Regex(v.clone())), Expression::Boolean(v) => Ok(Value::Boolean(*v)), } diff --git a/boreal/src/scanner/mod.rs b/boreal/src/scanner/mod.rs index 2a2818e2..a1b09be1 100644 --- a/boreal/src/scanner/mod.rs +++ b/boreal/src/scanner/mod.rs @@ -709,6 +709,7 @@ impl EvalContext { rule, var_matches.as_deref(), &self.previous_results, + &scanner.bytes_pool, scan_data, )?; @@ -989,13 +990,22 @@ mod tests { let mut previous_results = Vec::new(); let rules = &scanner.inner.rules; for rule in &rules[..(rules.len() - 1)] { - previous_results - .push(evaluate_rule(rule, None, &previous_results, &mut scan_data).unwrap()); + previous_results.push( + evaluate_rule( + rule, + None, + &previous_results, + &scanner.inner.bytes_pool, + &mut scan_data, + ) + .unwrap(), + ); } let last_res = evaluate_rule( &rules[rules.len() - 1], None, &previous_results, + &scanner.inner.bytes_pool, &mut scan_data, );