Skip to content

Commit

Permalink
feat: add bytes from expressions into bytes pool
Browse files Browse the repository at this point in the history
  • Loading branch information
vthib committed Jul 28, 2024
1 parent 1faa205 commit 44028ce
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 25 deletions.
7 changes: 4 additions & 3 deletions boreal/src/compiler/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use super::rule::RuleCompiler;
use super::{module, CompilationError};
use crate::module::Type as ModuleType;
use crate::regex::{regex_ast_to_hir, regex_hir_to_string, Regex};
use crate::BytesSymbol;

/// Type of a parsed expression
///
Expand Down Expand Up @@ -351,8 +352,8 @@ pub enum Expression {
/// The value is the index into the external symbols vector stored in the compiled rules.
ExternalSymbol(usize),

/// A byte string.
Bytes(Vec<u8>),
/// An interned byte string.
Bytes(BytesSymbol),

/// A regex.
Regex(Regex),
Expand Down Expand Up @@ -876,7 +877,7 @@ pub(super) fn compile_expression(
Ok(Expr { expr, ty, span })
}
parser::ExpressionKind::Bytes(s) => Ok(Expr {
expr: Expression::Bytes(s),
expr: Expression::Bytes(compiler.bytes_pool.insert(&s)),
ty: Type::Bytes,
span,
}),
Expand Down
2 changes: 1 addition & 1 deletion boreal/src/compiler/module.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ impl ModuleUse<'_, '_> {
// we can directly generate a primitive expression.
StaticValue::Integer(v) => Expression::Integer(*v),
StaticValue::Float(v) => Expression::Double(*v),
StaticValue::Bytes(v) => Expression::Bytes(v.clone()),
StaticValue::Bytes(v) => Expression::Bytes(self.compiler.bytes_pool.insert(v)),
StaticValue::Boolean(v) => Expression::Boolean(*v),

StaticValue::Object(_) => return None,
Expand Down
46 changes: 28 additions & 18 deletions boreal/src/compiler/rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@ use boreal_parser::rule;
use super::expression::{compile_bool_expression, Expression, VariableIndex};
use super::external_symbol::ExternalSymbol;
use super::{variable, CompilationError, CompilerParams, Namespace};
use crate::bytes_pool::BytesPoolBuilder;
use crate::bytes_pool::BytesSymbol;
use crate::bytes_pool::StringSymbol;
use crate::bytes_pool::{BytesPoolBuilder, BytesSymbol, StringSymbol};
use crate::module::Type as ModuleType;
use crate::statistics;

Expand Down Expand Up @@ -105,6 +103,9 @@ pub(super) struct RuleCompiler<'a> {

/// Warnings emitted while compiling the rule.
pub warnings: Vec<CompilationError>,

/// Bytes intern pool.
pub bytes_pool: &'a mut BytesPoolBuilder,
}

/// Helper struct used to track variables being compiled in a rule.
Expand All @@ -126,6 +127,7 @@ impl<'a> RuleCompiler<'a> {
namespace: &'a Namespace,
external_symbols: &'a Vec<ExternalSymbol>,
params: &'a CompilerParams,
bytes_pool: &'a mut BytesPoolBuilder,
) -> Result<Self, CompilationError> {
let mut names_set = HashSet::new();
let mut variables = Vec::with_capacity(rule_variables.len());
Expand Down Expand Up @@ -153,6 +155,7 @@ impl<'a> RuleCompiler<'a> {
params,
condition_depth: 0,
warnings: Vec::new(),
bytes_pool,
})
}

Expand Down Expand Up @@ -249,7 +252,26 @@ pub(super) fn compile_rule(
}
}

let mut compiler = RuleCompiler::new(&rule.variables, namespace, external_symbols, params)?;
let metadatas: Vec<_> = rule
.metadatas
.into_iter()
.map(|rule::Metadata { name, value }| Metadata {
name: bytes_pool.insert_str(&name),
value: match value {
rule::MetadataValue::Bytes(v) => MetadataValue::Bytes(bytes_pool.insert(&v)),
rule::MetadataValue::Integer(v) => MetadataValue::Integer(v),
rule::MetadataValue::Boolean(v) => MetadataValue::Boolean(v),
},
})
.collect();

let mut compiler = RuleCompiler::new(
&rule.variables,
namespace,
external_symbols,
params,
bytes_pool,
)?;
let condition = compile_bool_expression(&mut compiler, rule.condition)?;

let mut variables = Vec::with_capacity(rule.variables.len());
Expand All @@ -275,20 +297,7 @@ pub(super) fn compile_rule(
name: rule.name,
namespace_index,
tags: rule.tags.into_iter().map(|v| v.tag).collect(),
metadatas: rule
.metadatas
.into_iter()
.map(|rule::Metadata { name, value }| Metadata {
name: bytes_pool.insert_str(&name),
value: match value {
rule::MetadataValue::Bytes(v) => {
MetadataValue::Bytes(bytes_pool.insert(&v))
}
rule::MetadataValue::Integer(v) => MetadataValue::Integer(v),
rule::MetadataValue::Boolean(v) => MetadataValue::Boolean(v),
},
})
.collect(),
metadatas,
nb_variables: variables.len(),
condition,
is_private: rule.is_private,
Expand Down Expand Up @@ -326,6 +335,7 @@ mod tests {
params: &CompilerParams::default(),
condition_depth: 0,
warnings: Vec::new(),
bytes_pool: &mut BytesPoolBuilder::default(),
});
let build_rule = || Rule {
name: "a".to_owned(),
Expand Down
3 changes: 3 additions & 0 deletions boreal/src/compiler/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use super::{
AddRuleError, AddRuleErrorKind, AddRuleStatus, AvailableModule, CompilationError, Compiler,
CompilerParams, ImportedModule, ModuleLocation, Namespace,
};
use crate::bytes_pool::BytesPoolBuilder;
use crate::test_helpers::{test_type_traits, test_type_traits_non_clonable};
use boreal_parser::parse;

Expand All @@ -30,12 +31,14 @@ fn compile_expr(expression_str: &str, expected_type: Type) {
assert!(compiler.define_symbol("sym_bool", true));
assert!(compiler.define_symbol("sym_bytes", "keyboard"));

let mut bytes_pool = BytesPoolBuilder::default();
let ns = Namespace::default();
let mut rule_compiler = RuleCompiler::new(
&rule.variables,
&ns,
&compiler.external_symbols,
&compiler.params,
&mut bytes_pool,
)
.unwrap();
let res = compile_expression(&mut rule_compiler, rule.condition).unwrap();
Expand Down
2 changes: 2 additions & 0 deletions boreal/src/compiler/variable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ mod tests {
use boreal_parser::rule::VariableModifiers;

use super::*;
use crate::bytes_pool::BytesPoolBuilder;
use crate::compiler::{CompilerParams, Namespace};
use crate::regex::Regex;
use crate::test_helpers::test_type_traits_non_clonable;
Expand All @@ -169,6 +170,7 @@ mod tests {
params: &CompilerParams::default(),
condition_depth: 0,
warnings: Vec::new(),
bytes_pool: &mut BytesPoolBuilder::default(),
};
test_type_traits_non_clonable(
compile_variable(
Expand Down
8 changes: 7 additions & 1 deletion boreal/src/evaluator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
//! - `defined`
//!
//! For all of those, an undefined value is considered to be equivalent to a false boolean value.
use crate::bytes_pool::BytesPool;
use crate::compiler::expression::{Expression, ForIterator, ForSelection, VariableIndex};
use crate::compiler::rule::Rule;
#[cfg(feature = "object")]
Expand Down Expand Up @@ -112,13 +113,15 @@ pub(crate) fn evaluate_rule<'scan>(
rule: &Rule,
var_matches: Option<&'scan [Vec<variable::StringMatch>]>,
previous_rules_results: &'scan [bool],
bytes_pool: &'scan BytesPool,
scan_data: &'scan mut ScanData,
) -> Result<bool, EvalError> {
let mut evaluator = Evaluator {
var_matches: var_matches.map(variable::VarMatches::new),
previous_rules_results,
currently_selected_variable_index: None,
bounded_identifiers_stack: Vec::new(),
bytes_pool,
scan_data,
};
match evaluator.evaluate_expr(&rule.condition) {
Expand All @@ -145,6 +148,9 @@ struct Evaluator<'scan, 'rule, 'mem> {
// Stack of bounded identifiers to their integer values.
bounded_identifiers_stack: Vec<ModuleValue>,

// Bytes intern pool, used to resolve expressions that stored bytes in the pool.
bytes_pool: &'rule BytesPool,

// Data related only to the scan, independent of the rule.
scan_data: &'rule mut ScanData<'scan, 'mem>,
}
Expand Down Expand Up @@ -679,7 +685,7 @@ impl Evaluator<'_, '_, '_> {

Expression::Integer(v) => Ok(Value::Integer(*v)),
Expression::Double(v) => Ok(Value::Float(*v)),
Expression::Bytes(v) => Ok(Value::Bytes(v.clone())),
Expression::Bytes(v) => Ok(Value::Bytes(self.bytes_pool.get(*v).to_vec())),
Expression::Regex(v) => Ok(Value::Regex(v.clone())),
Expression::Boolean(v) => Ok(Value::Boolean(*v)),
}
Expand Down
14 changes: 12 additions & 2 deletions boreal/src/scanner/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,7 @@ impl EvalContext {
rule,
var_matches.as_deref(),
&self.previous_results,
&scanner.bytes_pool,
scan_data,
)?;

Expand Down Expand Up @@ -989,13 +990,22 @@ mod tests {
let mut previous_results = Vec::new();
let rules = &scanner.inner.rules;
for rule in &rules[..(rules.len() - 1)] {
previous_results
.push(evaluate_rule(rule, None, &previous_results, &mut scan_data).unwrap());
previous_results.push(
evaluate_rule(
rule,
None,
&previous_results,
&scanner.inner.bytes_pool,
&mut scan_data,
)
.unwrap(),
);
}
let last_res = evaluate_rule(
&rules[rules.len() - 1],
None,
&previous_results,
&scanner.inner.bytes_pool,
&mut scan_data,
);

Expand Down

0 comments on commit 44028ce

Please sign in to comment.