Skip to content

Commit

Permalink
Control Flow Graph Vector Encoding
Browse files Browse the repository at this point in the history
- VecGraph (control flow graph)
- VecNode (control flow node)
- Adds support to encode control flow graphs to vectors
- Can later be reduced for vector databases
  • Loading branch information
c3rb3ru5d3d53c committed Dec 26, 2024
1 parent 58e3dd9 commit e1b713c
Show file tree
Hide file tree
Showing 8 changed files with 204 additions and 204 deletions.
15 changes: 15 additions & 0 deletions src/bindings/python/src/controlflow/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -304,11 +304,26 @@ impl BlockJsonDeserializer {
self.inner.lock().unwrap().edges()
}

#[pyo3(text_signature = "($self)")]
pub fn blocks(&self) -> BTreeSet<u64> {
self.inner.lock().unwrap().blocks()
}

#[pyo3(text_signature = "($self)")]
pub fn to(&self) -> BTreeSet<u64> {
self.inner.lock().unwrap().to()
}

#[pyo3(text_signature = "($self)")]
pub fn conditional(&self) -> bool {
self.inner.lock().unwrap().conditional()
}

#[pyo3(text_signature = "($self)")]
pub fn entropy(&self) -> Option<f64> {
self.inner.lock().unwrap().entropy()
}

#[pyo3(text_signature = "($self)")]
pub fn next(&self) -> Option<u64> {
self.inner.lock().unwrap().next()
Expand Down
76 changes: 24 additions & 52 deletions src/bindings/python/src/controlflow/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@
// permanent authorization for you to choose that version for the
// Library.

use crate::controlflow::BlockJsonDeserializer;
use binlex::controlflow::BlockJsonDeserializer as InnerBlockJsonDeserializer;
use pyo3::prelude::*;
use pyo3::Py;
use std::collections::BTreeMap;
Expand Down Expand Up @@ -199,6 +201,28 @@ impl FunctionJsonDeserializer {
})
}

#[pyo3(text_signature = "($self)")]
pub fn blocks(&self) -> Vec<BlockJsonDeserializer> {
let mut result = Vec::<BlockJsonDeserializer>::new();
let blocks = self.inner.lock().unwrap().json.blocks.clone();
let inner_config = self.inner.lock().unwrap().config.clone();
for block_json in blocks {
let block_json_deserializer = BlockJsonDeserializer {
inner: Arc::new(Mutex::new(InnerBlockJsonDeserializer {
json: block_json,
config: inner_config.clone(),
})),
};
result.push(block_json_deserializer)
}
result
}

#[pyo3(text_signature = "($self)")]
pub fn functions(&self) -> BTreeMap<u64, u64> {
self.inner.lock().unwrap().functions()
}

#[pyo3(text_signature = "($self)")]
pub fn size(&self) -> usize {
self.inner.lock().unwrap().size()
Expand Down Expand Up @@ -563,58 +587,6 @@ impl Function {
})
}

// #[pyo3(text_signature = "($self, rhs_functions)")]
// pub fn compare_many(&self, py: Python, rhs_functions: Py<PyList>) -> PyResult<BTreeMap<u64, ChromosomeSimilarity>> {
// self.with_inner_function(py, |function| {
// let mut tasks = Vec::<(u64, Arc<Mutex<InnerGraph>>)>::new();

// let list = rhs_functions.bind(py);

// let items: Vec<Py<PyAny>> = list.iter().map(|item| item.into()).collect();

// for item in items {
// let py_item = item.bind(py);
// if !py_item.is_instance_of::<Function>() {
// return Err(pyo3::exceptions::PyTypeError::new_err(
// "all items in rhs_functions must be instances of Function",
// ));
// }
// let rhs: Option<Py<Function>> = py_item.extract().ok();
// if rhs.is_none() { continue; }
// let rhs_binding_0 = rhs.unwrap();
// let rhs_binding_1 = rhs_binding_0.borrow(py);
// let address = rhs_binding_1.address();
// let rhs_cfg = Arc::clone(&rhs_binding_1.cfg.borrow(py).inner);
// tasks.push((address, rhs_cfg));
// };

// let pool = ThreadPoolBuilder::new()
// .num_threads(function.cfg.config.general.threads)
// .build()
// .map_err(|error| pyo3::exceptions::PyRuntimeError::new_err(format!("{}", error)))?;

// let results: BTreeMap<u64, ChromosomeSimilarity> = pool.install(|| {
// tasks
// .par_iter()
// .filter_map(|(address, inner_cfg)| {
// let c = inner_cfg.lock().unwrap();
// let rhs_function = InnerFunction::new(*address, &c).ok()?;
// let similarity = function.compare(&rhs_function).ok()?;
// similarity.map(|similarity| {
// (
// *address,
// ChromosomeSimilarity {
// inner: Arc::new(Mutex::new(similarity)),
// },
// )
// })
// })
// .collect()
// });
// Ok(results)
// })
// }

#[pyo3(text_signature = "($self)")]
pub fn chromosome_minhash_ratio(&self, py: Python) -> PyResult<f64> {
self.with_inner_function(py, |function| {
Expand Down
2 changes: 2 additions & 0 deletions src/bindings/python/src/types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ use crate::types::lz4string::lz4string_init;
pub use crate::types::memorymappedfile::MemoryMappedFile;
pub use crate::types::lz4string::LZ4String;
pub use crate::types::vecnode::VecNode;
pub use crate::types::vecnode::VecGraph;

use pyo3::{prelude::*, wrap_pymodule};

Expand All @@ -187,6 +188,7 @@ pub fn types_init(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<MemoryMappedFile>()?;
m.add_class::<LZ4String>()?;
m.add_class::<VecNode>()?;
m.add_class::<VecGraph>()?;
py.import_bound("sys")?
.getattr("modules")?
.set_item("binlex.types", m)?;
Expand Down
117 changes: 73 additions & 44 deletions src/bindings/python/src/types/vecnode.rs
Original file line number Diff line number Diff line change
@@ -1,96 +1,125 @@
pub use binlex::types::VecNode as InnerVecNode;
pub use binlex::types::VecGraph as InnerVecGraph;
use std::sync::{Arc, Mutex};
use pyo3::prelude::*;

#[pyclass]
pub struct VecNode {
pub inner: Arc<Mutex<InnerVecNode>>,
pub struct VecGraph {
pub inner: Arc<Mutex<InnerVecGraph>>,
}

#[pymethods]
impl VecNode {
impl VecGraph {
#[new]
#[pyo3(text_signature = "(id)")]
pub fn new(id: u64) -> Self {
let inner = InnerVecNode::new(id);
#[pyo3(text_signature = "()")]
pub fn new() -> Self {
Self {
inner: Arc::new(Mutex::new(inner))
inner: Arc::new(Mutex::new(InnerVecGraph::new()))
}
}

#[pyo3(text_signature = "($self, child)")]
pub fn add_child(&self, py: Python, child: Py<VecNode>) {
let inner_child = child.borrow(py).inner.lock().unwrap().clone();
self.inner.lock().unwrap().add_child(inner_child);
#[pyo3(text_signature = "($self, node)")]
pub fn insert_node(&mut self, py: Python, node: Py<VecNode>) {
let inner_node = node
.borrow(py)
.inner.lock()
.unwrap()
.clone();
self.inner.lock().unwrap().insert_node(inner_node);
}

#[pyo3(text_signature = "($self, id)")]
pub fn get_node(&self, id: u64) -> Option<VecNode> {
let binding = self.inner.lock().unwrap();
let inner_node = binding.get_node(id)?;
let node = VecNode {
inner: Arc::new(Mutex::new(inner_node.clone()))
};
Some(node)
}

#[pyo3(text_signature = "($self, parent)")]
pub fn add_parent(&self, py: Python, parent: Py<VecNode>) {
let inner_parent = parent.borrow(py).inner.lock().unwrap().clone();
self.inner.lock().unwrap().add_parent(inner_parent);
#[pyo3(text_signature = "($self, node1_id, node2_id)")]
pub fn add_relationship(&mut self, node1_id: u64, node2_id: u64) {
let mut binding = self.inner
.lock()
.unwrap();
binding.add_relationship(node1_id, node2_id)
}

#[pyo3(text_signature = "($self)")]
pub fn children(&self) -> Vec<VecNode> {
let mut result = Vec::<VecNode>::new();
for child in self.inner.lock().unwrap().children() {
let a = VecNode {
inner: Arc::new(Mutex::new(child.clone()))
};
result.push(a);
}
result
pub fn to_vec(&self) -> Vec<f64> {
self.inner.lock().unwrap().to_vec()
}

#[pyo3(text_signature = "($self)")]
pub fn parents(&self) -> Vec<VecNode> {
let mut result = Vec::<VecNode>::new();
for parent in self.inner.lock().unwrap().parents() {
let a = VecNode {
inner: Arc::new(Mutex::new(parent.clone()))
};
result.push(a);
pub fn print(&self) {
self.inner
.lock()
.unwrap()
.print()
}

}

#[pyclass]
pub struct VecNode {
pub inner: Arc<Mutex<InnerVecNode>>,
}

#[pymethods]
impl VecNode {
#[new]
#[pyo3(text_signature = "(id)")]
pub fn new(id: u64) -> Self {
let inner = InnerVecNode::new(id);
Self {
inner: Arc::new(Mutex::new(inner))
}
result
}

#[pyo3(text_signature = "($self, key, value)")]
pub fn add_property(&mut self, key: String, value: f64) {
#[pyo3(text_signature = "($self)")]
pub fn id(&self) -> u64 {
self.inner.lock().unwrap().id()
}

#[pyo3(text_signature = "($self)")]
pub fn relationships(&self) -> Vec<u64> {
self.inner
.lock()
.unwrap()
.add_property(&key, value)
.relationships()
.clone()
}

#[pyo3(text_signature = "($self, key, values)")]
pub fn add_properties(&mut self, key: String, values: Vec<f64>) {
pub fn add_relationship(&self, id: u64) {
self.inner
.lock()
.unwrap()
.add_properties(&key, values)
.add_relationship(id)
}

#[pyo3(text_signature = "($self)")]
pub fn print(&self) {
#[pyo3(text_signature = "($self, key, value)")]
pub fn add_property(&mut self, key: String, value: f64) {
self.inner
.lock()
.unwrap()
.print()
.add_property(&key, value)
}

#[pyo3(text_signature = "($self)")]
pub fn to_vec(&self) -> Vec<f64> {
#[pyo3(text_signature = "($self, key, values)")]
pub fn add_properties(&mut self, key: String, values: Vec<f64>) {
self.inner
.lock()
.unwrap()
.to_vec()
.add_properties(&key, values)
}
}

#[pymodule]
#[pyo3(name = "vecnode")]
pub fn vecnode_init(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<VecNode>()?;
m.add_class::<VecGraph>()?;
py.import_bound("sys")?
.getattr("modules")?
.set_item("binlex.types.vecnode", m)?;
Expand Down
8 changes: 8 additions & 0 deletions src/controlflow/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,8 @@ pub struct BlockJson {
pub bytes: String,
/// A map of function addresses related to this block.
pub functions: BTreeMap<u64, u64>,
// Blocks this blocks has as children.
pub blocks: BTreeSet<u64>,
/// The number of instructions in this block.
pub number_of_instructions: usize,
/// Instructions assocated with this block.
Expand Down Expand Up @@ -280,6 +282,11 @@ impl BlockJsonDeserializer {
})
}

#[allow(dead_code)]
pub fn blocks(&self) -> BTreeSet<u64> {
self.json.blocks.clone()
}

#[allow(dead_code)]
pub fn edges(&self) -> usize {
self.json.edges
Expand Down Expand Up @@ -522,6 +529,7 @@ impl<'block> Block<'block> {
number_of_instructions: self.number_of_instructions(),
instructions: self.instructions_json(),
functions: self.functions(),
blocks: self.blocks(),
entropy: self.entropy(),
sha256: self.sha256(),
minhash: self.minhash(),
Expand Down
36 changes: 16 additions & 20 deletions src/controlflow/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,18 @@ impl FunctionJsonDeserializer {
self.json.address
}

pub fn blocks(&self) -> Vec<BlockJsonDeserializer> {
let mut result = Vec::<BlockJsonDeserializer>::new();
for block in &self.json.blocks {
let block_json_seserializer = BlockJsonDeserializer {
json: block.clone(),
config: self.config.clone(),
};
result.push(block_json_seserializer);
}
result
}

#[allow(dead_code)]
pub fn bytes(&self) -> Option<Vec<u8>> {
if self.json.bytes.is_none() { return None; }
Expand Down Expand Up @@ -301,24 +313,8 @@ impl FunctionJsonDeserializer {
}

#[allow(dead_code)]
pub fn blocks(&self) -> Result<Vec<BlockJsonDeserializer>, Error> {
let mut result = Vec::<BlockJsonDeserializer>::new();
for block in &self.json.blocks {
let string = match serde_json::to_string(block) {
Ok(string) => string,
Err(error) => {
return Err(Error::new(ErrorKind::InvalidData, format!("{}", error)));
}
};
let blockjsondeserializer= match BlockJsonDeserializer::new(string, self.config.clone()) {
Ok(blockjsondeserializer) => blockjsondeserializer,
Err(error) => {
return Err(Error::new(ErrorKind::InvalidData, format!("{}", error)));
}
};
result.push(blockjsondeserializer);
}
Ok(result)
pub fn functions(&self) -> BTreeMap<u64, u64> {
self.json.functions.clone()
}

#[allow(dead_code)]
Expand Down Expand Up @@ -380,11 +376,11 @@ impl FunctionJsonDeserializer {
let mut minhashes = Vec::<f64>::new();
let mut tls_values = Vec::<f64>::new();

for lhs_block in self.blocks()? {
for lhs_block in self.blocks() {
let mut best_minhash: Option<f64> = None;
let mut best_tls: Option<f64> = None;

let results = match lhs_block.compare_many(rhs.blocks()?) {
let results = match lhs_block.compare_many(rhs.blocks()) {
Ok(results) => results,
Err(error) => {
return Err(Error::new(ErrorKind::InvalidData, format!("{}", error)));
Expand Down
Loading

0 comments on commit e1b713c

Please sign in to comment.