Skip to content

Commit

Permalink
Python Hashing Bindings
Browse files Browse the repository at this point in the history
- Include Python Hashing Bindings
  • Loading branch information
c3rb3ru5d3d53c committed Dec 16, 2024
1 parent 9976cd1 commit b86deca
Show file tree
Hide file tree
Showing 10 changed files with 4,685 additions and 5 deletions.
4,631 changes: 4,631 additions & 0 deletions scripts/plugins/binlex.py

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions src/bin/blcompare.rs
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ fn compare_json_entries(json_lhs: &JSON, json_rhs: &JSON, non_contiguous_thresho
}

if lhs_contiguous == true && rhs_contiguous == true && lhs_minhash.is_some() && rhs_minhash.is_some() {
minhash_similarity = Some(MinHash32::jaccard_similarity_from_hexdigests(&lhs_minhash.clone().unwrap(), &rhs_minhash.clone().unwrap()));
minhash_similarity = Some(MinHash32::compare_jaccard_similarity(&lhs_minhash.clone().unwrap(), &rhs_minhash.clone().unwrap()));
}

// Handle Non-Contiguous Function Similarity
Expand Down Expand Up @@ -454,7 +454,7 @@ fn calculate_non_contiguous_minhash_similarity(lhs_blocks: &[Value], rhs_blocks:
let mut best_similarity: Option<f64> = None;

for rhs_tlsh in &rhs_minhash_values {
let similarity = MinHash32::jaccard_similarity_from_hexdigests(&lhs_tlsh.clone(), &rhs_tlsh.clone());
let similarity = MinHash32::compare_jaccard_similarity(&lhs_tlsh.clone(), &rhs_tlsh.clone());
best_similarity = match best_similarity {
Some(current_best) => Some(current_best.max(similarity)),
None => Some(similarity),
Expand Down
7 changes: 7 additions & 0 deletions src/bindings/python/src/hashing/minhash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,10 +188,17 @@ impl MinHash32 {
seed: seed,
}
}

#[pyo3(text_signature = "($self)")]
pub fn hexdigest(&self) -> Option<String> {
InnerMinHash32::new(&self.bytes, self.num_hashes, self.shingle_size, self.seed).hexdigest()
}

#[staticmethod]
#[pyo3(text_signature = "(lhs, rhs)")]
pub fn compare_jaccard_similarity(lhs: String, rhs: String) -> f64 {
InnerMinHash32::compare_jaccard_similarity(&lhs, &rhs)
}
}


Expand Down
9 changes: 8 additions & 1 deletion src/bindings/python/src/hashing/tlsh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@
// Library.

use pyo3::prelude::*;

use pyo3::exceptions::PyValueError;
use binlex::hashing::tlsh::TLSH as InnerTLSH;

#[pyclass]
Expand All @@ -188,6 +188,13 @@ impl TLSH {
InnerTLSH::new(&self.bytes, mininum_byte_size).hexdigest()
}

#[staticmethod]
#[pyo3(text_signature = "(lhs, rhs)")]
pub fn compare(lhs: String, rhs: String) -> PyResult<u32> {
InnerTLSH::compare(lhs, rhs)
.map_err(|err| PyValueError::new_err(err.to_string()))
}

}


Expand Down
2 changes: 2 additions & 0 deletions src/bindings/python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ use crate::disassemblers::disassemblers_init;
use crate::controlflow::controlflow_init;
use crate::genetics::genitics_init;
use crate::global::global_init;
use crate::hashing::hashing_init;

use pyo3::{prelude::*, wrap_pymodule};

Expand All @@ -196,6 +197,7 @@ fn binlex(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_wrapped(wrap_pymodule!(binary_init))?;
m.add_wrapped(wrap_pymodule!(disassemblers_init))?;
m.add_wrapped(wrap_pymodule!(genitics_init))?;
m.add_wrapped(wrap_pymodule!(hashing_init))?;
m.add_class::<Binary>()?;
m.add_class::<Architecture>()?;
m.add_class::<Config>()?;
Expand Down
11 changes: 11 additions & 0 deletions src/controlflow/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,17 @@ pub struct BlockJson {
pub attributes: Option<Value>,
}

impl BlockJson {
/// Converts a JSON string into a `BlockJson` object.
///
/// # Returns
///
/// Returns `Ok(BlockJson)` if the JSON is valid; otherwise, returns an `Err`.
pub fn from_json(json_str: &str) -> Result<Self, serde_json::Error> {
serde_json::from_str(json_str)
}
}

/// Represents a control flow block within a graph.
#[derive(Clone)]
pub struct Block <'block>{
Expand Down
11 changes: 11 additions & 0 deletions src/controlflow/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,17 @@ pub struct FunctionJson {
pub attributes: Option<Value>,
}

impl FunctionJson {
/// Converts a JSON string into a `FunctionJson` object.
///
/// # Returns
///
/// Returns `Ok(FunctionJson)` if the JSON is valid; otherwise, returns an `Err`.
pub fn from_json(json_str: &str) -> Result<Self, serde_json::Error> {
serde_json::from_str(json_str)
}
}

/// Represents a control flow function within a graph.
#[derive(Clone)]
pub struct Function <'function>{
Expand Down
11 changes: 11 additions & 0 deletions src/controlflow/instruction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,17 @@ pub struct InstructionJson {
pub attributes: Option<Value>,
}

impl InstructionJson {
/// Converts a JSON string into a `InstructionJson` object.
///
/// # Returns
///
/// Returns `Ok(InstructionJson)` if the JSON is valid; otherwise, returns an `Err`.
pub fn from_json(json_str: &str) -> Result<Self, serde_json::Error> {
serde_json::from_str(json_str)
}
}

impl Instruction {
/// Creates a new `Instruction` with the specified address.
///
Expand Down
2 changes: 1 addition & 1 deletion src/genetics/chromosome.rs
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ impl Chromosome {
let rhs_minhash = rhs.minhash();
let mut minhash: Option<f64> = None;
if lhs_minhash.is_some() && rhs_minhash.is_some() {
minhash = Some(MinHash32::jaccard_similarity_from_hexdigests(&lhs_minhash.unwrap(), &rhs_minhash.unwrap()));
minhash = Some(MinHash32::compare_jaccard_similarity(&lhs_minhash.unwrap(), &rhs_minhash.unwrap()));
}
let lhs_tlsh = self.tlsh();
let rhs_tlsh = rhs.tlsh();
Expand Down
2 changes: 1 addition & 1 deletion src/hashing/minhash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ impl <'minhash32> MinHash32 <'minhash32> {
///
/// Returns a `f64` value representing the Jaccard similarity between the two signatures.
/// If the signatures have different lengths or cannot be parsed, it returns `0.0`.
pub fn jaccard_similarity_from_hexdigests(hexdigest1: &str, hexdigest2: &str) -> f64 {
pub fn compare_jaccard_similarity(hexdigest1: &str, hexdigest2: &str) -> f64 {
let hash1 = Self::parse_hexdigest(hexdigest1);
let hash2 = Self::parse_hexdigest(hexdigest2);

Expand Down

0 comments on commit b86deca

Please sign in to comment.