Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Datasets locking/v3 #12216

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions rust/cbindgen.toml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ include = [
"FtpEvent",
"SCSigTableElmt",
"SCTransformTableElmt",
"DataRepType",
]

# A list of items to not include in the generated bindings
Expand Down
105 changes: 105 additions & 0 deletions rust/src/detect/datasets.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/* Copyright (C) 2024 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
* Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/

// Author: Shivani Bhardwaj <[email protected]>

//! This module exposes items from the datasets C code to Rust.

use std::fs::{File, OpenOptions};
use std::io::{self, BufRead};
use std::path::Path;
use std::ffi::{c_char, CStr};
use base64::{Engine, engine::general_purpose::STANDARD};

/// Opaque Dataset type defined in C
#[derive(Copy, Clone)]
pub enum Dataset {}

// Simple C type converted to Rust
#[derive(Debug, PartialEq)]
#[repr(C)]
pub struct DataRepType {
pub value: u16,
}

// Extern fns operating on the opaque Dataset type above
/// cbindgen:ignore
extern {
pub fn DatasetAdd(set: &Dataset, data: *const u8, len: u32) -> i32;
pub fn DatasetAddwRep(set: &Dataset, data: *const u8, len: u32, rep: *const DataRepType) -> i32;
}

#[no_mangle]
pub unsafe extern "C" fn ProcessDatasets(set: &Dataset, name: *const c_char, fname: *const c_char, fmode: *const c_char) -> i32 {
let file_string = CStr::from_ptr(fname).to_str().unwrap();
let mode = CStr::from_ptr(fmode).to_str().unwrap();
let set_name = CStr::from_ptr(name).to_str().unwrap();
let filename = Path::new(file_string);
// SCLogNotice!("Path: {:?}", filename);
if let Ok(lines) = read_lines(filename, mode) {
for line in lines.flatten() {
Copy link
Member Author

@inashivb inashivb Dec 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, something is going off here when the file is opened in append mode.
Edit: It's really bad. There's an infinite loop during the flatten call. Debugging..

// SCLogNotice!("{}", line);
let v: Vec<&str> = line.split(',').collect();
// Ignore empty and invalid lines in dataset/rep file
if v.is_empty() || v.len() > 2 {
continue;
}
if v.len() == 1 {
// Dataset
let mut decoded: Vec<u8> = vec![];
if STANDARD.decode_vec(v[0], &mut decoded).is_err() {
// FatalErrorOnInit STODO
SCLogError!("bad base64 encoding {}", set_name);
return -2;
}
DatasetAdd(&set, decoded.as_ptr(), decoded.len() as u32);
} else {
// Datarep
let mut decoded: Vec<u8> = vec![];
if STANDARD.decode_vec(v[0], &mut decoded).is_err() {
// FatalErrorOnInit STODO
SCLogError!("bad base64 encoding {}", set_name);
return -2;
}
if let Ok(val) = v[1].to_string().parse::<u16>() {
let rep: DataRepType = DataRepType { value: val };
DatasetAddwRep(&set, decoded.as_ptr(), decoded.len() as u32, &rep);
} else {
// FatalErrorOnInit STODO
SCLogError!("Invalid datarep value {}", set_name);
return -2;
}
}
}
} else {
SCLogNotice!("Couldn't open file");
return -1;
}
SCLogNotice!("All OK from rust parser");
0
}

fn read_lines<P>(filename: P, fmode: &str) -> io::Result<io::Lines<io::BufReader<File>>>
where P: AsRef<Path>, {
let file: File;
if fmode == "r" {
file = File::open(filename)?;
} else {
file = OpenOptions::new().append(true).create(true).open(filename)?;
}
Ok(io::BufReader::new(file).lines())
}
1 change: 1 addition & 0 deletions rust/src/detect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ pub mod transforms;
pub mod uint;
pub mod uri;
pub mod tojson;
pub mod datasets;

use crate::core::AppProto;
use std::os::raw::{c_int, c_void};
Expand Down
4 changes: 1 addition & 3 deletions src/datasets-reputation.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@
#ifndef SURICATA_DATASETS_REPUTATION_H
#define SURICATA_DATASETS_REPUTATION_H

typedef struct DataRepType {
uint16_t value;
} DataRepType;
#include "rust-bindings.h"

typedef struct DataRepResultType {
bool found;
Expand Down
75 changes: 7 additions & 68 deletions src/datasets.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@ SCMutex sets_lock = SCMUTEX_INITIALIZER;
static Dataset *sets = NULL;
static uint32_t set_ids = 0;

static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
DataRepType *rep);
int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep);

static inline void DatasetUnlockData(THashData *d)
{
Expand Down Expand Up @@ -496,80 +495,21 @@ static int DatasetLoadString(Dataset *set)
return 0;

SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);

const char *fopen_mode = "r";
if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
fopen_mode = "a+";
}

FILE *fp = fopen(set->load, fopen_mode);
if (fp == NULL) {
SCLogError("fopen '%s' failed: %s", set->load, strerror(errno));
int retval = ProcessDatasets(set, set->name, set->load, fopen_mode);
if (retval == -2) {
FatalErrorOnInit("dataset %s could not be processed", set->name);
} else if (retval == -1) {
return -1;
}

uint32_t cnt = 0;
char line[1024];
while (fgets(line, (int)sizeof(line), fp) != NULL) {
if (strlen(line) <= 1)
continue;

char *r = strchr(line, ',');
if (r == NULL) {
line[strlen(line) - 1] = '\0';
SCLogDebug("line: '%s'", line);
uint32_t decoded_size = Base64DecodeBufferSize(strlen(line));
// coverity[alloc_strlen : FALSE]
uint8_t decoded[decoded_size];
uint32_t num_decoded =
Base64Decode((const uint8_t *)line, strlen(line), Base64ModeStrict, decoded);
if (num_decoded == 0 && strlen(line) > 0) {
FatalErrorOnInit("bad base64 encoding %s/%s", set->name, set->load);
continue;
}

if (DatasetAdd(set, (const uint8_t *)decoded, num_decoded) < 0) {
FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
continue;
}
cnt++;
} else {
line[strlen(line) - 1] = '\0';
SCLogDebug("line: '%s'", line);

*r = '\0';

uint32_t decoded_size = Base64DecodeBufferSize(strlen(line));
uint8_t decoded[decoded_size];
uint32_t num_decoded =
Base64Decode((const uint8_t *)line, strlen(line), Base64ModeStrict, decoded);
if (num_decoded == 0) {
FatalErrorOnInit("bad base64 encoding %s/%s", set->name, set->load);
continue;
}

r++;
SCLogDebug("r '%s'", r);

DataRepType rep = { .value = 0 };
if (ParseRepLine(r, strlen(r), &rep) < 0) {
FatalErrorOnInit("die: bad rep");
continue;
}
SCLogDebug("rep %u", rep.value);

if (DatasetAddwRep(set, (const uint8_t *)decoded, num_decoded, &rep) < 0) {
FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
continue;
}
cnt++;

SCLogDebug("line with rep %s, %s", line, r);
}
}
THashConsolidateMemcap(set->hash);

fclose(fp);
SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
return 0;
}

Expand Down Expand Up @@ -1572,8 +1512,7 @@ int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
return -1;
}

static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
DataRepType *rep)
int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep)
{
if (set == NULL)
return -1;
Expand Down
1 change: 1 addition & 0 deletions src/datasets.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#define SURICATA_DATASETS_H

#include "util-thash.h"
#include "rust.h"
#include "datasets-reputation.h"

int DatasetsInit(void);
Expand Down
2 changes: 0 additions & 2 deletions src/detect-dataset.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,6 @@
#define DETECT_DATASET_CMD_ISNOTSET 2
#define DETECT_DATASET_CMD_ISSET 3

int DetectDatasetMatch (ThreadVars *, DetectEngineThreadCtx *, Packet *,
const Signature *, const SigMatchCtx *);
static int DetectDatasetSetup (DetectEngineCtx *, Signature *, const char *);
void DetectDatasetFree (DetectEngineCtx *, void *);

Expand Down
Loading