From f602d079af9275f46cde2e5d578b2e8f9da3fa06 Mon Sep 17 00:00:00 2001 From: Shivani Bhardwaj Date: Fri, 29 Nov 2024 15:40:16 +0530 Subject: [PATCH 1/2] datasets: remove unused fn definition --- src/detect-dataset.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/detect-dataset.c b/src/detect-dataset.c index ae23925f2c11..173a69cb1bf0 100644 --- a/src/detect-dataset.c +++ b/src/detect-dataset.c @@ -47,8 +47,6 @@ #define DETECT_DATASET_CMD_ISNOTSET 2 #define DETECT_DATASET_CMD_ISSET 3 -int DetectDatasetMatch (ThreadVars *, DetectEngineThreadCtx *, Packet *, - const Signature *, const SigMatchCtx *); static int DetectDatasetSetup (DetectEngineCtx *, Signature *, const char *); void DetectDatasetFree (DetectEngineCtx *, void *); From b815e4e4d6018f28772f050fc8e49626705b5998 Mon Sep 17 00:00:00 2001 From: Shivani Bhardwaj Date: Tue, 3 Dec 2024 18:25:18 +0530 Subject: [PATCH 2/2] datasets: move initial file reading to rust In a recent warning reported by scan-build, datasets were found to be using a blocking call in a critical section. datasets.c:187:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection] 187 | while (fgets(line, (int)sizeof(line), fp) != NULL) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ datasets.c:292:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection] 292 | while (fgets(line, (int)sizeof(line), fp) != NULL) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ datasets.c:368:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection] 368 | while (fgets(line, (int)sizeof(line), fp) != NULL) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ datasets.c:442:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection] 442 | while (fgets(line, (int)sizeof(line), fp) != NULL) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ datasets.c:512:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection] 512 | while (fgets(line, (int)sizeof(line), fp) != NULL) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 warnings generated. These calls are blocking in the multi tenant mode where several tenants may be trying to load the same dataset in parallel. In a single tenant mode, this operation is performed as a part of a single thread before the engine startup. In order to evade the warning and simplify the code, the initial file reading is moved to Rust with this commit with a much simpler handling of dataset and datarep. Bug 7398 --- rust/cbindgen.toml | 1 + rust/src/detect/datasets.rs | 105 ++++++++++++++++++++++++++++++++++++ rust/src/detect/mod.rs | 1 + src/datasets-reputation.h | 4 +- src/datasets.c | 75 +++----------------------- src/datasets.h | 1 + 6 files changed, 116 insertions(+), 71 deletions(-) create mode 100644 rust/src/detect/datasets.rs diff --git a/rust/cbindgen.toml b/rust/cbindgen.toml index eac6aa737760..e2730789c808 100644 --- a/rust/cbindgen.toml +++ b/rust/cbindgen.toml @@ -84,6 +84,7 @@ include = [ "FtpEvent", "SCSigTableElmt", "SCTransformTableElmt", + "DataRepType", ] # A list of items to not include in the generated bindings diff --git a/rust/src/detect/datasets.rs b/rust/src/detect/datasets.rs new file mode 100644 index 000000000000..d7c87acfd2b8 --- /dev/null +++ b/rust/src/detect/datasets.rs @@ -0,0 +1,105 @@ +/* Copyright (C) 2024 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +// Author: Shivani Bhardwaj + +//! This module exposes items from the datasets C code to Rust. + +use std::fs::{File, OpenOptions}; +use std::io::{self, BufRead}; +use std::path::Path; +use std::ffi::{c_char, CStr}; +use base64::{Engine, engine::general_purpose::STANDARD}; + +/// Opaque Dataset type defined in C +#[derive(Copy, Clone)] +pub enum Dataset {} + +// Simple C type converted to Rust +#[derive(Debug, PartialEq)] +#[repr(C)] +pub struct DataRepType { + pub value: u16, +} + +// Extern fns operating on the opaque Dataset type above +/// cbindgen:ignore +extern { + pub fn DatasetAdd(set: &Dataset, data: *const u8, len: u32) -> i32; + pub fn DatasetAddwRep(set: &Dataset, data: *const u8, len: u32, rep: *const DataRepType) -> i32; +} + +#[no_mangle] +pub unsafe extern "C" fn ProcessDatasets(set: &Dataset, name: *const c_char, fname: *const c_char, fmode: *const c_char) -> i32 { + let file_string = CStr::from_ptr(fname).to_str().unwrap(); + let mode = CStr::from_ptr(fmode).to_str().unwrap(); + let set_name = CStr::from_ptr(name).to_str().unwrap(); + let filename = Path::new(file_string); + // SCLogNotice!("Path: {:?}", filename); + if let Ok(lines) = read_lines(filename, mode) { + for line in lines.flatten() { +// SCLogNotice!("{}", line); + let v: Vec<&str> = line.split(',').collect(); + // Ignore empty and invalid lines in dataset/rep file + if v.is_empty() || v.len() > 2 { + continue; + } + if v.len() == 1 { + // Dataset + let mut decoded: Vec = vec![]; + if STANDARD.decode_vec(v[0], &mut decoded).is_err() { + // FatalErrorOnInit STODO + SCLogError!("bad base64 encoding {}", set_name); + return -2; + } + DatasetAdd(&set, decoded.as_ptr(), decoded.len() as u32); + } else { + // Datarep + let mut decoded: Vec = vec![]; + if STANDARD.decode_vec(v[0], &mut decoded).is_err() { + // FatalErrorOnInit STODO + SCLogError!("bad base64 encoding {}", set_name); + return -2; + } + if let Ok(val) = v[1].to_string().parse::() { + let rep: DataRepType = DataRepType { value: val }; + DatasetAddwRep(&set, decoded.as_ptr(), decoded.len() as u32, &rep); + } else { + // FatalErrorOnInit STODO + SCLogError!("Invalid datarep value {}", set_name); + return -2; + } + } + } + } else { + SCLogNotice!("Couldn't open file"); + return -1; + } + SCLogNotice!("All OK from rust parser"); + 0 +} + +fn read_lines

(filename: P, fmode: &str) -> io::Result>> +where P: AsRef, { + let file: File; + if fmode == "r" { + file = File::open(filename)?; + } else { + file = OpenOptions::new().append(true).create(true).open(filename)?; + } + Ok(io::BufReader::new(file).lines()) +} diff --git a/rust/src/detect/mod.rs b/rust/src/detect/mod.rs index 1857c22ee2b2..5ade8bf5d9b1 100644 --- a/rust/src/detect/mod.rs +++ b/rust/src/detect/mod.rs @@ -29,6 +29,7 @@ pub mod transforms; pub mod uint; pub mod uri; pub mod tojson; +pub mod datasets; use crate::core::AppProto; use std::os::raw::{c_int, c_void}; diff --git a/src/datasets-reputation.h b/src/datasets-reputation.h index 3483d823cd6e..18ced6803705 100644 --- a/src/datasets-reputation.h +++ b/src/datasets-reputation.h @@ -24,9 +24,7 @@ #ifndef SURICATA_DATASETS_REPUTATION_H #define SURICATA_DATASETS_REPUTATION_H -typedef struct DataRepType { - uint16_t value; -} DataRepType; +#include "rust-bindings.h" typedef struct DataRepResultType { bool found; diff --git a/src/datasets.c b/src/datasets.c index 402c7d34fe99..a64f840f94a3 100644 --- a/src/datasets.c +++ b/src/datasets.c @@ -44,8 +44,7 @@ SCMutex sets_lock = SCMUTEX_INITIALIZER; static Dataset *sets = NULL; static uint32_t set_ids = 0; -static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, - DataRepType *rep); +int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep); static inline void DatasetUnlockData(THashData *d) { @@ -496,80 +495,21 @@ static int DatasetLoadString(Dataset *set) return 0; SCLogConfig("dataset: %s loading from '%s'", set->name, set->load); + const char *fopen_mode = "r"; if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) { fopen_mode = "a+"; } - FILE *fp = fopen(set->load, fopen_mode); - if (fp == NULL) { - SCLogError("fopen '%s' failed: %s", set->load, strerror(errno)); + int retval = ProcessDatasets(set, set->name, set->load, fopen_mode); + if (retval == -2) { + FatalErrorOnInit("dataset %s could not be processed", set->name); + } else if (retval == -1) { return -1; } - uint32_t cnt = 0; - char line[1024]; - while (fgets(line, (int)sizeof(line), fp) != NULL) { - if (strlen(line) <= 1) - continue; - - char *r = strchr(line, ','); - if (r == NULL) { - line[strlen(line) - 1] = '\0'; - SCLogDebug("line: '%s'", line); - uint32_t decoded_size = Base64DecodeBufferSize(strlen(line)); - // coverity[alloc_strlen : FALSE] - uint8_t decoded[decoded_size]; - uint32_t num_decoded = - Base64Decode((const uint8_t *)line, strlen(line), Base64ModeStrict, decoded); - if (num_decoded == 0 && strlen(line) > 0) { - FatalErrorOnInit("bad base64 encoding %s/%s", set->name, set->load); - continue; - } - - if (DatasetAdd(set, (const uint8_t *)decoded, num_decoded) < 0) { - FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load); - continue; - } - cnt++; - } else { - line[strlen(line) - 1] = '\0'; - SCLogDebug("line: '%s'", line); - - *r = '\0'; - - uint32_t decoded_size = Base64DecodeBufferSize(strlen(line)); - uint8_t decoded[decoded_size]; - uint32_t num_decoded = - Base64Decode((const uint8_t *)line, strlen(line), Base64ModeStrict, decoded); - if (num_decoded == 0) { - FatalErrorOnInit("bad base64 encoding %s/%s", set->name, set->load); - continue; - } - - r++; - SCLogDebug("r '%s'", r); - - DataRepType rep = { .value = 0 }; - if (ParseRepLine(r, strlen(r), &rep) < 0) { - FatalErrorOnInit("die: bad rep"); - continue; - } - SCLogDebug("rep %u", rep.value); - - if (DatasetAddwRep(set, (const uint8_t *)decoded, num_decoded, &rep) < 0) { - FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load); - continue; - } - cnt++; - - SCLogDebug("line with rep %s, %s", line, r); - } - } THashConsolidateMemcap(set->hash); - fclose(fp); - SCLogConfig("dataset: %s loaded %u records", set->name, cnt); return 0; } @@ -1572,8 +1512,7 @@ int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len) return -1; } -static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, - DataRepType *rep) +int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep) { if (set == NULL) return -1; diff --git a/src/datasets.h b/src/datasets.h index 86bfed02b22f..1abfa889baa6 100644 --- a/src/datasets.h +++ b/src/datasets.h @@ -19,6 +19,7 @@ #define SURICATA_DATASETS_H #include "util-thash.h" +#include "rust.h" #include "datasets-reputation.h" int DatasetsInit(void);