From 254f94f7cf1e3ca512f751abbabb4c3e3ce02161 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Sat, 7 Dec 2024 18:43:49 +0100 Subject: [PATCH] network support (#5) * network support * varchar only :( * ureq cleanup * wasm fixes * wasm attempt * try wasm target * Update Cargo.toml * use ehttp --- Cargo.lock | 455 +++++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 2 + src/lib.rs | 257 ++++++++++++++++-------------- 3 files changed, 599 insertions(+), 115 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cb7941c..9652533 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -460,6 +460,26 @@ dependencies = [ "syn 2.0.79", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.79", +] + +[[package]] +name = "document-features" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb6969eaabd2421f8a2775cfd2471a2b634372b4a25d41e3bd647b79912850a0" +dependencies = [ + "litrs", +] + [[package]] name = "duckdb" version = "1.1.1" @@ -492,6 +512,20 @@ dependencies = [ "syn 2.0.79", ] +[[package]] +name = "ehttp" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59a81c221a1e4dad06cb9c9deb19aea1193a5eea084e8cd42d869068132bf876" +dependencies = [ + "document-features", + "js-sys", + "ureq", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "equivalent" version = "1.0.1" @@ -548,6 +582,15 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + [[package]] name = "funty" version = "2.0.0" @@ -621,6 +664,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + [[package]] name = "iana-time-zone" version = "0.1.61" @@ -644,12 +693,151 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.79", +] + [[package]] name = "ident_case" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" +[[package]] +name = "idna" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + [[package]] name = "indexmap" version = "2.6.0" @@ -786,6 +974,18 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "litemap" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" + +[[package]] +name = "litrs" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ce301924b7887e9d637144fdade93f9dfff9b60981d4ac161db09720d39aa5" + [[package]] name = "log" version = "0.4.22" @@ -920,10 +1120,18 @@ version = "0.1.0" dependencies = [ "duckdb", "duckdb-loadable-macros", + "ehttp", + "hex", "libduckdb-sys", "pcap-parser", ] +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + [[package]] name = "pkg-config" version = "0.3.31" @@ -1102,6 +1310,21 @@ dependencies = [ "bytecheck", ] +[[package]] +name = "ring" +version = "0.17.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" +dependencies = [ + "cc", + "cfg-if", + "getrandom", + "libc", + "spin", + "untrusted", + "windows-sys 0.52.0", +] + [[package]] name = "rkyv" version = "0.7.45" @@ -1169,6 +1392,38 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rustls" +version = "0.23.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "934b404430bb06b3fae2cba809eb45a1ab1aecd64491213d7c3301b88393f8d1" +dependencies = [ + "log", + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-pki-types" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" + +[[package]] +name = "rustls-webpki" +version = "0.102.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] + [[package]] name = "rustversion" version = "1.0.17" @@ -1237,6 +1492,18 @@ version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "static_assertions" version = "1.1.0" @@ -1290,6 +1557,12 @@ dependencies = [ "syn 2.0.79", ] +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + [[package]] name = "syn" version = "1.0.109" @@ -1324,6 +1597,17 @@ dependencies = [ "syn 2.0.79", ] +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.79", +] + [[package]] name = "tap" version = "1.0.1" @@ -1350,6 +1634,16 @@ dependencies = [ "crunchy", ] +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tinyvec" version = "1.8.0" @@ -1394,6 +1688,51 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "ureq" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d" +dependencies = [ + "base64", + "flate2", + "log", + "once_cell", + "rustls", + "rustls-pki-types", + "url", + "webpki-roots", +] + +[[package]] +name = "url" +version = "2.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "uuid" version = "1.10.0" @@ -1444,6 +1783,18 @@ dependencies = [ "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61e9300f63a621e96ed275155c108eb6f843b6a26d053f122ab69724559dc8ed" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "wasm-bindgen-macro" version = "0.2.93" @@ -1473,6 +1824,25 @@ version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" +[[package]] +name = "web-sys" +version = "0.3.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "webpki-roots" +version = "0.26.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d642ff16b7e79272ae451b7322067cdc17cadf68c23264be9d94a32319efe7e" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "windows-core" version = "0.52.0" @@ -1573,6 +1943,18 @@ dependencies = [ "memchr", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + [[package]] name = "wyz" version = "0.5.1" @@ -1593,6 +1975,30 @@ dependencies = [ "rustix", ] +[[package]] +name = "yoke" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.79", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.7.35" @@ -1613,3 +2019,52 @@ dependencies = [ "quote", "syn 2.0.79", ] + +[[package]] +name = "zerofrom" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.79", + "synstructure", +] + +[[package]] +name = "zeroize" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" + +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.79", +] diff --git a/Cargo.toml b/Cargo.toml index 691f236..694b610 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,3 +24,5 @@ duckdb = { version = "1.1.1", features = ["vtab-loadable"] } duckdb-loadable-macros = "0.1.3" libduckdb-sys = { version = "1.1.1", features = ["loadable-extension"] } pcap-parser = "0.16.0" +hex = "0.4" +ehttp = "0.5.0" diff --git a/src/lib.rs b/src/lib.rs index 2b48ed7..c32316b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,10 +2,11 @@ extern crate duckdb; extern crate duckdb_loadable_macros; extern crate libduckdb_sys; extern crate pcap_parser; + use std::mem::ManuallyDrop; use duckdb::{ - core::{DataChunkHandle, Inserter, LogicalTypeHandle, LogicalTypeId}, + core::{DataChunkHandle, LogicalTypeHandle, LogicalTypeId, Inserter}, vtab::{BindInfo, Free, FunctionInfo, InitInfo, VTab}, Connection, Result, }; @@ -17,11 +18,14 @@ use std::{ error::Error, ffi::{c_char, CStr, CString}, fs::File, + io::{Read,Cursor}, }; macro_rules! debug_print { ($($arg:tt)*) => { - // eprintln!("[PCAP Debug] {}", format!($($arg)*)); + if std::env::var("DEBUG").is_ok() { + eprintln!("[PCAP Debug] {}", format!($($arg)*)); + } }; } @@ -32,7 +36,7 @@ struct PcapBindData { #[repr(C)] struct PcapInitData { - reader: Option>>, + reader: Option>>>, done: bool, } @@ -50,7 +54,6 @@ struct PcapVTab; impl Free for PcapInitData { fn free(&mut self) { - // Explicitly don't drop the reader to keep file handle alive self.reader = None; } } @@ -67,7 +70,7 @@ impl VTab for PcapVTab { bind.add_result_column("dst_port", LogicalTypeHandle::from(LogicalTypeId::Varchar)); bind.add_result_column("protocol", LogicalTypeHandle::from(LogicalTypeId::Varchar)); bind.add_result_column("length", LogicalTypeHandle::from(LogicalTypeId::Varchar)); - bind.add_result_column("payload", LogicalTypeHandle::from(LogicalTypeId::Varchar)); // Changed from Blob to Varchar + bind.add_result_column("payload", LogicalTypeHandle::from(LogicalTypeId::Varchar)); let filepath = bind.get_parameter(0).to_string(); unsafe { @@ -77,109 +80,139 @@ impl VTab for PcapVTab { } unsafe fn init(info: &InitInfo, data: *mut PcapInitData) -> Result<(), Box> { - let bind_data = info.get_bind_data::(); - let filepath = unsafe { CStr::from_ptr((*bind_data).filepath).to_str()? }; - let file = File::open(filepath)?; - debug_print!("Initializing reader for file: {}", filepath); + let bind_data = info.get_bind_data::(); + let filepath = unsafe { CStr::from_ptr((*bind_data).filepath).to_str()? }; - unsafe { - (*data).reader = Some(ManuallyDrop::new( - LegacyPcapReader::new(65536, file).expect("PcapReader") - )); - (*data).done = false; - } - Ok(()) + debug_print!("Opening file: {}", filepath); + + let reader: Box = if filepath.starts_with("http://") || filepath.starts_with("https://") { + debug_print!("Using HTTP reader for {}", filepath); + + // Create a channel to receive the response + let (tx, rx) = std::sync::mpsc::channel(); + + let request = ehttp::Request::get(filepath); + ehttp::fetch(request, move |result: ehttp::Result| { + tx.send(result).unwrap(); + }); + + // Wait for the response + let response = rx.recv()?.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?; + Box::new(Cursor::new(response.bytes)) + } else { + debug_print!("Using file reader for {}", filepath); + Box::new(File::open(filepath)?) + }; + + unsafe { + (*data).reader = Some(ManuallyDrop::new( + LegacyPcapReader::new(65536, reader).expect("PcapReader") + )); + (*data).done = false; + } + Ok(()) } - // func start unsafe fn func(func: &FunctionInfo, output: &mut DataChunkHandle) -> Result<(), Box> { - let init_data = func.get_init_data::(); - - unsafe { - if (*init_data).done { - output.set_len(0); - return Ok(()); + let init_data = func.get_init_data::(); + + unsafe { + if (*init_data).done { + output.set_len(0); + return Ok(()); + } } + + let reader = unsafe { (*init_data).reader.as_mut() }.unwrap(); + let mut count = 0; + let mut next_result = reader.next(); + + while let Err(PcapError::Incomplete(_)) = next_result { + unsafe { (*init_data).reader.as_mut() }.unwrap().refill()?; + next_result = unsafe { (*init_data).reader.as_mut() }.unwrap().next(); + } + + match next_result { + Ok((offset, block)) => { + let (timestamp, length_str, src_ip, dst_ip, src_port, dst_port, protocol, payload) = match block { + PcapBlockOwned::Legacy(packet) => { + let parsed = Self::parse_packet(&packet.data)?; + let (src_ip, dst_ip, src_port, dst_port, protocol, payload) = parsed; + + let timestamp_micros = packet.ts_sec as i64 * 1_000_000 + packet.ts_usec as i64; + + (timestamp_micros, packet.origlen.to_string(), + src_ip, dst_ip, src_port, dst_port, + protocol, payload) + }, + PcapBlockOwned::LegacyHeader(_) => { + (0, "0".to_string(), "0.0.0.0".to_string(), "0.0.0.0".to_string(), + 0, 0, "UNKNOWN".to_string(), Vec::new()) + }, + _ => { + (0, "0".to_string(), "0.0.0.0".to_string(), "0.0.0.0".to_string(), + 0, 0, "UNKNOWN".to_string(), Vec::new()) + } + }; + + debug_print!("Processing packet: timestamp={}, src={}:{}, dst={}:{}, proto={}, len={}", + timestamp, src_ip, src_port, dst_ip, dst_port, protocol, length_str); + + output.flat_vector(0).insert(count, CString::new(timestamp.to_string())?); + output.flat_vector(1).insert(count, CString::new(src_ip)?); + output.flat_vector(2).insert(count, CString::new(dst_ip)?); + output.flat_vector(3).insert(count, CString::new(src_port.to_string())?); + output.flat_vector(4).insert(count, CString::new(dst_port.to_string())?); + output.flat_vector(5).insert(count, CString::new(protocol)?); + output.flat_vector(6).insert(count, CString::new(length_str)?); + + + let payload_str = if !payload.is_empty() { + if let Ok(utf8_str) = std::str::from_utf8(&payload) { + if utf8_str.chars().all(|c| c.is_ascii_graphic() || c.is_ascii_whitespace()) { + format!("{}", utf8_str) + } else { + let hex_str: Vec = payload.iter() + .take(32) + .map(|b| format!("{:02x}", b)) + .collect(); + format!("{}{}", hex_str.join(" "), + if payload.len() > 32 { " ..." } else { "" }) + } + } else { + let hex_str: Vec = payload.iter() + .take(32) + .map(|b| format!("{:02x}", b)) + .collect(); + format!("{}{}", hex_str.join(" "), + if payload.len() > 32 { " ..." } else { "" }) + } + } else { + "empty".to_string() + }; + output.flat_vector(7).insert(count, CString::new(payload_str)?); + + /* + let hex: String = payload.iter() + .map(|b| format!("{:02x}", b)) + .collect(); + output.flat_vector(7).insert(count, CString::new(hex)?); + */ + + count += 1; + unsafe { (*init_data).reader.as_mut() }.unwrap().consume(offset); + }, + Err(PcapError::Eof) => { + unsafe { (*init_data).done = true; } + output.set_len(count); + return Ok(()); + }, + Err(e) => return Err(Box::new(e)), + } + + output.set_len(count); + Ok(()) } - - let reader = unsafe { (*init_data).reader.as_mut() }.unwrap(); - let mut count = 0; - let mut next_result = reader.next(); - - while let Err(PcapError::Incomplete(_)) = next_result { - unsafe { (*init_data).reader.as_mut() }.unwrap().refill()?; - next_result = unsafe { (*init_data).reader.as_mut() }.unwrap().next(); - } - - match next_result { - Ok((offset, block)) => { - let (ts_sec_str, length_str, src_ip, dst_ip, src_port, dst_port, protocol, payload) = match block { - PcapBlockOwned::Legacy(packet) => { - let parsed = Self::parse_packet(&packet.data)?; - let (src_ip, dst_ip, src_port, dst_port, protocol, payload) = parsed; - - let payload_str = if !payload.is_empty() { - if let Ok(utf8_str) = std::str::from_utf8(&payload) { - if utf8_str.chars().all(|c| c.is_ascii_graphic() || c.is_ascii_whitespace()) { - format!("{}", utf8_str) - } else { - let hex_str: Vec = payload.iter() - .take(32) - .map(|b| format!("{:02x}", b)) - .collect(); - format!("{}{}", hex_str.join(" "), - if payload.len() > 32 { " ..." } else { "" }) - } - } else { - let hex_str: Vec = payload.iter() - .take(32) - .map(|b| format!("{:02x}", b)) - .collect(); - format!("{}{}", hex_str.join(" "), - if payload.len() > 32 { " ..." } else { "" }) - } - } else { - "empty".to_string() - }; - - (packet.ts_sec.to_string(), packet.origlen.to_string(), - src_ip, dst_ip, src_port.to_string(), dst_port.to_string(), - protocol, payload_str) - }, - PcapBlockOwned::LegacyHeader(_) => { - ("0".to_string(), "0".to_string(), "0.0.0.0".to_string(), "0.0.0.0".to_string(), - "0".to_string(), "0".to_string(), "UNKNOWN".to_string(), "empty".to_string()) - }, - _ => { - ("0".to_string(), "0".to_string(), "0.0.0.0".to_string(), "0.0.0.0".to_string(), - "0".to_string(), "0".to_string(), "UNKNOWN".to_string(), "empty".to_string()) - } - }; - - output.flat_vector(0).insert(count, CString::new(ts_sec_str)?); - output.flat_vector(1).insert(count, CString::new(src_ip)?); - output.flat_vector(2).insert(count, CString::new(dst_ip)?); - output.flat_vector(3).insert(count, CString::new(src_port)?); - output.flat_vector(4).insert(count, CString::new(dst_port)?); - output.flat_vector(5).insert(count, CString::new(protocol)?); - output.flat_vector(6).insert(count, CString::new(length_str)?); - output.flat_vector(7).insert(count, CString::new(payload)?); - - count += 1; - unsafe { (*init_data).reader.as_mut() }.unwrap().consume(offset); - }, - Err(PcapError::Eof) => { - unsafe { (*init_data).done = true; } - output.set_len(count); - return Ok(()); - }, - Err(e) => return Err(Box::new(e)), - } - - output.set_len(count); - Ok(()) - } - // func stop fn parameters() -> Option> { Some(vec![LogicalTypeHandle::from(LogicalTypeId::Varchar)]) @@ -197,35 +230,33 @@ impl PcapVTab { debug_print!("Parsing packet of length: {}", data.len()); - if data.len() >= 14 { // Minimum Ethernet frame size + if data.len() >= 14 { let ethertype = u16::from_be_bytes([data[12], data[13]]); debug_print!("Ethertype: 0x{:04x}", ethertype); - if ethertype == 0x0800 && data.len() >= 34 { // IPv4 + if ethertype == 0x0800 && data.len() >= 34 { let ip_header_len = (data[14] & 0x0f) * 4; debug_print!("IP header length: {}", ip_header_len); - // Extract IP addresses src_ip = format!("{}.{}.{}.{}", data[26], data[27], data[28], data[29]); dst_ip = format!("{}.{}.{}.{}", data[30], data[31], data[32], data[33]); - // Get IP protocol let ip_protocol = data[23]; debug_print!("IP Protocol: {}", ip_protocol); let transport_header_start = 14 + ip_header_len as usize; match ip_protocol { - 6 => { // TCP + 6 => { protocol = String::from("TCP"); if data.len() >= transport_header_start + 4 { src_port = u16::from_be_bytes([data[transport_header_start], data[transport_header_start + 1]]); dst_port = u16::from_be_bytes([data[transport_header_start + 2], data[transport_header_start + 3]]); } }, - 17 => { // UDP + 17 => { protocol = String::from("UDP"); if data.len() >= transport_header_start + 4 { src_port = u16::from_be_bytes([data[transport_header_start], data[transport_header_start + 1]]); @@ -235,10 +266,9 @@ impl PcapVTab { _ => protocol = format!("IP({})", ip_protocol), } - // Extract payload let payload_start = transport_header_start + match ip_protocol { - 6 => 20, // TCP header size (without options) - 17 => 8, // UDP header size + 6 => 20, + 17 => 8, _ => 0, }; @@ -246,9 +276,8 @@ impl PcapVTab { payload = data[payload_start..].to_vec(); } - } else if ethertype == 0x86DD { // IPv6 + } else if ethertype == 0x86DD { protocol = String::from("IPv6"); - // TODO: Add IPv6 parsing } } @@ -259,8 +288,6 @@ impl PcapVTab { } } -const EXTENSION_NAME: &str = env!("CARGO_PKG_NAME"); - #[duckdb_entrypoint_c_api(ext_name = "pcap_reader", min_duckdb_version = "v0.0.1")] pub unsafe fn extension_entrypoint(con: Connection) -> Result<(), Box> { con.register_table_function::("pcap_reader")