-
Notifications
You must be signed in to change notification settings - Fork 2
/
main.rs
173 lines (153 loc) · 5.37 KB
/
main.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
use log_surgeon::lexer::Lexer;
use log_surgeon::lexer::{BufferedFileStream, LexerStream};
use log_surgeon::log_parser::LogParser;
use log_surgeon::parser::SchemaConfig;
use clap::{Arg, Command};
use std::fs;
use std::path::Path;
use std::path::PathBuf;
use std::time::{Duration, Instant};
fn find_files<P: AsRef<Path>>(path: P) -> Result<Vec<PathBuf>, std::io::Error> {
let mut result = Vec::new();
let entries = fs::read_dir(&path)?;
for entry in entries {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
// If the entry is a directory, recursively search inside it.
result.extend(find_files(&path)?);
} else {
// If it's not a directory, add it to the result.
result.push(path);
}
}
Ok(result)
}
fn benchmark_log_parser(
schema_config: std::rc::Rc<SchemaConfig>,
input_log_paths: Vec<PathBuf>,
) -> log_surgeon::error_handling::Result<()> {
let mut log_parser = LogParser::new(schema_config.clone())?;
let mut total_duration = Duration::new(0, 0);
let mut total_size: u64 = 0;
let mut total_tokens: usize = 0;
for log_path in input_log_paths {
println!("Parsing file: {}", log_path.to_str().unwrap());
total_size += log_path
.metadata()
.expect("Failed to get file metadata")
.len();
log_parser.set_input_file(log_path.to_str().unwrap())?;
let mut log_event_idx = 0;
let mut num_tokens = 0;
let start = Instant::now();
while let Some(log_event) = log_parser.parse_next_log_event()? {
log_event_idx += 1;
num_tokens += log_event.get_num_tokens();
}
total_duration += start.elapsed();
total_tokens += num_tokens;
println!(
"Num log events: {}; Num tokens: {}",
log_event_idx, num_tokens
);
}
println!("\nBenchmark log parser:");
println!(
"Total size: {}GB",
total_size as f64 / (1024 * 1024 * 1024) as f64
);
println!("Total number of tokens: {}", total_tokens);
println!(
"Total duration: {}s",
total_duration.as_millis() as f64 / 1000 as f64
);
println!(
"Token throughput: {} per second",
(total_tokens * 1000) as f64 / total_duration.as_millis() as f64
);
println!(
"Parsing throughput: {}MB per second",
(total_size * 1000) as f64 / total_duration.as_millis() as f64 / (1024 * 1024) as f64
);
Ok(())
}
fn benchmark_lexer(
schema_config: std::rc::Rc<SchemaConfig>,
input_log_paths: Vec<PathBuf>,
) -> log_surgeon::error_handling::Result<()> {
let mut lexer = Lexer::new(schema_config.clone())?;
let mut total_duration = Duration::new(0, 0);
let mut total_size: u64 = 0;
let mut total_tokens: usize = 0;
for log_path in input_log_paths {
println!("Parsing file: {}", log_path.to_str().unwrap());
total_size += log_path
.metadata()
.expect("Failed to get file metadata")
.len();
let buffered_file = Box::new(BufferedFileStream::new(log_path.to_str().unwrap())?);
lexer.set_input_stream(buffered_file);
let mut num_tokens = 0;
let start = Instant::now();
while let Some(log_surgeon) = lexer.get_next_token()? {
num_tokens += 1;
}
total_duration += start.elapsed();
total_tokens += num_tokens;
println!("Num tokens: {}", num_tokens);
}
println!("\nBenchmark lexer:");
println!(
"Total size: {}GB",
total_size as f64 / (1024 * 1024 * 1024) as f64
);
println!("Total number of tokens: {}", total_tokens);
println!(
"Total duration: {}s",
total_duration.as_millis() as f64 / 1000 as f64
);
println!(
"Token throughput: {} per second",
(total_tokens * 1000) as f64 / total_duration.as_millis() as f64
);
println!(
"Parsing throughput: {}MB per second",
(total_size * 1000) as f64 / total_duration.as_millis() as f64 / (1024 * 1024) as f64
);
Ok(())
}
fn main() -> log_surgeon::error_handling::Result<()> {
let matches = Command::new("log-surgeon-example")
.version("1.0")
.arg(
Arg::new("schema")
.help("Path to the schema file")
.required(true)
.value_name("SCHEMA_FILE"),
)
.arg(
Arg::new("input")
.help("Directory to the input files")
.required(true)
.value_name("INPUT_DIR"),
)
.arg(
Arg::new("lexer")
.long("lexer")
.help("Benchmark lexer; otherwise benchmark parser")
.action(clap::ArgAction::SetTrue),
)
.get_matches();
let schema_path: &String = matches.get_one("schema").expect("no schema found");
let input_dir: &String = matches.get_one("input").expect("no input file found");
let schema_path = Path::new(schema_path.as_str());
let parsed_schema = SchemaConfig::parse_from_file(schema_path.to_str().unwrap())?;
let log_dir = Path::new(input_dir.as_str());
let input_log_paths = find_files(log_dir).unwrap();
if matches.get_flag("lexer") {
benchmark_lexer(parsed_schema, input_log_paths)
} else {
benchmark_log_parser(parsed_schema, input_log_paths)
}
}