Skip to content

Commit

Permalink
Implement the grep_file_contents action.
Browse files Browse the repository at this point in the history
  • Loading branch information
panhania authored Sep 27, 2024
1 parent 48d206d commit e8f8876
Show file tree
Hide file tree
Showing 7 changed files with 279 additions and 0 deletions.
1 change: 1 addition & 0 deletions crates/rrg-proto/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ const PROTOS: &'static [&'static str] = &[
"../../proto/rrg/action/get_filesystem_timeline.proto",
"../../proto/rrg/action/get_system_metadata.proto",
"../../proto/rrg/action/get_winreg_value.proto",
"../../proto/rrg/action/grep_file_contents.proto",
"../../proto/rrg/action/list_connections.proto",
"../../proto/rrg/action/list_interfaces.proto",
"../../proto/rrg/action/list_mounts.proto",
Expand Down
2 changes: 2 additions & 0 deletions crates/rrg/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ default = [
"action-get_system_metadata",
"action-get_file_metadata",
"action-get_file_contents",
"action-grep_file_contents",
"action-get_filesystem_timeline",
"action-list_connections",
"action-list_interfaces",
Expand All @@ -23,6 +24,7 @@ default = [
action-get_system_metadata = []
action-get_file_metadata = []
action-get_file_contents = ["dep:sha2"]
action-grep_file_contents = []
action-get_filesystem_timeline = ["dep:flate2", "dep:sha2"]
action-list_connections = []
action-list_interfaces = []
Expand Down
7 changes: 7 additions & 0 deletions crates/rrg/src/action.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ pub mod get_file_metadata;
#[cfg(feature = "action-get_file_contents")]
pub mod get_file_contents;

#[cfg(feature = "action-grep_file_contents")]
pub mod grep_file_contents;

#[cfg(feature = "action-get_filesystem_timeline")]
pub mod get_filesystem_timeline;

Expand Down Expand Up @@ -86,6 +89,10 @@ where
GetFileContents => {
handle(session, request, self::get_file_contents::handle)
}
#[cfg(feature = "action-grep_file_contents")]
GrepFileContents => {
handle(session, request, self::grep_file_contents::handle)
}
#[cfg(feature = "action-get_filesystem_timeline")]
GetFilesystemTimeline => {
handle(session, request, self::get_filesystem_timeline::handle)
Expand Down
225 changes: 225 additions & 0 deletions crates/rrg/src/action/grep_file_contents.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
// Copyright 2024 Google LLC
//
// Use of this source code is governed by an MIT-style license that can be found
// in the LICENSE file or at https://opensource.org/licenses/MIT.

use std::path::PathBuf;

/// Arguments of the `grep_file_contents` action.
pub struct Args {
/// Path to the file to grep the contents of.
path: PathBuf,
/// Regular expression to search for in the file contents.
regex: regex::Regex,
}

/// Result of the `grep_file_contents` action.
pub struct Item {
/// Byte offset within the file from which the content matched.
offset: u64,
/// Content that matched the specified regular expression.
content: String,
}

/// Handles invocations of the `grep_file_contents` action.
pub fn handle<S>(session: &mut S, args: Args) -> crate::session::Result<()>
where
S: crate::session::Session,
{
let file = std::fs::File::open(&args.path)
.map_err(crate::session::Error::action)?;

let mut file = std::io::BufReader::new(file);

// TODO(@panhania): Read to a buffer of predefined size so that we do not
// allow reading lines of arbitrary length.
let mut line = String::new();
let mut offset = 0;

loop {
use std::io::BufRead as _;

line.clear();
let len = match file.read_line(&mut line) {
Ok(0) => return Ok(()),
Ok(len) => len,
Err(error) => return Err(crate::session::Error::action(error)),
};

for matcz in args.regex.find_iter(&line) {
session.reply(Item {
offset: offset + matcz.start() as u64,
content: matcz.as_str().to_string(),
})?;
}

offset += len as u64;
}
}

impl crate::request::Args for Args {

type Proto = rrg_proto::grep_file_contents::Args;

fn from_proto(mut proto: Self::Proto) -> Result<Args, crate::request::ParseArgsError> {
use crate::request::ParseArgsError;

let path = PathBuf::try_from(proto.take_path())
.map_err(|error| ParseArgsError::invalid_field("path", error))?;

let regex = regex::Regex::new(proto.regex())
.map_err(|error| ParseArgsError::invalid_field("regex", error))?;

Ok(Args {
path,
regex,
})
}
}

impl crate::response::Item for Item {

type Proto = rrg_proto::grep_file_contents::Result;

fn into_proto(self) -> Self::Proto {
let mut proto = Self::Proto::default();
proto.set_offset(self.offset);
proto.set_content(self.content);

proto
}
}

#[cfg(test)]
mod tests {

use super::*;

#[test]
fn handle_empty_file_non_empty_regex() {
let tempdir = tempfile::tempdir()
.unwrap();

std::fs::write(tempdir.path().join("file"), b"")
.unwrap();

let args = Args {
path: tempdir.path().join("file"),
regex: regex::Regex::new("").unwrap(),
};

let mut session = crate::session::FakeSession::new();
handle(&mut session, args)
.unwrap();

assert_eq!(session.reply_count(), 0);
}

#[test]
fn handle_regex_no_matches() {
let tempdir = tempfile::tempdir()
.unwrap();

std::fs::write(tempdir.path().join("file"), b"foo")
.unwrap();

let args = Args {
path: tempdir.path().join("file"),
regex: regex::Regex::new("bar").unwrap(),
};

let mut session = crate::session::FakeSession::new();
handle(&mut session, args)
.unwrap();

assert_eq!(session.reply_count(), 0);
}

#[test]
fn handle_regex_single_match() {
let tempdir = tempfile::tempdir()
.unwrap();

std::fs::write(tempdir.path().join("file"), b"bar")
.unwrap();

let args = Args {
path: tempdir.path().join("file"),
regex: regex::Regex::new("bar").unwrap(),
};

let mut session = crate::session::FakeSession::new();
handle(&mut session, args)
.unwrap();

assert_eq!(session.reply_count(), 1);

let item = session.reply::<Item>(0);
assert_eq!(item.offset, 0);
assert_eq!(item.content, "bar");
}

#[test]
fn handle_regex_multiple_matches_multiple_lines() {
let tempdir = tempfile::tempdir()
.unwrap();

std::fs::write(tempdir.path().join("file"), b"bar\nbas\nbaz\nbar")
.unwrap();

let args = Args {
path: tempdir.path().join("file"),
regex: regex::Regex::new("ba[rz]").unwrap(),
};

let mut session = crate::session::FakeSession::new();
handle(&mut session, args)
.unwrap();

assert_eq!(session.reply_count(), 3);

let item = session.reply::<Item>(0);
assert_eq!(item.offset, 0);
assert_eq!(item.content, "bar");

let item = session.reply::<Item>(1);
assert_eq!(item.offset, 8);
assert_eq!(item.content, "baz");

let item = session.reply::<Item>(2);
assert_eq!(item.offset, 12);
assert_eq!(item.content, "bar");
}

#[test]
fn handle_regex_multiple_matches_single_line() {
let tempdir = tempfile::tempdir()
.unwrap();

std::fs::write(tempdir.path().join("file"), b"bar bas baz bar")
.unwrap();

let args = Args {
path: tempdir.path().join("file"),
regex: regex::Regex::new("ba[rz]").unwrap(),
};

let mut session = crate::session::FakeSession::new();
handle(&mut session, args)
.unwrap();

assert_eq!(session.reply_count(), 3);

let item = session.reply::<Item>(0);
assert_eq!(item.offset, 0);
assert_eq!(item.content, "bar");

let item = session.reply::<Item>(1);
assert_eq!(item.offset, 8);
assert_eq!(item.content, "baz");

let item = session.reply::<Item>(2);
assert_eq!(item.offset, 12);
assert_eq!(item.content, "bar");
}
}
4 changes: 4 additions & 0 deletions crates/rrg/src/request.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ pub enum Action {
GetFileContents,
/// Get hash of the specified file.
GetFileHash,
/// Grep the specified file for a pattern.
GrepFileContents,
/// List contents of a directory.
ListDirectory,
/// List processes available on the system.
Expand Down Expand Up @@ -57,6 +59,7 @@ impl std::fmt::Display for Action {
Action::GetFileMetadata => write!(fmt, "get_file_metadata"),
Action::GetFileContents => write!(fmt, "get_file_contents"),
Action::GetFileHash => write!(fmt, "get_file_hash"),
Action::GrepFileContents => write!(fmt, "grep_file_contents"),
Action::ListDirectory => write!(fmt, "list_directory"),
Action::ListProcesses => write!(fmt, "list_processes"),
Action::ListConnections => write!(fmt, "list_connections"),
Expand Down Expand Up @@ -105,6 +108,7 @@ impl TryFrom<rrg_proto::rrg::Action> for Action {
GET_FILE_METADATA => Ok(Action::GetFileMetadata),
GET_FILE_CONTENTS => Ok(Action::GetFileContents),
GET_FILE_HASH => Ok(Action::GetFileHash),
GREP_FILE_CONTENTS => Ok(Action::GrepFileContents),
LIST_DIRECTORY => Ok(Action::ListDirectory),
LIST_PROCESSES => Ok(Action::ListProcesses),
LIST_CONNECTIONS => Ok(Action::ListConnections),
Expand Down
2 changes: 2 additions & 0 deletions proto/rrg.proto
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ enum Action {
LIST_WINREG_KEYS = 15;
// Query WMI using WQL (Windows-only).
QUERY_WMI = 16;
/// Grep the specified file for a pattern.
GREP_FILE_CONTENTS = 17;

// TODO: Define more actions that should be supported.

Expand Down
38 changes: 38 additions & 0 deletions proto/rrg/action/grep_file_contents.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright 2024 Google LLC
//
// Use of this source code is governed by an MIT-style license that can be found
// in the LICENSE file or at https://opensource.org/licenses/MIT.
syntax = "proto3";

package rrg.action.get_file_contents;

import "rrg/fs.proto";

message Args {
// Absolute path to the file to grep the contents of.
//
// The file content must be valid UTF-8.
rrg.fs.Path path = 1;

// Regular expression to search for in the file contents.
//
// The specific syntax of the regex language is left unspecified as the
// implementation detail but most common regex features can be expected to
// be supported.
string regex = 2;

// TODO(@panhania): Add support for files that not necessarily conform to
// Unicode.

// TODO(@panhania): Add support for different file encodings.
}

message Result {
// Byte offset within the file from which the content matched.
uint64 offset = 1;

// Content that matched the specified regular expression.
string content = 2;

// TODO(@panhania): Add support for capture groups.
}

0 comments on commit e8f8876

Please sign in to comment.