-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement the
grep_file_contents
action.
- Loading branch information
Showing
7 changed files
with
279 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,225 @@ | ||
// Copyright 2024 Google LLC | ||
// | ||
// Use of this source code is governed by an MIT-style license that can be found | ||
// in the LICENSE file or at https://opensource.org/licenses/MIT. | ||
|
||
use std::path::PathBuf; | ||
|
||
/// Arguments of the `grep_file_contents` action. | ||
pub struct Args { | ||
/// Path to the file to grep the contents of. | ||
path: PathBuf, | ||
/// Regular expression to search for in the file contents. | ||
regex: regex::Regex, | ||
} | ||
|
||
/// Result of the `grep_file_contents` action. | ||
pub struct Item { | ||
/// Byte offset within the file from which the content matched. | ||
offset: u64, | ||
/// Content that matched the specified regular expression. | ||
content: String, | ||
} | ||
|
||
/// Handles invocations of the `grep_file_contents` action. | ||
pub fn handle<S>(session: &mut S, args: Args) -> crate::session::Result<()> | ||
where | ||
S: crate::session::Session, | ||
{ | ||
let file = std::fs::File::open(&args.path) | ||
.map_err(crate::session::Error::action)?; | ||
|
||
let mut file = std::io::BufReader::new(file); | ||
|
||
// TODO(@panhania): Read to a buffer of predefined size so that we do not | ||
// allow reading lines of arbitrary length. | ||
let mut line = String::new(); | ||
let mut offset = 0; | ||
|
||
loop { | ||
use std::io::BufRead as _; | ||
|
||
line.clear(); | ||
let len = match file.read_line(&mut line) { | ||
Ok(0) => return Ok(()), | ||
Ok(len) => len, | ||
Err(error) => return Err(crate::session::Error::action(error)), | ||
}; | ||
|
||
for matcz in args.regex.find_iter(&line) { | ||
session.reply(Item { | ||
offset: offset + matcz.start() as u64, | ||
content: matcz.as_str().to_string(), | ||
})?; | ||
} | ||
|
||
offset += len as u64; | ||
} | ||
} | ||
|
||
impl crate::request::Args for Args { | ||
|
||
type Proto = rrg_proto::grep_file_contents::Args; | ||
|
||
fn from_proto(mut proto: Self::Proto) -> Result<Args, crate::request::ParseArgsError> { | ||
use crate::request::ParseArgsError; | ||
|
||
let path = PathBuf::try_from(proto.take_path()) | ||
.map_err(|error| ParseArgsError::invalid_field("path", error))?; | ||
|
||
let regex = regex::Regex::new(proto.regex()) | ||
.map_err(|error| ParseArgsError::invalid_field("regex", error))?; | ||
|
||
Ok(Args { | ||
path, | ||
regex, | ||
}) | ||
} | ||
} | ||
|
||
impl crate::response::Item for Item { | ||
|
||
type Proto = rrg_proto::grep_file_contents::Result; | ||
|
||
fn into_proto(self) -> Self::Proto { | ||
let mut proto = Self::Proto::default(); | ||
proto.set_offset(self.offset); | ||
proto.set_content(self.content); | ||
|
||
proto | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
|
||
use super::*; | ||
|
||
#[test] | ||
fn handle_empty_file_non_empty_regex() { | ||
let tempdir = tempfile::tempdir() | ||
.unwrap(); | ||
|
||
std::fs::write(tempdir.path().join("file"), b"") | ||
.unwrap(); | ||
|
||
let args = Args { | ||
path: tempdir.path().join("file"), | ||
regex: regex::Regex::new("").unwrap(), | ||
}; | ||
|
||
let mut session = crate::session::FakeSession::new(); | ||
handle(&mut session, args) | ||
.unwrap(); | ||
|
||
assert_eq!(session.reply_count(), 0); | ||
} | ||
|
||
#[test] | ||
fn handle_regex_no_matches() { | ||
let tempdir = tempfile::tempdir() | ||
.unwrap(); | ||
|
||
std::fs::write(tempdir.path().join("file"), b"foo") | ||
.unwrap(); | ||
|
||
let args = Args { | ||
path: tempdir.path().join("file"), | ||
regex: regex::Regex::new("bar").unwrap(), | ||
}; | ||
|
||
let mut session = crate::session::FakeSession::new(); | ||
handle(&mut session, args) | ||
.unwrap(); | ||
|
||
assert_eq!(session.reply_count(), 0); | ||
} | ||
|
||
#[test] | ||
fn handle_regex_single_match() { | ||
let tempdir = tempfile::tempdir() | ||
.unwrap(); | ||
|
||
std::fs::write(tempdir.path().join("file"), b"bar") | ||
.unwrap(); | ||
|
||
let args = Args { | ||
path: tempdir.path().join("file"), | ||
regex: regex::Regex::new("bar").unwrap(), | ||
}; | ||
|
||
let mut session = crate::session::FakeSession::new(); | ||
handle(&mut session, args) | ||
.unwrap(); | ||
|
||
assert_eq!(session.reply_count(), 1); | ||
|
||
let item = session.reply::<Item>(0); | ||
assert_eq!(item.offset, 0); | ||
assert_eq!(item.content, "bar"); | ||
} | ||
|
||
#[test] | ||
fn handle_regex_multiple_matches_multiple_lines() { | ||
let tempdir = tempfile::tempdir() | ||
.unwrap(); | ||
|
||
std::fs::write(tempdir.path().join("file"), b"bar\nbas\nbaz\nbar") | ||
.unwrap(); | ||
|
||
let args = Args { | ||
path: tempdir.path().join("file"), | ||
regex: regex::Regex::new("ba[rz]").unwrap(), | ||
}; | ||
|
||
let mut session = crate::session::FakeSession::new(); | ||
handle(&mut session, args) | ||
.unwrap(); | ||
|
||
assert_eq!(session.reply_count(), 3); | ||
|
||
let item = session.reply::<Item>(0); | ||
assert_eq!(item.offset, 0); | ||
assert_eq!(item.content, "bar"); | ||
|
||
let item = session.reply::<Item>(1); | ||
assert_eq!(item.offset, 8); | ||
assert_eq!(item.content, "baz"); | ||
|
||
let item = session.reply::<Item>(2); | ||
assert_eq!(item.offset, 12); | ||
assert_eq!(item.content, "bar"); | ||
} | ||
|
||
#[test] | ||
fn handle_regex_multiple_matches_single_line() { | ||
let tempdir = tempfile::tempdir() | ||
.unwrap(); | ||
|
||
std::fs::write(tempdir.path().join("file"), b"bar bas baz bar") | ||
.unwrap(); | ||
|
||
let args = Args { | ||
path: tempdir.path().join("file"), | ||
regex: regex::Regex::new("ba[rz]").unwrap(), | ||
}; | ||
|
||
let mut session = crate::session::FakeSession::new(); | ||
handle(&mut session, args) | ||
.unwrap(); | ||
|
||
assert_eq!(session.reply_count(), 3); | ||
|
||
let item = session.reply::<Item>(0); | ||
assert_eq!(item.offset, 0); | ||
assert_eq!(item.content, "bar"); | ||
|
||
let item = session.reply::<Item>(1); | ||
assert_eq!(item.offset, 8); | ||
assert_eq!(item.content, "baz"); | ||
|
||
let item = session.reply::<Item>(2); | ||
assert_eq!(item.offset, 12); | ||
assert_eq!(item.content, "bar"); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
// Copyright 2024 Google LLC | ||
// | ||
// Use of this source code is governed by an MIT-style license that can be found | ||
// in the LICENSE file or at https://opensource.org/licenses/MIT. | ||
syntax = "proto3"; | ||
|
||
package rrg.action.get_file_contents; | ||
|
||
import "rrg/fs.proto"; | ||
|
||
message Args { | ||
// Absolute path to the file to grep the contents of. | ||
// | ||
// The file content must be valid UTF-8. | ||
rrg.fs.Path path = 1; | ||
|
||
// Regular expression to search for in the file contents. | ||
// | ||
// The specific syntax of the regex language is left unspecified as the | ||
// implementation detail but most common regex features can be expected to | ||
// be supported. | ||
string regex = 2; | ||
|
||
// TODO(@panhania): Add support for files that not necessarily conform to | ||
// Unicode. | ||
|
||
// TODO(@panhania): Add support for different file encodings. | ||
} | ||
|
||
message Result { | ||
// Byte offset within the file from which the content matched. | ||
uint64 offset = 1; | ||
|
||
// Content that matched the specified regular expression. | ||
string content = 2; | ||
|
||
// TODO(@panhania): Add support for capture groups. | ||
} |