Skip to content
This repository has been archived by the owner on May 24, 2022. It is now read-only.

Close #13 save bad rows to a file #16

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,10 @@ struct Opt {
/// quoting.
#[structopt(value_name = "CHAR", long = "quote", default_value = "\"")]
quote: CharSpecifier,

/// Save badly formed rows to a file.
#[structopt(value_name = "PATH", long = "bad-rows-path")]
bad_rows_path: Option<PathBuf>,
}

lazy_static! {
Expand Down Expand Up @@ -182,6 +186,13 @@ fn run() -> Result<()> {
.buffer_capacity(BUFFER_SIZE)
.from_writer(output);

// Create out CSV writer for bad rows if it is requested.
let mut bad_rows_wtr = if let Some(ref path) = opt.bad_rows_path {
Some(csv::WriterBuilder::new().from_path(path)?)
} else {
None
};

// Get our header and, if we were asked, make sure all the column names are unique.
let mut hdr = rdr
.byte_headers()
Expand Down Expand Up @@ -244,6 +255,11 @@ fn run() -> Result<()> {
// Check if we have the right number of columns in this row.
if record.len() != expected_cols {
bad_rows += 1;
if let Some(ref mut wtr_bad) = bad_rows_wtr {
wtr_bad
.write_record(record.into_iter())
.context("cannot write record")?;
};
continue 'next_row;
}

Expand Down Expand Up @@ -303,6 +319,11 @@ fn run() -> Result<()> {
// If the column is NULL but shouldn't be, bail on this row.
if is_required_col && value.is_empty() {
bad_rows += 1;
if let Some(ref mut wtr_bad) = bad_rows_wtr {
wtr_bad
.write_record(record.into_iter())
.context("cannot write record")?;
};
continue 'next_row;
}
}
Expand Down
47 changes: 47 additions & 0 deletions tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,26 @@ fn bad_rows() {
assert!(output.stderr_str().contains("102 rows (1 bad)"));
}

#[test]
fn bad_rows_saved() {
let mut good_rows = "a,b,c\n".to_owned();
for _ in 0..100 {
good_rows.push_str("1,2,3\n");
}
let mut bad_rows = good_rows.clone();
bad_rows.push_str("1,2\n");

let testdir = TestDir::new("scrubcsv", "bad_rows_saved");
let output = testdir
.cmd()
.args(&["--bad-rows-path", "bad.csv"])
.output_with_stdin(&bad_rows)
.expect_success();
testdir.expect_file_contents("bad.csv", "1,2\n");
assert_eq!(output.stdout_str(), &good_rows);
assert!(output.stderr_str().contains("102 rows (1 bad)"));
}

#[test]
fn too_many_bad_rows() {
let testdir = TestDir::new("scrubcsv", "too_many_bad_rows");
Expand Down Expand Up @@ -200,3 +220,30 @@ a,b,c
"#
);
}

#[test]
fn drop_row_if_null_saved() {
let testdir = TestDir::new("scrubcsv", "drop_row_if_null_saved");
let output = testdir
.cmd()
.arg("--drop-row-if-null=c1")
.arg("--drop-row-if-null=c2")
.args(&["--bad-rows-path", "bad.csv"])
.output_with_stdin(
r#"c1,c2,c3
1,,
a,b,c
1,2,3
3,2,1
1,4,5
2,2,2
1,1,1
5,5,5
2,2,2
1,1,1
"#,
)
.expect("error running scrubcsv");
eprintln!("{}", output.stderr_str());
testdir.expect_file_contents("bad.csv", "1,,\n");
}