Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

handle json arrays - extreme rough cut edition #12

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 37 additions & 6 deletions src/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ extern crate linked_hash_set;
use linked_hash_set::LinkedHashSet;
use serde_json::{json, Deserializer, Value};
use std::error::Error;
use std::io::{BufRead, Write};
use std::io::{BufRead, Read, Write};
use std::str;

mod unwind_json;
Expand Down Expand Up @@ -64,6 +64,37 @@ pub fn write_json_to_csv(
Ok(())
}

pub fn write_json_array_to_csv(
mut rdr: impl Read,
wtr: impl Write,
fields: Option<Vec<&str>>,
delimiter: Option<String>,
flatten: bool,
unwind_on: Option<String>,
samples: Option<u32>,
double_quote: bool,
) -> Result<(), Box<dyn Error>> {
let mut data = Vec::new();
rdr.read_to_end(&mut data)?;
let rows: Vec<serde_json::Value> = serde_json::from_slice(&data)?;
let mut headers = Vec::new();
for (key, _) in rows[0].as_object().expect("expected array of objects").iter() {
headers.push(key.to_string());
}

let mut csv_writer = csv::WriterBuilder::new()
.delimiter(delimiter.unwrap_or(",".to_string()).as_bytes()[0])
.double_quote(double_quote)
.from_writer(wtr);

csv_writer.write_record(convert_header_to_csv_record(&headers)?)?;

for item in rows {
csv_writer.write_record(convert_json_record_to_csv_record(&headers, &item)?)?;
}
Ok(())
}

/// Handle the flattening and unwinding of a value
/// Note that when unwinding a large array, all the array values
/// are held in memory. This could be improved.
Expand All @@ -85,24 +116,24 @@ fn preprocess(item: Value, flatten: bool, unwind_on: &Option<String>) -> Vec<Val
container
}

pub fn convert_header_to_csv_record(headers: &Vec<&str>) -> Result<Vec<String>, Box<dyn Error>> {
pub fn convert_header_to_csv_record<S: AsRef<str>>(headers: &Vec<S>) -> Result<Vec<String>, Box<dyn Error>> {
let mut record = Vec::new();
for item in headers {
record.push(String::from(item.clone()));
record.push(String::from(item.as_ref()));
}
Ok(record)
}

pub fn convert_json_record_to_csv_record(
headers: &Vec<&str>,
pub fn convert_json_record_to_csv_record<S: AsRef<str>>(
headers: &Vec<S>,
json_map: &Value,
) -> Result<Vec<String>, Box<dyn Error>> {
// iterate over headers
// if header is present in record, add it
// if not, blank string
let mut record = Vec::new();
for item in headers {
let value = json_map.get(&item.to_string());
let value = json_map.get(&item.as_ref().to_string());
let csv_result = match value {
Some(header_item) => match header_item.as_str() {
Some(s) => String::from(s),
Expand Down
19 changes: 16 additions & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ fn main() -> Result<(), Box<dyn Error>> {
.long("output")
.takes_value(true),
)
.arg(
Arg::with_name("array")
.help("handle as array")
.short("a")
.long("array"),
)
.arg(
Arg::with_name("flatten")
.help("Flatten nested jsons and arrays")
Expand Down Expand Up @@ -91,10 +97,17 @@ fn main() -> Result<(), Box<dyn Error>> {
Some(s) => Some(s.parse::<u32>().unwrap()),
None => Some(1),
};
let is_array = m.is_present("array");

convert::write_json_to_csv(
reader, writer, fields, delimiter, flatten, unwind_on, samples, double_quote
)
if is_array {
convert::write_json_array_to_csv(
reader, writer, fields, delimiter, flatten, unwind_on, samples, double_quote
)
} else {
convert::write_json_to_csv(
reader, writer, fields, delimiter, flatten, unwind_on, samples, double_quote
)
}
}

// From https://github.com/BurntSushi/xsv/blob/master/src/config.rs
Expand Down