-
-
Notifications
You must be signed in to change notification settings - Fork 131
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #373 from maciejhirsz/borrowe-json
chore(book): add JSON-borrowed parser example
- Loading branch information
Showing
5 changed files
with
290 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
# JSON parser with borrowed values | ||
|
||
The previous parser owned its data by allocating strings. This can require quite | ||
some memory space, and using borrowed string slices can help use saving space, while | ||
also maybe increasing performances. | ||
|
||
If you are familiar with Rust's concept of litefime, | ||
using `&str` string slices instead of owned `String` | ||
is straightforward: | ||
|
||
```diff | ||
@ 33c29 | ||
- enum Token { | ||
+ enum Token<'source> { | ||
@ 62,63c58,59 | ||
- #[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#, |lex| lex.slice().to_owned())] | ||
- String(String), | ||
+ #[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#, |lex| lex.slice())] | ||
+ String(&'source str), | ||
@ 70c66 | ||
- enum Value { | ||
- enum Value<'source> { | ||
@ 78c74 | ||
- String(String), | ||
+ String(&'source str), | ||
@ 80c76 | ||
- Array(Vec<Value>), | ||
+ Array(Vec<Value<'source>>), | ||
@ 82c78 | ||
- Object(HashMap<String, Value>), | ||
+ Object(HashMap<&'source str, Value<'source>>), | ||
@ 88c84 | ||
- fn parse_value<'source>(lexer: &mut Lexer<'source, Token>) -> Result<Value> { | ||
+ fn parse_value<'source>(lexer: &mut Lexer<'source, Token<'source>>) -> Result<Value<'source>> { | ||
@ 113c109 | ||
- fn parse_array<'source>(lexer: &mut Lexer<'source, Token>) -> Result<Value> { | ||
+ fn parse_array<'source>(lexer: &mut Lexer<'source, Token<'source>>) -> Result<Value<'source>> { | ||
@ 167c163 | ||
- fn parse_object<'source>(lexer: &mut Lexer<'source, Token>) -> Result<Value> { | ||
+ fn parse_object<'source>(lexer: &mut Lexer<'source, Token<'source>>) -> Result<Value<'source>> { | ||
``` | ||
|
||
The above code shows the lines you need to change from the previous example | ||
to use borrowed data. | ||
|
||
Finally, we provide you the full code that you should be able to run with[^1]: | ||
```bash | ||
cargo run --example json-borrowed examples/example.json | ||
``` | ||
|
||
[^1] You first need to clone [this repository](https://github.com/maciejhirsz/logos). | ||
|
||
```rust,no_run,noplayground | ||
{{#include ../../../examples/json_borrowed.rs:all}} | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,228 @@ | ||
//! Variant of JSON parser example, but | ||
//! using borrowed string slices to avoid copies. | ||
//! | ||
//! Usage: | ||
//! cargo run --example json-borrowed <path/to/file> | ||
//! | ||
//! Example: | ||
//! cargo run --example json-borrowed examples/example.json | ||
/* ANCHOR: all */ | ||
use logos::{Lexer, Logos, Span}; | ||
|
||
use std::collections::HashMap; | ||
use std::env; | ||
use std::fs; | ||
|
||
type Error = (String, Span); | ||
|
||
type Result<T> = std::result::Result<T, Error>; | ||
|
||
/* ANCHOR: tokens */ | ||
/// All meaningful JSON tokens. | ||
/// | ||
/// > NOTE: regexes for [`Token::Number`] and [`Token::String`] may not | ||
/// > catch all possible values, especially for strings. If you find | ||
/// > errors, please report them so that we can improve the regex. | ||
#[derive(Debug, Logos)] | ||
#[logos(skip r"[ \t\r\n\f]+")] | ||
enum Token<'source> { | ||
#[token("false", |_| false)] | ||
#[token("true", |_| true)] | ||
Bool(bool), | ||
|
||
#[token("{")] | ||
BraceOpen, | ||
|
||
#[token("}")] | ||
BraceClose, | ||
|
||
#[token("[")] | ||
BracketOpen, | ||
|
||
#[token("]")] | ||
BracketClose, | ||
|
||
#[token(":")] | ||
Colon, | ||
|
||
#[token(",")] | ||
Comma, | ||
|
||
#[token("null")] | ||
Null, | ||
|
||
#[regex(r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?", |lex| lex.slice().parse::<f64>().unwrap())] | ||
Number(f64), | ||
|
||
#[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#, |lex| lex.slice())] | ||
String(&'source str), | ||
} | ||
/* ANCHOR_END: tokens */ | ||
|
||
/* ANCHOR: values */ | ||
/// Represent any valid JSON value. | ||
#[derive(Debug)] | ||
enum Value<'source> { | ||
/// null. | ||
Null, | ||
/// true or false. | ||
Bool(bool), | ||
/// Any floating point number. | ||
Number(f64), | ||
/// Any quoted string. | ||
String(&'source str), | ||
/// An array of values | ||
Array(Vec<Value<'source>>), | ||
/// An dictionary mapping keys and values. | ||
Object(HashMap<&'source str, Value<'source>>), | ||
} | ||
/* ANCHOR_END: values */ | ||
|
||
/* ANCHOR: value */ | ||
/// Parse a token stream into a JSON value. | ||
fn parse_value<'source>(lexer: &mut Lexer<'source, Token<'source>>) -> Result<Value<'source>> { | ||
if let Some(token) = lexer.next() { | ||
match token { | ||
Ok(Token::Bool(b)) => Ok(Value::Bool(b)), | ||
Ok(Token::BraceOpen) => parse_object(lexer), | ||
Ok(Token::BracketOpen) => parse_array(lexer), | ||
Ok(Token::Null) => Ok(Value::Null), | ||
Ok(Token::Number(n)) => Ok(Value::Number(n)), | ||
Ok(Token::String(s)) => Ok(Value::String(s)), | ||
_ => Err(( | ||
"unexpected token here (context: value)".to_owned(), | ||
lexer.span(), | ||
)), | ||
} | ||
} else { | ||
Err(("empty values are not allowed".to_owned(), lexer.span())) | ||
} | ||
} | ||
/* ANCHOR_END: value */ | ||
|
||
/* ANCHOR: array */ | ||
/// Parse a token stream into an array and return when | ||
/// a valid terminator is found. | ||
/// | ||
/// > NOTE: we assume '[' was consumed. | ||
fn parse_array<'source>(lexer: &mut Lexer<'source, Token<'source>>) -> Result<Value<'source>> { | ||
let mut array = Vec::new(); | ||
let span = lexer.span(); | ||
let mut awaits_comma = false; | ||
let mut awaits_value = false; | ||
|
||
while let Some(token) = lexer.next() { | ||
match token { | ||
Ok(Token::Bool(b)) if !awaits_comma => { | ||
array.push(Value::Bool(b)); | ||
awaits_value = false; | ||
} | ||
Ok(Token::BraceOpen) if !awaits_comma => { | ||
let object = parse_object(lexer)?; | ||
array.push(object); | ||
awaits_value = false; | ||
} | ||
Ok(Token::BracketOpen) if !awaits_comma => { | ||
let sub_array = parse_array(lexer)?; | ||
array.push(sub_array); | ||
awaits_value = false; | ||
} | ||
Ok(Token::BracketClose) if !awaits_value => return Ok(Value::Array(array)), | ||
Ok(Token::Comma) if awaits_comma => awaits_value = true, | ||
Ok(Token::Null) if !awaits_comma => { | ||
array.push(Value::Null); | ||
awaits_value = false | ||
} | ||
Ok(Token::Number(n)) if !awaits_comma => { | ||
array.push(Value::Number(n)); | ||
awaits_value = false; | ||
} | ||
Ok(Token::String(s)) if !awaits_comma => { | ||
array.push(Value::String(s)); | ||
awaits_value = false; | ||
} | ||
_ => { | ||
return Err(( | ||
"unexpected token here (context: array)".to_owned(), | ||
lexer.span(), | ||
)) | ||
} | ||
} | ||
awaits_comma = !awaits_value; | ||
} | ||
Err(("unmatched opening bracket defined here".to_owned(), span)) | ||
} | ||
/* ANCHOR_END: array */ | ||
|
||
/* ANCHOR: object */ | ||
/// Parse a token stream into an object and return when | ||
/// a valid terminator is found. | ||
/// | ||
/// > NOTE: we assume '{' was consumed. | ||
fn parse_object<'source>(lexer: &mut Lexer<'source, Token<'source>>) -> Result<Value<'source>> { | ||
let mut map = HashMap::new(); | ||
let span = lexer.span(); | ||
let mut awaits_comma = false; | ||
let mut awaits_key = false; | ||
|
||
while let Some(token) = lexer.next() { | ||
match token { | ||
Ok(Token::BraceClose) if !awaits_key => return Ok(Value::Object(map)), | ||
Ok(Token::Comma) if awaits_comma => awaits_key = true, | ||
Ok(Token::String(key)) if !awaits_comma => { | ||
match lexer.next() { | ||
Some(Ok(Token::Colon)) => (), | ||
_ => { | ||
return Err(( | ||
"unexpected token here, expecting ':'".to_owned(), | ||
lexer.span(), | ||
)) | ||
} | ||
} | ||
let value = parse_value(lexer)?; | ||
map.insert(key, value); | ||
awaits_key = false; | ||
} | ||
_ => { | ||
return Err(( | ||
"unexpected token here (context: object)".to_owned(), | ||
lexer.span(), | ||
)) | ||
} | ||
} | ||
awaits_comma = !awaits_key; | ||
} | ||
Err(("unmatched opening brace defined here".to_owned(), span)) | ||
} | ||
/* ANCHOR_END: object */ | ||
|
||
fn main() { | ||
let filename = env::args().nth(1).expect("Expected file argument"); | ||
let src = fs::read_to_string(&filename).expect("Failed to read file"); | ||
|
||
let mut lexer = Token::lexer(src.as_str()); | ||
|
||
match parse_value(&mut lexer) { | ||
Ok(value) => println!("{:#?}", value), | ||
Err((msg, span)) => { | ||
use ariadne::{ColorGenerator, Label, Report, ReportKind, Source}; | ||
|
||
let mut colors = ColorGenerator::new(); | ||
|
||
let a = colors.next(); | ||
|
||
Report::build(ReportKind::Error, &filename, 12) | ||
.with_message(format!("Invalid JSON")) | ||
.with_label( | ||
Label::new((&filename, span)) | ||
.with_message(msg) | ||
.with_color(a), | ||
) | ||
.finish() | ||
.eprint((&filename, Source::from(src))) | ||
.unwrap(); | ||
} | ||
} | ||
} | ||
/* ANCHOR_END: all */ |