Skip to content

Commit

Permalink
feat: Add line info for lexer
Browse files Browse the repository at this point in the history
  • Loading branch information
sbwtw committed Nov 23, 2024
1 parent 5e8efe4 commit 21a04b3
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 11 deletions.
33 changes: 25 additions & 8 deletions lib/src/parser/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,16 @@ pub trait Buffer {
/// Peek character at 'n' position from current
fn peek(&mut self, n: usize) -> Option<char>;
fn current_line(&self) -> usize;
fn current_offset(&self) -> usize;
fn line_offset(&self) -> usize;
fn buffer_offset(&mut self) -> usize;
}

pub struct IterBuffer<'str> {
iter: Box<dyn Iterator<Item = char> + 'str>,
peek_buffer: VecDeque<char>,
current_line: usize,
current_offset: usize,
line_offset: usize,
buffer_offset: usize,
}

impl<'str> IterBuffer<'str> {
Expand All @@ -30,7 +32,8 @@ impl<'str> IterBuffer<'str> {
iter: Box::new(iter),
peek_buffer: VecDeque::with_capacity(1024),
current_line: 0,
current_offset: 0,
line_offset: 0,
buffer_offset: 0,
}
}
}
Expand All @@ -46,13 +49,16 @@ impl Buffer for IterBuffer<'_> {
match c {
None => {}
Some('\r') | Some('\n') => {
self.current_offset = 0;
self.buffer_offset += 1;
self.line_offset = 0;
self.current_line += 1;
let br = c == Some('\n');

// Extra break line character eat
match (br, self.peek1()) {
(true, Some('\r')) | (false, Some('\n')) => {
self.buffer_offset += 1;

if !self.peek_buffer.is_empty() {
self.peek_buffer.pop_front()
} else {
Expand All @@ -63,7 +69,8 @@ impl Buffer for IterBuffer<'_> {
}
}
_ => {
self.current_offset += 1;
self.line_offset += 1;
self.buffer_offset += 1;
}
};
}
Expand Down Expand Up @@ -98,8 +105,12 @@ impl Buffer for IterBuffer<'_> {
self.current_line
}

fn current_offset(&self) -> usize {
self.current_offset
fn line_offset(&self) -> usize {
self.line_offset
}

fn buffer_offset(&mut self) -> usize {
self.buffer_offset
}
}

Expand Down Expand Up @@ -171,7 +182,13 @@ impl<R: Read + Seek> Buffer for StreamBuffer<R> {
self.current_line
}

fn current_offset(&self) -> usize {
#[inline]
fn line_offset(&self) -> usize {
self.current_offset
}

#[inline]
fn buffer_offset(&mut self) -> usize {
self.reader.stream_position().unwrap() as usize
}
}
63 changes: 60 additions & 3 deletions lib/src/parser/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@ use smallmap::Map;
use std::fmt::{self, Display, Formatter};
use std::io;

/// The location info for input
#[derive(Default)]
pub struct LocInfo {
pub mark: usize,
pub text_line: usize,
pub buffer_offset: usize,
}

pub(crate) type LexerResult = Result<Token, LexicalError>;

bitflags! {
Expand Down Expand Up @@ -134,6 +142,7 @@ pub struct StLexer<'a> {
buffer: Box<dyn Buffer + 'a>,
keywords: Map<StString, TokenKind>,
options: StLexerOptions,
loc_info: Vec<LocInfo>,
}

macro_rules! keywords {
Expand Down Expand Up @@ -235,6 +244,7 @@ impl StLexerBuilder {
buffer: Box::new(IterBuffer::new(input.chars())),
keywords: self.keywords,
options: self.options,
loc_info: vec![LocInfo::default()],
}
}

Expand All @@ -243,6 +253,7 @@ impl StLexerBuilder {
buffer: Box::new(IterBuffer::new(iter)),
keywords: self.keywords,
options: self.options,
loc_info: vec![LocInfo::default()],
}
}

Expand All @@ -251,11 +262,13 @@ impl StLexerBuilder {
buffer: Box::new(StreamBuffer::from_file(file)?),
keywords: self.keywords,
options: self.options,
loc_info: vec![LocInfo::default()],
})
}
}

impl<'input> StLexer<'input> {
#[inline]
pub fn eof(&mut self) -> bool {
loop {
match self.buffer.peek1() {
Expand All @@ -266,6 +279,25 @@ impl<'input> StLexer<'input> {
}
}

#[inline]
pub fn buffer_offset_by_line(&self, line: usize) -> Option<usize> {
self.loc_info
.iter()
.find(|x| x.text_line == line)
.map(|x| x.buffer_offset)
}

fn record_line_location(&mut self) {
let text_line = self.buffer.current_line();
let buffer_offset = self.buffer.buffer_offset();

self.loc_info.push(LocInfo {
mark: text_line,
text_line,
buffer_offset,
});
}

// ^123.456
fn parse_number_string(
&mut self,
Expand Down Expand Up @@ -396,7 +428,7 @@ impl<'input> StLexer<'input> {
Some(c) => {
return Some(Err(LexicalError::UnexpectedCharacter(
self.buffer.current_line(),
self.buffer.current_offset(),
self.buffer.line_offset(),
c,
)))
}
Expand Down Expand Up @@ -491,6 +523,7 @@ impl<'input> StLexer<'input> {
tok.length += 1;
}
_ => {
self.record_line_location();
return Ok(tok);
}
}
Expand Down Expand Up @@ -581,7 +614,7 @@ impl<'input> StLexer<'input> {
length: 1,
location: Location {
mark: self.buffer.current_line(),
offset: self.buffer.current_offset(),
offset: self.buffer.line_offset(),
},
..Default::default()
};
Expand Down Expand Up @@ -677,7 +710,7 @@ impl<'input> StLexer<'input> {
self.buffer.consume1();
Some(Err(LexicalError::UnexpectedCharacter(
self.buffer.current_line(),
self.buffer.current_offset(),
self.buffer.line_offset(),
c,
)))
}
Expand Down Expand Up @@ -909,4 +942,28 @@ mod test {
test_literal_parse!("0.5", TokenKind::Literal(LiteralValue::LReal(..)), 3);
// test_literal_parse!("-0.5", TokenKind::Literal(LiteralValue::LReal(..)), 4);
}

#[test]
fn test_line_info() {
macro_rules! test_line_lexer {
($str:literal) => {{
let mut lexer = StLexerBuilder::new().build_str($str);
while let Some(_) = lexer.next() {}

lexer
}};
}

let lexer = test_line_lexer!("a\nb\nc");
assert_eq!(lexer.buffer_offset_by_line(0), Some(0));
assert_eq!(lexer.buffer_offset_by_line(1), Some(2));
assert_eq!(lexer.buffer_offset_by_line(2), Some(4));
assert_eq!(lexer.buffer_offset_by_line(3), None);

let lexer = test_line_lexer!("a\r\nb\r\nc");
assert_eq!(lexer.buffer_offset_by_line(0), Some(0));
assert_eq!(lexer.buffer_offset_by_line(1), Some(3));
assert_eq!(lexer.buffer_offset_by_line(2), Some(6));
assert_eq!(lexer.buffer_offset_by_line(3), None);
}
}
4 changes: 4 additions & 0 deletions lsp/src/lsp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ impl LanguageServer for StcLsp {
),
text_document_sync: Some(TextDocumentSyncCapability::Kind(TextDocumentSyncKind::FULL)),
document_highlight_provider: Some(OneOf::Left(true)),
// Use utf-8 for position encoding
// position_encoding: Some(PositionEncodingKind::UTF8),
..ServerCapabilities::default()
};

Expand Down Expand Up @@ -136,6 +138,8 @@ impl LanguageServer for StcLsp {
}

async fn did_change(&self, params: DidChangeTextDocumentParams) {
dbg!(&params);

for change in params.content_changes.into_iter() {
// Only full text support
assert!(change.range.is_none());
Expand Down

0 comments on commit 21a04b3

Please sign in to comment.