Skip to content

Commit

Permalink
Initial generation of IR from nu AST.
Browse files Browse the repository at this point in the history
The initial implementation only handles simple math expressions, the focus is
primarily on introducing new structs, agreeing on naming and adding the output
to insta tests.
  • Loading branch information
doriath committed Jan 2, 2025
1 parent a7d0042 commit aec6924
Show file tree
Hide file tree
Showing 33 changed files with 359 additions and 1 deletion.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ edition = "2021"
[dependencies]
tracy-client = { version = "0.17.3", default-features = false } # for tracy v0.11.1
logos = "0.15"
nu-protocol = "*"

[profile.profiling]
inherits = "release"
Expand All @@ -30,7 +31,6 @@ path = "src/lib.rs"
insta = { version = "1.33.0", features = ["glob"] }
tango-bench = "0.6"
nu-parser = "0.99"
nu-protocol = "0.99"

[[bench]]
name = "benchmarks"
Expand Down
193 changes: 193 additions & 0 deletions src/ir_generator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
use crate::compiler::Compiler;
use crate::errors::{Severity, SourceError};
use crate::parser::{AstNode, NodeId};
use nu_protocol::ast::{Math, Operator};
use nu_protocol::ir::{Instruction, IrBlock, Literal};
use nu_protocol::{RegId, Span};

/// Generates IR (Intermediate Representation) from nu AST.
pub struct IrGenerator<'a> {
// Immutable reference to a compiler after the typechecker pass
compiler: &'a Compiler,
errors: Vec<SourceError>,

instructions: Vec<Instruction>,
register_count: u32,
file_count: u32,
}

impl<'a> IrGenerator<'a> {
pub fn new(compiler: &'a Compiler) -> Self {
Self {
compiler,
errors: Default::default(),
instructions: Default::default(),
register_count: 0,
file_count: 0,
}
}

/// Generates the IR from the given state of the compiler.
/// After this is called, use `block` and `errors` to get the result.
pub fn generate(&mut self) {
if self.compiler.ast_nodes.is_empty() {
return;
}
let Some(reg) = self.generate_node(NodeId(self.compiler.ast_nodes.len() - 1)) else {
return;
};
self.instructions.push(Instruction::Return { src: reg });
}

/// Returns generated IR block.
///
/// Call `generate` before using this method and ensure there are no errors.
pub fn block(self) -> IrBlock {
// TODO: properly generate the spans
// TODO: figure out what to do with AST, as this parser has different
// representation of AST than the old parser.
let mut spans = vec![];
let mut ast = vec![];
for _ in 0..(self.instructions.len()) {
spans.push(Span { start: 0, end: 0 });
ast.push(None);
}
IrBlock {
instructions: self.instructions,
spans,
data: Default::default(),
ast,
comments: Default::default(),
register_count: self.register_count,
file_count: self.file_count,
}
}

/// Returns errors encountered during IR generation step.
///
/// Call `generate` before using this method.
pub fn errors(&self) -> &Vec<SourceError> {
&self.errors
}

/// Prints the internal state to standard output.
pub fn print(&self) {
let output = self.display_state();
print!("{output}");
}

/// Displays the state of the IR generator.
/// The output can be used for human debugging and for snapshot tests.
pub fn display_state(&self) -> String {
let mut result = String::new();
result.push_str("==== IR ====\n");
result.push_str(&format!("register_count: {}\n", self.register_count));
result.push_str(&format!("file_count: {}\n", self.file_count));

for (idx, instruction) in self.instructions.iter().enumerate() {
result.push_str(&format!("{}: {:?}\n", idx, instruction));
}

if !self.errors.is_empty() {
result.push_str("==== IR ERRORS ====\n");
for error in &self.errors {
result.push_str(&format!(
"{:?} (NodeId {}): {}\n",
error.severity, error.node_id.0, error.message
));
}
}
result
}

// Returns unused register.
fn next_register(&mut self) -> RegId {
let r = RegId::new(self.register_count);
self.register_count += 1;
r
}

fn span_to_string(&mut self, node_id: NodeId) -> Option<String> {
match std::str::from_utf8(self.compiler.get_span_contents(node_id)) {
Ok(val) => Some(val.to_string()),
Err(err) => {
self.error(
format!("failed to convert a node to string: {err}"),
node_id,
);
None
}
}
}

fn span_to_i64(&mut self, node_id: NodeId) -> Option<i64> {
match self.span_to_string(node_id)?.parse::<i64>() {
Ok(val) => Some(val),
Err(err) => {
self.error(
format!("failed to convert a node to string: {err}"),
node_id,
);
None
}
}
}

fn generate_node(&mut self, node_id: NodeId) -> Option<RegId> {
let ast_node = &self.compiler.ast_nodes[node_id.0];
match ast_node {
AstNode::Int => {
let next_reg = self.next_register();
let val = self.span_to_i64(node_id)?;
self.instructions.push(Instruction::LoadLiteral {
dst: next_reg,
lit: Literal::Int(val),
});
Some(next_reg)
}
AstNode::Block(block_id) => {
let block = &self.compiler.blocks[block_id.0];
let mut last = None;
for id in &block.nodes {
last = self.generate_node(*id);
last?;
}
last
}
AstNode::BinaryOp { lhs, op, rhs } => {
let l = self.generate_node(*lhs)?;
let r = self.generate_node(*rhs)?;
let o = self.node_to_operator(*op)?;
self.instructions.push(Instruction::BinaryOp {
lhs_dst: l,
op: o,
rhs: r,
});
Some(l)
}
_ => {
self.error(format!("node {:?} not suported yet", ast_node), node_id);
None
}
}
}

fn node_to_operator(&mut self, node_id: NodeId) -> Option<Operator> {
match self.compiler.get_node(node_id) {
AstNode::Plus => Some(Operator::Math(Math::Plus)),
AstNode::Multiply => Some(Operator::Math(Math::Multiply)),
node => {
self.error(format!("unrecognized operator {:?}", node), node_id);
None
}
}
}

fn error(&mut self, message: impl Into<String>, node_id: NodeId) {
self.errors.push(SourceError {
message: message.into(),
node_id,
severity: Severity::Error,
});
}
}
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pub mod compiler;
pub mod errors;
pub mod ir_generator;
pub mod lexer;
pub mod parser;
pub mod protocol;
Expand Down
7 changes: 7 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::process::exit;

use new_nu_parser::compiler::Compiler;
use new_nu_parser::ir_generator::IrGenerator;
use new_nu_parser::lexer::lex;
use new_nu_parser::parser::Parser;
use new_nu_parser::resolver::Resolver;
Expand Down Expand Up @@ -74,6 +75,12 @@ fn main() {
typechecker.print();
}

let mut ir_generator = IrGenerator::new(&compiler);
ir_generator.generate();
if do_print {
ir_generator.print();
}

compiler.merge_types(typechecker.to_types());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,10 @@ snapshot_kind: text
19: bool
20: bool
21: bool
==== IR ====
register_count: 2
file_count: 0
0: LoadLiteral { dst: RegId(0), lit: Int(1) }
1: LoadLiteral { dst: RegId(1), lit: Int(1) }
==== IR ERRORS ====
Error (NodeId 1): unrecognized operator Equal
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,8 @@ snapshot_kind: text
Error (NodeId 1): type mismatch: unsupported addition between string and float
Error (NodeId 5): type mismatch: unsupported append between string and float
Error (NodeId 9): type mismatch: unsupported logical operation between bool and string
==== IR ====
register_count: 0
file_count: 0
==== IR ERRORS ====
Error (NodeId 0): node String not suported yet
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,9 @@ snapshot_kind: text
41: list<list<float>>
42: list<list<number>>
43: list<list<number>>
==== IR ====
register_count: 1
file_count: 0
0: LoadLiteral { dst: RegId(0), lit: Int(1) }
==== IR ERRORS ====
Error (NodeId 2): node Float not suported yet
5 changes: 5 additions & 0 deletions src/snapshots/[email protected]
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,8 @@ snapshot_kind: text
36: unknown
37: stream<binary>
38: stream<binary>
==== IR ====
register_count: 0
file_count: 0
==== IR ERRORS ====
Error (NodeId 7): node Call { parts: [NodeId(0), NodeId(1), NodeId(2), NodeId(6)] } not suported yet
5 changes: 5 additions & 0 deletions src/snapshots/[email protected]
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,8 @@ snapshot_kind: text
21: closure
22: stream<binary>
23: stream<binary>
==== IR ====
register_count: 0
file_count: 0
==== IR ERRORS ====
Error (NodeId 19): node Let { variable_name: NodeId(0), ty: None, initializer: NodeId(18), is_mutable: false } not suported yet
5 changes: 5 additions & 0 deletions src/snapshots/[email protected]
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,8 @@ snapshot_kind: text
22: list<any>
23: ()
24: ()
==== IR ====
register_count: 0
file_count: 0
==== IR ERRORS ====
Error (NodeId 23): node Def { name: NodeId(0), params: NodeId(17), return_ty: None, block: NodeId(22) } not suported yet
5 changes: 5 additions & 0 deletions src/snapshots/[email protected]
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,8 @@ snapshot_kind: text
29: ()
30: ()
31: ()
==== IR ====
register_count: 0
file_count: 0
==== IR ERRORS ====
Error (NodeId 2): node Let { variable_name: NodeId(0), ty: None, initializer: NodeId(1), is_mutable: true } not suported yet
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,8 @@ snapshot_kind: text
==== TYPE ERRORS ====
Error (NodeId 12): unsupported ast node 'Break' in typechecker
Error (NodeId 19): unsupported ast node 'Continue' in typechecker
==== IR ====
register_count: 0
file_count: 0
==== IR ERRORS ====
Error (NodeId 2): node Let { variable_name: NodeId(0), ty: None, initializer: NodeId(1), is_mutable: true } not suported yet
5 changes: 5 additions & 0 deletions src/snapshots/new_nu_parser__test__node_output@if_.nu.snap
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,8 @@ snapshot_kind: text
17: int
18: int
19: int
==== IR ====
register_count: 0
file_count: 0
==== IR ERRORS ====
Error (NodeId 2): node Let { variable_name: NodeId(0), ty: None, initializer: NodeId(1), is_mutable: false } not suported yet
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,8 @@ snapshot_kind: text
6: error
==== TYPE ERRORS ====
Error (NodeId 0): The condition for if branch is not a boolean
==== IR ====
register_count: 0
file_count: 0
==== IR ERRORS ====
Error (NodeId 5): node If { condition: NodeId(0), then_block: NodeId(2), else_block: Some(NodeId(4)) } not suported yet
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,8 @@ snapshot_kind: text
==== TYPE ERRORS ====
Error (NodeId 7): list must have only one type parameter (to allow selection of types, use oneof<int, string> -- WIP)
Error (NodeId 17): list must have one type parameter
==== IR ====
register_count: 0
file_count: 0
==== IR ERRORS ====
Error (NodeId 13): node Def { name: NodeId(0), params: NodeId(10), return_ty: None, block: NodeId(12) } not suported yet
5 changes: 5 additions & 0 deletions src/snapshots/new_nu_parser__test__node_output@let_.nu.snap
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,8 @@ snapshot_kind: text
2: ()
3: int
4: int
==== IR ====
register_count: 0
file_count: 0
==== IR ERRORS ====
Error (NodeId 2): node Let { variable_name: NodeId(0), ty: None, initializer: NodeId(1), is_mutable: false } not suported yet
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,8 @@ snapshot_kind: text
==== TYPE ERRORS ====
Error (NodeId 13): initializer does not match declared type
Error (NodeId 26): initializer does not match declared type
==== IR ====
register_count: 0
file_count: 0
==== IR ERRORS ====
Error (NodeId 4): node Let { variable_name: NodeId(0), ty: Some(NodeId(2)), initializer: NodeId(3), is_mutable: false } not suported yet
5 changes: 5 additions & 0 deletions src/snapshots/[email protected]
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,8 @@ snapshot_kind: text
18: string
19: list<any>
20: list<any>
==== IR ====
register_count: 0
file_count: 0
==== IR ERRORS ====
Error (NodeId 3): node List([NodeId(0), NodeId(1), NodeId(2)]) not suported yet
5 changes: 5 additions & 0 deletions src/snapshots/[email protected]
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,8 @@ snapshot_kind: text
4: string
5: list<any>
6: list<any>
==== IR ====
register_count: 0
file_count: 0
==== IR ERRORS ====
Error (NodeId 5): node List([NodeId(0), NodeId(1), NodeId(2), NodeId(3), NodeId(4)]) not suported yet
5 changes: 5 additions & 0 deletions src/snapshots/[email protected]
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,8 @@ snapshot_kind: text
16: unknown
==== TYPE ERRORS ====
Error (NodeId 15): unsupported ast node 'Loop { block: NodeId(14) }' in typechecker
==== IR ====
register_count: 0
file_count: 0
==== IR ERRORS ====
Error (NodeId 2): node Let { variable_name: NodeId(0), ty: None, initializer: NodeId(1), is_mutable: true } not suported yet
7 changes: 7 additions & 0 deletions src/snapshots/[email protected]
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,10 @@ snapshot_kind: text
2: int
3: int
4: int
==== IR ====
register_count: 2
file_count: 0
0: LoadLiteral { dst: RegId(0), lit: Int(3) }
1: LoadLiteral { dst: RegId(1), lit: Int(4) }
2: BinaryOp { lhs_dst: RegId(0), op: Math(Plus), rhs: RegId(1) }
3: Return { src: RegId(0) }
Loading

0 comments on commit aec6924

Please sign in to comment.