Skip to content

Commit

Permalink
Optimize parser a little (#18)
Browse files Browse the repository at this point in the history
This PR adds the following:
* Optimizes the parser by caching the next lexed token
* Adds support for [tracy](https://github.com/wolfpld/tracy) (sampling
profiling doesn't work for some reason, only instrumentation)
* Adds a new more complicated benchmark in three sized: 1x, 10x, 100x,
1000x. (Typechecking doesn't work 100% in that benchmark yet so that
timing doesn't tell anything.)

Note that while the speedup is significant, the parser is still too
slow. This is likely due to the lexer being too intertwined with the
parser and too much control flow / indirection in the hot loop. The next
PR will try to extract and optimize the lexer, also hopefully
simplifying the parser logic.

Benchmarks:
```
                                                         main      this PR
def_parse                                          [   3.8 us ...   1.4 us ]     -63.30%*
def_resolve                                        [ 925.3 ns ... 919.2 ns ]      -0.66%*
def_typecheck                                      [ 780.7 ns ... 791.9 ns ]      +1.44%
def_resolve_merge                                  [   1.1 us ...   1.1 us ]      +1.18%*
def_typecheck_merge                                [ 831.7 ns ... 808.0 ns ]      -2.85%*
def_compile                                        [   5.2 us ...   2.8 us ]     -46.44%*
def_nu_old                                         [   3.4 us ...   3.4 us ]      +2.43%*
if_parse                                           [   5.0 us ...   1.8 us ]     -63.52%*
if_resolve                                         [ 469.7 ns ... 457.5 ns ]      -2.58%*
if_typecheck                                       [ 404.4 ns ... 414.4 ns ]      +2.47%*
if_resolve_merge                                   [ 580.7 ns ... 594.2 ns ]      +2.31%*
if_typecheck_merge                                 [ 429.4 ns ... 432.4 ns ]      +0.69%*
if_compile                                         [   5.7 us ...   2.4 us ]     -57.44%*
if_nu_old                                          [   5.2 us ...   5.3 us ]      +1.48%*
combined_parse                                     [  25.2 us ...   8.3 us ]     -66.96%*
combined_resolve                                   [   4.4 us ...   4.3 us ]      -2.46%*
combined_typecheck                                 [   3.9 us ...   3.9 us ]      +0.22%
combined_resolve_merge                             [   5.0 us ...   4.8 us ]      -4.29%*
combined_typecheck_merge                           [   4.0 us ...   4.0 us ]      +0.44%
combined_compile                                   [  32.3 us ...  14.8 us ]     -54.11%*
combined_nu_old                                    [   4.2 us ...   4.3 us ]      +1.93%*
combined10_parse                                   [ 250.8 us ...  82.4 us ]     -67.16%*
combined10_resolve                                 [  51.5 us ...  48.6 us ]      -5.68%*
combined10_typecheck                               [  43.2 us ...  43.8 us ]      +1.41%*
combined10_resolve_merge                           [  51.0 us ...  49.9 us ]      -2.00%*
combined10_typecheck_merge                         [  44.4 us ...  43.2 us ]      -2.50%
combined10_compile                                 [ 330.1 us ... 151.7 us ]     -54.04%*
combined10_nu_old                                  [  39.0 us ...  39.8 us ]      +1.96%*
combined100_parse                                  [   2.5 ms ... 748.1 us ]     -69.91%*
combined100_resolve                                [ 480.6 us ... 476.4 us ]      -0.88%
combined100_typecheck                              [ 435.4 us ... 451.7 us ]      +3.73%
combined100_resolve_merge                          [ 506.0 us ... 520.9 us ]      +2.93%
combined100_typecheck_merge                        [ 456.8 us ... 451.6 us ]      -1.15%
combined100_compile                                [   3.2 ms ...   1.4 ms ]     -54.94%*
combined100_nu_old                                 [ 383.4 us ... 392.0 us ]      +2.24%*
combined1000_parse                                 [  23.9 ms ...   6.5 ms ]     -72.61%*
combined1000_resolve                               [   4.9 ms ...   4.7 ms ]      -3.39%
combined1000_typecheck                             [   4.6 ms ...   4.6 ms ]      -0.20%
combined1000_resolve_merge                         [   5.6 ms ...   5.4 ms ]      -3.75%
combined1000_typecheck_merge                       [   5.6 ms ...   5.8 ms ]      +4.97%
combined1000_compile                               [  31.6 ms ...  13.7 ms ]     -56.69%*
combined1000_nu_old                                [   3.9 ms ...   3.9 ms ]      +0.56%
nu_old_empty                                       [ 332.2 ns ... 330.2 ns ]      -0.60%
```
  • Loading branch information
kubouch authored Oct 31, 2024
1 parent 8a6aa37 commit efa0c9e
Show file tree
Hide file tree
Showing 10 changed files with 30,411 additions and 78 deletions.
285 changes: 267 additions & 18 deletions Cargo.lock

Large diffs are not rendered by default.

18 changes: 16 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,20 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
tracy-client = { version = "0.17.3", default-features = false } # for tracy v0.11.1

[profile.profiling]
inherits = "release"
debug = true

[features]
# By default, profiling is disabled. Enable it by the "profile" feature
tracy = [
"tracy-client/enable",
"tracy-client/flush-on-exit",
"tracy-client/sampling",
"tracy-client/code-transfer",
]

[lib]
name = "new_nu_parser"
Expand All @@ -14,8 +28,8 @@ path = "src/lib.rs"
[dev-dependencies]
insta = { version = "1.33.0", features = ["glob"] }
tango-bench = "0.6"
nu-parser = "0.98"
nu-protocol = "0.98"
nu-parser = "0.99"
nu-protocol = "0.99"

[[bench]]
name = "benchmarks"
Expand Down
9 changes: 8 additions & 1 deletion benches/benchmarks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,14 @@ use new_nu_parser::resolver::Resolver;
use new_nu_parser::typechecker::Typechecker;

/// Files in benches/nu/ we want to benchmark (without .nu suffix)
const BENCHMARKS: &[&str] = &["def", "if"];
const BENCHMARKS: &[&str] = &[
"def",
"if",
"combined",
"combined10",
"combined100",
"combined1000",
];

enum Stage {
Parse,
Expand Down
26 changes: 26 additions & 0 deletions benches/nu/combined.nu
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
def foo [ x: bool, y: int, z: list<list<int>> ] {
def bar [ y: int ] {
$y * 10 * (($y * 10 + $y * 10) - ($y * 10 * 10))
}

def baz [ y: int ] {
$y * 20 * (($y * 20 + $y * 20) - ($y * 20 * 20))
}

let res = if $x {
bar $y
} else {
baz $y
}

mut out = [ $y ]

for a in $z {
for b in $a {
$out = $out ++ $b
}
}

$out = $out ++ $res
$out
}
269 changes: 269 additions & 0 deletions benches/nu/combined10.nu
Original file line number Diff line number Diff line change
@@ -0,0 +1,269 @@
def foo1 [ x: bool, y: int, z: list<list<int>> ] {
def bar [ y: int ] {
$y * 10 * (($y * 10 + $y * 10) - ($y * 10 * 10))
}

def baz [ y: int ] {
$y * 20 * (($y * 20 + $y * 20) - ($y * 20 * 20))
}

let res = if $x {
bar $y
} else {
baz $y
}

mut out = [ $y ]

for a in $z {
for b in $a {
$out = $out ++ $b
}
}

$out = $out ++ $res
$out
}

def foo2 [ x: bool, y: int, z: list<list<int>> ] {
def bar [ y: int ] {
$y * 10 * (($y * 10 + $y * 10) - ($y * 10 * 10))
}

def baz [ y: int ] {
$y * 20 * (($y * 20 + $y * 20) - ($y * 20 * 20))
}

let res = if $x {
bar $y
} else {
baz $y
}

mut out = [ $y ]

for a in $z {
for b in $a {
$out = $out ++ $b
}
}

$out = $out ++ $res
$out
}

def foo3 [ x: bool, y: int, z: list<list<int>> ] {
def bar [ y: int ] {
$y * 10 * (($y * 10 + $y * 10) - ($y * 10 * 10))
}

def baz [ y: int ] {
$y * 20 * (($y * 20 + $y * 20) - ($y * 20 * 20))
}

let res = if $x {
bar $y
} else {
baz $y
}

mut out = [ $y ]

for a in $z {
for b in $a {
$out = $out ++ $b
}
}

$out = $out ++ $res
$out
}

def foo4 [ x: bool, y: int, z: list<list<int>> ] {
def bar [ y: int ] {
$y * 10 * (($y * 10 + $y * 10) - ($y * 10 * 10))
}

def baz [ y: int ] {
$y * 20 * (($y * 20 + $y * 20) - ($y * 20 * 20))
}

let res = if $x {
bar $y
} else {
baz $y
}

mut out = [ $y ]

for a in $z {
for b in $a {
$out = $out ++ $b
}
}

$out = $out ++ $res
$out
}

def foo5 [ x: bool, y: int, z: list<list<int>> ] {
def bar [ y: int ] {
$y * 10 * (($y * 10 + $y * 10) - ($y * 10 * 10))
}

def baz [ y: int ] {
$y * 20 * (($y * 20 + $y * 20) - ($y * 20 * 20))
}

let res = if $x {
bar $y
} else {
baz $y
}

mut out = [ $y ]

for a in $z {
for b in $a {
$out = $out ++ $b
}
}

$out = $out ++ $res
$out
}

def foo6 [ x: bool, y: int, z: list<list<int>> ] {
def bar [ y: int ] {
$y * 10 * (($y * 10 + $y * 10) - ($y * 10 * 10))
}

def baz [ y: int ] {
$y * 20 * (($y * 20 + $y * 20) - ($y * 20 * 20))
}

let res = if $x {
bar $y
} else {
baz $y
}

mut out = [ $y ]

for a in $z {
for b in $a {
$out = $out ++ $b
}
}

$out = $out ++ $res
$out
}

def foo7 [ x: bool, y: int, z: list<list<int>> ] {
def bar [ y: int ] {
$y * 10 * (($y * 10 + $y * 10) - ($y * 10 * 10))
}

def baz [ y: int ] {
$y * 20 * (($y * 20 + $y * 20) - ($y * 20 * 20))
}

let res = if $x {
bar $y
} else {
baz $y
}

mut out = [ $y ]

for a in $z {
for b in $a {
$out = $out ++ $b
}
}

$out = $out ++ $res
$out
}

def foo8 [ x: bool, y: int, z: list<list<int>> ] {
def bar [ y: int ] {
$y * 10 * (($y * 10 + $y * 10) - ($y * 10 * 10))
}

def baz [ y: int ] {
$y * 20 * (($y * 20 + $y * 20) - ($y * 20 * 20))
}

let res = if $x {
bar $y
} else {
baz $y
}

mut out = [ $y ]

for a in $z {
for b in $a {
$out = $out ++ $b
}
}

$out = $out ++ $res
$out
}

def foo9 [ x: bool, y: int, z: list<list<int>> ] {
def bar [ y: int ] {
$y * 10 * (($y * 10 + $y * 10) - ($y * 10 * 10))
}

def baz [ y: int ] {
$y * 20 * (($y * 20 + $y * 20) - ($y * 20 * 20))
}

let res = if $x {
bar $y
} else {
baz $y
}

mut out = [ $y ]

for a in $z {
for b in $a {
$out = $out ++ $b
}
}

$out = $out ++ $res
$out
}

def foo10 [ x: bool, y: int, z: list<list<int>> ] {
def bar [ y: int ] {
$y * 10 * (($y * 10 + $y * 10) - ($y * 10 * 10))
}

def baz [ y: int ] {
$y * 20 * (($y * 20 + $y * 20) - ($y * 20 * 20))
}

let res = if $x {
bar $y
} else {
baz $y
}

mut out = [ $y ]

for a in $z {
for b in $a {
$out = $out ++ $b
}
}

$out = $out ++ $res
$out
}
Loading

0 comments on commit efa0c9e

Please sign in to comment.