Optimize parser a little (#18)

This PR adds the following: * Optimizes the parser by caching the next lexed token * Adds support for [tracy](https://github.com/wolfpld/tracy) (sampling profiling doesn't work for some reason, only instrumentation) * Adds a new more complicated benchmark in three sized: 1x, 10x, 100x, 1000x. (Typechecking doesn't work 100% in that benchmark yet so that timing doesn't tell anything.) Note that while the speedup is significant, the parser is still too slow. This is likely due to the lexer being too intertwined with the parser and too much control flow / indirection in the hot loop. The next PR will try to extract and optimize the lexer, also hopefully simplifying the parser logic. Benchmarks: ``` main this PR def_parse [ 3.8 us ... 1.4 us ] -63.30%* def_resolve [ 925.3 ns ... 919.2 ns ] -0.66%* def_typecheck [ 780.7 ns ... 791.9 ns ] +1.44% def_resolve_merge [ 1.1 us ... 1.1 us ] +1.18%* def_typecheck_merge [ 831.7 ns ... 808.0 ns ] -2.85%* def_compile [ 5.2 us ... 2.8 us ] -46.44%* def_nu_old [ 3.4 us ... 3.4 us ] +2.43%* if_parse [ 5.0 us ... 1.8 us ] -63.52%* if_resolve [ 469.7 ns ... 457.5 ns ] -2.58%* if_typecheck [ 404.4 ns ... 414.4 ns ] +2.47%* if_resolve_merge [ 580.7 ns ... 594.2 ns ] +2.31%* if_typecheck_merge [ 429.4 ns ... 432.4 ns ] +0.69%* if_compile [ 5.7 us ... 2.4 us ] -57.44%* if_nu_old [ 5.2 us ... 5.3 us ] +1.48%* combined_parse [ 25.2 us ... 8.3 us ] -66.96%* combined_resolve [ 4.4 us ... 4.3 us ] -2.46%* combined_typecheck [ 3.9 us ... 3.9 us ] +0.22% combined_resolve_merge [ 5.0 us ... 4.8 us ] -4.29%* combined_typecheck_merge [ 4.0 us ... 4.0 us ] +0.44% combined_compile [ 32.3 us ... 14.8 us ] -54.11%* combined_nu_old [ 4.2 us ... 4.3 us ] +1.93%* combined10_parse [ 250.8 us ... 82.4 us ] -67.16%* combined10_resolve [ 51.5 us ... 48.6 us ] -5.68%* combined10_typecheck [ 43.2 us ... 43.8 us ] +1.41%* combined10_resolve_merge [ 51.0 us ... 49.9 us ] -2.00%* combined10_typecheck_merge [ 44.4 us ... 43.2 us ] -2.50% combined10_compile [ 330.1 us ... 151.7 us ] -54.04%* combined10_nu_old [ 39.0 us ... 39.8 us ] +1.96%* combined100_parse [ 2.5 ms ... 748.1 us ] -69.91%* combined100_resolve [ 480.6 us ... 476.4 us ] -0.88% combined100_typecheck [ 435.4 us ... 451.7 us ] +3.73% combined100_resolve_merge [ 506.0 us ... 520.9 us ] +2.93% combined100_typecheck_merge [ 456.8 us ... 451.6 us ] -1.15% combined100_compile [ 3.2 ms ... 1.4 ms ] -54.94%* combined100_nu_old [ 383.4 us ... 392.0 us ] +2.24%* combined1000_parse [ 23.9 ms ... 6.5 ms ] -72.61%* combined1000_resolve [ 4.9 ms ... 4.7 ms ] -3.39% combined1000_typecheck [ 4.6 ms ... 4.6 ms ] -0.20% combined1000_resolve_merge [ 5.6 ms ... 5.4 ms ] -3.75% combined1000_typecheck_merge [ 5.6 ms ... 5.8 ms ] +4.97% combined1000_compile [ 31.6 ms ... 13.7 ms ] -56.69%* combined1000_nu_old [ 3.9 ms ... 3.9 ms ] +0.56% nu_old_empty [ 332.2 ns ... 330.2 ns ] -0.60% ```
nushell · Oct 31, 2024 · efa0c9e · efa0c9e
1 parent 8a6aa37
commit efa0c9e
Show file tree

Hide file tree

Showing 10 changed files with 30,411 additions and 78 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -6,6 +6,20 @@ edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
+tracy-client = { version = "0.17.3", default-features = false } # for tracy v0.11.1
+
+[profile.profiling]
+inherits = "release"
+debug = true
+
+[features]
+# By default, profiling is disabled. Enable it by the "profile" feature
+tracy = [
+    "tracy-client/enable",
+    "tracy-client/flush-on-exit",
+    "tracy-client/sampling",
+    "tracy-client/code-transfer",
+]
 
 [lib]
 name = "new_nu_parser"
@@ -14,8 +28,8 @@ path = "src/lib.rs"
 [dev-dependencies]
 insta = { version = "1.33.0", features = ["glob"] }
 tango-bench = "0.6"
-nu-parser = "0.98"
-nu-protocol = "0.98"
+nu-parser = "0.99"
+nu-protocol = "0.99"
 
 [[bench]]
 name = "benchmarks"

diff --git a/benches/benchmarks.rs b/benches/benchmarks.rs
@@ -9,7 +9,14 @@ use new_nu_parser::resolver::Resolver;
 use new_nu_parser::typechecker::Typechecker;
 
 /// Files in benches/nu/ we want to benchmark (without .nu suffix)
-const BENCHMARKS: &[&str] = &["def", "if"];
+const BENCHMARKS: &[&str] = &[
+    "def",
+    "if",
+    "combined",
+    "combined10",
+    "combined100",
+    "combined1000",
+];
 
 enum Stage {
     Parse,

diff --git a/benches/nu/combined.nu b/benches/nu/combined.nu
@@ -0,0 +1,26 @@
+def foo [ x: bool, y: int, z: list<list<int>> ] {
+    def bar [ y: int ] {
+        $y * 10 * (($y * 10 + $y * 10) - ($y * 10 * 10))
+    }
+
+    def baz [ y: int ] {
+        $y * 20 * (($y * 20 + $y * 20) - ($y * 20 * 20))
+    }
+
+    let res = if $x {
+        bar $y
+    } else {
+        baz $y
+    }
+
+    mut out = [ $y ]
+
+    for a in $z {
+        for b in $a {
+            $out = $out ++ $b
+        }
+    }
+
+    $out = $out ++ $res
+    $out
+}
diff --git a/benches/nu/combined10.nu b/benches/nu/combined10.nu
@@ -0,0 +1,269 @@
+def foo1 [ x: bool, y: int, z: list<list<int>> ] {
+    def bar [ y: int ] {
+        $y * 10 * (($y * 10 + $y * 10) - ($y * 10 * 10))
+    }
+
+    def baz [ y: int ] {
+        $y * 20 * (($y * 20 + $y * 20) - ($y * 20 * 20))
+    }
+
+    let res = if $x {
+        bar $y
+    } else {
+        baz $y
+    }
+
+    mut out = [ $y ]
+
+    for a in $z {
+        for b in $a {
+            $out = $out ++ $b
+        }
+    }
+
+    $out = $out ++ $res
+    $out
+}
+
+def foo2 [ x: bool, y: int, z: list<list<int>> ] {
+    def bar [ y: int ] {
+        $y * 10 * (($y * 10 + $y * 10) - ($y * 10 * 10))
+    }
+
+    def baz [ y: int ] {
+        $y * 20 * (($y * 20 + $y * 20) - ($y * 20 * 20))
+    }
+
+    let res = if $x {
+        bar $y
+    } else {
+        baz $y
+    }
+
+    mut out = [ $y ]
+
+    for a in $z {
+        for b in $a {
+            $out = $out ++ $b
+        }
+    }
+
+    $out = $out ++ $res
+    $out
+}
+
+def foo3 [ x: bool, y: int, z: list<list<int>> ] {
+    def bar [ y: int ] {
+        $y * 10 * (($y * 10 + $y * 10) - ($y * 10 * 10))
+    }
+
+    def baz [ y: int ] {
+        $y * 20 * (($y * 20 + $y * 20) - ($y * 20 * 20))
+    }
+
+    let res = if $x {
+        bar $y
+    } else {
+        baz $y
+    }
+
+    mut out = [ $y ]
+
+    for a in $z {
+        for b in $a {
+            $out = $out ++ $b
+        }
+    }
+
+    $out = $out ++ $res
+    $out
+}
+
+def foo4 [ x: bool, y: int, z: list<list<int>> ] {
+    def bar [ y: int ] {
+        $y * 10 * (($y * 10 + $y * 10) - ($y * 10 * 10))
+    }
+
+    def baz [ y: int ] {
+        $y * 20 * (($y * 20 + $y * 20) - ($y * 20 * 20))
+    }
+
+    let res = if $x {
+        bar $y
+    } else {
+        baz $y
+    }
+
+    mut out = [ $y ]
+
+    for a in $z {
+        for b in $a {
+            $out = $out ++ $b
+        }
+    }
+
+    $out = $out ++ $res
+    $out
+}
+
+def foo5 [ x: bool, y: int, z: list<list<int>> ] {
+    def bar [ y: int ] {
+        $y * 10 * (($y * 10 + $y * 10) - ($y * 10 * 10))
+    }
+
+    def baz [ y: int ] {
+        $y * 20 * (($y * 20 + $y * 20) - ($y * 20 * 20))
+    }
+
+    let res = if $x {
+        bar $y
+    } else {
+        baz $y
+    }
+
+    mut out = [ $y ]
+
+    for a in $z {
+        for b in $a {
+            $out = $out ++ $b
+        }
+    }
+
+    $out = $out ++ $res
+    $out
+}
+
+def foo6 [ x: bool, y: int, z: list<list<int>> ] {
+    def bar [ y: int ] {
+        $y * 10 * (($y * 10 + $y * 10) - ($y * 10 * 10))
+    }
+
+    def baz [ y: int ] {
+        $y * 20 * (($y * 20 + $y * 20) - ($y * 20 * 20))
+    }
+
+    let res = if $x {
+        bar $y
+    } else {
+        baz $y
+    }
+
+    mut out = [ $y ]
+
+    for a in $z {
+        for b in $a {
+            $out = $out ++ $b
+        }
+    }
+
+    $out = $out ++ $res
+    $out
+}
+
+def foo7 [ x: bool, y: int, z: list<list<int>> ] {
+    def bar [ y: int ] {
+        $y * 10 * (($y * 10 + $y * 10) - ($y * 10 * 10))
+    }
+
+    def baz [ y: int ] {
+        $y * 20 * (($y * 20 + $y * 20) - ($y * 20 * 20))
+    }
+
+    let res = if $x {
+        bar $y
+    } else {
+        baz $y
+    }
+
+    mut out = [ $y ]
+
+    for a in $z {
+        for b in $a {
+            $out = $out ++ $b
+        }
+    }
+
+    $out = $out ++ $res
+    $out
+}
+
+def foo8 [ x: bool, y: int, z: list<list<int>> ] {
+    def bar [ y: int ] {
+        $y * 10 * (($y * 10 + $y * 10) - ($y * 10 * 10))
+    }
+
+    def baz [ y: int ] {
+        $y * 20 * (($y * 20 + $y * 20) - ($y * 20 * 20))
+    }
+
+    let res = if $x {
+        bar $y
+    } else {
+        baz $y
+    }
+
+    mut out = [ $y ]
+
+    for a in $z {
+        for b in $a {
+            $out = $out ++ $b
+        }
+    }
+
+    $out = $out ++ $res
+    $out
+}
+
+def foo9 [ x: bool, y: int, z: list<list<int>> ] {
+    def bar [ y: int ] {
+        $y * 10 * (($y * 10 + $y * 10) - ($y * 10 * 10))
+    }
+
+    def baz [ y: int ] {
+        $y * 20 * (($y * 20 + $y * 20) - ($y * 20 * 20))
+    }
+
+    let res = if $x {
+        bar $y
+    } else {
+        baz $y
+    }
+
+    mut out = [ $y ]
+
+    for a in $z {
+        for b in $a {
+            $out = $out ++ $b
+        }
+    }
+
+    $out = $out ++ $res
+    $out
+}
+
+def foo10 [ x: bool, y: int, z: list<list<int>> ] {
+    def bar [ y: int ] {
+        $y * 10 * (($y * 10 + $y * 10) - ($y * 10 * 10))
+    }
+
+    def baz [ y: int ] {
+        $y * 20 * (($y * 20 + $y * 20) - ($y * 20 * 20))
+    }
+
+    let res = if $x {
+        bar $y
+    } else {
+        baz $y
+    }
+
+    mut out = [ $y ]
+
+    for a in $z {
+        for b in $a {
+            $out = $out ++ $b
+        }
+    }
+
+    $out = $out ++ $res
+    $out
+}