evmar · evmar · Jan 25, 2024 · Jan 22, 2024 · Jan 22, 2024 · Jan 22, 2024
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -18,6 +18,7 @@ description = "a ninja compatible build system"
 anyhow = "1.0"
 argh = "0.1.10"
 libc = "0.2"
+rustc-hash = "1.1.0"
 
 [target.'cfg(windows)'.dependencies.windows-sys]
 version = "0.48"

diff --git a/benches/parse.rs b/benches/parse.rs
@@ -1,5 +1,4 @@
 use criterion::{criterion_group, criterion_main, Criterion};
-use n2::parse::Loader;
 use std::{io::Write, path::PathBuf, str::FromStr};
 
 pub fn bench_canon(c: &mut Criterion) {

diff --git a/src/eval.rs b/src/eval.rs
@@ -1,9 +1,11 @@
 //! Represents parsed Ninja strings with embedded variable references, e.g.
 //! `c++ $in -o $out`, and mechanisms for expanding those into plain strings.
 
+use rustc_hash::FxHashMap;
+
 use crate::smallmap::SmallMap;
 use std::borrow::Borrow;
-use std::{borrow::Cow, collections::HashMap};
+use std::borrow::Cow;
 
 /// An environment providing a mapping of variable name to variable value.
 /// This represents one "frame" of evaluation context, a given EvalString may
@@ -46,13 +48,31 @@ impl<T: AsRef<str>> EvalString<T> {
         }
     }
 
+    fn calc_evaluated_length(&self, envs: &[&dyn Env]) -> usize {
+        self.0
+            .iter()
+            .map(|part| match part {
+                EvalPart::Literal(s) => s.as_ref().len(),
+                EvalPart::VarRef(v) => {
+                    for (i, env) in envs.iter().enumerate() {
+                        if let Some(v) = env.get_var(v.as_ref()) {
+                            return v.calc_evaluated_length(&envs[i + 1..]);
+                        }
+                    }
+                    0
+                }
+            })
+            .sum()
+    }
+
     /// evalulate turns the EvalString into a regular String, looking up the
     /// values of variable references in the provided Envs. It will look up
     /// its variables in the earliest Env that has them, and then those lookups
     /// will be recursively expanded starting from the env after the one that
     /// had the first successful lookup.
     pub fn evaluate(&self, envs: &[&dyn Env]) -> String {
         let mut result = String::new();
+        result.reserve(self.calc_evaluated_length(envs));
         self.evaluate_inner(&mut result, envs);
         result
     }
@@ -102,7 +122,7 @@ impl EvalString<&str> {
 
 /// A single scope's worth of variable definitions.
 #[derive(Debug, Default)]
-pub struct Vars<'text>(HashMap<&'text str, String>);
+pub struct Vars<'text>(FxHashMap<&'text str, String>);
 
 impl<'text> Vars<'text> {
     pub fn insert(&mut self, key: &'text str, val: String) {

diff --git a/src/graph.rs b/src/graph.rs
@@ -1,10 +1,12 @@
 //! The build graph, a graph between files and commands.
 
+use rustc_hash::FxHashMap;
+
 use crate::{
     densemap::{self, DenseMap},
     hash::BuildHash,
 };
-use std::collections::HashMap;
+use std::collections::{hash_map::Entry, HashMap};
 use std::path::{Path, PathBuf};
 use std::time::SystemTime;
 
@@ -258,7 +260,7 @@ pub struct Graph {
 #[derive(Default)]
 pub struct GraphFiles {
     pub by_id: DenseMap<FileId, File>,
-    by_name: HashMap<String, FileId>,
+    by_name: FxHashMap<String, FileId>,
 }
 
 impl Graph {
@@ -310,21 +312,26 @@ impl GraphFiles {
     }
 
     /// Look up a file by its name, adding it if not already present.
-    /// Name must have been canonicalized already.
-    /// This function has a funny API to avoid copies; pass a String if you have
-    /// one already, otherwise a &str is acceptable.
-    pub fn id_from_canonical<S: AsRef<str> + Into<String>>(&mut self, file: S) -> FileId {
-        self.lookup(file.as_ref()).unwrap_or_else(|| {
-            // TODO: so many string copies :<
-            let file = file.into();
-            let id = self.by_id.push(File {
-                name: file.clone(),
-                input: None,
-                dependents: Vec::new(),
-            });
-            self.by_name.insert(file, id);
-            id
-        })
+    /// Name must have been canonicalized already. Only accepting an owned
+    /// string allows us to avoid a string copy and a hashmap lookup when we
+    /// need to create a new id, but would also be possible to create a version
+    /// of this function that accepts string references that is more optimized
+    /// for the case where the entry already exists. But so far, all of our
+    /// usages of this function have an owned string easily accessible anyways.
+    pub fn id_from_canonical(&mut self, file: String) -> FileId {
+        // TODO: so many string copies :<
+        match self.by_name.entry(file) {
+            Entry::Occupied(o) => *o.get(),
+            Entry::Vacant(v) => {
+                let id = self.by_id.push(File {
+                    name: v.key().clone(),
+                    input: None,
+                    dependents: Vec::new(),
+                });
+                v.insert(id);
+                id
+            }
+        }
     }
 
     pub fn all_ids(&self) -> impl Iterator<Item = FileId> {

diff --git a/src/load.rs b/src/load.rs
@@ -56,17 +56,6 @@ pub struct Loader {
     builddir: Option<String>,
 }
 
-impl parse::Loader for Loader {
-    type Path = FileId;
-    fn path(&mut self, path: &mut str) -> Self::Path {
-        // Perf: this is called while parsing build.ninja files.  We go to
-        // some effort to avoid allocating in the common case of a path that
-        // refers to a file that is already known.
-        let len = canon_path_fast(path);
-        self.graph.files.id_from_canonical(&path[..len])
-    }
-}
-
 impl Loader {
     pub fn new() -> Self {
         let mut loader = Loader::default();
@@ -76,10 +65,19 @@ impl Loader {
         loader
     }
 
+    /// Convert a path string to a FileId.  For performance reasons
+    /// this requires an owned 'path' param.
+    fn path(&mut self, mut path: String) -> FileId {
+        // Perf: this is called while parsing build.ninja files.  We go to
+        // some effort to avoid allocating in the common case of a path that
+        // refers to a file that is already known.
+        let len = canon_path_fast(&mut path);
+        path.truncate(len);
+        self.graph.files.id_from_canonical(path)
+    }
+
     fn evaluate_path(&mut self, path: EvalString<&str>, envs: &[&dyn eval::Env]) -> FileId {
-        use parse::Loader;
-        let mut evaluated = path.evaluate(envs);
-        self.path(&mut evaluated)
+        self.path(path.evaluate(envs))
     }
 
     fn evaluate_paths(

diff --git a/src/parse.rs b/src/parse.rs
@@ -57,18 +57,6 @@ pub struct Parser<'text> {
     eval_buf: Vec<EvalPart<&'text str>>,
 }
 
-/// Loader maps path strings (as found in build.ninja files) into an arbitrary
-/// "Path" type.  This allows us to canonicalize and convert path strings to
-/// more efficient integer identifiers while we parse, rather than needing to
-/// buffer up many intermediate strings; in fact, parsing uses a single buffer
-/// for all of these.
-pub trait Loader {
-    type Path;
-    /// Convert a path string to a Self::Path type.  For performance reasons
-    /// this may mutate the 'path' param.
-    fn path(&mut self, path: &mut str) -> Self::Path;
-}
-
 impl<'text> Parser<'text> {
     pub fn new(buf: &'text [u8]) -> Parser<'text> {
         Parser {

diff --git a/src/work.rs b/src/work.rs
@@ -788,7 +788,7 @@ build b: phony c
 build c: phony a
 ";
         let mut graph = crate::load::parse("build.ninja", file.as_bytes().to_vec())?;
-        let a_id = graph.files.id_from_canonical("a");
+        let a_id = graph.files.id_from_canonical("a".to_owned());
         let mut states = BuildStates::new(graph.builds.next_id(), SmallMap::default());
         let mut stack = Vec::new();
         match states.want_file(&graph, &mut stack, a_id) {