From eea18ba56da028363c947f6c4d2ae09178baa43a Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Sat, 27 Apr 2024 13:50:26 +0200 Subject: [PATCH] Cache file names in fill_todo Background: While working with cargo, I've noticed that it takes ~30s to cargo clean -p with large enough target directory (~200GB). With a profiler, it turned out that most of the time was spent retrieving paths for removal in https://github.com/rust-lang/cargo/blob/eee4ea2f5a5fa1ae184a44675315548ec932a15c/src/cargo/ops/cargo_clean.rs#L319 (and not actually removing the files). Change description: In call to .sort_by, we repetitively parse the paths to obtain file names for comparison. This commit caches file names in PathWrapper object, akin to #135 that did so for dir info. For my use case, a cargo build using that branch takes ~14s to clean files instead of previous 30s (I've measured against main branch of this directory, to account for changes made since 0.3.1). Still not ideal, but hey, we're shaving 50% of time off for a bit heavier memory use. --- src/lib.rs | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 8428a9a..a21e7c6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -72,6 +72,7 @@ doctest!("../README.md"); use std::cmp; use std::cmp::Ordering; use std::error::Error; +use std::ffi::OsString; use std::fmt; use std::fs; use std::fs::DirEntry; @@ -329,10 +330,11 @@ impl fmt::Display for GlobError { struct PathWrapper { path: PathBuf, is_directory: bool, + file_name: Option, } impl PathWrapper { - fn from_dir_entry(path: PathBuf, e: DirEntry) -> Self { + fn from_dir_entry(path: PathBuf, file_name: OsString, e: DirEntry) -> Self { let is_directory = e .file_type() .ok() @@ -347,11 +349,20 @@ impl PathWrapper { }) .or_else(|| fs::metadata(&path).map(|m| m.is_dir()).ok()) .unwrap_or(false); - Self { path, is_directory } + Self { + path, + is_directory, + file_name: Some(file_name), + } } fn from_path(path: PathBuf) -> Self { let is_directory = fs::metadata(&path).map(|m| m.is_dir()).unwrap_or(false); - Self { path, is_directory } + let file_name = path.file_name().map(ToOwned::to_owned); + Self { + path, + is_directory, + file_name, + } } fn into_path(self) -> PathBuf { @@ -930,12 +941,16 @@ fn fill_todo( let dirs = fs::read_dir(path).and_then(|d| { d.map(|e| { e.map(|e| { - let path = if curdir { - PathBuf::from(e.path().file_name().unwrap()) + let (path, file_name) = if curdir { + let path = e.path(); + let file_name = path.file_name().unwrap(); + (PathBuf::from(file_name), file_name.to_owned()) } else { - e.path() + let path = e.path(); + let file_name = path.file_name().unwrap().to_owned(); + (path, file_name) }; - PathWrapper::from_dir_entry(path, e) + PathWrapper::from_dir_entry(path, file_name, e) }) }) .collect::, _>>() @@ -946,7 +961,7 @@ fn fill_todo( children .retain(|x| !x.file_name().unwrap().to_str().unwrap().starts_with('.')); } - children.sort_by(|p1, p2| p2.file_name().cmp(&p1.file_name())); + children.sort_by(|p1, p2| p2.file_name.cmp(&p1.file_name)); todo.extend(children.into_iter().map(|x| Ok((x, idx)))); // Matching the special directory entries . and .. that