Skip to content

Commit

Permalink
Rust: extract files on a per-project basis
Browse files Browse the repository at this point in the history
This way we have only one "project" database in-memory at a time. This
should avoid running out of memory when analyzing large mono-repos.
  • Loading branch information
aibaars committed Oct 8, 2024
1 parent ecf7861 commit a86d676
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 63 deletions.
70 changes: 59 additions & 11 deletions rust/extractor/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
use std::{
collections::HashMap,
path::{Path, PathBuf},
};

use anyhow::Context;
use archive::Archiver;
use ra_ap_ide_db::line_index::{LineCol, LineIndex};
use ra_ap_project_model::ProjectManifest;
use rust_analyzer::RustAnalyzer;
mod archive;
mod config;
pub mod generated;
Expand All @@ -9,10 +17,13 @@ pub mod trap;

fn extract(
rust_analyzer: &mut rust_analyzer::RustAnalyzer,
archiver: &Archiver,
traps: &trap::TrapFileProvider,
file: std::path::PathBuf,
) -> anyhow::Result<()> {
let (ast, input, parse_errors, file_id, semi) = rust_analyzer.parse(&file);
file: &std::path::Path,
) -> () {
archiver.archive(&file);

let (ast, input, parse_errors, file_id, semi) = rust_analyzer.parse(file);
let line_index = LineIndex::new(input.as_ref());
let display_path = file.to_string_lossy();
let mut trap = traps.create("source", &file);
Expand Down Expand Up @@ -40,26 +51,63 @@ fn extract(
);
}
translator.emit_source_file(ast);
translator.trap.commit()?;
Ok(())
translator.trap.commit().unwrap_or_else(|err| {
log::error!(
"Failed to write trap file for: {}: {}",
display_path,
err.to_string()
)
});
}
fn main() -> anyhow::Result<()> {
let cfg = config::Config::extract().context("failed to load configuration")?;
stderrlog::new()
.module(module_path!())
.verbosity(2 + cfg.verbose as usize)
.init()?;
let mut rust_analyzer = rust_analyzer::RustAnalyzer::new(&cfg)?;

let traps = trap::TrapFileProvider::new(&cfg).context("failed to set up trap files")?;
let archiver = archive::Archiver {
root: cfg.source_archive_dir,
};
for file in cfg.inputs {
let file = std::path::absolute(&file).unwrap_or(file);
let file = std::fs::canonicalize(&file).unwrap_or(file);
archiver.archive(&file);
extract(&mut rust_analyzer, &traps, file)?;
let files: Vec<PathBuf> = cfg
.inputs
.iter()
.map(|file| {
let file = std::path::absolute(&file).unwrap_or(file.to_path_buf());
std::fs::canonicalize(&file).unwrap_or(file)
})
.collect();
let manifests = rust_analyzer::find_project_manifests(&files)?;
let mut map: HashMap<&Path, (&ProjectManifest, Vec<&Path>)> = manifests
.iter()
.map(|x| (x.manifest_path().parent().as_ref(), (x, Vec::new())))
.collect();
let mut other_files = Vec::new();

'outer: for file in &files {
let mut p = file.as_path();
while let Some(parent) = p.parent() {
p = parent;
if let Some((_, files)) = map.get_mut(parent) {
files.push(file);
continue 'outer;
}
}
other_files.push(file);
}
for (manifest, files) in map.values() {
if files.is_empty() {
break;
}
let mut rust_analyzer = RustAnalyzer::new(manifest, &cfg.scratch_dir);
for file in files {
extract(&mut rust_analyzer, &archiver, &traps, file);
}
}
let mut rust_analyzer = RustAnalyzer::WithoutDatabase();
for file in other_files {
extract(&mut rust_analyzer, &archiver, &traps, file);
}

Ok(())
Expand Down
89 changes: 37 additions & 52 deletions rust/extractor/src/rust_analyzer.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use crate::config::Config;
use anyhow::Context;
use itertools::Itertools;
use log::info;
use ra_ap_base_db::SourceDatabase;
Expand All @@ -9,6 +7,7 @@ use ra_ap_ide_db::RootDatabase;
use ra_ap_load_cargo::{load_workspace_at, LoadCargoConfig, ProcMacroServerChoice};
use ra_ap_paths::Utf8PathBuf;
use ra_ap_project_model::CargoConfig;
use ra_ap_project_model::ProjectManifest;
use ra_ap_project_model::RustLibSource;
use ra_ap_span::Edition;
use ra_ap_span::EditionedFileId;
Expand All @@ -20,19 +19,18 @@ use ra_ap_vfs::AbsPathBuf;
use ra_ap_vfs::Vfs;
use ra_ap_vfs::VfsPath;
use std::borrow::Cow;
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use triomphe::Arc;
pub struct RustAnalyzer {
workspace: HashMap<PathBuf, (Vfs, RootDatabase)>,
pub enum RustAnalyzer {
WithDatabase { db: RootDatabase, vfs: Vfs },
WithoutDatabase(),
}

impl RustAnalyzer {
pub fn new(cfg: &Config) -> anyhow::Result<RustAnalyzer> {
let mut workspace = HashMap::new();
pub fn new(project: &ProjectManifest, scratch_dir: &Path) -> Self {
let config = CargoConfig {
sysroot: Some(RustLibSource::Discover),
target_dir: ra_ap_paths::Utf8PathBuf::from_path_buf(cfg.scratch_dir.to_path_buf())
target_dir: ra_ap_paths::Utf8PathBuf::from_path_buf(scratch_dir.to_path_buf())
.map(|x| x.join("target"))
.ok(),
..Default::default()
Expand All @@ -43,25 +41,19 @@ impl RustAnalyzer {
with_proc_macro_server: ProcMacroServerChoice::Sysroot,
prefill_caches: false,
};
let projects = find_project_manifests(&cfg.inputs).context("loading inputs")?;
for project in projects {
let manifest = project.manifest_path();
let manifest = project.manifest_path();

match load_workspace_at(manifest.as_ref(), &config, &load_config, &progress) {
Ok((db, vfs, _macro_server)) => {
let path: &Path = manifest.parent().as_ref();
workspace.insert(path.to_path_buf(), (vfs, db));
}
Err(err) => {
log::error!("failed to load workspace for {}: {}", manifest, err);
}
match load_workspace_at(manifest.as_ref(), &config, &load_config, &progress) {
Ok((db, vfs, _macro_server)) => RustAnalyzer::WithDatabase { db, vfs },
Err(err) => {
log::error!("failed to load workspace for {}: {}", manifest, err);
RustAnalyzer::WithoutDatabase()
}
}
Ok(RustAnalyzer { workspace })
}
pub fn parse(
&mut self,
path: &PathBuf,
path: &Path,
) -> (
SourceFile,
Arc<str>,
Expand All @@ -82,37 +74,30 @@ impl RustAnalyzer {
};
let (input, err) = from_utf8_lossy(&input);

let mut p = path.as_path();
while let Some(parent) = p.parent() {
p = parent;
if self.workspace.contains_key(parent) {
let (vfs, db) = self.workspace.get_mut(parent).unwrap();
if let Some(file_id) = Utf8PathBuf::from_path_buf(path.to_path_buf())
.ok()
.and_then(|x| AbsPathBuf::try_from(x).ok())
.map(VfsPath::from)
.and_then(|x| vfs.file_id(&x))
{
db.set_file_text(file_id, &input);
let semi = Semantics::new(db);
if let RustAnalyzer::WithDatabase { vfs, db } = self {
if let Some(file_id) = Utf8PathBuf::from_path_buf(path.to_path_buf())
.ok()
.and_then(|x| AbsPathBuf::try_from(x).ok())
.map(VfsPath::from)
.and_then(|x| vfs.file_id(&x))
{
db.set_file_text(file_id, &input);
let semi = Semantics::new(db);

let file_id = EditionedFileId::current_edition(file_id);
let source_file = semi.parse(file_id);
errors.extend(
db.parse_errors(file_id)
.into_iter()
.flat_map(|x| x.to_vec()),
);
return (
source_file,
input.as_ref().into(),
errors,
Some(file_id),
Some(semi),
);
} else {
break;
}
let file_id = EditionedFileId::current_edition(file_id);
let source_file = semi.parse(file_id);
errors.extend(
db.parse_errors(file_id)
.into_iter()
.flat_map(|x| x.to_vec()),
);
return (
source_file,
input.as_ref().into(),
errors,
Some(file_id),
Some(semi),
);
}
}
let parse = ra_ap_syntax::ast::SourceFile::parse(&input, Edition::CURRENT);
Expand All @@ -122,7 +107,7 @@ impl RustAnalyzer {
}
}

fn find_project_manifests(
pub fn find_project_manifests(
files: &[PathBuf],
) -> anyhow::Result<Vec<ra_ap_project_model::ProjectManifest>> {
let current = std::env::current_dir()?;
Expand Down

0 comments on commit a86d676

Please sign in to comment.