From 2987743e446b02d037c2dc2f642afab31155d291 Mon Sep 17 00:00:00 2001 From: Paolo Tranquilli <redsun82@gihub.com> Date: Wed, 6 Nov 2024 15:22:28 +0100 Subject: [PATCH 1/3] Rust: exclude uncompiled files from semantics and surface semanticless reason --- rust/extractor/src/main.rs | 146 ++++++++++++++++----------- rust/extractor/src/rust_analyzer.rs | 95 +++++++++-------- rust/extractor/src/translate/base.rs | 13 +-- 3 files changed, 147 insertions(+), 107 deletions(-) diff --git a/rust/extractor/src/main.rs b/rust/extractor/src/main.rs index 294c4734209a..43f94cbfd225 100644 --- a/rust/extractor/src/main.rs +++ b/rust/extractor/src/main.rs @@ -1,14 +1,18 @@ +use crate::rust_analyzer::path_to_file_id; use anyhow::Context; use archive::Archiver; use log::info; use ra_ap_hir::Semantics; use ra_ap_ide_db::line_index::{LineCol, LineIndex}; +use ra_ap_ide_db::RootDatabase; use ra_ap_project_model::ProjectManifest; +use ra_ap_vfs::Vfs; use rust_analyzer::{ParseResult, RustAnalyzer}; use std::{ collections::HashMap, path::{Path, PathBuf}, }; + mod archive; mod config; pub mod generated; @@ -17,54 +21,71 @@ mod rust_analyzer; mod translate; pub mod trap; -fn extract( - rust_analyzer: &rust_analyzer::RustAnalyzer, - archiver: &Archiver, - traps: &trap::TrapFileProvider, - file: &std::path::Path, -) { - archiver.archive(file); +struct Extractor<'a> { + archiver: &'a Archiver, + traps: &'a trap::TrapFileProvider, +} - let ParseResult { - ast, - text, - errors, - file_id, - } = rust_analyzer.parse(file); - let line_index = LineIndex::new(text.as_ref()); - let display_path = file.to_string_lossy(); - let mut trap = traps.create("source", file); - let label = trap.emit_file(file); - let mut translator = translate::Translator::new( - trap, - display_path.as_ref(), - label, - line_index, - file_id, - file_id.and(rust_analyzer.semantics()), - ); +impl Extractor<'_> { + fn extract(&self, rust_analyzer: &rust_analyzer::RustAnalyzer, file: &std::path::Path) { + self.archiver.archive(file); - for err in errors { - translator.emit_parse_error(&ast, &err); - } - let no_location = (LineCol { line: 0, col: 0 }, LineCol { line: 0, col: 0 }); - if translator.semantics.is_none() { - translator.emit_diagnostic( - trap::DiagnosticSeverity::Warning, - "semantics".to_owned(), - "semantic analyzer unavailable".to_owned(), - "semantic analyzer unavailable: macro expansion, call graph, and type inference will be skipped.".to_owned(), - no_location, + let ParseResult { + ast, + text, + errors, + semantics_info, + } = rust_analyzer.parse(file); + let line_index = LineIndex::new(text.as_ref()); + let display_path = file.to_string_lossy(); + let mut trap = self.traps.create("source", file); + let label = trap.emit_file(file); + let mut translator = translate::Translator::new( + trap, + display_path.as_ref(), + label, + line_index, + semantics_info.as_ref().ok(), ); + + for err in errors { + translator.emit_parse_error(&ast, &err); + } + let no_location = (LineCol { line: 0, col: 0 }, LineCol { line: 0, col: 0 }); + if let Err(reason) = semantics_info { + let message = format!("semantic analyzer unavailable ({reason})"); + let full_message = format!( + "{message}: macro expansion, call graph, and type inference will be skipped." + ); + translator.emit_diagnostic( + trap::DiagnosticSeverity::Warning, + "semantics".to_owned(), + message, + full_message, + no_location, + ); + } + translator.emit_source_file(ast); + translator.trap.commit().unwrap_or_else(|err| { + log::error!( + "Failed to write trap file for: {}: {}", + display_path, + err.to_string() + ) + }); + } + + pub fn extract_with_semantics( + &self, + file: &Path, + semantics: &Semantics<'_, RootDatabase>, + vfs: &Vfs, + ) { + self.extract(&RustAnalyzer::new(vfs, semantics), file); + } + pub fn extract_without_semantics(&self, file: &Path, reason: &str) { + self.extract(&RustAnalyzer::WithoutSemantics { reason }, file); } - translator.emit_source_file(ast); - translator.trap.commit().unwrap_or_else(|err| { - log::error!( - "Failed to write trap file for: {}: {}", - display_path, - err.to_string() - ) - }); } fn main() -> anyhow::Result<()> { @@ -82,6 +103,10 @@ fn main() -> anyhow::Result<()> { let archiver = archive::Archiver { root: cfg.source_archive_dir.clone(), }; + let extractor = Extractor { + archiver: &archiver, + traps: &traps, + }; let files: Vec<PathBuf> = cfg .inputs .iter() @@ -95,38 +120,39 @@ fn main() -> anyhow::Result<()> { .iter() .map(|x| (x.manifest_path().parent().as_ref(), (x, Vec::new()))) .collect(); - let mut other_files = Vec::new(); 'outer: for file in &files { - let mut p = file.as_path(); - while let Some(parent) = p.parent() { - p = parent; - if let Some((_, files)) = map.get_mut(parent) { + for ancestor in file.as_path().ancestors() { + if let Some((_, files)) = map.get_mut(ancestor) { files.push(file); continue 'outer; } } - other_files.push(file); + extractor.extract_without_semantics(file, "no manifest found"); } - for (manifest, files) in map.values() { - if files.is_empty() { - break; - } + for (manifest, files) in map.values().filter(|(_, files)| !files.is_empty()) { if let Some((ref db, ref vfs)) = RustAnalyzer::load_workspace(manifest, &cfg.scratch_dir) { let semantics = Semantics::new(db); - let rust_analyzer = RustAnalyzer::new(vfs, semantics); for file in files { - extract(&rust_analyzer, &archiver, &traps, file); + let Some(id) = path_to_file_id(file, vfs) else { + extractor.extract_without_semantics( + file, + "not included in files loaded from manifest", + ); + continue; + }; + if semantics.file_to_module_def(id).is_none() { + extractor.extract_without_semantics(file, "not included as a module"); + continue; + } + extractor.extract_with_semantics(file, &semantics, vfs); } } else { for file in files { - extract(&RustAnalyzer::WithoutSemantics, &archiver, &traps, file); + extractor.extract_without_semantics(file, "unable to load manifest"); } } } - for file in other_files { - extract(&RustAnalyzer::WithoutSemantics, &archiver, &traps, file); - } Ok(()) } diff --git a/rust/extractor/src/rust_analyzer.rs b/rust/extractor/src/rust_analyzer.rs index 652f1619919c..39419a12a43c 100644 --- a/rust/extractor/src/rust_analyzer.rs +++ b/rust/extractor/src/rust_analyzer.rs @@ -14,24 +14,32 @@ use ra_ap_span::TextRange; use ra_ap_span::TextSize; use ra_ap_syntax::SourceFile; use ra_ap_syntax::SyntaxError; -use ra_ap_vfs::AbsPathBuf; use ra_ap_vfs::Vfs; use ra_ap_vfs::VfsPath; +use ra_ap_vfs::{AbsPathBuf, FileId}; use std::borrow::Cow; use std::path::{Path, PathBuf}; use triomphe::Arc; pub enum RustAnalyzer<'a> { WithSemantics { vfs: &'a Vfs, - semantics: Semantics<'a, RootDatabase>, + semantics: &'a Semantics<'a, RootDatabase>, }, - WithoutSemantics, + WithoutSemantics { + reason: &'a str, + }, +} + +pub struct FileSemanticInformation<'a> { + pub file_id: EditionedFileId, + pub semantics: &'a Semantics<'a, RootDatabase>, } -pub struct ParseResult { + +pub struct ParseResult<'a> { pub ast: SourceFile, pub text: Arc<str>, pub errors: Vec<SyntaxError>, - pub file_id: Option<EditionedFileId>, + pub semantics_info: Result<FileSemanticInformation<'a>, &'a str>, } impl<'a> RustAnalyzer<'a> { pub fn load_workspace( @@ -61,47 +69,44 @@ impl<'a> RustAnalyzer<'a> { } } } - pub fn new(vfs: &'a Vfs, semantics: Semantics<'a, RootDatabase>) -> Self { + pub fn new(vfs: &'a Vfs, semantics: &'a Semantics<'a, RootDatabase>) -> Self { RustAnalyzer::WithSemantics { vfs, semantics } } - pub fn semantics(&'a self) -> Option<&'a Semantics<'a, RootDatabase>> { - match self { - RustAnalyzer::WithSemantics { vfs: _, semantics } => Some(semantics), - RustAnalyzer::WithoutSemantics => None, - } - } pub fn parse(&self, path: &Path) -> ParseResult { - if let RustAnalyzer::WithSemantics { vfs, semantics } = self { - if let Some(file_id) = Utf8PathBuf::from_path_buf(path.to_path_buf()) - .ok() - .and_then(|x| AbsPathBuf::try_from(x).ok()) - .map(VfsPath::from) - .and_then(|x| vfs.file_id(&x)) - { - if let Ok(input) = std::panic::catch_unwind(|| semantics.db.file_text(file_id)) { - let file_id = EditionedFileId::current_edition(file_id); - let source_file = semantics.parse(file_id); - let errors = semantics - .db - .parse_errors(file_id) - .into_iter() - .flat_map(|x| x.to_vec()) - .collect(); + let mut no_semantics_reason = ""; + match self { + RustAnalyzer::WithSemantics { vfs, semantics } => { + if let Some(file_id) = path_to_file_id(path, vfs) { + if let Ok(input) = std::panic::catch_unwind(|| semantics.db.file_text(file_id)) + { + let file_id = EditionedFileId::current_edition(file_id); + let source_file = semantics.parse(file_id); + let errors = semantics + .db + .parse_errors(file_id) + .into_iter() + .flat_map(|x| x.to_vec()) + .collect(); - return ParseResult { - ast: source_file, - text: input, - errors, - file_id: Some(file_id), - }; - } else { - log::debug!( - "No text available for file_id '{:?}', falling back to loading file '{}' from disk.", - file_id, - path.to_string_lossy() - ) + return ParseResult { + ast: source_file, + text: input, + errors, + semantics_info: Ok(FileSemanticInformation { file_id, semantics }), + }; + } else { + debug!( + "No text available for file_id '{:?}', falling back to loading file '{}' from disk.", + file_id, + path.to_string_lossy() + ); + no_semantics_reason = "file not found in project"; + } } } + RustAnalyzer::WithoutSemantics { reason } => { + no_semantics_reason = reason; + } } let mut errors = Vec::new(); let input = match std::fs::read(path) { @@ -123,7 +128,7 @@ impl<'a> RustAnalyzer<'a> { ast: parse.tree(), text: input.as_ref().into(), errors, - file_id: None, + semantics_info: Err(no_semantics_reason), } } } @@ -187,3 +192,11 @@ fn from_utf8_lossy(v: &[u8]) -> (Cow<'_, str>, Option<SyntaxError>) { (Cow::Owned(res), Some(error)) } + +pub(crate) fn path_to_file_id(path: &Path, vfs: &Vfs) -> Option<FileId> { + Utf8PathBuf::from_path_buf(path.to_path_buf()) + .ok() + .and_then(|x| AbsPathBuf::try_from(x).ok()) + .map(VfsPath::from) + .and_then(|x| vfs.file_id(&x)) +} diff --git a/rust/extractor/src/translate/base.rs b/rust/extractor/src/translate/base.rs index 7233faccf854..c1aadadf0e79 100644 --- a/rust/extractor/src/translate/base.rs +++ b/rust/extractor/src/translate/base.rs @@ -1,6 +1,7 @@ use super::mappings::{AddressableAst, AddressableHir}; use crate::generated::MacroCall; use crate::generated::{self}; +use crate::rust_analyzer::FileSemanticInformation; use crate::trap::{DiagnosticSeverity, TrapFile, TrapId}; use crate::trap::{Label, TrapClass}; use codeql_extractor::trap::{self}; @@ -64,16 +65,15 @@ impl<'a> Translator<'a> { path: &'a str, label: trap::Label, line_index: LineIndex, - file_id: Option<EditionedFileId>, - semantics: Option<&'a Semantics<'a, RootDatabase>>, + semantic_info: Option<&FileSemanticInformation<'a>>, ) -> Translator<'a> { Translator { trap, path, label, line_index, - file_id, - semantics, + file_id: semantic_info.map(|i| i.file_id), + semantics: semantic_info.map(|i| i.semantics), } } fn location(&self, range: TextRange) -> (LineCol, LineCol) { @@ -160,7 +160,7 @@ impl<'a> Translator<'a> { self.path, start.line + 1, start.col + 1, - &message + &full_message ); if severity > DiagnosticSeverity::Debug { let location = self.trap.emit_location_label(self.label, start, end); @@ -284,7 +284,8 @@ impl<'a> Translator<'a> { range.unwrap_or_else(|| TextRange::empty(TextSize::from(0))), )); } - } else { + } else if self.semantics.is_some() { + // let's not spam warnings if we don't have semantics, we already emitted one let range = self.text_range_for_node(mcall); self.emit_parse_error( mcall, From 200715773f3c92d187ffe16b08d7b74c7eb5c639 Mon Sep 17 00:00:00 2001 From: Paolo Tranquilli <redsun82@gihub.com> Date: Wed, 6 Nov 2024 17:19:06 +0100 Subject: [PATCH 2/3] Rust: fix `no_semantics_reason` --- rust/extractor/src/rust_analyzer.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/rust/extractor/src/rust_analyzer.rs b/rust/extractor/src/rust_analyzer.rs index 39419a12a43c..2c92861ce54f 100644 --- a/rust/extractor/src/rust_analyzer.rs +++ b/rust/extractor/src/rust_analyzer.rs @@ -73,7 +73,7 @@ impl<'a> RustAnalyzer<'a> { RustAnalyzer::WithSemantics { vfs, semantics } } pub fn parse(&self, path: &Path) -> ParseResult { - let mut no_semantics_reason = ""; + let no_semantics_reason; match self { RustAnalyzer::WithSemantics { vfs, semantics } => { if let Some(file_id) = path_to_file_id(path, vfs) { @@ -94,15 +94,14 @@ impl<'a> RustAnalyzer<'a> { errors, semantics_info: Ok(FileSemanticInformation { file_id, semantics }), }; - } else { - debug!( - "No text available for file_id '{:?}', falling back to loading file '{}' from disk.", - file_id, - path.to_string_lossy() - ); - no_semantics_reason = "file not found in project"; } + debug!( + "No text available for file_id '{:?}', falling back to loading file '{}' from disk.", + file_id, + path.to_string_lossy() + ); } + no_semantics_reason = "file not found in project"; } RustAnalyzer::WithoutSemantics { reason } => { no_semantics_reason = reason; From 64d522e4479dc1b9945453c42e88cb5d4d468b7d Mon Sep 17 00:00:00 2001 From: Paolo Tranquilli <redsun82@gihub.com> Date: Thu, 7 Nov 2024 09:39:44 +0100 Subject: [PATCH 3/3] Rust: address review --- rust/extractor/src/rust_analyzer.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rust/extractor/src/rust_analyzer.rs b/rust/extractor/src/rust_analyzer.rs index 2c92861ce54f..9f1a8f70ec67 100644 --- a/rust/extractor/src/rust_analyzer.rs +++ b/rust/extractor/src/rust_analyzer.rs @@ -41,6 +41,7 @@ pub struct ParseResult<'a> { pub errors: Vec<SyntaxError>, pub semantics_info: Result<FileSemanticInformation<'a>, &'a str>, } + impl<'a> RustAnalyzer<'a> { pub fn load_workspace( project: &ProjectManifest, @@ -100,8 +101,10 @@ impl<'a> RustAnalyzer<'a> { file_id, path.to_string_lossy() ); + no_semantics_reason = "no text available for the file in the project"; + } else { + no_semantics_reason = "file not found in project"; } - no_semantics_reason = "file not found in project"; } RustAnalyzer::WithoutSemantics { reason } => { no_semantics_reason = reason;