From 2987743e446b02d037c2dc2f642afab31155d291 Mon Sep 17 00:00:00 2001
From: Paolo Tranquilli <redsun82@gihub.com>
Date: Wed, 6 Nov 2024 15:22:28 +0100
Subject: [PATCH 1/3] Rust: exclude uncompiled files from semantics and surface
 semanticless reason

---
 rust/extractor/src/main.rs           | 146 ++++++++++++++++-----------
 rust/extractor/src/rust_analyzer.rs  |  95 +++++++++--------
 rust/extractor/src/translate/base.rs |  13 +--
 3 files changed, 147 insertions(+), 107 deletions(-)

diff --git a/rust/extractor/src/main.rs b/rust/extractor/src/main.rs
index 294c4734209a..43f94cbfd225 100644
--- a/rust/extractor/src/main.rs
+++ b/rust/extractor/src/main.rs
@@ -1,14 +1,18 @@
+use crate::rust_analyzer::path_to_file_id;
 use anyhow::Context;
 use archive::Archiver;
 use log::info;
 use ra_ap_hir::Semantics;
 use ra_ap_ide_db::line_index::{LineCol, LineIndex};
+use ra_ap_ide_db::RootDatabase;
 use ra_ap_project_model::ProjectManifest;
+use ra_ap_vfs::Vfs;
 use rust_analyzer::{ParseResult, RustAnalyzer};
 use std::{
     collections::HashMap,
     path::{Path, PathBuf},
 };
+
 mod archive;
 mod config;
 pub mod generated;
@@ -17,54 +21,71 @@ mod rust_analyzer;
 mod translate;
 pub mod trap;
 
-fn extract(
-    rust_analyzer: &rust_analyzer::RustAnalyzer,
-    archiver: &Archiver,
-    traps: &trap::TrapFileProvider,
-    file: &std::path::Path,
-) {
-    archiver.archive(file);
+struct Extractor<'a> {
+    archiver: &'a Archiver,
+    traps: &'a trap::TrapFileProvider,
+}
 
-    let ParseResult {
-        ast,
-        text,
-        errors,
-        file_id,
-    } = rust_analyzer.parse(file);
-    let line_index = LineIndex::new(text.as_ref());
-    let display_path = file.to_string_lossy();
-    let mut trap = traps.create("source", file);
-    let label = trap.emit_file(file);
-    let mut translator = translate::Translator::new(
-        trap,
-        display_path.as_ref(),
-        label,
-        line_index,
-        file_id,
-        file_id.and(rust_analyzer.semantics()),
-    );
+impl Extractor<'_> {
+    fn extract(&self, rust_analyzer: &rust_analyzer::RustAnalyzer, file: &std::path::Path) {
+        self.archiver.archive(file);
 
-    for err in errors {
-        translator.emit_parse_error(&ast, &err);
-    }
-    let no_location = (LineCol { line: 0, col: 0 }, LineCol { line: 0, col: 0 });
-    if translator.semantics.is_none() {
-        translator.emit_diagnostic(
-            trap::DiagnosticSeverity::Warning,
-            "semantics".to_owned(),
-            "semantic analyzer unavailable".to_owned(),
-            "semantic analyzer unavailable: macro expansion, call graph, and type inference will be skipped.".to_owned(),
-            no_location,
+        let ParseResult {
+            ast,
+            text,
+            errors,
+            semantics_info,
+        } = rust_analyzer.parse(file);
+        let line_index = LineIndex::new(text.as_ref());
+        let display_path = file.to_string_lossy();
+        let mut trap = self.traps.create("source", file);
+        let label = trap.emit_file(file);
+        let mut translator = translate::Translator::new(
+            trap,
+            display_path.as_ref(),
+            label,
+            line_index,
+            semantics_info.as_ref().ok(),
         );
+
+        for err in errors {
+            translator.emit_parse_error(&ast, &err);
+        }
+        let no_location = (LineCol { line: 0, col: 0 }, LineCol { line: 0, col: 0 });
+        if let Err(reason) = semantics_info {
+            let message = format!("semantic analyzer unavailable ({reason})");
+            let full_message = format!(
+                "{message}: macro expansion, call graph, and type inference will be skipped."
+            );
+            translator.emit_diagnostic(
+                trap::DiagnosticSeverity::Warning,
+                "semantics".to_owned(),
+                message,
+                full_message,
+                no_location,
+            );
+        }
+        translator.emit_source_file(ast);
+        translator.trap.commit().unwrap_or_else(|err| {
+            log::error!(
+                "Failed to write trap file for: {}: {}",
+                display_path,
+                err.to_string()
+            )
+        });
+    }
+
+    pub fn extract_with_semantics(
+        &self,
+        file: &Path,
+        semantics: &Semantics<'_, RootDatabase>,
+        vfs: &Vfs,
+    ) {
+        self.extract(&RustAnalyzer::new(vfs, semantics), file);
+    }
+    pub fn extract_without_semantics(&self, file: &Path, reason: &str) {
+        self.extract(&RustAnalyzer::WithoutSemantics { reason }, file);
     }
-    translator.emit_source_file(ast);
-    translator.trap.commit().unwrap_or_else(|err| {
-        log::error!(
-            "Failed to write trap file for: {}: {}",
-            display_path,
-            err.to_string()
-        )
-    });
 }
 
 fn main() -> anyhow::Result<()> {
@@ -82,6 +103,10 @@ fn main() -> anyhow::Result<()> {
     let archiver = archive::Archiver {
         root: cfg.source_archive_dir.clone(),
     };
+    let extractor = Extractor {
+        archiver: &archiver,
+        traps: &traps,
+    };
     let files: Vec<PathBuf> = cfg
         .inputs
         .iter()
@@ -95,38 +120,39 @@ fn main() -> anyhow::Result<()> {
         .iter()
         .map(|x| (x.manifest_path().parent().as_ref(), (x, Vec::new())))
         .collect();
-    let mut other_files = Vec::new();
 
     'outer: for file in &files {
-        let mut p = file.as_path();
-        while let Some(parent) = p.parent() {
-            p = parent;
-            if let Some((_, files)) = map.get_mut(parent) {
+        for ancestor in file.as_path().ancestors() {
+            if let Some((_, files)) = map.get_mut(ancestor) {
                 files.push(file);
                 continue 'outer;
             }
         }
-        other_files.push(file);
+        extractor.extract_without_semantics(file, "no manifest found");
     }
-    for (manifest, files) in map.values() {
-        if files.is_empty() {
-            break;
-        }
+    for (manifest, files) in map.values().filter(|(_, files)| !files.is_empty()) {
         if let Some((ref db, ref vfs)) = RustAnalyzer::load_workspace(manifest, &cfg.scratch_dir) {
             let semantics = Semantics::new(db);
-            let rust_analyzer = RustAnalyzer::new(vfs, semantics);
             for file in files {
-                extract(&rust_analyzer, &archiver, &traps, file);
+                let Some(id) = path_to_file_id(file, vfs) else {
+                    extractor.extract_without_semantics(
+                        file,
+                        "not included in files loaded from manifest",
+                    );
+                    continue;
+                };
+                if semantics.file_to_module_def(id).is_none() {
+                    extractor.extract_without_semantics(file, "not included as a module");
+                    continue;
+                }
+                extractor.extract_with_semantics(file, &semantics, vfs);
             }
         } else {
             for file in files {
-                extract(&RustAnalyzer::WithoutSemantics, &archiver, &traps, file);
+                extractor.extract_without_semantics(file, "unable to load manifest");
             }
         }
     }
-    for file in other_files {
-        extract(&RustAnalyzer::WithoutSemantics, &archiver, &traps, file);
-    }
 
     Ok(())
 }
diff --git a/rust/extractor/src/rust_analyzer.rs b/rust/extractor/src/rust_analyzer.rs
index 652f1619919c..39419a12a43c 100644
--- a/rust/extractor/src/rust_analyzer.rs
+++ b/rust/extractor/src/rust_analyzer.rs
@@ -14,24 +14,32 @@ use ra_ap_span::TextRange;
 use ra_ap_span::TextSize;
 use ra_ap_syntax::SourceFile;
 use ra_ap_syntax::SyntaxError;
-use ra_ap_vfs::AbsPathBuf;
 use ra_ap_vfs::Vfs;
 use ra_ap_vfs::VfsPath;
+use ra_ap_vfs::{AbsPathBuf, FileId};
 use std::borrow::Cow;
 use std::path::{Path, PathBuf};
 use triomphe::Arc;
 pub enum RustAnalyzer<'a> {
     WithSemantics {
         vfs: &'a Vfs,
-        semantics: Semantics<'a, RootDatabase>,
+        semantics: &'a Semantics<'a, RootDatabase>,
     },
-    WithoutSemantics,
+    WithoutSemantics {
+        reason: &'a str,
+    },
+}
+
+pub struct FileSemanticInformation<'a> {
+    pub file_id: EditionedFileId,
+    pub semantics: &'a Semantics<'a, RootDatabase>,
 }
-pub struct ParseResult {
+
+pub struct ParseResult<'a> {
     pub ast: SourceFile,
     pub text: Arc<str>,
     pub errors: Vec<SyntaxError>,
-    pub file_id: Option<EditionedFileId>,
+    pub semantics_info: Result<FileSemanticInformation<'a>, &'a str>,
 }
 impl<'a> RustAnalyzer<'a> {
     pub fn load_workspace(
@@ -61,47 +69,44 @@ impl<'a> RustAnalyzer<'a> {
             }
         }
     }
-    pub fn new(vfs: &'a Vfs, semantics: Semantics<'a, RootDatabase>) -> Self {
+    pub fn new(vfs: &'a Vfs, semantics: &'a Semantics<'a, RootDatabase>) -> Self {
         RustAnalyzer::WithSemantics { vfs, semantics }
     }
-    pub fn semantics(&'a self) -> Option<&'a Semantics<'a, RootDatabase>> {
-        match self {
-            RustAnalyzer::WithSemantics { vfs: _, semantics } => Some(semantics),
-            RustAnalyzer::WithoutSemantics => None,
-        }
-    }
     pub fn parse(&self, path: &Path) -> ParseResult {
-        if let RustAnalyzer::WithSemantics { vfs, semantics } = self {
-            if let Some(file_id) = Utf8PathBuf::from_path_buf(path.to_path_buf())
-                .ok()
-                .and_then(|x| AbsPathBuf::try_from(x).ok())
-                .map(VfsPath::from)
-                .and_then(|x| vfs.file_id(&x))
-            {
-                if let Ok(input) = std::panic::catch_unwind(|| semantics.db.file_text(file_id)) {
-                    let file_id = EditionedFileId::current_edition(file_id);
-                    let source_file = semantics.parse(file_id);
-                    let errors = semantics
-                        .db
-                        .parse_errors(file_id)
-                        .into_iter()
-                        .flat_map(|x| x.to_vec())
-                        .collect();
+        let mut no_semantics_reason = "";
+        match self {
+            RustAnalyzer::WithSemantics { vfs, semantics } => {
+                if let Some(file_id) = path_to_file_id(path, vfs) {
+                    if let Ok(input) = std::panic::catch_unwind(|| semantics.db.file_text(file_id))
+                    {
+                        let file_id = EditionedFileId::current_edition(file_id);
+                        let source_file = semantics.parse(file_id);
+                        let errors = semantics
+                            .db
+                            .parse_errors(file_id)
+                            .into_iter()
+                            .flat_map(|x| x.to_vec())
+                            .collect();
 
-                    return ParseResult {
-                        ast: source_file,
-                        text: input,
-                        errors,
-                        file_id: Some(file_id),
-                    };
-                } else {
-                    log::debug!(
-                        "No text available for file_id '{:?}', falling back to loading file '{}' from disk.",
-                        file_id,
-                        path.to_string_lossy()
-                    )
+                        return ParseResult {
+                            ast: source_file,
+                            text: input,
+                            errors,
+                            semantics_info: Ok(FileSemanticInformation { file_id, semantics }),
+                        };
+                    } else {
+                        debug!(
+                            "No text available for file_id '{:?}', falling back to loading file '{}' from disk.",
+                            file_id,
+                            path.to_string_lossy()
+                        );
+                        no_semantics_reason = "file not found in project";
+                    }
                 }
             }
+            RustAnalyzer::WithoutSemantics { reason } => {
+                no_semantics_reason = reason;
+            }
         }
         let mut errors = Vec::new();
         let input = match std::fs::read(path) {
@@ -123,7 +128,7 @@ impl<'a> RustAnalyzer<'a> {
             ast: parse.tree(),
             text: input.as_ref().into(),
             errors,
-            file_id: None,
+            semantics_info: Err(no_semantics_reason),
         }
     }
 }
@@ -187,3 +192,11 @@ fn from_utf8_lossy(v: &[u8]) -> (Cow<'_, str>, Option<SyntaxError>) {
 
     (Cow::Owned(res), Some(error))
 }
+
+pub(crate) fn path_to_file_id(path: &Path, vfs: &Vfs) -> Option<FileId> {
+    Utf8PathBuf::from_path_buf(path.to_path_buf())
+        .ok()
+        .and_then(|x| AbsPathBuf::try_from(x).ok())
+        .map(VfsPath::from)
+        .and_then(|x| vfs.file_id(&x))
+}
diff --git a/rust/extractor/src/translate/base.rs b/rust/extractor/src/translate/base.rs
index 7233faccf854..c1aadadf0e79 100644
--- a/rust/extractor/src/translate/base.rs
+++ b/rust/extractor/src/translate/base.rs
@@ -1,6 +1,7 @@
 use super::mappings::{AddressableAst, AddressableHir};
 use crate::generated::MacroCall;
 use crate::generated::{self};
+use crate::rust_analyzer::FileSemanticInformation;
 use crate::trap::{DiagnosticSeverity, TrapFile, TrapId};
 use crate::trap::{Label, TrapClass};
 use codeql_extractor::trap::{self};
@@ -64,16 +65,15 @@ impl<'a> Translator<'a> {
         path: &'a str,
         label: trap::Label,
         line_index: LineIndex,
-        file_id: Option<EditionedFileId>,
-        semantics: Option<&'a Semantics<'a, RootDatabase>>,
+        semantic_info: Option<&FileSemanticInformation<'a>>,
     ) -> Translator<'a> {
         Translator {
             trap,
             path,
             label,
             line_index,
-            file_id,
-            semantics,
+            file_id: semantic_info.map(|i| i.file_id),
+            semantics: semantic_info.map(|i| i.semantics),
         }
     }
     fn location(&self, range: TextRange) -> (LineCol, LineCol) {
@@ -160,7 +160,7 @@ impl<'a> Translator<'a> {
             self.path,
             start.line + 1,
             start.col + 1,
-            &message
+            &full_message
         );
         if severity > DiagnosticSeverity::Debug {
             let location = self.trap.emit_location_label(self.label, start, end);
@@ -284,7 +284,8 @@ impl<'a> Translator<'a> {
                         range.unwrap_or_else(|| TextRange::empty(TextSize::from(0))),
                     ));
             }
-        } else {
+        } else if self.semantics.is_some() {
+            // let's not spam warnings if we don't have semantics, we already emitted one
             let range = self.text_range_for_node(mcall);
             self.emit_parse_error(
                 mcall,

From 200715773f3c92d187ffe16b08d7b74c7eb5c639 Mon Sep 17 00:00:00 2001
From: Paolo Tranquilli <redsun82@gihub.com>
Date: Wed, 6 Nov 2024 17:19:06 +0100
Subject: [PATCH 2/3] Rust: fix `no_semantics_reason`

---
 rust/extractor/src/rust_analyzer.rs | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/rust/extractor/src/rust_analyzer.rs b/rust/extractor/src/rust_analyzer.rs
index 39419a12a43c..2c92861ce54f 100644
--- a/rust/extractor/src/rust_analyzer.rs
+++ b/rust/extractor/src/rust_analyzer.rs
@@ -73,7 +73,7 @@ impl<'a> RustAnalyzer<'a> {
         RustAnalyzer::WithSemantics { vfs, semantics }
     }
     pub fn parse(&self, path: &Path) -> ParseResult {
-        let mut no_semantics_reason = "";
+        let no_semantics_reason;
         match self {
             RustAnalyzer::WithSemantics { vfs, semantics } => {
                 if let Some(file_id) = path_to_file_id(path, vfs) {
@@ -94,15 +94,14 @@ impl<'a> RustAnalyzer<'a> {
                             errors,
                             semantics_info: Ok(FileSemanticInformation { file_id, semantics }),
                         };
-                    } else {
-                        debug!(
-                            "No text available for file_id '{:?}', falling back to loading file '{}' from disk.",
-                            file_id,
-                            path.to_string_lossy()
-                        );
-                        no_semantics_reason = "file not found in project";
                     }
+                    debug!(
+                        "No text available for file_id '{:?}', falling back to loading file '{}' from disk.",
+                        file_id,
+                        path.to_string_lossy()
+                    );
                 }
+                no_semantics_reason = "file not found in project";
             }
             RustAnalyzer::WithoutSemantics { reason } => {
                 no_semantics_reason = reason;

From 64d522e4479dc1b9945453c42e88cb5d4d468b7d Mon Sep 17 00:00:00 2001
From: Paolo Tranquilli <redsun82@gihub.com>
Date: Thu, 7 Nov 2024 09:39:44 +0100
Subject: [PATCH 3/3] Rust: address review

---
 rust/extractor/src/rust_analyzer.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/rust/extractor/src/rust_analyzer.rs b/rust/extractor/src/rust_analyzer.rs
index 2c92861ce54f..9f1a8f70ec67 100644
--- a/rust/extractor/src/rust_analyzer.rs
+++ b/rust/extractor/src/rust_analyzer.rs
@@ -41,6 +41,7 @@ pub struct ParseResult<'a> {
     pub errors: Vec<SyntaxError>,
     pub semantics_info: Result<FileSemanticInformation<'a>, &'a str>,
 }
+
 impl<'a> RustAnalyzer<'a> {
     pub fn load_workspace(
         project: &ProjectManifest,
@@ -100,8 +101,10 @@ impl<'a> RustAnalyzer<'a> {
                         file_id,
                         path.to_string_lossy()
                     );
+                    no_semantics_reason = "no text available for the file in the project";
+                } else {
+                    no_semantics_reason = "file not found in project";
                 }
-                no_semantics_reason = "file not found in project";
             }
             RustAnalyzer::WithoutSemantics { reason } => {
                 no_semantics_reason = reason;