diff --git a/Cargo.lock b/Cargo.lock index 1d5b8824c84a..b0af4387f2ca 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -381,6 +381,7 @@ version = "0.1.0" dependencies = [ "anyhow", "argfile", + "chrono", "clap", "codeql-extractor", "figment", @@ -404,6 +405,7 @@ dependencies = [ "ra_ap_vfs", "rust-extractor-macros", "serde", + "serde_json", "serde_with", "stderrlog", "triomphe", @@ -2034,9 +2036,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.132" +version = "1.0.133" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" +checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" dependencies = [ "itoa", "memchr", diff --git a/MODULE.bazel b/MODULE.bazel index 13c801520b04..311d09088323 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -68,7 +68,7 @@ use_repo(py_deps, "vendor__anyhow-1.0.44", "vendor__cc-1.0.70", "vendor__clap-2. # deps for ruby+rust # keep in sync by running `misc/bazel/3rdparty/update_cargo_deps.sh` tree_sitter_extractors_deps = use_extension("//misc/bazel/3rdparty:tree_sitter_extractors_extension.bzl", "r") -use_repo(tree_sitter_extractors_deps, "vendor__anyhow-1.0.93", "vendor__argfile-0.2.1", "vendor__chrono-0.4.38", "vendor__clap-4.5.20", "vendor__encoding-0.2.33", "vendor__figment-0.10.19", "vendor__flate2-1.0.34", "vendor__glob-0.3.1", "vendor__globset-0.4.15", "vendor__itertools-0.10.5", "vendor__itertools-0.13.0", "vendor__lazy_static-1.5.0", "vendor__log-0.4.22", "vendor__num-traits-0.2.19", "vendor__num_cpus-1.16.0", "vendor__proc-macro2-1.0.89", "vendor__quote-1.0.37", "vendor__ra_ap_base_db-0.0.232", "vendor__ra_ap_cfg-0.0.232", "vendor__ra_ap_hir-0.0.232", "vendor__ra_ap_hir_def-0.0.232", "vendor__ra_ap_hir_expand-0.0.232", "vendor__ra_ap_ide_db-0.0.232", "vendor__ra_ap_intern-0.0.232", "vendor__ra_ap_load-cargo-0.0.232", "vendor__ra_ap_parser-0.0.232", "vendor__ra_ap_paths-0.0.232", "vendor__ra_ap_project_model-0.0.232", "vendor__ra_ap_span-0.0.232", "vendor__ra_ap_syntax-0.0.232", "vendor__ra_ap_vfs-0.0.232", "vendor__rand-0.8.5", "vendor__rayon-1.10.0", "vendor__regex-1.11.1", "vendor__serde-1.0.214", "vendor__serde_json-1.0.132", "vendor__serde_with-3.11.0", "vendor__stderrlog-0.6.0", "vendor__syn-2.0.87", "vendor__tracing-0.1.40", "vendor__tracing-subscriber-0.3.18", "vendor__tree-sitter-0.24.4", "vendor__tree-sitter-embedded-template-0.23.2", "vendor__tree-sitter-json-0.24.8", "vendor__tree-sitter-ql-0.23.1", "vendor__tree-sitter-ruby-0.23.1", "vendor__triomphe-0.1.14", "vendor__ungrammar-1.16.1") +use_repo(tree_sitter_extractors_deps, "vendor__anyhow-1.0.93", "vendor__argfile-0.2.1", "vendor__chrono-0.4.38", "vendor__clap-4.5.20", "vendor__encoding-0.2.33", "vendor__figment-0.10.19", "vendor__flate2-1.0.34", "vendor__glob-0.3.1", "vendor__globset-0.4.15", "vendor__itertools-0.10.5", "vendor__itertools-0.13.0", "vendor__lazy_static-1.5.0", "vendor__log-0.4.22", "vendor__num-traits-0.2.19", "vendor__num_cpus-1.16.0", "vendor__proc-macro2-1.0.89", "vendor__quote-1.0.37", "vendor__ra_ap_base_db-0.0.232", "vendor__ra_ap_cfg-0.0.232", "vendor__ra_ap_hir-0.0.232", "vendor__ra_ap_hir_def-0.0.232", "vendor__ra_ap_hir_expand-0.0.232", "vendor__ra_ap_ide_db-0.0.232", "vendor__ra_ap_intern-0.0.232", "vendor__ra_ap_load-cargo-0.0.232", "vendor__ra_ap_parser-0.0.232", "vendor__ra_ap_paths-0.0.232", "vendor__ra_ap_project_model-0.0.232", "vendor__ra_ap_span-0.0.232", "vendor__ra_ap_syntax-0.0.232", "vendor__ra_ap_vfs-0.0.232", "vendor__rand-0.8.5", "vendor__rayon-1.10.0", "vendor__regex-1.11.1", "vendor__serde-1.0.214", "vendor__serde_json-1.0.133", "vendor__serde_with-3.11.0", "vendor__stderrlog-0.6.0", "vendor__syn-2.0.87", "vendor__tracing-0.1.40", "vendor__tracing-subscriber-0.3.18", "vendor__tree-sitter-0.24.4", "vendor__tree-sitter-embedded-template-0.23.2", "vendor__tree-sitter-json-0.24.8", "vendor__tree-sitter-ql-0.23.1", "vendor__tree-sitter-ruby-0.23.1", "vendor__triomphe-0.1.14", "vendor__ungrammar-1.16.1") dotnet = use_extension("@rules_dotnet//dotnet:extensions.bzl", "dotnet") dotnet.toolchain(dotnet_version = "9.0.100") diff --git a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.bazel b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.bazel index 844d385f8a41..6208d66c5d93 100644 --- a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.bazel +++ b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.bazel @@ -243,7 +243,7 @@ alias( alias( name = "serde_json", - actual = "@vendor__serde_json-1.0.132//:serde_json", + actual = "@vendor__serde_json-1.0.133//:serde_json", tags = ["manual"], ) diff --git a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.cargo_metadata-0.18.1.bazel b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.cargo_metadata-0.18.1.bazel index e31c61cfac73..9ad331aa5f74 100644 --- a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.cargo_metadata-0.18.1.bazel +++ b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.cargo_metadata-0.18.1.bazel @@ -86,7 +86,7 @@ rust_library( "@vendor__cargo-platform-0.1.8//:cargo_platform", "@vendor__semver-1.0.23//:semver", "@vendor__serde-1.0.214//:serde", - "@vendor__serde_json-1.0.132//:serde_json", + "@vendor__serde_json-1.0.133//:serde_json", "@vendor__thiserror-1.0.69//:thiserror", ], ) diff --git a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.ra_ap_proc_macro_api-0.0.232.bazel b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.ra_ap_proc_macro_api-0.0.232.bazel index d1f5d480d719..fa3b71f570cd 100644 --- a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.ra_ap_proc_macro_api-0.0.232.bazel +++ b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.ra_ap_proc_macro_api-0.0.232.bazel @@ -96,7 +96,7 @@ rust_library( "@vendor__ra_ap_tt-0.0.232//:ra_ap_tt", "@vendor__rustc-hash-1.1.0//:rustc_hash", "@vendor__serde-1.0.214//:serde", - "@vendor__serde_json-1.0.132//:serde_json", + "@vendor__serde_json-1.0.133//:serde_json", "@vendor__tracing-0.1.40//:tracing", ], ) diff --git a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.ra_ap_project_model-0.0.232.bazel b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.ra_ap_project_model-0.0.232.bazel index 6f6f3bb303a4..5608e6dd6b0d 100644 --- a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.ra_ap_project_model-0.0.232.bazel +++ b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.ra_ap_project_model-0.0.232.bazel @@ -102,7 +102,7 @@ rust_library( "@vendor__rustc-hash-1.1.0//:rustc_hash", "@vendor__semver-1.0.23//:semver", "@vendor__serde-1.0.214//:serde", - "@vendor__serde_json-1.0.132//:serde_json", + "@vendor__serde_json-1.0.133//:serde_json", "@vendor__tracing-0.1.40//:tracing", "@vendor__triomphe-0.1.14//:triomphe", ], diff --git a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.serde_json-1.0.132.bazel b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.serde_json-1.0.133.bazel similarity index 97% rename from misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.serde_json-1.0.132.bazel rename to misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.serde_json-1.0.133.bazel index ecbae32eeafa..710772e4f37b 100644 --- a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.serde_json-1.0.132.bazel +++ b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.serde_json-1.0.133.bazel @@ -83,13 +83,13 @@ rust_library( "@rules_rust//rust/platform:x86_64-unknown-none": [], "//conditions:default": ["@platforms//:incompatible"], }), - version = "1.0.132", + version = "1.0.133", deps = [ "@vendor__itoa-1.0.11//:itoa", "@vendor__memchr-2.7.4//:memchr", "@vendor__ryu-1.0.18//:ryu", "@vendor__serde-1.0.214//:serde", - "@vendor__serde_json-1.0.132//:build_script_build", + "@vendor__serde_json-1.0.133//:build_script_build", ], ) @@ -143,7 +143,7 @@ cargo_build_script( "noclippy", "norustfmt", ], - version = "1.0.132", + version = "1.0.133", visibility = ["//visibility:private"], ) diff --git a/misc/bazel/3rdparty/tree_sitter_extractors_deps/defs.bzl b/misc/bazel/3rdparty/tree_sitter_extractors_deps/defs.bzl index 1d0b825c2356..927736b56b21 100644 --- a/misc/bazel/3rdparty/tree_sitter_extractors_deps/defs.bzl +++ b/misc/bazel/3rdparty/tree_sitter_extractors_deps/defs.bzl @@ -320,6 +320,7 @@ _NORMAL_DEPENDENCIES = { _COMMON_CONDITION: { "anyhow": Label("@vendor__anyhow-1.0.93//:anyhow"), "argfile": Label("@vendor__argfile-0.2.1//:argfile"), + "chrono": Label("@vendor__chrono-0.4.38//:chrono"), "clap": Label("@vendor__clap-4.5.20//:clap"), "figment": Label("@vendor__figment-0.10.19//:figment"), "glob": Label("@vendor__glob-0.3.1//:glob"), @@ -341,6 +342,7 @@ _NORMAL_DEPENDENCIES = { "ra_ap_syntax": Label("@vendor__ra_ap_syntax-0.0.232//:ra_ap_syntax"), "ra_ap_vfs": Label("@vendor__ra_ap_vfs-0.0.232//:ra_ap_vfs"), "serde": Label("@vendor__serde-1.0.214//:serde"), + "serde_json": Label("@vendor__serde_json-1.0.133//:serde_json"), "serde_with": Label("@vendor__serde_with-3.11.0//:serde_with"), "stderrlog": Label("@vendor__stderrlog-0.6.0//:stderrlog"), "triomphe": Label("@vendor__triomphe-0.1.14//:triomphe"), @@ -363,7 +365,7 @@ _NORMAL_DEPENDENCIES = { "rayon": Label("@vendor__rayon-1.10.0//:rayon"), "regex": Label("@vendor__regex-1.11.1//:regex"), "serde": Label("@vendor__serde-1.0.214//:serde"), - "serde_json": Label("@vendor__serde_json-1.0.132//:serde_json"), + "serde_json": Label("@vendor__serde_json-1.0.133//:serde_json"), "tracing": Label("@vendor__tracing-0.1.40//:tracing"), "tracing-subscriber": Label("@vendor__tracing-subscriber-0.3.18//:tracing_subscriber"), "tree-sitter": Label("@vendor__tree-sitter-0.24.4//:tree_sitter"), @@ -2508,12 +2510,12 @@ def crate_repositories(): maybe( http_archive, - name = "vendor__serde_json-1.0.132", - sha256 = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03", + name = "vendor__serde_json-1.0.133", + sha256 = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377", type = "tar.gz", - urls = ["https://static.crates.io/crates/serde_json/1.0.132/download"], - strip_prefix = "serde_json-1.0.132", - build_file = Label("//misc/bazel/3rdparty/tree_sitter_extractors_deps:BUILD.serde_json-1.0.132.bazel"), + urls = ["https://static.crates.io/crates/serde_json/1.0.133/download"], + strip_prefix = "serde_json-1.0.133", + build_file = Label("//misc/bazel/3rdparty/tree_sitter_extractors_deps:BUILD.serde_json-1.0.133.bazel"), ) maybe( @@ -3341,7 +3343,7 @@ def crate_repositories(): struct(repo = "vendor__rayon-1.10.0", is_dev_dep = False), struct(repo = "vendor__regex-1.11.1", is_dev_dep = False), struct(repo = "vendor__serde-1.0.214", is_dev_dep = False), - struct(repo = "vendor__serde_json-1.0.132", is_dev_dep = False), + struct(repo = "vendor__serde_json-1.0.133", is_dev_dep = False), struct(repo = "vendor__serde_with-3.11.0", is_dev_dep = False), struct(repo = "vendor__stderrlog-0.6.0", is_dev_dep = False), struct(repo = "vendor__syn-2.0.87", is_dev_dep = False), diff --git a/rust/extractor/Cargo.toml b/rust/extractor/Cargo.toml index 8b58898d3cf3..591df3480cd3 100644 --- a/rust/extractor/Cargo.toml +++ b/rust/extractor/Cargo.toml @@ -33,3 +33,5 @@ codeql-extractor = { path = "../../shared/tree-sitter-extractor" } rust-extractor-macros = { path = "macros" } itertools = "0.13.0" glob = "0.3.1" +chrono = { version = "0.4.38", features = ["serde"] } +serde_json = "1.0.133" diff --git a/rust/extractor/src/config.rs b/rust/extractor/src/config.rs index 70c390b99491..0e92e82a58cb 100644 --- a/rust/extractor/src/config.rs +++ b/rust/extractor/src/config.rs @@ -45,6 +45,7 @@ pub struct Config { pub scratch_dir: PathBuf, pub trap_dir: PathBuf, pub source_archive_dir: PathBuf, + pub diagnostic_dir: PathBuf, pub cargo_target_dir: Option, pub cargo_target: Option, pub cargo_features: Vec, diff --git a/rust/extractor/src/diagnostics.rs b/rust/extractor/src/diagnostics.rs new file mode 100644 index 000000000000..21b3619a207d --- /dev/null +++ b/rust/extractor/src/diagnostics.rs @@ -0,0 +1,258 @@ +use crate::config::Config; +use anyhow::Context; +use chrono::{DateTime, Utc}; +use log::{debug, info}; +use ra_ap_project_model::ProjectManifest; +use serde::Serialize; +use std::fmt::Display; +use std::fs::File; +use std::path::{Path, PathBuf}; +use std::time::Instant; + +#[derive(Default, Debug, Clone, Copy, Serialize)] +#[serde(rename_all = "camelCase")] +#[allow(dead_code)] +enum Severity { + #[default] + Note, + Warning, + Error, +} + +#[derive(Default, Debug, Clone, Copy, Serialize)] +#[serde(rename_all = "camelCase")] +struct Visibility { + status_page: bool, + cli_summary_table: bool, + telemetry: bool, +} + +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +#[allow(dead_code)] +enum Message { + TextMessage(String), + MarkdownMessage(String), +} + +impl Default for Message { + fn default() -> Self { + Message::TextMessage("".to_string()) + } +} + +#[derive(Default, Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +struct Source { + id: String, + name: String, + extractor_name: String, +} + +#[derive(Default, Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +struct Location { + file: PathBuf, + start_line: u32, + start_column: u32, + end_line: u32, + end_column: u32, +} + +#[derive(Default, Debug, Clone, Serialize)] +pub struct Diagnostics { + source: Source, + visibility: Visibility, + severity: Severity, + #[serde(flatten)] + message: Message, + timestamp: DateTime, + #[serde(skip_serializing_if = "Option::is_none")] + location: Option, + attributes: T, +} + +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +enum ExtractionStepTarget { + LoadManifest(PathBuf), + FetchFile(PathBuf), + Parse(PathBuf), + Extract(PathBuf), +} + +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct ExtractionStep { + #[serde(flatten)] + target: ExtractionStepTarget, + ms: u128, +} + +impl ExtractionStep { + fn new(start: Instant, target: ExtractionStepTarget) -> Self { + let ret = ExtractionStep { + target, + ms: start.elapsed().as_millis(), + }; + debug!("{ret:?}"); + ret + } + + pub fn load_manifest(start: Instant, target: &ProjectManifest) -> Self { + Self::new( + start, + ExtractionStepTarget::LoadManifest(PathBuf::from(target.manifest_path())), + ) + } + + pub fn parse(start: Instant, target: &Path) -> Self { + Self::new(start, ExtractionStepTarget::Parse(PathBuf::from(target))) + } + + pub fn extract(start: Instant, target: &Path) -> Self { + Self::new(start, ExtractionStepTarget::Extract(PathBuf::from(target))) + } + + pub fn fetch_file(start: Instant, target: &Path) -> Self { + Self::new( + start, + ExtractionStepTarget::FetchFile(PathBuf::from(target)), + ) + } +} + +#[derive(Debug, Default, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +struct HumanReadableDuration { + ms: u128, + pretty: String, +} + +impl HumanReadableDuration { + pub fn new(ms: u128) -> Self { + let seconds = ms / 1000; + let minutes = seconds / 60; + let hours = minutes / 60; + let pretty = format!( + "{hours}:{minutes:02}:{seconds:02}.{milliseconds:03}", + minutes = minutes % 60, + seconds = seconds % 60, + milliseconds = ms % 1000, + ); + Self { ms, pretty } + } +} + +impl From for HumanReadableDuration { + fn from(val: u128) -> Self { + HumanReadableDuration::new(val) + } +} + +impl Display for HumanReadableDuration { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}ms ({})", self.ms, self.pretty) + } +} + +#[derive(Debug, Default, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +struct ExtractionSummary { + number_of_manifests: usize, + number_of_files: usize, + total_load_duration: HumanReadableDuration, + total_fetch_file_duration: HumanReadableDuration, + total_parse_duration: HumanReadableDuration, + total_extract_duration: HumanReadableDuration, + total_duration: HumanReadableDuration, +} + +#[derive(Debug, Default, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +struct ExtractionAttributes { + steps: Vec, + summary: ExtractionSummary, +} + +type ExtractionDiagnostics = Diagnostics; + +fn summary(start: Instant, steps: &[ExtractionStep]) -> ExtractionSummary { + let mut number_of_manifests = 0; + let mut number_of_files = 0; + let mut total_load_duration = 0; + let mut total_parse_duration = 0; + let mut total_extract_duration = 0; + let mut total_fetch_file_duration: u128 = 0; + for step in steps { + match &step.target { + ExtractionStepTarget::LoadManifest(_) => { + number_of_manifests += 1; + total_load_duration += step.ms; + } + ExtractionStepTarget::FetchFile(_) => { + number_of_files += 1; + total_fetch_file_duration += step.ms; + } + ExtractionStepTarget::Parse(_) => { + total_parse_duration += step.ms; + } + ExtractionStepTarget::Extract(_) => { + total_extract_duration += step.ms; + } + } + } + let ret = ExtractionSummary { + number_of_manifests, + number_of_files, + total_load_duration: total_load_duration.into(), + total_fetch_file_duration: total_fetch_file_duration.into(), + total_parse_duration: total_parse_duration.into(), + total_extract_duration: total_extract_duration.into(), + total_duration: start.elapsed().as_millis().into(), + }; + info!("total loadimg duration: {}", ret.total_load_duration); + info!( + "total file fetching duration: {}", + ret.total_fetch_file_duration + ); + info!("total parsing duration: {}", ret.total_parse_duration); + info!("total extracting duration: {}", ret.total_extract_duration); + info!("total duration: {}", ret.total_duration); + ret +} + +pub fn emit_extraction_diagnostics( + start: Instant, + config: &Config, + steps: Vec, +) -> anyhow::Result<()> { + let summary = summary(start, &steps); + let diagnostics = ExtractionDiagnostics { + source: Source { + id: "rust/extractor/telemetry".to_owned(), + name: "telemetry".to_string(), + extractor_name: "rust".to_string(), + }, + visibility: Visibility { + telemetry: true, + ..Default::default() + }, + timestamp: Utc::now(), + attributes: ExtractionAttributes { steps, summary }, + ..Default::default() + }; + + std::fs::create_dir_all(&config.diagnostic_dir).with_context(|| { + format!( + "creating diagnostics directory {}", + config.diagnostic_dir.display() + ) + })?; + let target = config.diagnostic_dir.join("extraction.jsonc"); + let mut output = File::create(&target) + .with_context(|| format!("creating diagnostics file {}", target.display()))?; + serde_json::to_writer_pretty(&mut output, &diagnostics) + .with_context(|| format!("writing to diagnostics file {}", target.display()))?; + Ok(()) +} diff --git a/rust/extractor/src/main.rs b/rust/extractor/src/main.rs index ecbdc965f8e7..f00e94e9ba4e 100644 --- a/rust/extractor/src/main.rs +++ b/rust/extractor/src/main.rs @@ -1,3 +1,4 @@ +use crate::diagnostics::{emit_extraction_diagnostics, ExtractionStep}; use crate::rust_analyzer::path_to_file_id; use anyhow::Context; use archive::Archiver; @@ -5,9 +6,10 @@ use log::info; use ra_ap_hir::Semantics; use ra_ap_ide_db::line_index::{LineCol, LineIndex}; use ra_ap_ide_db::RootDatabase; -use ra_ap_project_model::ProjectManifest; +use ra_ap_project_model::{CargoConfig, ProjectManifest}; use ra_ap_vfs::Vfs; use rust_analyzer::{ParseResult, RustAnalyzer}; +use std::time::Instant; use std::{ collections::HashMap, path::{Path, PathBuf}, @@ -15,6 +17,7 @@ use std::{ mod archive; mod config; +mod diagnostics; pub mod generated; mod qltest; mod rust_analyzer; @@ -24,18 +27,31 @@ pub mod trap; struct Extractor<'a> { archiver: &'a Archiver, traps: &'a trap::TrapFileProvider, + steps: Vec, } -impl Extractor<'_> { - fn extract(&self, rust_analyzer: &rust_analyzer::RustAnalyzer, file: &std::path::Path) { +impl<'a> Extractor<'a> { + pub fn new(archiver: &'a Archiver, traps: &'a trap::TrapFileProvider) -> Self { + Self { + archiver, + traps, + steps: Vec::new(), + } + } + + fn extract(&mut self, rust_analyzer: &rust_analyzer::RustAnalyzer, file: &std::path::Path) { self.archiver.archive(file); + let before_parse = Instant::now(); let ParseResult { ast, text, errors, semantics_info, } = rust_analyzer.parse(file); + self.steps.push(ExtractionStep::parse(before_parse, file)); + + let before_extract = Instant::now(); let line_index = LineIndex::new(text.as_ref()); let display_path = file.to_string_lossy(); let mut trap = self.traps.create("source", file); @@ -73,22 +89,63 @@ impl Extractor<'_> { err.to_string() ) }); + self.steps + .push(ExtractionStep::extract(before_extract, file)); } pub fn extract_with_semantics( - &self, + &mut self, file: &Path, semantics: &Semantics<'_, RootDatabase>, vfs: &Vfs, ) { self.extract(&RustAnalyzer::new(vfs, semantics), file); } - pub fn extract_without_semantics(&self, file: &Path, reason: &str) { + + pub fn extract_without_semantics(&mut self, file: &Path, reason: &str) { self.extract(&RustAnalyzer::WithoutSemantics { reason }, file); } + + pub fn load_manifest( + &mut self, + project: &ProjectManifest, + config: &CargoConfig, + ) -> Option<(RootDatabase, Vfs)> { + let before = Instant::now(); + let ret = RustAnalyzer::load_workspace(project, config); + self.steps + .push(ExtractionStep::load_manifest(before, project)); + ret + } + + pub fn fetch_file( + &mut self, + file: &Path, + semantics: &Semantics<'_, RootDatabase>, + vfs: &Vfs, + ) -> Result<(), String> { + let before = Instant::now(); + let Some(id) = path_to_file_id(file, vfs) else { + return Err("not included in files loaded from manifest".to_string()); + }; + if semantics.file_to_module_def(id).is_none() { + return Err("not included as a module".to_string()); + } + self.steps.push(ExtractionStep::fetch_file(before, file)); + Ok(()) + } + + pub fn emit_extraction_diagnostics( + self, + start: Instant, + cfg: &config::Config, + ) -> anyhow::Result<()> { + emit_extraction_diagnostics(start, cfg, self.steps) + } } fn main() -> anyhow::Result<()> { + let start = Instant::now(); let mut cfg = config::Config::extract().context("failed to load configuration")?; stderrlog::new() .module(module_path!()) @@ -103,10 +160,7 @@ fn main() -> anyhow::Result<()> { let archiver = archive::Archiver { root: cfg.source_archive_dir.clone(), }; - let extractor = Extractor { - archiver: &archiver, - traps: &traps, - }; + let mut extractor = Extractor::new(&archiver, &traps); let files: Vec = cfg .inputs .iter() @@ -132,21 +186,13 @@ fn main() -> anyhow::Result<()> { } let cargo_config = cfg.to_cargo_config(); for (manifest, files) in map.values().filter(|(_, files)| !files.is_empty()) { - if let Some((ref db, ref vfs)) = RustAnalyzer::load_workspace(manifest, &cargo_config) { + if let Some((ref db, ref vfs)) = extractor.load_manifest(manifest, &cargo_config) { let semantics = Semantics::new(db); for file in files { - let Some(id) = path_to_file_id(file, vfs) else { - extractor.extract_without_semantics( - file, - "not included in files loaded from manifest", - ); - continue; + match extractor.fetch_file(file, &semantics, vfs) { + Ok(()) => extractor.extract_with_semantics(file, &semantics, vfs), + Err(reason) => extractor.extract_without_semantics(file, &reason), }; - if semantics.file_to_module_def(id).is_none() { - extractor.extract_without_semantics(file, "not included as a module"); - continue; - } - extractor.extract_with_semantics(file, &semantics, vfs); } } else { for file in files { @@ -155,5 +201,5 @@ fn main() -> anyhow::Result<()> { } } - Ok(()) + extractor.emit_extraction_diagnostics(start, &cfg) } diff --git a/rust/ql/integration-tests/conftest.py b/rust/ql/integration-tests/conftest.py index 08b17f106f80..9967339f2d2d 100644 --- a/rust/ql/integration-tests/conftest.py +++ b/rust/ql/integration-tests/conftest.py @@ -13,3 +13,12 @@ def select(self, name: str): @pytest.fixture def manifests(cwd): return _Manifests(cwd) + +@pytest.fixture +def rust_check_diagnostics(check_diagnostics): + check_diagnostics.replacements += [ + (r'"ms"\s*:\s*[0-9]+', '"ms": "REDACTED"'), + (r'"pretty"\s*:\s*"[0-9]+:[0-9]{2}:[0-9]{2}.[0-9]{3}"', '"pretty": "REDACTED"'), + (r'Cargo.toml|rust-project.json', ""), + ] + return check_diagnostics diff --git a/rust/ql/integration-tests/hello-project/diagnostics.expected b/rust/ql/integration-tests/hello-project/diagnostics.expected new file mode 100644 index 000000000000..165fd2639901 --- /dev/null +++ b/rust/ql/integration-tests/hello-project/diagnostics.expected @@ -0,0 +1,101 @@ +{ + "attributes": { + "steps": [ + { + "loadManifest": "/", + "ms": "REDACTED" + }, + { + "fetchFile": "/src/directory_module/mod.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/src/directory_module/mod.rs" + }, + { + "extract": "/src/directory_module/mod.rs", + "ms": "REDACTED" + }, + { + "fetchFile": "/src/directory_module/nested_module.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/src/directory_module/nested_module.rs" + }, + { + "extract": "/src/directory_module/nested_module.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/src/directory_module/not_loaded.rs" + }, + { + "extract": "/src/directory_module/not_loaded.rs", + "ms": "REDACTED" + }, + { + "fetchFile": "/src/file_module.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/src/file_module.rs" + }, + { + "extract": "/src/file_module.rs", + "ms": "REDACTED" + }, + { + "fetchFile": "/src/main.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/src/main.rs" + }, + { + "extract": "/src/main.rs", + "ms": "REDACTED" + } + ], + "summary": { + "numberOfFiles": 4, + "numberOfManifests": 1, + "totalDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + }, + "totalExtractDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + }, + "totalFetchFileDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + }, + "totalLoadDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + }, + "totalParseDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + } + } + }, + "severity": "note", + "source": { + "extractorName": "rust", + "id": "rust/extractor/telemetry", + "name": "telemetry" + }, + "visibility": { + "cliSummaryTable": false, + "statusPage": false, + "telemetry": true + } +} diff --git a/rust/ql/integration-tests/hello-project/test_project.py b/rust/ql/integration-tests/hello-project/test_project.py index d03c4f67e265..2cbac0ffdb8a 100644 --- a/rust/ql/integration-tests/hello-project/test_project.py +++ b/rust/ql/integration-tests/hello-project/test_project.py @@ -1,7 +1,7 @@ -def test_cargo(codeql, rust, manifests, check_source_archive): +def test_cargo(codeql, rust, manifests, check_source_archive, rust_check_diagnostics): manifests.select("Cargo.toml") codeql.database.create() -def test_rust_project(codeql, rust, manifests, check_source_archive): +def test_rust_project(codeql, rust, manifests, check_source_archive, rust_check_diagnostics): manifests.select("rust-project.json") codeql.database.create() diff --git a/rust/ql/integration-tests/hello-workspace/diagnostics.cargo.expected b/rust/ql/integration-tests/hello-workspace/diagnostics.cargo.expected new file mode 100644 index 000000000000..42e7859a4e82 --- /dev/null +++ b/rust/ql/integration-tests/hello-workspace/diagnostics.cargo.expected @@ -0,0 +1,97 @@ +{ + "attributes": { + "steps": [ + { + "loadManifest": "/lib/", + "ms": "REDACTED" + }, + { + "fetchFile": "/lib/src/a_module/mod.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/lib/src/a_module/mod.rs" + }, + { + "extract": "/lib/src/a_module/mod.rs", + "ms": "REDACTED" + }, + { + "fetchFile": "/lib/src/lib.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/lib/src/lib.rs" + }, + { + "extract": "/lib/src/lib.rs", + "ms": "REDACTED" + }, + { + "loadManifest": "/exe/", + "ms": "REDACTED" + }, + { + "fetchFile": "/exe/src/a_module.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/exe/src/a_module.rs" + }, + { + "extract": "/exe/src/a_module.rs", + "ms": "REDACTED" + }, + { + "fetchFile": "/exe/src/main.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/exe/src/main.rs" + }, + { + "extract": "/exe/src/main.rs", + "ms": "REDACTED" + } + ], + "summary": { + "numberOfFiles": 4, + "numberOfManifests": 2, + "totalDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + }, + "totalExtractDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + }, + "totalFetchFileDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + }, + "totalLoadDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + }, + "totalParseDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + } + } + }, + "severity": "note", + "source": { + "extractorName": "rust", + "id": "rust/extractor/telemetry", + "name": "telemetry" + }, + "visibility": { + "cliSummaryTable": false, + "statusPage": false, + "telemetry": true + } +} diff --git a/rust/ql/integration-tests/hello-workspace/diagnostics.rust-project.expected b/rust/ql/integration-tests/hello-workspace/diagnostics.rust-project.expected new file mode 100644 index 000000000000..7df7b52cdee0 --- /dev/null +++ b/rust/ql/integration-tests/hello-workspace/diagnostics.rust-project.expected @@ -0,0 +1,93 @@ +{ + "attributes": { + "steps": [ + { + "loadManifest": "/", + "ms": "REDACTED" + }, + { + "fetchFile": "/exe/src/a_module.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/exe/src/a_module.rs" + }, + { + "extract": "/exe/src/a_module.rs", + "ms": "REDACTED" + }, + { + "fetchFile": "/exe/src/main.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/exe/src/main.rs" + }, + { + "extract": "/exe/src/main.rs", + "ms": "REDACTED" + }, + { + "fetchFile": "/lib/src/a_module/mod.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/lib/src/a_module/mod.rs" + }, + { + "extract": "/lib/src/a_module/mod.rs", + "ms": "REDACTED" + }, + { + "fetchFile": "/lib/src/lib.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/lib/src/lib.rs" + }, + { + "extract": "/lib/src/lib.rs", + "ms": "REDACTED" + } + ], + "summary": { + "numberOfFiles": 4, + "numberOfManifests": 1, + "totalDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + }, + "totalExtractDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + }, + "totalFetchFileDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + }, + "totalLoadDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + }, + "totalParseDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + } + } + }, + "severity": "note", + "source": { + "extractorName": "rust", + "id": "rust/extractor/telemetry", + "name": "telemetry" + }, + "visibility": { + "cliSummaryTable": false, + "statusPage": false, + "telemetry": true + } +} diff --git a/rust/ql/integration-tests/hello-workspace/test_workspace.py b/rust/ql/integration-tests/hello-workspace/test_workspace.py index f3503e7cefae..5c95031466f8 100644 --- a/rust/ql/integration-tests/hello-workspace/test_workspace.py +++ b/rust/ql/integration-tests/hello-workspace/test_workspace.py @@ -3,10 +3,12 @@ # currently the DB-check fails on actions because of loading files multiple times and assiging multiple locations # see https://github.com/github/codeql-team/issues/3365 @pytest.mark.ql_test("DB-CHECK", xfail="maybe") -def test_cargo(codeql, rust, manifests, check_source_archive): +def test_cargo(codeql, rust, manifests, check_source_archive, rust_check_diagnostics): + rust_check_diagnostics.expected_suffix = ".cargo.expected" manifests.select("Cargo.toml") codeql.database.create() -def test_rust_project(codeql, rust, manifests, check_source_archive): +def test_rust_project(codeql, rust, manifests, check_source_archive, rust_check_diagnostics): + rust_check_diagnostics.expected_suffix = ".rust-project.expected" manifests.select("rust-project.json") codeql.database.create()