From 24eb65692f616cb1b3f1f8f0b5bad6bd10071ceb Mon Sep 17 00:00:00 2001 From: Paolo Tranquilli Date: Tue, 26 Nov 2024 15:57:18 +0100 Subject: [PATCH] Rust: add some performance diagnostics This outputs some duration counts for various parts of the extraction process in the database in the form of telemetry diagnostics. The diagnostics format was preferred to putting things in the relational database as that will scale better to code scanning and is more flexible as for the data we can put into it without passing through the dbscheme. Also, although it's not the case yet, it will be possible to output diagnostics even if creation of the database fails. --- Cargo.lock | 6 +- MODULE.bazel | 2 +- .../tree_sitter_extractors_deps/BUILD.bazel | 2 +- .../BUILD.cargo_metadata-0.18.1.bazel | 2 +- .../BUILD.ra_ap_proc_macro_api-0.0.232.bazel | 2 +- .../BUILD.ra_ap_project_model-0.0.232.bazel | 2 +- ...2.bazel => BUILD.serde_json-1.0.133.bazel} | 6 +- .../tree_sitter_extractors_deps/defs.bzl | 16 +- rust/extractor/Cargo.toml | 2 + rust/extractor/src/config.rs | 1 + rust/extractor/src/diagnostics.rs | 258 ++++++++++++++++++ rust/extractor/src/main.rs | 90 ++++-- rust/ql/integration-tests/conftest.py | 9 + .../hello-project/diagnostics.expected | 101 +++++++ .../hello-project/test_project.py | 4 +- .../diagnostics.cargo.expected | 97 +++++++ .../diagnostics.rust-project.expected | 93 +++++++ .../hello-workspace/test_workspace.py | 6 +- 18 files changed, 656 insertions(+), 43 deletions(-) rename misc/bazel/3rdparty/tree_sitter_extractors_deps/{BUILD.serde_json-1.0.132.bazel => BUILD.serde_json-1.0.133.bazel} (97%) create mode 100644 rust/extractor/src/diagnostics.rs create mode 100644 rust/ql/integration-tests/hello-project/diagnostics.expected create mode 100644 rust/ql/integration-tests/hello-workspace/diagnostics.cargo.expected create mode 100644 rust/ql/integration-tests/hello-workspace/diagnostics.rust-project.expected diff --git a/Cargo.lock b/Cargo.lock index 1d5b8824c84a..b0af4387f2ca 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -381,6 +381,7 @@ version = "0.1.0" dependencies = [ "anyhow", "argfile", + "chrono", "clap", "codeql-extractor", "figment", @@ -404,6 +405,7 @@ dependencies = [ "ra_ap_vfs", "rust-extractor-macros", "serde", + "serde_json", "serde_with", "stderrlog", "triomphe", @@ -2034,9 +2036,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.132" +version = "1.0.133" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" +checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" dependencies = [ "itoa", "memchr", diff --git a/MODULE.bazel b/MODULE.bazel index 13c801520b04..311d09088323 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -68,7 +68,7 @@ use_repo(py_deps, "vendor__anyhow-1.0.44", "vendor__cc-1.0.70", "vendor__clap-2. # deps for ruby+rust # keep in sync by running `misc/bazel/3rdparty/update_cargo_deps.sh` tree_sitter_extractors_deps = use_extension("//misc/bazel/3rdparty:tree_sitter_extractors_extension.bzl", "r") -use_repo(tree_sitter_extractors_deps, "vendor__anyhow-1.0.93", "vendor__argfile-0.2.1", "vendor__chrono-0.4.38", "vendor__clap-4.5.20", "vendor__encoding-0.2.33", "vendor__figment-0.10.19", "vendor__flate2-1.0.34", "vendor__glob-0.3.1", "vendor__globset-0.4.15", "vendor__itertools-0.10.5", "vendor__itertools-0.13.0", "vendor__lazy_static-1.5.0", "vendor__log-0.4.22", "vendor__num-traits-0.2.19", "vendor__num_cpus-1.16.0", "vendor__proc-macro2-1.0.89", "vendor__quote-1.0.37", "vendor__ra_ap_base_db-0.0.232", "vendor__ra_ap_cfg-0.0.232", "vendor__ra_ap_hir-0.0.232", "vendor__ra_ap_hir_def-0.0.232", "vendor__ra_ap_hir_expand-0.0.232", "vendor__ra_ap_ide_db-0.0.232", "vendor__ra_ap_intern-0.0.232", "vendor__ra_ap_load-cargo-0.0.232", "vendor__ra_ap_parser-0.0.232", "vendor__ra_ap_paths-0.0.232", "vendor__ra_ap_project_model-0.0.232", "vendor__ra_ap_span-0.0.232", "vendor__ra_ap_syntax-0.0.232", "vendor__ra_ap_vfs-0.0.232", "vendor__rand-0.8.5", "vendor__rayon-1.10.0", "vendor__regex-1.11.1", "vendor__serde-1.0.214", "vendor__serde_json-1.0.132", "vendor__serde_with-3.11.0", "vendor__stderrlog-0.6.0", "vendor__syn-2.0.87", "vendor__tracing-0.1.40", "vendor__tracing-subscriber-0.3.18", "vendor__tree-sitter-0.24.4", "vendor__tree-sitter-embedded-template-0.23.2", "vendor__tree-sitter-json-0.24.8", "vendor__tree-sitter-ql-0.23.1", "vendor__tree-sitter-ruby-0.23.1", "vendor__triomphe-0.1.14", "vendor__ungrammar-1.16.1") +use_repo(tree_sitter_extractors_deps, "vendor__anyhow-1.0.93", "vendor__argfile-0.2.1", "vendor__chrono-0.4.38", "vendor__clap-4.5.20", "vendor__encoding-0.2.33", "vendor__figment-0.10.19", "vendor__flate2-1.0.34", "vendor__glob-0.3.1", "vendor__globset-0.4.15", "vendor__itertools-0.10.5", "vendor__itertools-0.13.0", "vendor__lazy_static-1.5.0", "vendor__log-0.4.22", "vendor__num-traits-0.2.19", "vendor__num_cpus-1.16.0", "vendor__proc-macro2-1.0.89", "vendor__quote-1.0.37", "vendor__ra_ap_base_db-0.0.232", "vendor__ra_ap_cfg-0.0.232", "vendor__ra_ap_hir-0.0.232", "vendor__ra_ap_hir_def-0.0.232", "vendor__ra_ap_hir_expand-0.0.232", "vendor__ra_ap_ide_db-0.0.232", "vendor__ra_ap_intern-0.0.232", "vendor__ra_ap_load-cargo-0.0.232", "vendor__ra_ap_parser-0.0.232", "vendor__ra_ap_paths-0.0.232", "vendor__ra_ap_project_model-0.0.232", "vendor__ra_ap_span-0.0.232", "vendor__ra_ap_syntax-0.0.232", "vendor__ra_ap_vfs-0.0.232", "vendor__rand-0.8.5", "vendor__rayon-1.10.0", "vendor__regex-1.11.1", "vendor__serde-1.0.214", "vendor__serde_json-1.0.133", "vendor__serde_with-3.11.0", "vendor__stderrlog-0.6.0", "vendor__syn-2.0.87", "vendor__tracing-0.1.40", "vendor__tracing-subscriber-0.3.18", "vendor__tree-sitter-0.24.4", "vendor__tree-sitter-embedded-template-0.23.2", "vendor__tree-sitter-json-0.24.8", "vendor__tree-sitter-ql-0.23.1", "vendor__tree-sitter-ruby-0.23.1", "vendor__triomphe-0.1.14", "vendor__ungrammar-1.16.1") dotnet = use_extension("@rules_dotnet//dotnet:extensions.bzl", "dotnet") dotnet.toolchain(dotnet_version = "9.0.100") diff --git a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.bazel b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.bazel index 844d385f8a41..6208d66c5d93 100644 --- a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.bazel +++ b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.bazel @@ -243,7 +243,7 @@ alias( alias( name = "serde_json", - actual = "@vendor__serde_json-1.0.132//:serde_json", + actual = "@vendor__serde_json-1.0.133//:serde_json", tags = ["manual"], ) diff --git a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.cargo_metadata-0.18.1.bazel b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.cargo_metadata-0.18.1.bazel index e31c61cfac73..9ad331aa5f74 100644 --- a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.cargo_metadata-0.18.1.bazel +++ b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.cargo_metadata-0.18.1.bazel @@ -86,7 +86,7 @@ rust_library( "@vendor__cargo-platform-0.1.8//:cargo_platform", "@vendor__semver-1.0.23//:semver", "@vendor__serde-1.0.214//:serde", - "@vendor__serde_json-1.0.132//:serde_json", + "@vendor__serde_json-1.0.133//:serde_json", "@vendor__thiserror-1.0.69//:thiserror", ], ) diff --git a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.ra_ap_proc_macro_api-0.0.232.bazel b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.ra_ap_proc_macro_api-0.0.232.bazel index d1f5d480d719..fa3b71f570cd 100644 --- a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.ra_ap_proc_macro_api-0.0.232.bazel +++ b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.ra_ap_proc_macro_api-0.0.232.bazel @@ -96,7 +96,7 @@ rust_library( "@vendor__ra_ap_tt-0.0.232//:ra_ap_tt", "@vendor__rustc-hash-1.1.0//:rustc_hash", "@vendor__serde-1.0.214//:serde", - "@vendor__serde_json-1.0.132//:serde_json", + "@vendor__serde_json-1.0.133//:serde_json", "@vendor__tracing-0.1.40//:tracing", ], ) diff --git a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.ra_ap_project_model-0.0.232.bazel b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.ra_ap_project_model-0.0.232.bazel index 6f6f3bb303a4..5608e6dd6b0d 100644 --- a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.ra_ap_project_model-0.0.232.bazel +++ b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.ra_ap_project_model-0.0.232.bazel @@ -102,7 +102,7 @@ rust_library( "@vendor__rustc-hash-1.1.0//:rustc_hash", "@vendor__semver-1.0.23//:semver", "@vendor__serde-1.0.214//:serde", - "@vendor__serde_json-1.0.132//:serde_json", + "@vendor__serde_json-1.0.133//:serde_json", "@vendor__tracing-0.1.40//:tracing", "@vendor__triomphe-0.1.14//:triomphe", ], diff --git a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.serde_json-1.0.132.bazel b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.serde_json-1.0.133.bazel similarity index 97% rename from misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.serde_json-1.0.132.bazel rename to misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.serde_json-1.0.133.bazel index ecbae32eeafa..710772e4f37b 100644 --- a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.serde_json-1.0.132.bazel +++ b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.serde_json-1.0.133.bazel @@ -83,13 +83,13 @@ rust_library( "@rules_rust//rust/platform:x86_64-unknown-none": [], "//conditions:default": ["@platforms//:incompatible"], }), - version = "1.0.132", + version = "1.0.133", deps = [ "@vendor__itoa-1.0.11//:itoa", "@vendor__memchr-2.7.4//:memchr", "@vendor__ryu-1.0.18//:ryu", "@vendor__serde-1.0.214//:serde", - "@vendor__serde_json-1.0.132//:build_script_build", + "@vendor__serde_json-1.0.133//:build_script_build", ], ) @@ -143,7 +143,7 @@ cargo_build_script( "noclippy", "norustfmt", ], - version = "1.0.132", + version = "1.0.133", visibility = ["//visibility:private"], ) diff --git a/misc/bazel/3rdparty/tree_sitter_extractors_deps/defs.bzl b/misc/bazel/3rdparty/tree_sitter_extractors_deps/defs.bzl index 1d0b825c2356..927736b56b21 100644 --- a/misc/bazel/3rdparty/tree_sitter_extractors_deps/defs.bzl +++ b/misc/bazel/3rdparty/tree_sitter_extractors_deps/defs.bzl @@ -320,6 +320,7 @@ _NORMAL_DEPENDENCIES = { _COMMON_CONDITION: { "anyhow": Label("@vendor__anyhow-1.0.93//:anyhow"), "argfile": Label("@vendor__argfile-0.2.1//:argfile"), + "chrono": Label("@vendor__chrono-0.4.38//:chrono"), "clap": Label("@vendor__clap-4.5.20//:clap"), "figment": Label("@vendor__figment-0.10.19//:figment"), "glob": Label("@vendor__glob-0.3.1//:glob"), @@ -341,6 +342,7 @@ _NORMAL_DEPENDENCIES = { "ra_ap_syntax": Label("@vendor__ra_ap_syntax-0.0.232//:ra_ap_syntax"), "ra_ap_vfs": Label("@vendor__ra_ap_vfs-0.0.232//:ra_ap_vfs"), "serde": Label("@vendor__serde-1.0.214//:serde"), + "serde_json": Label("@vendor__serde_json-1.0.133//:serde_json"), "serde_with": Label("@vendor__serde_with-3.11.0//:serde_with"), "stderrlog": Label("@vendor__stderrlog-0.6.0//:stderrlog"), "triomphe": Label("@vendor__triomphe-0.1.14//:triomphe"), @@ -363,7 +365,7 @@ _NORMAL_DEPENDENCIES = { "rayon": Label("@vendor__rayon-1.10.0//:rayon"), "regex": Label("@vendor__regex-1.11.1//:regex"), "serde": Label("@vendor__serde-1.0.214//:serde"), - "serde_json": Label("@vendor__serde_json-1.0.132//:serde_json"), + "serde_json": Label("@vendor__serde_json-1.0.133//:serde_json"), "tracing": Label("@vendor__tracing-0.1.40//:tracing"), "tracing-subscriber": Label("@vendor__tracing-subscriber-0.3.18//:tracing_subscriber"), "tree-sitter": Label("@vendor__tree-sitter-0.24.4//:tree_sitter"), @@ -2508,12 +2510,12 @@ def crate_repositories(): maybe( http_archive, - name = "vendor__serde_json-1.0.132", - sha256 = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03", + name = "vendor__serde_json-1.0.133", + sha256 = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377", type = "tar.gz", - urls = ["https://static.crates.io/crates/serde_json/1.0.132/download"], - strip_prefix = "serde_json-1.0.132", - build_file = Label("//misc/bazel/3rdparty/tree_sitter_extractors_deps:BUILD.serde_json-1.0.132.bazel"), + urls = ["https://static.crates.io/crates/serde_json/1.0.133/download"], + strip_prefix = "serde_json-1.0.133", + build_file = Label("//misc/bazel/3rdparty/tree_sitter_extractors_deps:BUILD.serde_json-1.0.133.bazel"), ) maybe( @@ -3341,7 +3343,7 @@ def crate_repositories(): struct(repo = "vendor__rayon-1.10.0", is_dev_dep = False), struct(repo = "vendor__regex-1.11.1", is_dev_dep = False), struct(repo = "vendor__serde-1.0.214", is_dev_dep = False), - struct(repo = "vendor__serde_json-1.0.132", is_dev_dep = False), + struct(repo = "vendor__serde_json-1.0.133", is_dev_dep = False), struct(repo = "vendor__serde_with-3.11.0", is_dev_dep = False), struct(repo = "vendor__stderrlog-0.6.0", is_dev_dep = False), struct(repo = "vendor__syn-2.0.87", is_dev_dep = False), diff --git a/rust/extractor/Cargo.toml b/rust/extractor/Cargo.toml index 8b58898d3cf3..591df3480cd3 100644 --- a/rust/extractor/Cargo.toml +++ b/rust/extractor/Cargo.toml @@ -33,3 +33,5 @@ codeql-extractor = { path = "../../shared/tree-sitter-extractor" } rust-extractor-macros = { path = "macros" } itertools = "0.13.0" glob = "0.3.1" +chrono = { version = "0.4.38", features = ["serde"] } +serde_json = "1.0.133" diff --git a/rust/extractor/src/config.rs b/rust/extractor/src/config.rs index 70c390b99491..0e92e82a58cb 100644 --- a/rust/extractor/src/config.rs +++ b/rust/extractor/src/config.rs @@ -45,6 +45,7 @@ pub struct Config { pub scratch_dir: PathBuf, pub trap_dir: PathBuf, pub source_archive_dir: PathBuf, + pub diagnostic_dir: PathBuf, pub cargo_target_dir: Option, pub cargo_target: Option, pub cargo_features: Vec, diff --git a/rust/extractor/src/diagnostics.rs b/rust/extractor/src/diagnostics.rs new file mode 100644 index 000000000000..21b3619a207d --- /dev/null +++ b/rust/extractor/src/diagnostics.rs @@ -0,0 +1,258 @@ +use crate::config::Config; +use anyhow::Context; +use chrono::{DateTime, Utc}; +use log::{debug, info}; +use ra_ap_project_model::ProjectManifest; +use serde::Serialize; +use std::fmt::Display; +use std::fs::File; +use std::path::{Path, PathBuf}; +use std::time::Instant; + +#[derive(Default, Debug, Clone, Copy, Serialize)] +#[serde(rename_all = "camelCase")] +#[allow(dead_code)] +enum Severity { + #[default] + Note, + Warning, + Error, +} + +#[derive(Default, Debug, Clone, Copy, Serialize)] +#[serde(rename_all = "camelCase")] +struct Visibility { + status_page: bool, + cli_summary_table: bool, + telemetry: bool, +} + +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +#[allow(dead_code)] +enum Message { + TextMessage(String), + MarkdownMessage(String), +} + +impl Default for Message { + fn default() -> Self { + Message::TextMessage("".to_string()) + } +} + +#[derive(Default, Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +struct Source { + id: String, + name: String, + extractor_name: String, +} + +#[derive(Default, Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +struct Location { + file: PathBuf, + start_line: u32, + start_column: u32, + end_line: u32, + end_column: u32, +} + +#[derive(Default, Debug, Clone, Serialize)] +pub struct Diagnostics { + source: Source, + visibility: Visibility, + severity: Severity, + #[serde(flatten)] + message: Message, + timestamp: DateTime, + #[serde(skip_serializing_if = "Option::is_none")] + location: Option, + attributes: T, +} + +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +enum ExtractionStepTarget { + LoadManifest(PathBuf), + FetchFile(PathBuf), + Parse(PathBuf), + Extract(PathBuf), +} + +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct ExtractionStep { + #[serde(flatten)] + target: ExtractionStepTarget, + ms: u128, +} + +impl ExtractionStep { + fn new(start: Instant, target: ExtractionStepTarget) -> Self { + let ret = ExtractionStep { + target, + ms: start.elapsed().as_millis(), + }; + debug!("{ret:?}"); + ret + } + + pub fn load_manifest(start: Instant, target: &ProjectManifest) -> Self { + Self::new( + start, + ExtractionStepTarget::LoadManifest(PathBuf::from(target.manifest_path())), + ) + } + + pub fn parse(start: Instant, target: &Path) -> Self { + Self::new(start, ExtractionStepTarget::Parse(PathBuf::from(target))) + } + + pub fn extract(start: Instant, target: &Path) -> Self { + Self::new(start, ExtractionStepTarget::Extract(PathBuf::from(target))) + } + + pub fn fetch_file(start: Instant, target: &Path) -> Self { + Self::new( + start, + ExtractionStepTarget::FetchFile(PathBuf::from(target)), + ) + } +} + +#[derive(Debug, Default, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +struct HumanReadableDuration { + ms: u128, + pretty: String, +} + +impl HumanReadableDuration { + pub fn new(ms: u128) -> Self { + let seconds = ms / 1000; + let minutes = seconds / 60; + let hours = minutes / 60; + let pretty = format!( + "{hours}:{minutes:02}:{seconds:02}.{milliseconds:03}", + minutes = minutes % 60, + seconds = seconds % 60, + milliseconds = ms % 1000, + ); + Self { ms, pretty } + } +} + +impl From for HumanReadableDuration { + fn from(val: u128) -> Self { + HumanReadableDuration::new(val) + } +} + +impl Display for HumanReadableDuration { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}ms ({})", self.ms, self.pretty) + } +} + +#[derive(Debug, Default, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +struct ExtractionSummary { + number_of_manifests: usize, + number_of_files: usize, + total_load_duration: HumanReadableDuration, + total_fetch_file_duration: HumanReadableDuration, + total_parse_duration: HumanReadableDuration, + total_extract_duration: HumanReadableDuration, + total_duration: HumanReadableDuration, +} + +#[derive(Debug, Default, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +struct ExtractionAttributes { + steps: Vec, + summary: ExtractionSummary, +} + +type ExtractionDiagnostics = Diagnostics; + +fn summary(start: Instant, steps: &[ExtractionStep]) -> ExtractionSummary { + let mut number_of_manifests = 0; + let mut number_of_files = 0; + let mut total_load_duration = 0; + let mut total_parse_duration = 0; + let mut total_extract_duration = 0; + let mut total_fetch_file_duration: u128 = 0; + for step in steps { + match &step.target { + ExtractionStepTarget::LoadManifest(_) => { + number_of_manifests += 1; + total_load_duration += step.ms; + } + ExtractionStepTarget::FetchFile(_) => { + number_of_files += 1; + total_fetch_file_duration += step.ms; + } + ExtractionStepTarget::Parse(_) => { + total_parse_duration += step.ms; + } + ExtractionStepTarget::Extract(_) => { + total_extract_duration += step.ms; + } + } + } + let ret = ExtractionSummary { + number_of_manifests, + number_of_files, + total_load_duration: total_load_duration.into(), + total_fetch_file_duration: total_fetch_file_duration.into(), + total_parse_duration: total_parse_duration.into(), + total_extract_duration: total_extract_duration.into(), + total_duration: start.elapsed().as_millis().into(), + }; + info!("total loadimg duration: {}", ret.total_load_duration); + info!( + "total file fetching duration: {}", + ret.total_fetch_file_duration + ); + info!("total parsing duration: {}", ret.total_parse_duration); + info!("total extracting duration: {}", ret.total_extract_duration); + info!("total duration: {}", ret.total_duration); + ret +} + +pub fn emit_extraction_diagnostics( + start: Instant, + config: &Config, + steps: Vec, +) -> anyhow::Result<()> { + let summary = summary(start, &steps); + let diagnostics = ExtractionDiagnostics { + source: Source { + id: "rust/extractor/telemetry".to_owned(), + name: "telemetry".to_string(), + extractor_name: "rust".to_string(), + }, + visibility: Visibility { + telemetry: true, + ..Default::default() + }, + timestamp: Utc::now(), + attributes: ExtractionAttributes { steps, summary }, + ..Default::default() + }; + + std::fs::create_dir_all(&config.diagnostic_dir).with_context(|| { + format!( + "creating diagnostics directory {}", + config.diagnostic_dir.display() + ) + })?; + let target = config.diagnostic_dir.join("extraction.jsonc"); + let mut output = File::create(&target) + .with_context(|| format!("creating diagnostics file {}", target.display()))?; + serde_json::to_writer_pretty(&mut output, &diagnostics) + .with_context(|| format!("writing to diagnostics file {}", target.display()))?; + Ok(()) +} diff --git a/rust/extractor/src/main.rs b/rust/extractor/src/main.rs index ecbdc965f8e7..f00e94e9ba4e 100644 --- a/rust/extractor/src/main.rs +++ b/rust/extractor/src/main.rs @@ -1,3 +1,4 @@ +use crate::diagnostics::{emit_extraction_diagnostics, ExtractionStep}; use crate::rust_analyzer::path_to_file_id; use anyhow::Context; use archive::Archiver; @@ -5,9 +6,10 @@ use log::info; use ra_ap_hir::Semantics; use ra_ap_ide_db::line_index::{LineCol, LineIndex}; use ra_ap_ide_db::RootDatabase; -use ra_ap_project_model::ProjectManifest; +use ra_ap_project_model::{CargoConfig, ProjectManifest}; use ra_ap_vfs::Vfs; use rust_analyzer::{ParseResult, RustAnalyzer}; +use std::time::Instant; use std::{ collections::HashMap, path::{Path, PathBuf}, @@ -15,6 +17,7 @@ use std::{ mod archive; mod config; +mod diagnostics; pub mod generated; mod qltest; mod rust_analyzer; @@ -24,18 +27,31 @@ pub mod trap; struct Extractor<'a> { archiver: &'a Archiver, traps: &'a trap::TrapFileProvider, + steps: Vec, } -impl Extractor<'_> { - fn extract(&self, rust_analyzer: &rust_analyzer::RustAnalyzer, file: &std::path::Path) { +impl<'a> Extractor<'a> { + pub fn new(archiver: &'a Archiver, traps: &'a trap::TrapFileProvider) -> Self { + Self { + archiver, + traps, + steps: Vec::new(), + } + } + + fn extract(&mut self, rust_analyzer: &rust_analyzer::RustAnalyzer, file: &std::path::Path) { self.archiver.archive(file); + let before_parse = Instant::now(); let ParseResult { ast, text, errors, semantics_info, } = rust_analyzer.parse(file); + self.steps.push(ExtractionStep::parse(before_parse, file)); + + let before_extract = Instant::now(); let line_index = LineIndex::new(text.as_ref()); let display_path = file.to_string_lossy(); let mut trap = self.traps.create("source", file); @@ -73,22 +89,63 @@ impl Extractor<'_> { err.to_string() ) }); + self.steps + .push(ExtractionStep::extract(before_extract, file)); } pub fn extract_with_semantics( - &self, + &mut self, file: &Path, semantics: &Semantics<'_, RootDatabase>, vfs: &Vfs, ) { self.extract(&RustAnalyzer::new(vfs, semantics), file); } - pub fn extract_without_semantics(&self, file: &Path, reason: &str) { + + pub fn extract_without_semantics(&mut self, file: &Path, reason: &str) { self.extract(&RustAnalyzer::WithoutSemantics { reason }, file); } + + pub fn load_manifest( + &mut self, + project: &ProjectManifest, + config: &CargoConfig, + ) -> Option<(RootDatabase, Vfs)> { + let before = Instant::now(); + let ret = RustAnalyzer::load_workspace(project, config); + self.steps + .push(ExtractionStep::load_manifest(before, project)); + ret + } + + pub fn fetch_file( + &mut self, + file: &Path, + semantics: &Semantics<'_, RootDatabase>, + vfs: &Vfs, + ) -> Result<(), String> { + let before = Instant::now(); + let Some(id) = path_to_file_id(file, vfs) else { + return Err("not included in files loaded from manifest".to_string()); + }; + if semantics.file_to_module_def(id).is_none() { + return Err("not included as a module".to_string()); + } + self.steps.push(ExtractionStep::fetch_file(before, file)); + Ok(()) + } + + pub fn emit_extraction_diagnostics( + self, + start: Instant, + cfg: &config::Config, + ) -> anyhow::Result<()> { + emit_extraction_diagnostics(start, cfg, self.steps) + } } fn main() -> anyhow::Result<()> { + let start = Instant::now(); let mut cfg = config::Config::extract().context("failed to load configuration")?; stderrlog::new() .module(module_path!()) @@ -103,10 +160,7 @@ fn main() -> anyhow::Result<()> { let archiver = archive::Archiver { root: cfg.source_archive_dir.clone(), }; - let extractor = Extractor { - archiver: &archiver, - traps: &traps, - }; + let mut extractor = Extractor::new(&archiver, &traps); let files: Vec = cfg .inputs .iter() @@ -132,21 +186,13 @@ fn main() -> anyhow::Result<()> { } let cargo_config = cfg.to_cargo_config(); for (manifest, files) in map.values().filter(|(_, files)| !files.is_empty()) { - if let Some((ref db, ref vfs)) = RustAnalyzer::load_workspace(manifest, &cargo_config) { + if let Some((ref db, ref vfs)) = extractor.load_manifest(manifest, &cargo_config) { let semantics = Semantics::new(db); for file in files { - let Some(id) = path_to_file_id(file, vfs) else { - extractor.extract_without_semantics( - file, - "not included in files loaded from manifest", - ); - continue; + match extractor.fetch_file(file, &semantics, vfs) { + Ok(()) => extractor.extract_with_semantics(file, &semantics, vfs), + Err(reason) => extractor.extract_without_semantics(file, &reason), }; - if semantics.file_to_module_def(id).is_none() { - extractor.extract_without_semantics(file, "not included as a module"); - continue; - } - extractor.extract_with_semantics(file, &semantics, vfs); } } else { for file in files { @@ -155,5 +201,5 @@ fn main() -> anyhow::Result<()> { } } - Ok(()) + extractor.emit_extraction_diagnostics(start, &cfg) } diff --git a/rust/ql/integration-tests/conftest.py b/rust/ql/integration-tests/conftest.py index 08b17f106f80..9967339f2d2d 100644 --- a/rust/ql/integration-tests/conftest.py +++ b/rust/ql/integration-tests/conftest.py @@ -13,3 +13,12 @@ def select(self, name: str): @pytest.fixture def manifests(cwd): return _Manifests(cwd) + +@pytest.fixture +def rust_check_diagnostics(check_diagnostics): + check_diagnostics.replacements += [ + (r'"ms"\s*:\s*[0-9]+', '"ms": "REDACTED"'), + (r'"pretty"\s*:\s*"[0-9]+:[0-9]{2}:[0-9]{2}.[0-9]{3}"', '"pretty": "REDACTED"'), + (r'Cargo.toml|rust-project.json', ""), + ] + return check_diagnostics diff --git a/rust/ql/integration-tests/hello-project/diagnostics.expected b/rust/ql/integration-tests/hello-project/diagnostics.expected new file mode 100644 index 000000000000..165fd2639901 --- /dev/null +++ b/rust/ql/integration-tests/hello-project/diagnostics.expected @@ -0,0 +1,101 @@ +{ + "attributes": { + "steps": [ + { + "loadManifest": "/", + "ms": "REDACTED" + }, + { + "fetchFile": "/src/directory_module/mod.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/src/directory_module/mod.rs" + }, + { + "extract": "/src/directory_module/mod.rs", + "ms": "REDACTED" + }, + { + "fetchFile": "/src/directory_module/nested_module.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/src/directory_module/nested_module.rs" + }, + { + "extract": "/src/directory_module/nested_module.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/src/directory_module/not_loaded.rs" + }, + { + "extract": "/src/directory_module/not_loaded.rs", + "ms": "REDACTED" + }, + { + "fetchFile": "/src/file_module.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/src/file_module.rs" + }, + { + "extract": "/src/file_module.rs", + "ms": "REDACTED" + }, + { + "fetchFile": "/src/main.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/src/main.rs" + }, + { + "extract": "/src/main.rs", + "ms": "REDACTED" + } + ], + "summary": { + "numberOfFiles": 4, + "numberOfManifests": 1, + "totalDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + }, + "totalExtractDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + }, + "totalFetchFileDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + }, + "totalLoadDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + }, + "totalParseDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + } + } + }, + "severity": "note", + "source": { + "extractorName": "rust", + "id": "rust/extractor/telemetry", + "name": "telemetry" + }, + "visibility": { + "cliSummaryTable": false, + "statusPage": false, + "telemetry": true + } +} diff --git a/rust/ql/integration-tests/hello-project/test_project.py b/rust/ql/integration-tests/hello-project/test_project.py index d03c4f67e265..2cbac0ffdb8a 100644 --- a/rust/ql/integration-tests/hello-project/test_project.py +++ b/rust/ql/integration-tests/hello-project/test_project.py @@ -1,7 +1,7 @@ -def test_cargo(codeql, rust, manifests, check_source_archive): +def test_cargo(codeql, rust, manifests, check_source_archive, rust_check_diagnostics): manifests.select("Cargo.toml") codeql.database.create() -def test_rust_project(codeql, rust, manifests, check_source_archive): +def test_rust_project(codeql, rust, manifests, check_source_archive, rust_check_diagnostics): manifests.select("rust-project.json") codeql.database.create() diff --git a/rust/ql/integration-tests/hello-workspace/diagnostics.cargo.expected b/rust/ql/integration-tests/hello-workspace/diagnostics.cargo.expected new file mode 100644 index 000000000000..42e7859a4e82 --- /dev/null +++ b/rust/ql/integration-tests/hello-workspace/diagnostics.cargo.expected @@ -0,0 +1,97 @@ +{ + "attributes": { + "steps": [ + { + "loadManifest": "/lib/", + "ms": "REDACTED" + }, + { + "fetchFile": "/lib/src/a_module/mod.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/lib/src/a_module/mod.rs" + }, + { + "extract": "/lib/src/a_module/mod.rs", + "ms": "REDACTED" + }, + { + "fetchFile": "/lib/src/lib.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/lib/src/lib.rs" + }, + { + "extract": "/lib/src/lib.rs", + "ms": "REDACTED" + }, + { + "loadManifest": "/exe/", + "ms": "REDACTED" + }, + { + "fetchFile": "/exe/src/a_module.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/exe/src/a_module.rs" + }, + { + "extract": "/exe/src/a_module.rs", + "ms": "REDACTED" + }, + { + "fetchFile": "/exe/src/main.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/exe/src/main.rs" + }, + { + "extract": "/exe/src/main.rs", + "ms": "REDACTED" + } + ], + "summary": { + "numberOfFiles": 4, + "numberOfManifests": 2, + "totalDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + }, + "totalExtractDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + }, + "totalFetchFileDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + }, + "totalLoadDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + }, + "totalParseDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + } + } + }, + "severity": "note", + "source": { + "extractorName": "rust", + "id": "rust/extractor/telemetry", + "name": "telemetry" + }, + "visibility": { + "cliSummaryTable": false, + "statusPage": false, + "telemetry": true + } +} diff --git a/rust/ql/integration-tests/hello-workspace/diagnostics.rust-project.expected b/rust/ql/integration-tests/hello-workspace/diagnostics.rust-project.expected new file mode 100644 index 000000000000..7df7b52cdee0 --- /dev/null +++ b/rust/ql/integration-tests/hello-workspace/diagnostics.rust-project.expected @@ -0,0 +1,93 @@ +{ + "attributes": { + "steps": [ + { + "loadManifest": "/", + "ms": "REDACTED" + }, + { + "fetchFile": "/exe/src/a_module.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/exe/src/a_module.rs" + }, + { + "extract": "/exe/src/a_module.rs", + "ms": "REDACTED" + }, + { + "fetchFile": "/exe/src/main.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/exe/src/main.rs" + }, + { + "extract": "/exe/src/main.rs", + "ms": "REDACTED" + }, + { + "fetchFile": "/lib/src/a_module/mod.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/lib/src/a_module/mod.rs" + }, + { + "extract": "/lib/src/a_module/mod.rs", + "ms": "REDACTED" + }, + { + "fetchFile": "/lib/src/lib.rs", + "ms": "REDACTED" + }, + { + "ms": "REDACTED", + "parse": "/lib/src/lib.rs" + }, + { + "extract": "/lib/src/lib.rs", + "ms": "REDACTED" + } + ], + "summary": { + "numberOfFiles": 4, + "numberOfManifests": 1, + "totalDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + }, + "totalExtractDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + }, + "totalFetchFileDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + }, + "totalLoadDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + }, + "totalParseDuration": { + "ms": "REDACTED", + "pretty": "REDACTED" + } + } + }, + "severity": "note", + "source": { + "extractorName": "rust", + "id": "rust/extractor/telemetry", + "name": "telemetry" + }, + "visibility": { + "cliSummaryTable": false, + "statusPage": false, + "telemetry": true + } +} diff --git a/rust/ql/integration-tests/hello-workspace/test_workspace.py b/rust/ql/integration-tests/hello-workspace/test_workspace.py index f3503e7cefae..5c95031466f8 100644 --- a/rust/ql/integration-tests/hello-workspace/test_workspace.py +++ b/rust/ql/integration-tests/hello-workspace/test_workspace.py @@ -3,10 +3,12 @@ # currently the DB-check fails on actions because of loading files multiple times and assiging multiple locations # see https://github.com/github/codeql-team/issues/3365 @pytest.mark.ql_test("DB-CHECK", xfail="maybe") -def test_cargo(codeql, rust, manifests, check_source_archive): +def test_cargo(codeql, rust, manifests, check_source_archive, rust_check_diagnostics): + rust_check_diagnostics.expected_suffix = ".cargo.expected" manifests.select("Cargo.toml") codeql.database.create() -def test_rust_project(codeql, rust, manifests, check_source_archive): +def test_rust_project(codeql, rust, manifests, check_source_archive, rust_check_diagnostics): + rust_check_diagnostics.expected_suffix = ".rust-project.expected" manifests.select("rust-project.json") codeql.database.create()