diff --git a/csharp/extractor/Semmle.Extraction.CSharp/Entities/Compilations/Compilation.cs b/csharp/extractor/Semmle.Extraction.CSharp/Entities/Compilations/Compilation.cs index 8685c9d7a7dc..ecb5d51d44d5 100644 --- a/csharp/extractor/Semmle.Extraction.CSharp/Entities/Compilations/Compilation.cs +++ b/csharp/extractor/Semmle.Extraction.CSharp/Entities/Compilations/Compilation.cs @@ -3,6 +3,7 @@ using System.IO; using System.Linq; using Microsoft.CodeAnalysis; +using Semmle.Extraction.Entities; using Semmle.Util; namespace Semmle.Extraction.CSharp.Entities @@ -89,13 +90,21 @@ public void PopulatePerformance(PerformanceMetrics p) trapFile.compilation_finished(this, (float)p.Total.Cpu.TotalSeconds, (float)p.Total.Elapsed.TotalSeconds); } + public void PopulateAggregatedMessages() + { + ExtractionMessage.groupedMessageCounts.ForEach(pair => + { + Context.TrapWriter.Writer.compilation_info(this, $"Extractor message count for group '{pair.Key}'", pair.Value.ToString()); + }); + } + public override void WriteId(EscapingTextWriter trapFile) { trapFile.Write(hashCode); trapFile.Write(";compilation"); } - public override Location ReportingLocation => throw new NotImplementedException(); + public override Microsoft.CodeAnalysis.Location ReportingLocation => throw new NotImplementedException(); public override bool NeedsPopulation => Context.IsAssemblyScope; diff --git a/csharp/extractor/Semmle.Extraction.CSharp/Extractor/Analyser.cs b/csharp/extractor/Semmle.Extraction.CSharp/Extractor/Analyser.cs index 0b523e69b1aa..b839d2c976a3 100644 --- a/csharp/extractor/Semmle.Extraction.CSharp/Extractor/Analyser.cs +++ b/csharp/extractor/Semmle.Extraction.CSharp/Extractor/Analyser.cs @@ -250,6 +250,8 @@ private void DoAnalyseCompilation() public void LogPerformance(Entities.PerformanceMetrics p) => compilationEntity.PopulatePerformance(p); + public void ExtractAggregatedMessages() => compilationEntity.PopulateAggregatedMessages(); + #nullable restore warnings /// diff --git a/csharp/extractor/Semmle.Extraction.CSharp/Extractor/Extractor.cs b/csharp/extractor/Semmle.Extraction.CSharp/Extractor/Extractor.cs index ae3875f5028f..d87f6fd24c0c 100644 --- a/csharp/extractor/Semmle.Extraction.CSharp/Extractor/Extractor.cs +++ b/csharp/extractor/Semmle.Extraction.CSharp/Extractor/Extractor.cs @@ -458,6 +458,7 @@ public static ExitCode Analyse(Stopwatch stopwatch, Analyser analyser, CommonOpt sw.Restart(); analyser.PerformExtraction(options.Threads); + analyser.ExtractAggregatedMessages(); sw.Stop(); var cpuTime2 = currentProcess.TotalProcessorTime; var userTime2 = currentProcess.UserProcessorTime; diff --git a/csharp/extractor/Semmle.Extraction/BUILD.bazel b/csharp/extractor/Semmle.Extraction/BUILD.bazel index de3a6c2d96ae..83dfb8235e88 100644 --- a/csharp/extractor/Semmle.Extraction/BUILD.bazel +++ b/csharp/extractor/Semmle.Extraction/BUILD.bazel @@ -26,6 +26,7 @@ codeql_csharp_library( ], "//conditions:default": [], }), + internals_visible_to = ["Semmle.Extraction.CSharp"], visibility = ["//csharp:__subpackages__"], deps = [ "//csharp/extractor/Semmle.Util", diff --git a/csharp/extractor/Semmle.Extraction/Entities/ExtractionMessage.cs b/csharp/extractor/Semmle.Extraction/Entities/ExtractionMessage.cs index f417a170c10b..514ce433c0ac 100644 --- a/csharp/extractor/Semmle.Extraction/Entities/ExtractionMessage.cs +++ b/csharp/extractor/Semmle.Extraction/Entities/ExtractionMessage.cs @@ -1,4 +1,5 @@ -using System.IO; +using System.Collections.Concurrent; +using System.IO; using System.Threading; using Semmle.Util; @@ -7,6 +8,8 @@ namespace Semmle.Extraction.Entities internal class ExtractionMessage : FreshEntity { private static readonly int limit = EnvironmentVariables.TryGetExtractorNumberOption("MESSAGE_LIMIT") ?? 10000; + + internal static readonly ConcurrentDictionary groupedMessageCounts = []; private static int messageCount = 0; private readonly Message msg; @@ -25,6 +28,10 @@ private ExtractionMessage(Context cx, Message msg, bool bypassLimit) : base(cx) protected override void Populate(TextWriter trapFile) { + // For the time being we're counting the number of messages per severity, we could introduce other groupings in the future + var key = msg.Severity.ToString(); + groupedMessageCounts.AddOrUpdate(key, 1, (_, c) => c + 1); + if (!bypassLimit) { var val = Interlocked.Increment(ref messageCount); diff --git a/csharp/extractor/Semmle.Extraction/Semmle.Extraction.csproj b/csharp/extractor/Semmle.Extraction/Semmle.Extraction.csproj index b4625b160173..2173a50f2ad3 100644 --- a/csharp/extractor/Semmle.Extraction/Semmle.Extraction.csproj +++ b/csharp/extractor/Semmle.Extraction/Semmle.Extraction.csproj @@ -5,6 +5,8 @@ + + diff --git a/csharp/ql/integration-tests/all-platforms/standalone/Diag.expected b/csharp/ql/integration-tests/all-platforms/standalone/Diag.expected index 6d84e27e5cea..74314f2c0c93 100644 --- a/csharp/ql/integration-tests/all-platforms/standalone/Diag.expected +++ b/csharp/ql/integration-tests/all-platforms/standalone/Diag.expected @@ -7,3 +7,5 @@ extractorMessagesLeachedLimit compilationInfo | Compiler diagnostic count for CS0103 | 3.0 | | Compiler diagnostic count for CS8019 | 7.0 | +| Extractor message count for group 'Error' | 8.0 | +| Extractor message count for group 'Warning' | 1.0 | diff --git a/csharp/ql/integration-tests/all-platforms/standalone/Diag.ql b/csharp/ql/integration-tests/all-platforms/standalone/Diag.ql index 8b1fbae6b2fd..e2fa743471ca 100644 --- a/csharp/ql/integration-tests/all-platforms/standalone/Diag.ql +++ b/csharp/ql/integration-tests/all-platforms/standalone/Diag.ql @@ -11,7 +11,8 @@ query predicate extractorMessagesLeachedLimit(ExtractorMessage msg) { query predicate compilationInfo(string key, float value) { exists(Compilation c, string infoValue | - infoValue = c.getInfo(key) and key.matches("Compiler diagnostic count for%") + infoValue = c.getInfo(key) and + key.matches(["Compiler diagnostic count for%", "Extractor message count for group%"]) | value = infoValue.toFloat() ) diff --git a/csharp/ql/integration-tests/all-platforms/standalone_winforms/CompilationInfo.ql b/csharp/ql/integration-tests/all-platforms/standalone_winforms/CompilationInfo.ql index a96c2fd99a69..078e352be4d9 100644 --- a/csharp/ql/integration-tests/all-platforms/standalone_winforms/CompilationInfo.ql +++ b/csharp/ql/integration-tests/all-platforms/standalone_winforms/CompilationInfo.ql @@ -4,7 +4,7 @@ import semmle.code.csharp.commons.Diagnostics query predicate compilationInfo(string key, float value) { key != "Resolved references" and key != "Resolved assembly conflicts" and - not key.matches("Compiler diagnostic count for%") and + not key.matches(["Compiler diagnostic count for%", "Extractor message count for group%"]) and exists(Compilation c, string infoKey, string infoValue | infoValue = c.getInfo(infoKey) | key = infoKey and value = infoValue.toFloat() diff --git a/csharp/ql/src/Telemetry/ExtractorInformation.ql b/csharp/ql/src/Telemetry/ExtractorInformation.ql index c2d80f7c7687..6fdaf9ca22d5 100644 --- a/csharp/ql/src/Telemetry/ExtractorInformation.ql +++ b/csharp/ql/src/Telemetry/ExtractorInformation.ql @@ -12,6 +12,7 @@ import DatabaseQuality predicate compilationInfo(string key, float value) { not key.matches("Compiler diagnostic count for%") and + not key.matches("Extractor message count for group%") and exists(Compilation c, string infoKey, string infoValue | infoValue = c.getInfo(infoKey) | key = infoKey and value = infoValue.toFloat() @@ -22,6 +23,16 @@ predicate compilationInfo(string key, float value) { ) } +predicate compilerDiagnostics(string key, int value) { + key.matches("Compiler diagnostic count for%") and + strictsum(Compilation c | | c.getInfo(key).toInt()) = value +} + +predicate extractorMessages(string key, int value) { + key.matches("Extractor message count for group%") and + strictsum(Compilation c | | c.getInfo(key).toInt()) = value +} + predicate fileCount(string key, int value) { key = "Number of files" and value = strictcount(File f) @@ -140,6 +151,8 @@ from string key, float value where ( compilationInfo(key, value) or + compilerDiagnostics(key, value) or + extractorMessages(key, value) or fileCount(key, value) or fileCountByExtension(key, value) or totalNumberOfLines(key, value) or