diff --git a/csharp/extractor/Semmle.Extraction.CSharp/Entities/Compilations/Compilation.cs b/csharp/extractor/Semmle.Extraction.CSharp/Entities/Compilations/Compilation.cs index a194323c9a61..0b575df2b696 100644 --- a/csharp/extractor/Semmle.Extraction.CSharp/Entities/Compilations/Compilation.cs +++ b/csharp/extractor/Semmle.Extraction.CSharp/Entities/Compilations/Compilation.cs @@ -1,4 +1,5 @@ using System; +using System.Collections.Concurrent; using System.IO; using System.Linq; using Microsoft.CodeAnalysis; @@ -8,6 +9,8 @@ namespace Semmle.Extraction.CSharp.Entities { internal class Compilation : CachedEntity { + internal readonly ConcurrentDictionary messageCounts = new(); + private static (string Cwd, string[] Args) settings; private static int hashCode; @@ -78,10 +81,11 @@ public override void Populate(TextWriter trapFile) .ForEach((file, index) => trapFile.compilation_referencing_files(this, index, file)); // Diagnostics - Context.Compilation - .GetDiagnostics() - .Select(d => new Diagnostic(Context, d)) - .ForEach((diag, index) => trapFile.diagnostic_for(diag, this, 0, index)); + var diags = Context.Compilation.GetDiagnostics(); + diags.ForEach((diag, index) => new CompilerDiagnostic(Context, diag, this, index)); + + var diagCounts = diags.GroupBy(diag => diag.Id).ToDictionary(group => group.Key, group => group.Count()); + diagCounts.ForEach(pair => trapFile.compilation_info(this, $"Compiler diagnostic count for {pair.Key}", pair.Value.ToString())); } public void PopulatePerformance(PerformanceMetrics p) diff --git a/csharp/extractor/Semmle.Extraction.CSharp/Entities/Compilations/CompilerDiagnostic.cs b/csharp/extractor/Semmle.Extraction.CSharp/Entities/Compilations/CompilerDiagnostic.cs new file mode 100644 index 000000000000..c1227f2ffc0b --- /dev/null +++ b/csharp/extractor/Semmle.Extraction.CSharp/Entities/Compilations/CompilerDiagnostic.cs @@ -0,0 +1,43 @@ +using System.IO; +using Semmle.Util; + +namespace Semmle.Extraction.CSharp.Entities +{ + internal class CompilerDiagnostic : FreshEntity + { + private static readonly int limit = EnvironmentVariables.TryGetExtractorNumberOption("COMPILER_DIAGNOSTIC_LIMIT") ?? 1000; + + private readonly Microsoft.CodeAnalysis.Diagnostic diagnostic; + private readonly Compilation compilation; + private readonly int index; + + public CompilerDiagnostic(Context cx, Microsoft.CodeAnalysis.Diagnostic diag, Compilation compilation, int index) : base(cx) + { + diagnostic = diag; + this.compilation = compilation; + this.index = index; + TryPopulate(); + } + + protected override void Populate(TextWriter trapFile) + { + // The below doesn't limit the extractor messages to the exact limit, but it's good enough. + var key = diagnostic.Id; + var messageCount = compilation.messageCounts.AddOrUpdate(key, 1, (_, c) => c + 1); + if (messageCount > limit) + { + if (messageCount == limit + 1) + { + Context.Extractor.Logger.LogWarning($"Stopped logging {key} compiler diagnostics for the current compilation after reaching {limit}"); + } + + return; + } + + trapFile.diagnostics(this, (int)diagnostic.Severity, key, diagnostic.Descriptor.Title.ToString(), + diagnostic.GetMessage(), Context.CreateLocation(diagnostic.Location)); + + trapFile.diagnostic_for(this, compilation, 0, index); + } + } +} diff --git a/csharp/extractor/Semmle.Extraction.CSharp/Entities/Compilations/Diagnostic.cs b/csharp/extractor/Semmle.Extraction.CSharp/Entities/Compilations/Diagnostic.cs deleted file mode 100644 index a53ee5797f27..000000000000 --- a/csharp/extractor/Semmle.Extraction.CSharp/Entities/Compilations/Diagnostic.cs +++ /dev/null @@ -1,21 +0,0 @@ -using System.IO; - -namespace Semmle.Extraction.CSharp.Entities -{ - internal class Diagnostic : FreshEntity - { - private readonly Microsoft.CodeAnalysis.Diagnostic diagnostic; - - public Diagnostic(Context cx, Microsoft.CodeAnalysis.Diagnostic diag) : base(cx) - { - diagnostic = diag; - TryPopulate(); - } - - protected override void Populate(TextWriter trapFile) - { - trapFile.diagnostics(this, (int)diagnostic.Severity, diagnostic.Id, diagnostic.Descriptor.Title.ToString(), - diagnostic.GetMessage(), Context.CreateLocation(diagnostic.Location)); - } - } -} diff --git a/csharp/extractor/Semmle.Extraction.CSharp/Tuples.cs b/csharp/extractor/Semmle.Extraction.CSharp/Tuples.cs index 71ed85cb201c..9d4f913ff9c3 100644 --- a/csharp/extractor/Semmle.Extraction.CSharp/Tuples.cs +++ b/csharp/extractor/Semmle.Extraction.CSharp/Tuples.cs @@ -122,10 +122,10 @@ internal static void destructor_location(this TextWriter trapFile, Destructor de internal static void destructors(this TextWriter trapFile, Destructor destructor, string name, Type containingType, Destructor original) => trapFile.WriteTuple("destructors", destructor, name, containingType, original); - internal static void diagnostic_for(this TextWriter trapFile, Diagnostic diag, Compilation comp, int fileNo, int index) => + internal static void diagnostic_for(this TextWriter trapFile, CompilerDiagnostic diag, Compilation comp, int fileNo, int index) => trapFile.WriteTuple("diagnostic_for", diag, comp, fileNo, index); - internal static void diagnostics(this TextWriter trapFile, Diagnostic diag, int severity, string errorTag, string errorMessage, string fullErrorMessage, Location location) => + internal static void diagnostics(this TextWriter trapFile, CompilerDiagnostic diag, int severity, string errorTag, string errorMessage, string fullErrorMessage, Location location) => trapFile.WriteTuple("diagnostics", diag, severity, errorTag, errorMessage, fullErrorMessage, location); internal static void dynamic_member_name(this TextWriter trapFile, Expression e, string name) => diff --git a/csharp/extractor/Semmle.Extraction/Entities/ExtractionError.cs b/csharp/extractor/Semmle.Extraction/Entities/ExtractionError.cs deleted file mode 100644 index 99f175377909..000000000000 --- a/csharp/extractor/Semmle.Extraction/Entities/ExtractionError.cs +++ /dev/null @@ -1,21 +0,0 @@ -using System.IO; - -namespace Semmle.Extraction.Entities -{ - internal class ExtractionMessage : FreshEntity - { - private readonly Message msg; - - public ExtractionMessage(Context cx, Message msg) : base(cx) - { - this.msg = msg; - TryPopulate(); - } - - protected override void Populate(TextWriter trapFile) - { - trapFile.extractor_messages(this, msg.Severity, "C# extractor", msg.Text, msg.EntityText ?? string.Empty, - msg.Location ?? Context.CreateLocation(), msg.StackTrace ?? string.Empty); - } - } -} diff --git a/csharp/extractor/Semmle.Extraction/Entities/ExtractionMessage.cs b/csharp/extractor/Semmle.Extraction/Entities/ExtractionMessage.cs new file mode 100644 index 000000000000..bc6ea5aa27dc --- /dev/null +++ b/csharp/extractor/Semmle.Extraction/Entities/ExtractionMessage.cs @@ -0,0 +1,37 @@ +using System.IO; +using System.Threading; +using Semmle.Util; + +namespace Semmle.Extraction.Entities +{ + internal class ExtractionMessage : FreshEntity + { + private static readonly int limit = EnvironmentVariables.TryGetExtractorNumberOption("MESSAGE_LIMIT") ?? 10000; + private static int messageCount = 0; + + private readonly Message msg; + + public ExtractionMessage(Context cx, Message msg) : base(cx) + { + this.msg = msg; + TryPopulate(); + } + + protected override void Populate(TextWriter trapFile) + { + // The below doesn't limit the extractor messages to the exact limit, but it's good enough. + Interlocked.Increment(ref messageCount); + if (messageCount > limit) + { + if (messageCount == limit + 1) + { + Context.Extractor.Logger.LogWarning($"Stopped logging extractor messages after reaching {limit}"); + } + return; + } + + trapFile.extractor_messages(this, msg.Severity, "C# extractor", msg.Text, msg.EntityText ?? string.Empty, + msg.Location ?? Context.CreateLocation(), msg.StackTrace ?? string.Empty); + } + } +} diff --git a/csharp/extractor/Semmle.Util/EnvironmentVariables.cs b/csharp/extractor/Semmle.Util/EnvironmentVariables.cs index 9dcccf6d8785..c96aa16357c3 100644 --- a/csharp/extractor/Semmle.Util/EnvironmentVariables.cs +++ b/csharp/extractor/Semmle.Util/EnvironmentVariables.cs @@ -1,4 +1,6 @@ using System; +using System.Globalization; +using System.Numerics; namespace Semmle.Util { @@ -7,6 +9,16 @@ public class EnvironmentVariables public static string? GetExtractorOption(string name) => Environment.GetEnvironmentVariable($"CODEQL_EXTRACTOR_CSHARP_OPTION_{name.ToUpper()}"); + public static T? TryGetExtractorNumberOption(string name) where T : struct, INumberBase + { + var value = GetExtractorOption(name); + if (T.TryParse(value, NumberStyles.Number, CultureInfo.InvariantCulture, out var result)) + { + return result; + } + return null; + } + public static int GetDefaultNumberOfThreads() { if (!int.TryParse(Environment.GetEnvironmentVariable("CODEQL_THREADS"), out var threads) || threads == -1) diff --git a/csharp/ql/integration-tests/all-platforms/standalone/Diag.expected b/csharp/ql/integration-tests/all-platforms/standalone/Diag.expected new file mode 100644 index 000000000000..b48630869ee8 --- /dev/null +++ b/csharp/ql/integration-tests/all-platforms/standalone/Diag.expected @@ -0,0 +1,7 @@ +extractorMessages +| 5 | +compilerDiagnostics +| 4 | +compilationInfo +| Compiler diagnostic count for CS0103 | 3.0 | +| Compiler diagnostic count for CS8019 | 7.0 | diff --git a/csharp/ql/integration-tests/all-platforms/standalone/Diag.ql b/csharp/ql/integration-tests/all-platforms/standalone/Diag.ql new file mode 100644 index 000000000000..e391b345b20b --- /dev/null +++ b/csharp/ql/integration-tests/all-platforms/standalone/Diag.ql @@ -0,0 +1,14 @@ +import csharp +import semmle.code.csharp.commons.Diagnostics + +query predicate extractorMessages(int c) { c = count(ExtractorMessage msg) } + +query predicate compilerDiagnostics(int c) { c = count(Diagnostic diag) } + +query predicate compilationInfo(string key, float value) { + exists(Compilation c, string infoValue | + infoValue = c.getInfo(key) and key.matches("Compiler diagnostic count for%") + | + value = infoValue.toFloat() + ) +} diff --git a/csharp/ql/integration-tests/all-platforms/standalone/Program.cs b/csharp/ql/integration-tests/all-platforms/standalone/Program.cs index 47eee48cc791..371ed98c7247 100644 --- a/csharp/ql/integration-tests/all-platforms/standalone/Program.cs +++ b/csharp/ql/integration-tests/all-platforms/standalone/Program.cs @@ -1 +1,2 @@ -var dummy = "dummy"; \ No newline at end of file +var dummy = "dummy"; +dummy = M() + M() + M(); \ No newline at end of file diff --git a/csharp/ql/integration-tests/all-platforms/standalone/test.py b/csharp/ql/integration-tests/all-platforms/standalone/test.py index a17966e148a9..cd9d65c57410 100644 --- a/csharp/ql/integration-tests/all-platforms/standalone/test.py +++ b/csharp/ql/integration-tests/all-platforms/standalone/test.py @@ -1,3 +1,6 @@ +import os from create_database_utils import * +os.environ['CODEQL_EXTRACTOR_CSHARP_OPTION_COMPILER_DIAGNOSTIC_LIMIT'] = '2' +os.environ['CODEQL_EXTRACTOR_CSHARP_OPTION_MESSAGE_LIMIT'] = '5' run_codeql_database_create([], lang="csharp", extra_args=["--build-mode=none"]) diff --git a/csharp/ql/integration-tests/posix-only/standalone_dependencies_nuget_config_error/CompilationInfo.ql b/csharp/ql/integration-tests/posix-only/standalone_dependencies_nuget_config_error/CompilationInfo.ql index 87a9e20f0273..073ffe3b224d 100644 --- a/csharp/ql/integration-tests/posix-only/standalone_dependencies_nuget_config_error/CompilationInfo.ql +++ b/csharp/ql/integration-tests/posix-only/standalone_dependencies_nuget_config_error/CompilationInfo.ql @@ -3,6 +3,7 @@ import semmle.code.csharp.commons.Diagnostics query predicate compilationInfo(string key, float value) { key != "Resolved references" and + not key.matches("Compiler diagnostic count for%") and exists(Compilation c, string infoKey, string infoValue | infoValue = c.getInfo(infoKey) | key = infoKey and value = infoValue.toFloat() diff --git a/csharp/ql/src/Telemetry/ExtractorInformation.ql b/csharp/ql/src/Telemetry/ExtractorInformation.ql index d09e1c9d5d3f..08efbd7b6ec8 100644 --- a/csharp/ql/src/Telemetry/ExtractorInformation.ql +++ b/csharp/ql/src/Telemetry/ExtractorInformation.ql @@ -10,6 +10,7 @@ import csharp import semmle.code.csharp.commons.Diagnostics predicate compilationInfo(string key, float value) { + not key.matches("Compiler diagnostic count for%") and exists(Compilation c, string infoKey, string infoValue | infoValue = c.getInfo(infoKey) | key = infoKey and value = infoValue.toFloat()