From f0c90aa04509747ad52e991127de186c6acd06e2 Mon Sep 17 00:00:00 2001 From: Carter Kozak Date: Wed, 7 Dec 2022 13:20:31 -0500 Subject: [PATCH] Add metrics for cpu shares and detected processor count (#1619) Add metrics for cpu shares and detected processor count --- changelog/@unreleased/pr-1619.v2.yml | 5 +++ .../tritium/metrics/jvm/JvmMetrics.java | 13 +++++++ .../src/main/metrics/metrics.yml | 11 ++++++ .../tritium/metrics/jvm/JvmMetricsTest.java | 34 +++++++++++++++++++ versions.lock | 4 +-- versions.props | 2 +- 6 files changed, 66 insertions(+), 3 deletions(-) create mode 100644 changelog/@unreleased/pr-1619.v2.yml diff --git a/changelog/@unreleased/pr-1619.v2.yml b/changelog/@unreleased/pr-1619.v2.yml new file mode 100644 index 000000000..7a7d73a2a --- /dev/null +++ b/changelog/@unreleased/pr-1619.v2.yml @@ -0,0 +1,5 @@ +type: improvement +improvement: + description: Add metrics for cpu shares and detected processor count + links: + - https://github.com/palantir/tritium/pull/1619 diff --git a/tritium-metrics-jvm/src/main/java/com/palantir/tritium/metrics/jvm/JvmMetrics.java b/tritium-metrics-jvm/src/main/java/com/palantir/tritium/metrics/jvm/JvmMetrics.java index e35a6d423..245743579 100644 --- a/tritium-metrics-jvm/src/main/java/com/palantir/tritium/metrics/jvm/JvmMetrics.java +++ b/tritium-metrics-jvm/src/main/java/com/palantir/tritium/metrics/jvm/JvmMetrics.java @@ -22,6 +22,8 @@ import com.codahale.metrics.jvm.ThreadDeadlockDetector; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Suppliers; +import com.palantir.jvm.diagnostics.CpuSharesAccessor; +import com.palantir.jvm.diagnostics.JvmDiagnostics; import com.palantir.logsafe.Preconditions; import com.palantir.logsafe.logger.SafeLogger; import com.palantir.logsafe.logger.SafeLoggerFactory; @@ -37,6 +39,7 @@ import java.lang.management.ThreadMXBean; import java.util.EnumMap; import java.util.Map; +import java.util.Optional; import java.util.concurrent.TimeUnit; import java.util.function.BiFunction; import java.util.function.Function; @@ -70,6 +73,8 @@ public static void register(TaggedMetricRegistry registry) { registerClassLoading(metrics); registerJvmMemory(registry); registerThreads(metrics); + metrics.processors(Runtime.getRuntime()::availableProcessors); + registerCpuShares(registry, JvmDiagnostics.cpuShares()); } private static void registerAttributes(InternalJvmMetrics metrics) { @@ -113,6 +118,14 @@ private static void registerThreads(InternalJvmMetrics metrics) { metrics.threadsTerminatedCount(() -> threadsByStateSupplier.get().getOrDefault(Thread.State.TERMINATED, 0)); } + @VisibleForTesting + static void registerCpuShares(TaggedMetricRegistry registry, Optional maybeCpuSharesAccessor) { + maybeCpuSharesAccessor.ifPresentOrElse( + cpuSharesAccessor -> ContainerMetrics.of(registry).cpuShares((Gauge) + () -> cpuSharesAccessor.getCpuShares().orElse(-1L)), + () -> log.info("CPU Shares information is not supported, cpu share metrics will not be reported")); + } + @SuppressWarnings("UnnecessaryLambda") // Avoid allocations in the threads-by-state loop private static final BiFunction incrementThreadState = (_state, input) -> { int existingValue = input == null ? 0 : input; diff --git a/tritium-metrics-jvm/src/main/metrics/metrics.yml b/tritium-metrics-jvm/src/main/metrics/metrics.yml index 84432bc94..26129fe3f 100644 --- a/tritium-metrics-jvm/src/main/metrics/metrics.yml +++ b/tritium-metrics-jvm/src/main/metrics/metrics.yml @@ -11,6 +11,12 @@ namespaces: load.norm.1: type: gauge docs: Gauge based on `os.load.1` normalized for the number of available processors. This is designed to provide a hint about the system load, values are positive and may be greater than one. + container: + docs: Container Metrics + metrics: + cpu.shares: + type: gauge + docs: Gauge based on the detected CPU shares, if supported by the platform. Note that `-1` is reported if CPU shares are not being used, and hte metric will not be available if cpu shares are unsupported on this system. process: docs: JVM Process Metrics metrics: @@ -84,6 +90,11 @@ namespaces: threads.terminated.count: type: gauge docs: Number of live threads in the `TERMINATED` state. + processors: + type: gauge + docs: Gauge describing the number of CPUs _detected_ by the JVM. + Note that this may not reflect the number of processors that should be used due to changes in the JDK along + the lines of [JDK-8281181](https://bugs.openjdk.org/browse/JDK-8281181). jvm.memory: docs: Java virtual machine memory usage metrics. metrics: diff --git a/tritium-metrics-jvm/src/test/java/com/palantir/tritium/metrics/jvm/JvmMetricsTest.java b/tritium-metrics-jvm/src/test/java/com/palantir/tritium/metrics/jvm/JvmMetricsTest.java index e6f45f4dc..cbd6fd263 100644 --- a/tritium-metrics-jvm/src/test/java/com/palantir/tritium/metrics/jvm/JvmMetricsTest.java +++ b/tritium-metrics-jvm/src/test/java/com/palantir/tritium/metrics/jvm/JvmMetricsTest.java @@ -35,6 +35,8 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Optional; +import java.util.OptionalLong; import javax.management.ObjectName; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -75,6 +77,7 @@ final class JvmMetricsTest { "jvm.memory.non-heap.committed", "jvm.memory.non-heap.max", "jvm.memory.heap.max", + "jvm.processors", "jvm.safepoint.time", "jvm.threads.timed-waiting.count", "jvm.threads.waiting.count", @@ -191,6 +194,37 @@ void testUnavailableJvmMemoryMetrics() { })); } + @Test + void testProcessorsMetric() { + TaggedMetricRegistry metrics = new DefaultTaggedMetricRegistry(); + JvmMetrics.register(metrics); + assertThat(metrics.gauge(InternalJvmMetrics.processorsMetricName()).map(Gauge::getValue)) + .hasValue(Runtime.getRuntime().availableProcessors()); + } + + @Test + void testCpuSharesUnavailable() { + TaggedMetricRegistry metrics = new DefaultTaggedMetricRegistry(); + JvmMetrics.registerCpuShares(metrics, Optional.empty()); + assertThat(metrics.getMetrics()).isEmpty(); + } + + @Test + void testCpuSharesAvailableButNotUsed() { + TaggedMetricRegistry metrics = new DefaultTaggedMetricRegistry(); + JvmMetrics.registerCpuShares(metrics, Optional.of(OptionalLong::empty)); + assertThat(metrics.gauge(ContainerMetrics.cpuSharesMetricName())) + .hasValueSatisfying(gauge -> assertThat(gauge.getValue()).isEqualTo(-1L)); + } + + @Test + void testCpuShares() { + TaggedMetricRegistry metrics = new DefaultTaggedMetricRegistry(); + JvmMetrics.registerCpuShares(metrics, Optional.of(() -> OptionalLong.of(200L))); + assertThat(metrics.gauge(ContainerMetrics.cpuSharesMetricName())) + .hasValueSatisfying(gauge -> assertThat(gauge.getValue()).isEqualTo(200L)); + } + @SuppressWarnings("JdkObsolete") private static T find(TaggedMetricRegistry metrics, MetricName baseName, Class type) { return metrics.getMetrics().entrySet().stream() diff --git a/versions.lock b/versions.lock index ea29a0274..7521780e7 100644 --- a/versions.lock +++ b/versions.lock @@ -8,7 +8,7 @@ com.google.guava:guava:31.1-jre (5 constraints: 6b423e36) com.google.guava:listenablefuture:9999.0-empty-to-avoid-conflict-with-guava (1 constraints: bd17c918) com.google.j2objc:j2objc-annotations:1.3 (1 constraints: b809eda0) com.palantir.goethe:goethe:0.9.0 (1 constraints: 0b050636) -com.palantir.jvm.diagnostics:jvm-diagnostics:0.2.0 (1 constraints: 0405f135) +com.palantir.jvm.diagnostics:jvm-diagnostics:0.3.0 (1 constraints: 0505f435) com.palantir.safe-logging:logger:3.2.0 (2 constraints: d611ff0a) com.palantir.safe-logging:logger-slf4j:3.2.0 (1 constraints: 000e5942) com.palantir.safe-logging:logger-spi:3.2.0 (2 constraints: 0f1e997a) @@ -24,7 +24,7 @@ org.checkerframework:checker-qual:3.27.0 (3 constraints: da242163) org.hdrhistogram:HdrHistogram:2.1.12 (1 constraints: 3805313b) org.jetbrains:annotations:23.0.0 (2 constraints: 0f20e4ff) org.mpierce.metrics.reservoir:hdrhistogram-metrics-reservoir:1.1.3 (1 constraints: 0705f635) -org.slf4j:slf4j-api:1.7.36 (7 constraints: d25d37db) +org.slf4j:slf4j-api:1.7.36 (7 constraints: d55da1dd) [Test dependencies] com.google.auto:auto-common:1.2.1 (1 constraints: e711f5e8) diff --git a/versions.props b/versions.props index a770ead3e..aed8171d8 100644 --- a/versions.props +++ b/versions.props @@ -7,7 +7,7 @@ com.google.code.findbugs:* = 3.0.2 com.google.guava:* = 31.1-jre com.google.testing.compile:* = 0.19 com.palantir.goethe:* = 0.9.0 -com.palantir.jvm.diagnostics:* = 0.2.0 +com.palantir.jvm.diagnostics:* = 0.3.0 com.palantir.safe-logging:* = 3.2.0 com.palantir.tracing:* = 6.15.0 com.squareup:javapoet = 1.13.0