From e7e8698b3854d192a0815e24fe8d147eff3c04d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20B=C3=B8gh=20K=C3=B6ster?= Date: Mon, 20 May 2024 16:09:51 +0200 Subject: [PATCH] Adds a instrumented HttpShardHandlerFactory - Reports metrics for dead shards and http destination request queues - Makes heavy use of reflection (and is thus not ready for upstream yet) --- gradle/solr/solr-forbidden-apis.gradle | 3 + .../InstrumentedHtttpShardHandlerFactory.java | 232 ++++++++++++++++++ 2 files changed, 235 insertions(+) create mode 100644 solr/core/src/java/org/apache/solr/handler/component/InstrumentedHtttpShardHandlerFactory.java diff --git a/gradle/solr/solr-forbidden-apis.gradle b/gradle/solr/solr-forbidden-apis.gradle index bb4dc601ee8..497d33d4ca3 100644 --- a/gradle/solr/solr-forbidden-apis.gradle +++ b/gradle/solr/solr-forbidden-apis.gradle @@ -22,5 +22,8 @@ configure(project(":solr:core")) { tasks.matching { it.name == "forbiddenApisMain" || it.name == "forbiddenApisTest" }.all { exclude "org/apache/solr/internal/**" exclude "org/apache/hadoop/**" + + // uses reflection for now, needs revision + exclude "org/apache/solr/handler/component/InstrumentedHtttpShardHandlerFactory*" } } \ No newline at end of file diff --git a/solr/core/src/java/org/apache/solr/handler/component/InstrumentedHtttpShardHandlerFactory.java b/solr/core/src/java/org/apache/solr/handler/component/InstrumentedHtttpShardHandlerFactory.java new file mode 100644 index 00000000000..73a59aef8d9 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/handler/component/InstrumentedHtttpShardHandlerFactory.java @@ -0,0 +1,232 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.component; + +import com.codahale.metrics.Gauge; +import java.lang.invoke.MethodHandles; +import java.lang.reflect.Field; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.HashMap; +import java.util.Map; +import java.util.Queue; +import java.util.concurrent.ExecutorService; +import org.apache.solr.client.solrj.impl.Http2SolrClient; +import org.apache.solr.client.solrj.impl.LBHttp2SolrClient; +import org.apache.solr.client.solrj.impl.LBSolrClient; +import org.apache.solr.common.util.ExecutorUtil; +import org.apache.solr.common.util.SolrNamedThreadFactory; +import org.apache.solr.core.SolrInfoBean; +import org.apache.solr.metrics.SolrMetricsContext; +import org.eclipse.jetty.client.HttpClient; +import org.eclipse.jetty.client.HttpDestination; +import org.eclipse.jetty.client.HttpExchange; +import org.eclipse.jetty.client.api.Destination; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A shard handler factory that instruments the internal jetty http2 client to emit size metrics per + * http destincation queue. This helps in identifying troublesome nodes in your Solr cloud setup. + * + *

It furthermore registers metrics for alive and zombie members of the shard ensemble reflecting + * the current nodes view. + * + *

This code uses reflection to break up closed internal APIs. + */ +public class InstrumentedHtttpShardHandlerFactory extends HttpShardHandlerFactory { + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private ExecutorService scheduler; + + // store gauges here + private final Map destinationGauges = new HashMap<>(); + + @Override + public void initializeMetrics(SolrMetricsContext parentContext, String scope) { + super.initializeMetrics(parentContext, scope); + + // a scheduler to check available HttpDestinations + this.scheduler = + ExecutorUtil.newMDCAwareSingleThreadExecutor( + new SolrNamedThreadFactory("httpshardhandler-destination-checker-")); + + registerDeadServerMetricGauges(); + registerHttpDestinationGauges(defaultClient); + } + + void registerDeadServerMetricGauges() { + try { + // break up loadbalancer in super class + final Field loadbalancer = HttpShardHandlerFactory.class.getDeclaredField("loadbalancer"); + loadbalancer.setAccessible(true); + + // break up zombie servers in LBHttp2SolrClient + final Field zombieServers = LBSolrClient.class.getDeclaredField("zombieServers"); + zombieServers.setAccessible(true); + + // break up zombie servers in LBHttp2SolrClient + final Field aliveServers = LBSolrClient.class.getDeclaredField("aliveServers"); + aliveServers.setAccessible(true); + + // get instance + final LBHttp2SolrClient loadbalancerSolrClient = (LBHttp2SolrClient) loadbalancer.get(this); + @SuppressWarnings("unchecked") + final Map zombies = + (Map) zombieServers.get(loadbalancerSolrClient); + @SuppressWarnings("unchecked") + final Map alives = + (Map) aliveServers.get(loadbalancerSolrClient); + + // register the gauges + getSolrMetricsContext() + .gauge( + () -> zombies.size(), + true, + "count", + SolrInfoBean.Category.QUERY.name(), + "httpShardHandler", + "zombieServers"); + getSolrMetricsContext() + .gauge( + () -> alives.size(), + true, + "count", + SolrInfoBean.Category.QUERY.name(), + "httpShardHandler", + "aliveServers"); + + } catch (NoSuchFieldException + | SecurityException + | IllegalArgumentException + | IllegalAccessException e) { + if (log.isWarnEnabled()) { + log.warn("Could not attach gauge to zombie server list", e); + } + } + } + + void registerHttpDestinationGauges(Http2SolrClient solrClient) { + try { + // extract internal Jetty HttpClient + Method getHttpClient = Http2SolrClient.class.getDeclaredMethod("getHttpClient"); + getHttpClient.setAccessible(true); + final HttpClient httpClient = (HttpClient) getHttpClient.invoke(solrClient); + + // submit a thread to the single scheduler to poll + // for new destinations every second + scheduler.submit( + () -> { + while (!scheduler.isShutdown()) { + // wait a second + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + if (log.isInfoEnabled()) { + log.info("Got interrupted while checking for new destinations to monitor ..."); + } + } + + // check for new destinations + try { + checkAvailableHttpDestinations(httpClient); + } catch (Exception e) { + if (log.isWarnEnabled()) { + log.warn("Could not check available destinations", e); + } + } + } + }); + } catch (NoSuchMethodException + | SecurityException + | IllegalAccessException + | IllegalArgumentException + | InvocationTargetException e) { + if (log.isWarnEnabled()) { + log.warn("Could not attach gauge to HttpDestination queue", e); + } + } + } + + void checkAvailableHttpDestinations(HttpClient httpClient) { + for (Destination destination : httpClient.getDestinations()) { + + // build a finite destination name + final String destinationName = + String.format( + "%s://%s:%s", destination.getScheme(), destination.getHost(), destination.getPort()); + + // new destination appeared. Create Gauge + if (!destinationGauges.containsKey(destinationName) && getSolrMetricsContext() != null) { + try { + final HttpDestinationGauge destinationGauge = + new HttpDestinationGauge((HttpDestination) destination); + getSolrMetricsContext() + .gauge( + destinationGauge, + true, + "count", + SolrInfoBean.Category.QUERY.name(), + "httpShardHandler", + "shardRequestQueue", + destinationName); + destinationGauges.put(destinationName, destinationGauge); + } catch (NoSuchFieldException | SecurityException e) { + if (log.isWarnEnabled()) { + log.warn("Could not inspect HttpDestination queue size", e); + } + } + } + } + } + + @Override + public void close() { + scheduler.shutdownNow(); + destinationGauges.clear(); + } + + public static class HttpDestinationGauge implements Gauge { + + private final Field exchanges; + private final HttpDestination destination; + + public HttpDestinationGauge(HttpDestination destination) + throws NoSuchFieldException, SecurityException { + this.destination = destination; + + // break up implementation + this.exchanges = HttpDestination.class.getDeclaredField("exchanges"); + this.exchanges.setAccessible(true); + } + + @SuppressWarnings("unchecked") + @Override + public Integer getValue() { + try { + return ((Queue) exchanges.get(destination)).size(); + } catch (IllegalArgumentException | IllegalAccessException e) { + if (log.isInfoEnabled()) { + log.info("Could not pull http request queue size", e); + } + + return 0; + } + } + } +}