From 8510637aad3f44fbeba200a8e6565c588a0a98e8 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Wed, 18 Oct 2023 15:26:43 -0700 Subject: [PATCH] Fixed issue where multiple mock nodes on one JVM (in IT tests) would cause initialization errors in the cache manager --- .../indices/EhcacheDiskCachingTier.java | 48 ++++++++++++++----- .../indices/IndicesRequestCache.java | 2 +- .../opensearch/indices/IndicesService.java | 4 ++ 3 files changed, 42 insertions(+), 12 deletions(-) diff --git a/server/src/main/java/org/opensearch/indices/EhcacheDiskCachingTier.java b/server/src/main/java/org/opensearch/indices/EhcacheDiskCachingTier.java index 2469fd5934ea0..01fe6d491a58a 100644 --- a/server/src/main/java/org/opensearch/indices/EhcacheDiskCachingTier.java +++ b/server/src/main/java/org/opensearch/indices/EhcacheDiskCachingTier.java @@ -21,18 +21,35 @@ import org.opensearch.common.cache.RemovalListener; import org.ehcache.Cache; import org.opensearch.common.cache.RemovalNotification; +import org.opensearch.common.io.PathUtils; import org.opensearch.common.metrics.CounterMetric; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.common.io.stream.BytesStreamInput; import java.io.IOException; import java.util.Collections; +import java.util.HashMap; public class EhcacheDiskCachingTier implements DiskCachingTier, RemovalListener { - public static PersistentCacheManager cacheManager; + public static HashMap cacheManagers = new HashMap<>(); + // Because of the way test cases are set up, each node may try to instantiate several disk caching tiers. + // Only one of them will be used, but there will be initialization errors when multiple cache managers try to + // use the same file path and create/get caches with the same alias. We resolve this with a static reference + // to a cache manager, which is populated if it is null and reused if it is not. + // (See https://stackoverflow.com/questions/53756412/ehcache-org-ehcache-statetransitionexception-persistence-directory-already-lo) + // To deal with IT cases, we need to create a manager per node, as otherwise nodes will try to reuse the same manager, + // so we get the correct cache manager by looking up the node ID in this map. + // I don't think any of this can happen in production, because nodes shouldn't share a JVM, + // and they should only instantiate their services once? But it's best to resolve it anyway. + + private PersistentCacheManager cacheManager; // This is the manager this tier will actually use private Cache cache; - public final static String DISK_CACHE_FP = "disk_cache_tier"; // Placeholder. this should probably be defined somewhere else, since we need to change security.policy based on its value + public final static String BASE_DISK_CACHE_FP = "disk_cache_tier"; + // Placeholder. this should probably be defined somewhere else, since we need to change security.policy based on its value + // To accomodate test setups, where multiple nodes may exist on the same filesystem, we add the node ID to the end of this + // These will be subfolders of BASE_DISK_CACHE_FP + private final String diskCacheFP; // the one to use for this node private RemovalListener removalListener; private ExponentiallyWeightedMovingAverage getTimeMillisEWMA; private static final double GET_TIME_EWMA_ALPHA = 0.3; // This is the value used elsewhere in OpenSearch @@ -42,6 +59,7 @@ public class EhcacheDiskCachingTier implements DiskCachingTier().setOnHeapCachingTier( openSearchOnHeapCache ).setOnDiskCachingTier(diskCachingTier).setTieredCacheEventListener(this).build(); diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index f5e71327b6e7b..4be7061deabf3 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -1939,6 +1939,10 @@ public boolean allPendingDanglingIndicesWritten() { || (danglingIndicesToWrite.isEmpty() && danglingIndicesThreadPoolExecutor.getActiveCount() == 0); } + public String getNodeId() { + return nodeEnv.nodeId(); + } + /** * Validates the cluster default index refresh interval. *