diff --git a/.ci/bwcVersions b/.ci/bwcVersions index f3a9aa2787a80..e490c6ae6b4ea 100644 --- a/.ci/bwcVersions +++ b/.ci/bwcVersions @@ -32,3 +32,4 @@ BWC_VERSION: - "2.13.0" - "2.13.1" - "2.14.0" + - "2.15.0" diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 908a032bf833e..6281fa0af3e36 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -17,6 +17,7 @@ Resolves #[Issue number to be closed when this PR is merged] - [ ] All tests pass - [ ] New functionality has been documented. - [ ] New functionality has javadoc added +- [ ] API changes companion pull request [created](https://github.com/opensearch-project/opensearch-api-specification/blob/main/DEVELOPER_GUIDE.md). - [ ] Failing checks are inspected and point to the corresponding known issue(s) (See: [Troubleshooting Failing Builds](../blob/main/CONTRIBUTING.md#troubleshooting-failing-builds)) - [ ] Commits are signed per the DCO using --signoff - [ ] Commit changes are listed out in CHANGELOG.md file (See: [Changelog](../blob/main/CONTRIBUTING.md#changelog)) diff --git a/.github/workflows/assemble.yml b/.github/workflows/assemble.yml index 382105364c048..d18170e9ea6b7 100644 --- a/.github/workflows/assemble.yml +++ b/.github/workflows/assemble.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: java: [ 11, 17, 21 ] - os: [ubuntu-latest, windows-latest, macos-latest] + os: [ubuntu-latest, windows-latest, macos-13] steps: - uses: actions/checkout@v4 - name: Set up JDK ${{ matrix.java }} @@ -18,10 +18,7 @@ jobs: distribution: temurin - name: Setup docker (missing on MacOS) if: runner.os == 'macos' - run: | - brew install docker - colima start - sudo ln -sf $HOME/.colima/default/docker.sock /var/run/docker.sock + uses: douglascamata/setup-docker-macos-action@main - name: Run Gradle (assemble) run: | ./gradlew assemble --parallel --no-build-cache -PDISABLE_BUILD_CACHE diff --git a/.github/workflows/links.yml b/.github/workflows/links.yml index 1c83821e22804..8f628fcd78148 100644 --- a/.github/workflows/links.yml +++ b/.github/workflows/links.yml @@ -13,7 +13,7 @@ jobs: - uses: actions/checkout@v4 - name: lychee Link Checker id: lychee - uses: lycheeverse/lychee-action@v1.9.3 + uses: lycheeverse/lychee-action@v1.10.0 with: args: --accept=200,403,429 --exclude-mail **/*.html **/*.md **/*.txt **/*.json --exclude-file .lychee.excludes fail: true diff --git a/.github/workflows/precommit.yml b/.github/workflows/precommit.yml index 800aacec98516..95ca49ac9cb43 100644 --- a/.github/workflows/precommit.yml +++ b/.github/workflows/precommit.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: java: [ 11, 17, 21 ] - os: [ubuntu-latest, windows-latest, macos-latest] + os: [ubuntu-latest, windows-latest, macos-13] steps: - uses: actions/checkout@v4 - name: Set up JDK ${{ matrix.java }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d8af16db72a7..b24cc5347e768 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,46 +5,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased 2.x] ### Added -- Constant Keyword Field ([#12285](https://github.com/opensearch-project/OpenSearch/pull/12285)) -- Convert ingest processor supports ip type ([#12818](https://github.com/opensearch-project/OpenSearch/pull/12818)) -- Add a counter to node stat api to track shard going from idle to non-idle ([#12768](https://github.com/opensearch-project/OpenSearch/pull/12768)) -- Allow setting KEYSTORE_PASSWORD through env variable ([#12865](https://github.com/opensearch-project/OpenSearch/pull/12865)) -- [Concurrent Segment Search] Perform buildAggregation concurrently and support Composite Aggregations ([#12697](https://github.com/opensearch-project/OpenSearch/pull/12697)) -- [Concurrent Segment Search] Disable concurrent segment search for system indices and throttled requests ([#12954](https://github.com/opensearch-project/OpenSearch/pull/12954)) -- [Tiered Caching] Make took time caching policy setting dynamic ([#13063](https://github.com/opensearch-project/OpenSearch/pull/13063)) -- Derived fields support to derive field values at query time without indexing ([#12569](https://github.com/opensearch-project/OpenSearch/pull/12569)) -- Detect breaking changes on pull requests ([#9044](https://github.com/opensearch-project/OpenSearch/pull/9044)) -- Add cluster primary balance contraint for rebalancing with buffer ([#12656](https://github.com/opensearch-project/OpenSearch/pull/12656)) -- [Remote Store] Make translog transfer timeout configurable ([#12704](https://github.com/opensearch-project/OpenSearch/pull/12704)) -- Reject Resize index requests (i.e, split, shrink and clone), While DocRep to SegRep migration is in progress.([#12686](https://github.com/opensearch-project/OpenSearch/pull/12686)) -- Add support for more than one protocol for transport ([#12967](https://github.com/opensearch-project/OpenSearch/pull/12967)) -- [Tiered Caching] Add dimension-based stats to ICache implementations. ([#12531](https://github.com/opensearch-project/OpenSearch/pull/12531)) -- Add changes for overriding remote store and replication settings during snapshot restore. ([#11868](https://github.com/opensearch-project/OpenSearch/pull/11868)) -- Add an individual setting of rate limiter for segment replication ([#12959](https://github.com/opensearch-project/OpenSearch/pull/12959)) -- [Streaming Indexing] Ensure support of the new transport by security plugin ([#13174](https://github.com/opensearch-project/OpenSearch/pull/13174)) -- Add cluster setting to dynamically configure the buckets for filter rewrite optimization. ([#13179](https://github.com/opensearch-project/OpenSearch/pull/13179)) +- Add useCompoundFile index setting ([#13478](https://github.com/opensearch-project/OpenSearch/pull/13478)) ### Dependencies -- Bump `org.apache.commons:commons-configuration2` from 2.10.0 to 2.10.1 ([#12896](https://github.com/opensearch-project/OpenSearch/pull/12896)) -- Bump `asm` from 9.6 to 9.7 ([#12908](https://github.com/opensearch-project/OpenSearch/pull/12908)) -- Bump `net.minidev:json-smart` from 2.5.0 to 2.5.1 ([#12893](https://github.com/opensearch-project/OpenSearch/pull/12893), [#13117](https://github.com/opensearch-project/OpenSearch/pull/13117)) -- Bump `netty` from 4.1.107.Final to 4.1.109.Final ([#12924](https://github.com/opensearch-project/OpenSearch/pull/12924), [#13233](https://github.com/opensearch-project/OpenSearch/pull/13233)) -- Bump `commons-io:commons-io` from 2.15.1 to 2.16.0 ([#12996](https://github.com/opensearch-project/OpenSearch/pull/12996), [#12998](https://github.com/opensearch-project/OpenSearch/pull/12998), [#12999](https://github.com/opensearch-project/OpenSearch/pull/12999)) -- Bump `org.apache.commons:commons-compress` from 1.24.0 to 1.26.1 ([#12627](https://github.com/opensearch-project/OpenSearch/pull/12627)) -- Bump `org.apache.commons:commonscodec` from 1.15 to 1.16.1 ([#12627](https://github.com/opensearch-project/OpenSearch/pull/12627)) -- Bump `org.apache.commons:commonslang` from 3.13.0 to 3.14.0 ([#12627](https://github.com/opensearch-project/OpenSearch/pull/12627)) -- Bump Apache Tika from 2.6.0 to 2.9.2 ([#12627](https://github.com/opensearch-project/OpenSearch/pull/12627)) -- Bump `com.gradle.enterprise` from 3.16.2 to 3.17.1 ([#13116](https://github.com/opensearch-project/OpenSearch/pull/13116), [#13191](https://github.com/opensearch-project/OpenSearch/pull/13191)) -- Bump `gradle/wrapper-validation-action` from 2 to 3 ([#13192](https://github.com/opensearch-project/OpenSearch/pull/13192)) -- Bump joda from 2.12.2 to 2.12.7 ([#13193](https://github.com/opensearch-project/OpenSearch/pull/13193)) -- Bump bouncycastle from 1.77 to 1.78 ([#13243](https://github.com/opensearch-project/OpenSearch/pull/13243)) -- Update google dependencies in repository-gcs and discovery-gce ([#13213](https://github.com/opensearch-project/OpenSearch/pull/13213)) +- Bump `com.github.spullara.mustache.java:compiler` from 0.9.10 to 0.9.13 ([#13329](https://github.com/opensearch-project/OpenSearch/pull/13329), [#13559](https://github.com/opensearch-project/OpenSearch/pull/13559)) +- Bump `org.gradle.test-retry` from 1.5.8 to 1.5.9 ([#13442](https://github.com/opensearch-project/OpenSearch/pull/13442)) +- Bump `org.apache.commons:commons-text` from 1.11.0 to 1.12.0 ([#13557](https://github.com/opensearch-project/OpenSearch/pull/13557)) +- Bump `org.hdrhistogram:HdrHistogram` from 2.1.12 to 2.2.1 ([#13556](https://github.com/opensearch-project/OpenSearch/pull/13556)) ### Changed -- [BWC and API enforcement] Enforcing the presence of API annotations at build time ([#12872](https://github.com/opensearch-project/OpenSearch/pull/12872)) -- Improve built-in secure transports support ([#12907](https://github.com/opensearch-project/OpenSearch/pull/12907)) -- Update links to documentation in rest-api-spec ([#13043](https://github.com/opensearch-project/OpenSearch/pull/13043)) -- Refactoring globMatch using simpleMatchWithNormalizedStrings from Regex ([#13104](https://github.com/opensearch-project/OpenSearch/pull/13104)) +- Add ability for Boolean and date field queries to run when only doc_values are enabled ([#11650](https://github.com/opensearch-project/OpenSearch/pull/11650)) +- Refactor implementations of query phase searcher, allow QueryCollectorContext to have zero collectors ([#13481](https://github.com/opensearch-project/OpenSearch/pull/13481)) ### Deprecated @@ -52,14 +23,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Remove handling of index.mapper.dynamic in AutoCreateIndex([#13067](https://github.com/opensearch-project/OpenSearch/pull/13067)) ### Fixed -- Fix bulk API ignores ingest pipeline for upsert ([#12883](https://github.com/opensearch-project/OpenSearch/pull/12883)) -- Fix issue with feature flags where default value may not be honored ([#12849](https://github.com/opensearch-project/OpenSearch/pull/12849)) -- Fix UOE While building Exists query for nested search_as_you_type field ([#12048](https://github.com/opensearch-project/OpenSearch/pull/12048)) -- Client with Java 8 runtime and Apache HttpClient 5 Transport fails with java.lang.NoSuchMethodError: java.nio.ByteBuffer.flip()Ljava/nio/ByteBuffer ([#13100](https://github.com/opensearch-project/opensearch-java/pull/13100)) -- Enabled mockTelemetryPlugin for IT and fixed OOM issues ([#13054](https://github.com/opensearch-project/OpenSearch/pull/13054)) -- Fix implement mark() and markSupported() in class FilterStreamInput ([#13098](https://github.com/opensearch-project/OpenSearch/pull/13098)) -- Fix snapshot _status API to return correct status for partial snapshots ([#12812](https://github.com/opensearch-project/OpenSearch/pull/12812)) -- Ignore BaseRestHandler unconsumed content check as it's always consumed. ([#13290](https://github.com/opensearch-project/OpenSearch/pull/13290)) +- Fix negative RequestStats metric issue ([#13553](https://github.com/opensearch-project/OpenSearch/pull/13553)) +- Fix get field mapping API returns 404 error in mixed cluster with multiple versions ([#13624](https://github.com/opensearch-project/OpenSearch/pull/13624)) ### Security diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index bce6ca0d49294..0ec0abe535dd0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -9,6 +9,7 @@ - [Changelog](#changelog) - [Review Process](#review-process) - [Tips for Success](#tips) +- [Troubleshooting Failing Builds](#troubleshooting-failing-builds) # Contributing to OpenSearch @@ -180,3 +181,13 @@ We have a lot of mechanisms to help expedite towards an accepted PR. Here are so In general, adding more guardrails to your changes increases the likelihood of swift PR acceptance. We can always relax these guard rails in smaller followup PRs. Reverting a GA feature is much more difficult. Check out the [DEVELOPER_GUIDE](./DEVELOPER_GUIDE.md#submitting-changes) for more useful tips. +## Troubleshooting Failing Builds + +The OpenSearch testing framework offers many capabilities but exhibits significant complexity (it does lot of randomization internally to cover as many edge cases and variations as possible). Unfortunately, this posses a challenge by making it harder to discover important issues/bugs in straightforward way and may lead to so called flaky tests - the tests which flip randomly from success to failure without any code changes. + +If your pull request reports a failing test(s) on one of the checks, please: +- look if there is an existing [issue](https://github.com/opensearch-project/OpenSearch/issues) reported for the test in question +- if not, please make sure this is not caused by your changes, run the failing test(s) locally for some time +- if you are sure the failure is not related, please open a new [bug](https://github.com/opensearch-project/OpenSearch/issues/new?assignees=&labels=bug%2C+untriaged&projects=&template=bug_template.md&title=%5BBUG%5D) with `flaky-test` label +- add a comment referencing the issue(s) or bug report(s) to your pull request explaining the failing build(s) +- as a bonus point, try to contribute by fixing the flaky test(s) diff --git a/DEVELOPER_GUIDE.md b/DEVELOPER_GUIDE.md index f0851fc58d444..92ef71b92da7e 100644 --- a/DEVELOPER_GUIDE.md +++ b/DEVELOPER_GUIDE.md @@ -57,6 +57,7 @@ - [Developer API](#developer-api) - [User API](#user-api) - [Experimental Development](#experimental-development) + - [API Compatibility Checks](#api-compatibility-checks) - [Backports](#backports) - [LineLint](#linelint) - [Lucene Snapshots](#lucene-snapshots) @@ -607,6 +608,20 @@ a LTS feature but with additional guard rails and communication mechanisms to si release, or be removed altogether. Any Developer or User APIs implemented along with the experimental feature should be marked with `@ExperimentalApi` (or documented as `@opensearch.experimental`) annotation to signal the implementation is not subject to LTS and does not follow backwards compatibility guidelines. +#### API Compatibility Checks + +The compatibility checks for public APIs are performed using [japicmp](https://siom79.github.io/japicmp/) and are available as separate Gradle tasks (those are run on demand at the moment): + +``` +./gradlew japicmp +``` + +By default, the API compatibility checks are run against the latest released version of the OpenSearch, however the target version to compare to could be provided using system property during the build, fe.: + +``` +./gradlew japicmp -Djapicmp.compare.version=2.14.0-SNAPSHOT +``` + ### Backports The Github workflow in [`backport.yml`](.github/workflows/backport.yml) creates backport PRs automatically when the original PR with an appropriate label `backport ` is merged to main with the backport workflow run successfully on the PR. For example, if a PR on main needs to be backported to `1.x` branch, add a label `backport 1.x` to the PR and make sure the backport workflow runs on the PR along with other checks. Once this PR is merged to main, the workflow will create a backport PR to the `1.x` branch. diff --git a/README.md b/README.md index 748f8a366ecc8..aff18018960d5 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ This project has adopted the [Amazon Open Source Code of Conduct](CODE_OF_CONDUCT.md). For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq), or contact [opensource-codeofconduct@amazon.com](mailto:opensource-codeofconduct@amazon.com) with any additional questions or comments. ## Security -If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/) or directly via email to aws-security@amazon.com. Please do **not** create a public GitHub issue. +If you discover a potential security issue in this project we ask that you notify OpenSearch Security directly via email to security@opensearch.org. Please do **not** create a public GitHub issue. ## License diff --git a/SECURITY.md b/SECURITY.md index b86292104335f..be4ac7463864a 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -1,3 +1,3 @@ ## Reporting a Vulnerability -If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/) or directly via email to aws-security@amazon.com. Please do **not** create a public GitHub issue. +If you discover a potential security issue in this project we ask that you notify OpenSearch Security directly via email to security@opensearch.org. Please do **not** create a public GitHub issue. diff --git a/build.gradle b/build.gradle index 2aac4a1e893e9..e92f396e006f5 100644 --- a/build.gradle +++ b/build.gradle @@ -55,7 +55,7 @@ plugins { id 'opensearch.docker-support' id 'opensearch.global-build-info' id "com.diffplug.spotless" version "6.25.0" apply false - id "org.gradle.test-retry" version "1.5.8" apply false + id "org.gradle.test-retry" version "1.5.9" apply false id "test-report-aggregation" id 'jacoco-report-aggregation' } diff --git a/buildSrc/src/main/java/org/opensearch/gradle/test/DistroTestPlugin.java b/buildSrc/src/main/java/org/opensearch/gradle/test/DistroTestPlugin.java index bc44f81a81aff..b2b3e3003e572 100644 --- a/buildSrc/src/main/java/org/opensearch/gradle/test/DistroTestPlugin.java +++ b/buildSrc/src/main/java/org/opensearch/gradle/test/DistroTestPlugin.java @@ -77,9 +77,9 @@ import java.util.stream.Stream; public class DistroTestPlugin implements Plugin { - private static final String SYSTEM_JDK_VERSION = "21.0.2+13"; + private static final String SYSTEM_JDK_VERSION = "21.0.3+9"; private static final String SYSTEM_JDK_VENDOR = "adoptium"; - private static final String GRADLE_JDK_VERSION = "21.0.2+13"; + private static final String GRADLE_JDK_VERSION = "21.0.3+9"; private static final String GRADLE_JDK_VENDOR = "adoptium"; // all distributions used by distro tests. this is temporary until tests are per distribution diff --git a/buildSrc/version.properties b/buildSrc/version.properties index 6c6138ac9b7f6..d0aaea546803b 100644 --- a/buildSrc/version.properties +++ b/buildSrc/version.properties @@ -2,7 +2,7 @@ opensearch = 3.0.0 lucene = 9.11.0-snapshot-fb97840 bundled_jdk_vendor = adoptium -bundled_jdk = 21.0.2+13 +bundled_jdk = 21.0.3+9 # optional dependencies spatial4j = 0.7 diff --git a/distribution/packages/build.gradle b/distribution/packages/build.gradle index 4e85d19986e43..fbd13f03af814 100644 --- a/distribution/packages/build.gradle +++ b/distribution/packages/build.gradle @@ -63,7 +63,7 @@ import java.util.regex.Pattern */ plugins { - id "com.netflix.nebula.ospackage-base" version "11.8.1" + id "com.netflix.nebula.ospackage-base" version "11.9.0" } void addProcessFilesTask(String type, boolean jdk) { diff --git a/distribution/tools/plugin-cli/build.gradle b/distribution/tools/plugin-cli/build.gradle index 446dbaad8466e..3083ad4375460 100644 --- a/distribution/tools/plugin-cli/build.gradle +++ b/distribution/tools/plugin-cli/build.gradle @@ -38,7 +38,7 @@ dependencies { compileOnly project(":server") compileOnly project(":libs:opensearch-cli") api "org.bouncycastle:bcpg-fips:1.0.7.1" - api "org.bouncycastle:bc-fips:1.0.2.4" + api "org.bouncycastle:bc-fips:1.0.2.5" testImplementation project(":test:framework") testImplementation 'com.google.jimfs:jimfs:1.3.0' testRuntimeOnly("com.google.guava:guava:${versions.guava}") { diff --git a/distribution/tools/plugin-cli/licenses/bc-fips-1.0.2.4.jar.sha1 b/distribution/tools/plugin-cli/licenses/bc-fips-1.0.2.4.jar.sha1 deleted file mode 100644 index da37449f80d7e..0000000000000 --- a/distribution/tools/plugin-cli/licenses/bc-fips-1.0.2.4.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -9008d04fc13da6455e6a792935b93b629757335d \ No newline at end of file diff --git a/distribution/tools/plugin-cli/licenses/bc-fips-1.0.2.5.jar.sha1 b/distribution/tools/plugin-cli/licenses/bc-fips-1.0.2.5.jar.sha1 new file mode 100644 index 0000000000000..1b44c77dd4ee1 --- /dev/null +++ b/distribution/tools/plugin-cli/licenses/bc-fips-1.0.2.5.jar.sha1 @@ -0,0 +1 @@ +704e65f7e4fe679e5ab2aa8a840f27f8ced4c522 \ No newline at end of file diff --git a/libs/core/src/main/java/org/opensearch/Version.java b/libs/core/src/main/java/org/opensearch/Version.java index f312c484a4842..823eb110eaa10 100644 --- a/libs/core/src/main/java/org/opensearch/Version.java +++ b/libs/core/src/main/java/org/opensearch/Version.java @@ -103,6 +103,7 @@ public class Version implements Comparable, ToXContentFragment { public static final Version V_2_13_0 = new Version(2130099, org.apache.lucene.util.Version.LUCENE_9_10_0); public static final Version V_2_13_1 = new Version(2130199, org.apache.lucene.util.Version.LUCENE_9_10_0); public static final Version V_2_14_0 = new Version(2140099, org.apache.lucene.util.Version.LUCENE_9_10_0); + public static final Version V_2_15_0 = new Version(2150099, org.apache.lucene.util.Version.LUCENE_9_10_0); public static final Version V_3_0_0 = new Version(3000099, org.apache.lucene.util.Version.LUCENE_9_11_0); public static final Version CURRENT = V_3_0_0; diff --git a/libs/core/src/main/java/org/opensearch/core/common/io/stream/StreamInput.java b/libs/core/src/main/java/org/opensearch/core/common/io/stream/StreamInput.java index ea23b3d81a775..f4c52cb8a6506 100644 --- a/libs/core/src/main/java/org/opensearch/core/common/io/stream/StreamInput.java +++ b/libs/core/src/main/java/org/opensearch/core/common/io/stream/StreamInput.java @@ -80,6 +80,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.Date; import java.util.EnumSet; import java.util.HashMap; @@ -90,6 +91,8 @@ import java.util.Locale; import java.util.Map; import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; import java.util.concurrent.TimeUnit; import java.util.function.IntFunction; @@ -642,12 +645,47 @@ public Map readMap(Writeable.Reader keyReader, Writeable.Reader< return Collections.emptyMap(); } Map map = new HashMap<>(size); + readIntoMap(keyReader, valueReader, map, size); + return map; + } + + /** + * Read a serialized map into a SortedMap using the default ordering for the keys. If the result is empty it might be immutable. + */ + public , V> SortedMap readOrderedMap(Writeable.Reader keyReader, Writeable.Reader valueReader) + throws IOException { + return readOrderedMap(keyReader, valueReader, null); + } + + /** + * Read a serialized map into a SortedMap, specifying a Comparator for the keys. If the result is empty it might be immutable. + */ + public , V> SortedMap readOrderedMap( + Writeable.Reader keyReader, + Writeable.Reader valueReader, + @Nullable Comparator keyComparator + ) throws IOException { + int size = readArraySize(); + if (size == 0) { + return Collections.emptySortedMap(); + } + SortedMap sortedMap; + if (keyComparator == null) { + sortedMap = new TreeMap<>(); + } else { + sortedMap = new TreeMap<>(keyComparator); + } + readIntoMap(keyReader, valueReader, sortedMap, size); + return sortedMap; + } + + private void readIntoMap(Writeable.Reader keyReader, Writeable.Reader valueReader, Map map, int size) + throws IOException { for (int i = 0; i < size; i++) { K key = keyReader.read(this); V value = valueReader.read(this); map.put(key, value); } - return map; } /** diff --git a/modules/cache-common/src/internalClusterTest/java/org.opensearch.cache.common.tier/TieredSpilloverCacheIT.java b/modules/cache-common/src/internalClusterTest/java/org.opensearch.cache.common.tier/TieredSpilloverCacheIT.java index 977a66c53b7e8..bfc184cff0566 100644 --- a/modules/cache-common/src/internalClusterTest/java/org.opensearch.cache.common.tier/TieredSpilloverCacheIT.java +++ b/modules/cache-common/src/internalClusterTest/java/org.opensearch.cache.common.tier/TieredSpilloverCacheIT.java @@ -425,6 +425,121 @@ public void testWithExplicitCacheClear() throws Exception { }, 1, TimeUnit.SECONDS); } + public void testWithDynamicDiskCacheSetting() throws Exception { + int onHeapCacheSizeInBytes = 10; // Keep it low so that all items are cached onto disk. + internalCluster().startNode( + Settings.builder() + .put(defaultSettings(onHeapCacheSizeInBytes + "b")) + .put(INDICES_CACHE_CLEAN_INTERVAL_SETTING.getKey(), new TimeValue(1)) + .build() + ); + Client client = client(); + assertAcked( + client.admin() + .indices() + .prepareCreate("index") + .setMapping("k", "type=keyword") + .setSettings( + Settings.builder() + .put(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING.getKey(), true) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put("index.refresh_interval", -1) + ) + .get() + ); + // Update took time policy to zero so that all entries are eligible to be cached on disk. + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest().transientSettings( + Settings.builder() + .put( + TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get(CacheType.INDICES_REQUEST_CACHE).getKey(), + new TimeValue(0, TimeUnit.MILLISECONDS) + ) + .build() + ); + assertAcked(internalCluster().client().admin().cluster().updateSettings(updateSettingsRequest).get()); + int numberOfIndexedItems = randomIntBetween(5, 10); + for (int iterator = 0; iterator < numberOfIndexedItems; iterator++) { + indexRandom(true, client.prepareIndex("index").setSource("k" + iterator, "hello" + iterator)); + } + ensureSearchable("index"); + refreshAndWaitForReplication(); + // Force merge the index to ensure there can be no background merges during the subsequent searches that would invalidate the cache + ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge("index").setFlush(true).get(); + OpenSearchAssertions.assertAllSuccessful(forceMergeResponse); + long perQuerySizeInCacheInBytes = -1; + // Step 1: Hit some queries. We will see misses and queries will be cached(onto disk cache) for subsequent + // requests. + for (int iterator = 0; iterator < numberOfIndexedItems; iterator++) { + SearchResponse resp = client.prepareSearch("index") + .setRequestCache(true) + .setQuery(QueryBuilders.termQuery("k" + iterator, "hello" + iterator)) + .get(); + if (perQuerySizeInCacheInBytes == -1) { + RequestCacheStats requestCacheStats = getRequestCacheStats(client, "index"); + perQuerySizeInCacheInBytes = requestCacheStats.getMemorySizeInBytes(); + } + assertSearchResponse(resp); + } + + RequestCacheStats requestCacheStats = getRequestCacheStats(client, "index"); + assertEquals(numberOfIndexedItems * perQuerySizeInCacheInBytes, requestCacheStats.getMemorySizeInBytes()); + assertEquals(numberOfIndexedItems, requestCacheStats.getMissCount()); + assertEquals(0, requestCacheStats.getHitCount()); + assertEquals(0, requestCacheStats.getEvictions()); + + // Step 2: Hit same queries again. We will see hits now. + for (int iterator = 0; iterator < numberOfIndexedItems; iterator++) { + SearchResponse resp = client.prepareSearch("index") + .setRequestCache(true) + .setQuery(QueryBuilders.termQuery("k" + iterator, "hello" + iterator)) + .get(); + assertSearchResponse(resp); + } + requestCacheStats = getRequestCacheStats(client, "index"); + assertEquals(numberOfIndexedItems * perQuerySizeInCacheInBytes, requestCacheStats.getMemorySizeInBytes()); + assertEquals(numberOfIndexedItems, requestCacheStats.getMissCount()); + assertEquals(numberOfIndexedItems, requestCacheStats.getHitCount()); + assertEquals(0, requestCacheStats.getEvictions()); + long lastKnownHitCount = requestCacheStats.getHitCount(); + long lastKnownMissCount = requestCacheStats.getMissCount(); + + // Step 3: Turn off disk cache now. And hit same queries again. We should not see hits now as all queries + // were cached onto disk cache. + updateSettingsRequest = new ClusterUpdateSettingsRequest().transientSettings( + Settings.builder() + .put(TieredSpilloverCacheSettings.DISK_CACHE_ENABLED_SETTING_MAP.get(CacheType.INDICES_REQUEST_CACHE).getKey(), false) + .build() + ); + assertAcked(internalCluster().client().admin().cluster().updateSettings(updateSettingsRequest).get()); + + for (int iterator = 0; iterator < numberOfIndexedItems; iterator++) { + SearchResponse resp = client.prepareSearch("index") + .setRequestCache(true) + .setQuery(QueryBuilders.termQuery("k" + iterator, "hello" + iterator)) + .get(); + assertSearchResponse(resp); + } + requestCacheStats = getRequestCacheStats(client, "index"); + assertEquals(numberOfIndexedItems * perQuerySizeInCacheInBytes, requestCacheStats.getMemorySizeInBytes()); // + // Still shows disk cache entries as explicit clear or invalidation is required to clean up disk cache. + assertEquals(lastKnownMissCount + numberOfIndexedItems, requestCacheStats.getMissCount()); + assertEquals(0, lastKnownHitCount - requestCacheStats.getHitCount()); // No new hits being seen. + lastKnownMissCount = requestCacheStats.getMissCount(); + lastKnownHitCount = requestCacheStats.getHitCount(); + + // Step 4: Invalidate entries via refresh. + // Explicit refresh would invalidate cache entries. + refreshAndWaitForReplication(); + assertBusy(() -> { + // Explicit refresh should clear up cache entries + assertTrue(getRequestCacheStats(client, "index").getMemorySizeInBytes() == 0); + }, 1, TimeUnit.SECONDS); + requestCacheStats = getRequestCacheStats(client, "index"); + assertEquals(0, lastKnownMissCount - requestCacheStats.getMissCount()); + assertEquals(0, lastKnownHitCount - requestCacheStats.getHitCount()); + } + private RequestCacheStats getRequestCacheStats(Client client, String indexName) { return client.admin().indices().prepareStats(indexName).setRequestCache(true).get().getTotal().getRequestCache(); } @@ -435,7 +550,7 @@ public MockDiskCachePlugin() {} @Override public Map getCacheFactoryMap() { - return Map.of(MockDiskCache.MockDiskCacheFactory.NAME, new MockDiskCache.MockDiskCacheFactory(0, 1000)); + return Map.of(MockDiskCache.MockDiskCacheFactory.NAME, new MockDiskCache.MockDiskCacheFactory(0, 1000, false)); } @Override diff --git a/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCache.java b/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCache.java index ae3d9f1dbcf62..9942651ccdd67 100644 --- a/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCache.java +++ b/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCache.java @@ -20,6 +20,7 @@ import org.opensearch.common.cache.policy.CachedQueryResult; import org.opensearch.common.cache.stats.ImmutableCacheStatsHolder; import org.opensearch.common.cache.store.config.CacheConfig; +import org.opensearch.common.collect.Tuple; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; @@ -27,16 +28,23 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; +import java.util.Collections; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.Objects; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.function.Function; import java.util.function.Predicate; +import java.util.function.ToLongBiFunction; + +import static org.opensearch.cache.common.tier.TieredSpilloverCacheSettings.DISK_CACHE_ENABLED_SETTING_MAP; +import static org.opensearch.cache.common.tier.TieredSpilloverCacheStatsHolder.TIER_DIMENSION_VALUE_DISK; +import static org.opensearch.cache.common.tier.TieredSpilloverCacheStatsHolder.TIER_DIMENSION_VALUE_ON_HEAP; /** * This cache spillover the evicted items from heap tier to disk tier. All the new items are first cached on heap @@ -57,9 +65,17 @@ public class TieredSpilloverCache implements ICache { private final ICache diskCache; private final ICache onHeapCache; - // The listener for removals from the spillover cache as a whole - // TODO: In TSC stats PR, each tier will have its own separate removal listener. + // Removal listeners for the individual tiers + private final RemovalListener, V> onDiskRemovalListener; + private final RemovalListener, V> onHeapRemovalListener; + + // Removal listener from the spillover cache as a whole private final RemovalListener, V> removalListener; + + // In future we want to just read the stats from the individual tiers' statsHolder objects, but this isn't + // possible right now because of the way computeIfAbsent is implemented. + private final TieredSpilloverCacheStatsHolder statsHolder; + private ToLongBiFunction, V> weigher; private final List dimensionNames; ReadWriteLock readWriteLock = new ReentrantReadWriteLock(); ReleasableLock readLock = new ReleasableLock(readWriteLock.readLock()); @@ -67,28 +83,22 @@ public class TieredSpilloverCache implements ICache { /** * Maintains caching tiers in ascending order of cache latency. */ - private final List> cacheList; + private final Map, TierInfo> caches; private final List> policies; TieredSpilloverCache(Builder builder) { Objects.requireNonNull(builder.onHeapCacheFactory, "onHeap cache builder can't be null"); Objects.requireNonNull(builder.diskCacheFactory, "disk cache builder can't be null"); + Objects.requireNonNull(builder.cacheConfig, "cache config can't be null"); + Objects.requireNonNull(builder.cacheConfig.getClusterSettings(), "cluster settings can't be null"); this.removalListener = Objects.requireNonNull(builder.removalListener, "Removal listener can't be null"); + this.onHeapRemovalListener = new HeapTierRemovalListener(this); + this.onDiskRemovalListener = new DiskTierRemovalListener(this); + this.weigher = Objects.requireNonNull(builder.cacheConfig.getWeigher(), "Weigher can't be null"); + this.onHeapCache = builder.onHeapCacheFactory.create( - new CacheConfig.Builder().setRemovalListener(new RemovalListener, V>() { - @Override - public void onRemoval(RemovalNotification, V> notification) { - try (ReleasableLock ignore = writeLock.acquire()) { - if (SPILLOVER_REMOVAL_REASONS.contains(notification.getRemovalReason()) - && evaluatePolicies(notification.getValue())) { - diskCache.put(notification.getKey(), notification.getValue()); - } else { - removalListener.onRemoval(notification); - } - } - } - }) + new CacheConfig.Builder().setRemovalListener(onHeapRemovalListener) .setKeyType(builder.cacheConfig.getKeyType()) .setValueType(builder.cacheConfig.getValueType()) .setSettings(builder.cacheConfig.getSettings()) @@ -97,15 +107,36 @@ && evaluatePolicies(notification.getValue())) { .setMaxSizeInBytes(builder.cacheConfig.getMaxSizeInBytes()) .setExpireAfterAccess(builder.cacheConfig.getExpireAfterAccess()) .setClusterSettings(builder.cacheConfig.getClusterSettings()) + .setStatsTrackingEnabled(false) .build(), builder.cacheType, builder.cacheFactories ); - this.diskCache = builder.diskCacheFactory.create(builder.cacheConfig, builder.cacheType, builder.cacheFactories); - this.cacheList = Arrays.asList(onHeapCache, diskCache); + this.diskCache = builder.diskCacheFactory.create( + new CacheConfig.Builder().setRemovalListener(onDiskRemovalListener) + .setKeyType(builder.cacheConfig.getKeyType()) + .setValueType(builder.cacheConfig.getValueType()) + .setSettings(builder.cacheConfig.getSettings()) + .setWeigher(builder.cacheConfig.getWeigher()) + .setDimensionNames(builder.cacheConfig.getDimensionNames()) + .setStatsTrackingEnabled(false) + .build(), + builder.cacheType, + builder.cacheFactories + ); + Boolean isDiskCacheEnabled = DISK_CACHE_ENABLED_SETTING_MAP.get(builder.cacheType).get(builder.cacheConfig.getSettings()); + LinkedHashMap, TierInfo> cacheListMap = new LinkedHashMap<>(); + cacheListMap.put(onHeapCache, new TierInfo(true, TIER_DIMENSION_VALUE_ON_HEAP)); + cacheListMap.put(diskCache, new TierInfo(isDiskCacheEnabled, TIER_DIMENSION_VALUE_DISK)); + this.caches = Collections.synchronizedMap(cacheListMap); + this.dimensionNames = builder.cacheConfig.getDimensionNames(); + // Pass "tier" as the innermost dimension name, in addition to whatever dimensions are specified for the cache as a whole + this.statsHolder = new TieredSpilloverCacheStatsHolder(dimensionNames, isDiskCacheEnabled); this.policies = builder.policies; // Will never be null; builder initializes it to an empty list + builder.cacheConfig.getClusterSettings() + .addSettingsUpdateConsumer(DISK_CACHE_ENABLED_SETTING_MAP.get(builder.cacheType), this::enableDisableDiskCache); } // Package private for testing @@ -118,23 +149,42 @@ ICache getDiskCache() { return diskCache; } + // Package private for testing. + void enableDisableDiskCache(Boolean isDiskCacheEnabled) { + // When disk cache is disabled, we are not clearing up the disk cache entries yet as that should be part of + // separate cache/clear API. + this.caches.put(diskCache, new TierInfo(isDiskCacheEnabled, TIER_DIMENSION_VALUE_DISK)); + this.statsHolder.setDiskCacheEnabled(isDiskCacheEnabled); + } + @Override public V get(ICacheKey key) { - return getValueFromTieredCache().apply(key); + Tuple cacheValueTuple = getValueFromTieredCache(true).apply(key); + if (cacheValueTuple == null) { + return null; + } + return cacheValueTuple.v1(); } @Override public void put(ICacheKey key, V value) { try (ReleasableLock ignore = writeLock.acquire()) { onHeapCache.put(key, value); + updateStatsOnPut(TIER_DIMENSION_VALUE_ON_HEAP, key, value); } } @Override public V computeIfAbsent(ICacheKey key, LoadAwareCacheLoader, V> loader) throws Exception { - - V cacheValue = getValueFromTieredCache().apply(key); - if (cacheValue == null) { + // Don't capture stats in the initial getValueFromTieredCache(). If we have concurrent requests for the same key, + // and it only has to be loaded one time, we should report one miss and the rest hits. But, if we do stats in + // getValueFromTieredCache(), + // we will see all misses. Instead, handle stats in computeIfAbsent(). + Tuple cacheValueTuple = getValueFromTieredCache(false).apply(key); + List heapDimensionValues = statsHolder.getDimensionsWithTierValue(key.dimensions, TIER_DIMENSION_VALUE_ON_HEAP); + List diskDimensionValues = statsHolder.getDimensionsWithTierValue(key.dimensions, TIER_DIMENSION_VALUE_DISK); + + if (cacheValueTuple == null) { // Add the value to the onHeap cache. We are calling computeIfAbsent which does another get inside. // This is needed as there can be many requests for the same key at the same time and we only want to load // the value once. @@ -142,9 +192,32 @@ public V computeIfAbsent(ICacheKey key, LoadAwareCacheLoader, V> try (ReleasableLock ignore = writeLock.acquire()) { value = onHeapCache.computeIfAbsent(key, loader); } + // Handle stats + if (loader.isLoaded()) { + // The value was just computed and added to the cache by this thread. Register a miss for the heap cache, and the disk cache + // if present + updateStatsOnPut(TIER_DIMENSION_VALUE_ON_HEAP, key, value); + statsHolder.incrementMisses(heapDimensionValues); + if (caches.get(diskCache).isEnabled()) { + statsHolder.incrementMisses(diskDimensionValues); + } + } else { + // Another thread requesting this key already loaded the value. Register a hit for the heap cache + statsHolder.incrementHits(heapDimensionValues); + } return value; + } else { + // Handle stats for an initial hit from getValueFromTieredCache() + if (cacheValueTuple.v2().equals(TIER_DIMENSION_VALUE_ON_HEAP)) { + // A hit for the heap tier + statsHolder.incrementHits(heapDimensionValues); + } else if (cacheValueTuple.v2().equals(TIER_DIMENSION_VALUE_DISK)) { + // Miss for the heap tier, hit for the disk tier + statsHolder.incrementMisses(heapDimensionValues); + statsHolder.incrementHits(diskDimensionValues); + } } - return cacheValue; + return cacheValueTuple.v1(); } @Override @@ -152,9 +225,16 @@ public void invalidate(ICacheKey key) { // We are trying to invalidate the key from all caches though it would be present in only of them. // Doing this as we don't know where it is located. We could do a get from both and check that, but what will // also trigger a hit/miss listener event, so ignoring it for now. - try (ReleasableLock ignore = writeLock.acquire()) { - for (ICache cache : cacheList) { - cache.invalidate(key); + // We don't update stats here, as this is handled by the removal listeners for the tiers. + for (Map.Entry, TierInfo> cacheEntry : caches.entrySet()) { + if (key.getDropStatsForDimensions()) { + List dimensionValues = statsHolder.getDimensionsWithTierValue(key.dimensions, cacheEntry.getValue().tierName); + statsHolder.removeDimensions(dimensionValues); + } + if (key.key != null) { + try (ReleasableLock ignore = writeLock.acquire()) { + cacheEntry.getKey().invalidate(key); + } } } } @@ -162,10 +242,11 @@ public void invalidate(ICacheKey key) { @Override public void invalidateAll() { try (ReleasableLock ignore = writeLock.acquire()) { - for (ICache cache : cacheList) { - cache.invalidateAll(); + for (Map.Entry, TierInfo> cacheEntry : caches.entrySet()) { + cacheEntry.getKey().invalidateAll(); } } + statsHolder.reset(); } /** @@ -175,57 +256,110 @@ public void invalidateAll() { @SuppressWarnings({ "unchecked" }) @Override public Iterable> keys() { - Iterable>[] iterables = (Iterable>[]) new Iterable[] { onHeapCache.keys(), diskCache.keys() }; - return new ConcatenatedIterables>(iterables); + List>> iterableList = new ArrayList<>(); + for (Map.Entry, TierInfo> cacheEntry : caches.entrySet()) { + iterableList.add(cacheEntry.getKey().keys()); + } + Iterable>[] iterables = (Iterable>[]) iterableList.toArray(new Iterable[0]); + return new ConcatenatedIterables<>(iterables); } @Override public long count() { - long count = 0; - for (ICache cache : cacheList) { - count += cache.count(); - } - return count; + // Count for all the tiers irrespective of whether they are enabled or not. As eventually + // this will turn to zero once cache is cleared up either via invalidation or manually. + return statsHolder.count(); } @Override public void refresh() { try (ReleasableLock ignore = writeLock.acquire()) { - for (ICache cache : cacheList) { - cache.refresh(); + for (Map.Entry, TierInfo> cacheEntry : caches.entrySet()) { + cacheEntry.getKey().refresh(); } } } @Override public void close() throws IOException { - for (ICache cache : cacheList) { - cache.close(); + for (Map.Entry, TierInfo> cacheEntry : caches.entrySet()) { + // Close all the caches here irrespective of whether they are enabled or not. + cacheEntry.getKey().close(); } } @Override - public ImmutableCacheStatsHolder stats() { - return null; // TODO: in TSC stats PR + public ImmutableCacheStatsHolder stats(String[] levels) { + return statsHolder.getImmutableCacheStatsHolder(levels); } - private Function, V> getValueFromTieredCache() { + /** + * Get a value from the tiered cache, and the name of the tier it was found in. + * @param captureStats Whether to record hits/misses for this call of the function + * @return A tuple of the value and the name of the tier it was found in. + */ + private Function, Tuple> getValueFromTieredCache(boolean captureStats) { return key -> { try (ReleasableLock ignore = readLock.acquire()) { - for (ICache cache : cacheList) { - V value = cache.get(key); - if (value != null) { - // update hit stats - return value; - } else { - // update miss stats + for (Map.Entry, TierInfo> cacheEntry : caches.entrySet()) { + if (cacheEntry.getValue().isEnabled()) { + V value = cacheEntry.getKey().get(key); + // Get the tier value corresponding to this cache + String tierValue = cacheEntry.getValue().tierName; + List dimensionValues = statsHolder.getDimensionsWithTierValue(key.dimensions, tierValue); + if (value != null) { + if (captureStats) { + statsHolder.incrementHits(dimensionValues); + } + return new Tuple<>(value, tierValue); + } else if (captureStats) { + statsHolder.incrementMisses(dimensionValues); + } } } + return null; } - return null; }; } + void handleRemovalFromHeapTier(RemovalNotification, V> notification) { + ICacheKey key = notification.getKey(); + boolean wasEvicted = SPILLOVER_REMOVAL_REASONS.contains(notification.getRemovalReason()); + if (caches.get(diskCache).isEnabled() && wasEvicted && evaluatePolicies(notification.getValue())) { + try (ReleasableLock ignore = writeLock.acquire()) { + diskCache.put(key, notification.getValue()); // spill over to the disk tier and increment its stats + } + updateStatsOnPut(TIER_DIMENSION_VALUE_DISK, key, notification.getValue()); + } else { + // If the value is not going to the disk cache, send this notification to the TSC's removal listener + // as the value is leaving the TSC entirely + removalListener.onRemoval(notification); + } + updateStatsOnRemoval(TIER_DIMENSION_VALUE_ON_HEAP, wasEvicted, key, notification.getValue()); + } + + void handleRemovalFromDiskTier(RemovalNotification, V> notification) { + // Values removed from the disk tier leave the TSC entirely + removalListener.onRemoval(notification); + boolean wasEvicted = SPILLOVER_REMOVAL_REASONS.contains(notification.getRemovalReason()); + updateStatsOnRemoval(TIER_DIMENSION_VALUE_DISK, wasEvicted, notification.getKey(), notification.getValue()); + } + + void updateStatsOnRemoval(String removedFromTierValue, boolean wasEvicted, ICacheKey key, V value) { + List dimensionValues = statsHolder.getDimensionsWithTierValue(key.dimensions, removedFromTierValue); + if (wasEvicted) { + statsHolder.incrementEvictions(dimensionValues); + } + statsHolder.decrementItems(dimensionValues); + statsHolder.decrementSizeInBytes(dimensionValues, weigher.applyAsLong(key, value)); + } + + void updateStatsOnPut(String destinationTierValue, ICacheKey key, V value) { + List dimensionValues = statsHolder.getDimensionsWithTierValue(key.dimensions, destinationTierValue); + statsHolder.incrementItems(dimensionValues); + statsHolder.incrementSizeInBytes(dimensionValues, weigher.applyAsLong(key, value)); + } + boolean evaluatePolicies(V value) { for (Predicate policy : policies) { if (!policy.test(value)) { @@ -235,6 +369,38 @@ boolean evaluatePolicies(V value) { return true; } + /** + * A class which receives removal events from the heap tier. + */ + private class HeapTierRemovalListener implements RemovalListener, V> { + private final TieredSpilloverCache tsc; + + HeapTierRemovalListener(TieredSpilloverCache tsc) { + this.tsc = tsc; + } + + @Override + public void onRemoval(RemovalNotification, V> notification) { + tsc.handleRemovalFromHeapTier(notification); + } + } + + /** + * A class which receives removal events from the disk tier. + */ + private class DiskTierRemovalListener implements RemovalListener, V> { + private final TieredSpilloverCache tsc; + + DiskTierRemovalListener(TieredSpilloverCache tsc) { + this.tsc = tsc; + } + + @Override + public void onRemoval(RemovalNotification, V> notification) { + tsc.handleRemovalFromDiskTier(notification); + } + } + /** * ConcatenatedIterables which combines cache iterables and supports remove() functionality as well if underlying * iterator supports it. @@ -296,6 +462,20 @@ public void remove() { } } + private class TierInfo { + AtomicBoolean isEnabled; + final String tierName; + + TierInfo(boolean isEnabled, String tierName) { + this.isEnabled = new AtomicBoolean(isEnabled); + this.tierName = tierName; + } + + boolean isEnabled() { + return isEnabled.get(); + } + } + /** * Factory to create TieredSpilloverCache objects. */ diff --git a/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCachePlugin.java b/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCachePlugin.java index dfd40199d859e..1c10e51630460 100644 --- a/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCachePlugin.java +++ b/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCachePlugin.java @@ -11,6 +11,8 @@ import org.opensearch.common.cache.CacheType; import org.opensearch.common.cache.ICache; import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.plugins.CachePlugin; import org.opensearch.plugins.Plugin; @@ -18,6 +20,7 @@ import java.util.List; import java.util.Map; +import static org.opensearch.cache.common.tier.TieredSpilloverCacheSettings.DISK_CACHE_ENABLED_SETTING_MAP; import static org.opensearch.cache.common.tier.TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP; /** @@ -30,10 +33,15 @@ public class TieredSpilloverCachePlugin extends Plugin implements CachePlugin { */ public static final String TIERED_CACHE_SPILLOVER_PLUGIN_NAME = "tieredSpilloverCachePlugin"; + private final Settings settings; + /** * Default constructor + * @param settings settings */ - public TieredSpilloverCachePlugin() {} + public TieredSpilloverCachePlugin(Settings settings) { + this.settings = settings; + } @Override public Map getCacheFactoryMap() { @@ -54,6 +62,9 @@ public List> getSettings() { TieredSpilloverCacheSettings.TIERED_SPILLOVER_DISK_STORE_NAME.getConcreteSettingForNamespace(cacheType.getSettingPrefix()) ); settingList.add(TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get(cacheType)); + if (FeatureFlags.PLUGGABLE_CACHE_SETTING.get(settings)) { + settingList.add(DISK_CACHE_ENABLED_SETTING_MAP.get(cacheType)); + } } return settingList; } diff --git a/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCacheSettings.java b/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCacheSettings.java index b89e8c517a351..e8e441d6bd3a6 100644 --- a/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCacheSettings.java +++ b/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCacheSettings.java @@ -42,6 +42,14 @@ public class TieredSpilloverCacheSettings { (key) -> Setting.simpleString(key, "", NodeScope) ); + /** + * Setting to disable/enable disk cache dynamically. + */ + public static final Setting.AffixSetting TIERED_SPILLOVER_DISK_CACHE_SETTING = Setting.suffixKeySetting( + TieredSpilloverCache.TieredSpilloverCacheFactory.TIERED_SPILLOVER_CACHE_NAME + ".disk.store.enabled", + (key) -> Setting.boolSetting(key, true, NodeScope, Setting.Property.Dynamic) + ); + /** * Setting defining the minimum took time for a query to be allowed into the disk cache. */ @@ -63,17 +71,29 @@ public class TieredSpilloverCacheSettings { public static final Map> TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP; /** - * Fetches concrete took time policy settings. + * Stores disk cache enabled settings for various cache types as these are dynamic so that can be registered and + * retrieved accordingly. + */ + public static final Map> DISK_CACHE_ENABLED_SETTING_MAP; + + /** + * Fetches concrete took time policy and disk cache settings. */ static { Map> concreteTookTimePolicySettingMap = new HashMap<>(); + Map> diskCacheSettingMap = new HashMap<>(); for (CacheType cacheType : CacheType.values()) { concreteTookTimePolicySettingMap.put( cacheType, TIERED_SPILLOVER_DISK_TOOK_TIME_THRESHOLD.getConcreteSettingForNamespace(cacheType.getSettingPrefix()) ); + diskCacheSettingMap.put( + cacheType, + TIERED_SPILLOVER_DISK_CACHE_SETTING.getConcreteSettingForNamespace(cacheType.getSettingPrefix()) + ); } TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP = concreteTookTimePolicySettingMap; + DISK_CACHE_ENABLED_SETTING_MAP = diskCacheSettingMap; } /** diff --git a/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCacheStatsHolder.java b/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCacheStatsHolder.java new file mode 100644 index 0000000000000..d17059e8dee94 --- /dev/null +++ b/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCacheStatsHolder.java @@ -0,0 +1,161 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cache.common.tier; + +import org.opensearch.common.cache.stats.DefaultCacheStatsHolder; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Consumer; + +/** + * A tier-aware version of DefaultCacheStatsHolder. Overrides the incrementer functions, as we can't just add the on-heap + * and disk stats to get a total for the cache as a whole. If the disk tier is present, the total hits, size, and entries + * should be the sum of both tiers' values, but the total misses and evictions should be the disk tier's values. + * When the disk tier isn't present, on-heap misses and evictions should contribute to the total. + * + * For example, if the heap tier has 5 misses and the disk tier has 4, the total cache has had 4 misses, not 9. + * The same goes for evictions. Other stats values add normally. + * + * This means for misses and evictions, if we are incrementing for the on-heap tier and the disk tier is present, + * we have to increment only the leaf nodes corresponding to the on-heap tier itself, and not its ancestors, + * which correspond to totals including both tiers. If the disk tier is not present, we do increment the ancestor nodes. + */ +public class TieredSpilloverCacheStatsHolder extends DefaultCacheStatsHolder { + + /** Whether the disk cache is currently enabled. */ + private boolean diskCacheEnabled; + + // Common values used for tier dimension + + /** The name for the tier dimension. */ + public static final String TIER_DIMENSION_NAME = "tier"; + + /** Dimension value for on-heap cache, like OpenSearchOnHeapCache.*/ + public static final String TIER_DIMENSION_VALUE_ON_HEAP = "on_heap"; + + /** Dimension value for on-disk cache, like EhcacheDiskCache. */ + public static final String TIER_DIMENSION_VALUE_DISK = "disk"; + + /** + * Constructor for the stats holder. + * @param originalDimensionNames the original dimension names, not including TIER_DIMENSION_NAME + * @param diskCacheEnabled whether the disk tier starts out enabled + */ + public TieredSpilloverCacheStatsHolder(List originalDimensionNames, boolean diskCacheEnabled) { + super( + getDimensionNamesWithTier(originalDimensionNames), + TieredSpilloverCache.TieredSpilloverCacheFactory.TIERED_SPILLOVER_CACHE_NAME + ); + this.diskCacheEnabled = diskCacheEnabled; + } + + private static List getDimensionNamesWithTier(List dimensionNames) { + List dimensionNamesWithTier = new ArrayList<>(dimensionNames); + dimensionNamesWithTier.add(TIER_DIMENSION_NAME); + return dimensionNamesWithTier; + } + + /** + * Add tierValue to the end of a copy of the initial dimension values, so they can appropriately be used in this stats holder. + */ + List getDimensionsWithTierValue(List initialDimensions, String tierValue) { + List result = new ArrayList<>(initialDimensions); + result.add(tierValue); + return result; + } + + private String validateTierDimensionValue(List dimensionValues) { + String tierDimensionValue = dimensionValues.get(dimensionValues.size() - 1); + assert tierDimensionValue.equals(TIER_DIMENSION_VALUE_ON_HEAP) || tierDimensionValue.equals(TIER_DIMENSION_VALUE_DISK) + : "Invalid tier dimension value"; + return tierDimensionValue; + } + + @Override + public void incrementHits(List dimensionValues) { + validateTierDimensionValue(dimensionValues); + // Hits from either tier should be included in the total values. + super.incrementHits(dimensionValues); + } + + @Override + public void incrementMisses(List dimensionValues) { + final String tierValue = validateTierDimensionValue(dimensionValues); + + // If the disk tier is present, only misses from the disk tier should be included in total values. + Consumer missIncrementer = (node) -> { + if (tierValue.equals(TIER_DIMENSION_VALUE_ON_HEAP) && diskCacheEnabled) { + // If on-heap tier, increment only the leaf node corresponding to the on heap values; not the total values in its parent + // nodes + if (node.isAtLowestLevel()) { + node.incrementMisses(); + } + } else { + // If disk tier, or on-heap tier with a disabled disk tier, increment the leaf node and its parents + node.incrementMisses(); + } + }; + internalIncrement(dimensionValues, missIncrementer, true); + } + + @Override + public void incrementEvictions(List dimensionValues) { + final String tierValue = validateTierDimensionValue(dimensionValues); + + // If the disk tier is present, only evictions from the disk tier should be included in total values. + Consumer evictionsIncrementer = (node) -> { + if (tierValue.equals(TIER_DIMENSION_VALUE_ON_HEAP) && diskCacheEnabled) { + // If on-heap tier, increment only the leaf node corresponding to the on heap values; not the total values in its parent + // nodes + if (node.isAtLowestLevel()) { + node.incrementEvictions(); + } + } else { + // If disk tier, or on-heap tier with a disabled disk tier, increment the leaf node and its parents + node.incrementEvictions(); + } + }; + internalIncrement(dimensionValues, evictionsIncrementer, true); + } + + @Override + public void incrementSizeInBytes(List dimensionValues, long amountBytes) { + validateTierDimensionValue(dimensionValues); + // Size from either tier should be included in the total values. + super.incrementSizeInBytes(dimensionValues, amountBytes); + } + + // For decrements, we should not create nodes if they are absent. This protects us from erroneously decrementing values for keys + // which have been entirely deleted, for example in an async removal listener. + @Override + public void decrementSizeInBytes(List dimensionValues, long amountBytes) { + validateTierDimensionValue(dimensionValues); + // Size from either tier should be included in the total values. + super.decrementSizeInBytes(dimensionValues, amountBytes); + } + + @Override + public void incrementItems(List dimensionValues) { + validateTierDimensionValue(dimensionValues); + // Entries from either tier should be included in the total values. + super.incrementItems(dimensionValues); + } + + @Override + public void decrementItems(List dimensionValues) { + validateTierDimensionValue(dimensionValues); + // Entries from either tier should be included in the total values. + super.decrementItems(dimensionValues); + } + + void setDiskCacheEnabled(boolean diskCacheEnabled) { + this.diskCacheEnabled = diskCacheEnabled; + } +} diff --git a/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/MockDiskCache.java b/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/MockDiskCache.java index 0d98503af635f..2058faa5181b1 100644 --- a/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/MockDiskCache.java +++ b/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/MockDiskCache.java @@ -16,11 +16,15 @@ import org.opensearch.common.cache.RemovalNotification; import org.opensearch.common.cache.RemovalReason; import org.opensearch.common.cache.serializer.Serializer; +import org.opensearch.common.cache.stats.CacheStatsHolder; +import org.opensearch.common.cache.stats.DefaultCacheStatsHolder; import org.opensearch.common.cache.stats.ImmutableCacheStatsHolder; +import org.opensearch.common.cache.stats.NoopCacheStatsHolder; import org.opensearch.common.cache.store.builders.ICacheBuilder; import org.opensearch.common.cache.store.config.CacheConfig; import java.util.Iterator; +import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.concurrent.ConcurrentHashMap; @@ -32,12 +36,19 @@ public class MockDiskCache implements ICache { long delay; private final RemovalListener, V> removalListener; + private final CacheStatsHolder statsHolder; // Only update for number of entries; this is only used to test statsTrackingEnabled logic + // in TSC - public MockDiskCache(int maxSize, long delay, RemovalListener, V> removalListener) { + public MockDiskCache(int maxSize, long delay, RemovalListener, V> removalListener, boolean statsTrackingEnabled) { this.maxSize = maxSize; this.delay = delay; this.removalListener = removalListener; this.cache = new ConcurrentHashMap, V>(); + if (statsTrackingEnabled) { + this.statsHolder = new DefaultCacheStatsHolder(List.of(), "mock_disk_cache"); + } else { + this.statsHolder = NoopCacheStatsHolder.getInstance(); + } } @Override @@ -50,6 +61,7 @@ public V get(ICacheKey key) { public void put(ICacheKey key, V value) { if (this.cache.size() >= maxSize) { // For simplification this.removalListener.onRemoval(new RemovalNotification<>(key, value, RemovalReason.EVICTED)); + this.statsHolder.decrementItems(List.of()); } try { Thread.sleep(delay); @@ -57,6 +69,7 @@ public void put(ICacheKey key, V value) { throw new RuntimeException(e); } this.cache.put(key, value); + this.statsHolder.incrementItems(List.of()); } @Override @@ -73,6 +86,7 @@ public V computeIfAbsent(ICacheKey key, LoadAwareCacheLoader, V> @Override public void invalidate(ICacheKey key) { + removalListener.onRemoval(new RemovalNotification<>(key, cache.get(key), RemovalReason.INVALIDATED)); this.cache.remove(key); } @@ -96,6 +110,13 @@ public void refresh() {} @Override public ImmutableCacheStatsHolder stats() { + // To allow testing of statsTrackingEnabled logic in TSC, return a dummy ImmutableCacheStatsHolder with the + // right number of entries, unless statsTrackingEnabled is false + return statsHolder.getImmutableCacheStatsHolder(null); + } + + @Override + public ImmutableCacheStatsHolder stats(String[] levels) { return null; } @@ -109,10 +130,12 @@ public static class MockDiskCacheFactory implements Factory { public static final String NAME = "mockDiskCache"; final long delay; final int maxSize; + final boolean statsTrackingEnabled; - public MockDiskCacheFactory(long delay, int maxSize) { + public MockDiskCacheFactory(long delay, int maxSize, boolean statsTrackingEnabled) { this.delay = delay; this.maxSize = maxSize; + this.statsTrackingEnabled = statsTrackingEnabled; } @Override @@ -123,6 +146,7 @@ public ICache create(CacheConfig config, CacheType cacheType, .setMaxSize(maxSize) .setDeliberateDelay(delay) .setRemovalListener(config.getRemovalListener()) + .setStatsTrackingEnabled(config.getStatsTrackingEnabled()) .build(); } @@ -141,7 +165,7 @@ public static class Builder extends ICacheBuilder { @Override public ICache build() { - return new MockDiskCache(this.maxSize, this.delay, this.getRemovalListener()); + return new MockDiskCache(this.maxSize, this.delay, this.getRemovalListener(), getStatsTrackingEnabled()); } public Builder setMaxSize(int maxSize) { diff --git a/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/TieredSpilloverCachePluginTests.java b/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/TieredSpilloverCachePluginTests.java index 1172a48e97c6a..4a96ffe2069ec 100644 --- a/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/TieredSpilloverCachePluginTests.java +++ b/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/TieredSpilloverCachePluginTests.java @@ -9,6 +9,8 @@ package org.opensearch.cache.common.tier; import org.opensearch.common.cache.ICache; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.test.OpenSearchTestCase; import java.util.Map; @@ -16,9 +18,16 @@ public class TieredSpilloverCachePluginTests extends OpenSearchTestCase { public void testGetCacheFactoryMap() { - TieredSpilloverCachePlugin tieredSpilloverCachePlugin = new TieredSpilloverCachePlugin(); + TieredSpilloverCachePlugin tieredSpilloverCachePlugin = new TieredSpilloverCachePlugin(Settings.EMPTY); Map map = tieredSpilloverCachePlugin.getCacheFactoryMap(); assertNotNull(map.get(TieredSpilloverCache.TieredSpilloverCacheFactory.TIERED_SPILLOVER_CACHE_NAME)); assertEquals(TieredSpilloverCachePlugin.TIERED_CACHE_SPILLOVER_PLUGIN_NAME, tieredSpilloverCachePlugin.getName()); } + + public void testGetSettingsWithFeatureFlagOn() { + TieredSpilloverCachePlugin tieredSpilloverCachePlugin = new TieredSpilloverCachePlugin( + Settings.builder().put(FeatureFlags.PLUGGABLE_CACHE_SETTING.getKey(), true).build() + ); + assertFalse(tieredSpilloverCachePlugin.getSettings().isEmpty()); + } } diff --git a/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/TieredSpilloverCacheTests.java b/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/TieredSpilloverCacheTests.java index bf9f8fd22d793..6d5ee91326338 100644 --- a/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/TieredSpilloverCacheTests.java +++ b/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/TieredSpilloverCacheTests.java @@ -8,6 +8,7 @@ package org.opensearch.cache.common.tier; +import org.opensearch.common.Randomness; import org.opensearch.common.cache.CacheType; import org.opensearch.common.cache.ICache; import org.opensearch.common.cache.ICacheKey; @@ -16,6 +17,8 @@ import org.opensearch.common.cache.RemovalNotification; import org.opensearch.common.cache.policy.CachedQueryResult; import org.opensearch.common.cache.settings.CacheSettings; +import org.opensearch.common.cache.stats.ImmutableCacheStats; +import org.opensearch.common.cache.stats.ImmutableCacheStatsHolder; import org.opensearch.common.cache.store.OpenSearchOnHeapCache; import org.opensearch.common.cache.store.config.CacheConfig; import org.opensearch.common.cache.store.settings.OpenSearchOnHeapCacheSettings; @@ -28,11 +31,13 @@ import org.opensearch.test.OpenSearchTestCase; import org.junit.Before; +import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Random; import java.util.UUID; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.CountDownLatch; @@ -42,11 +47,14 @@ import java.util.function.Function; import java.util.function.Predicate; +import static org.opensearch.cache.common.tier.TieredSpilloverCacheSettings.DISK_CACHE_ENABLED_SETTING_MAP; import static org.opensearch.cache.common.tier.TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP; +import static org.opensearch.cache.common.tier.TieredSpilloverCacheStatsHolder.TIER_DIMENSION_NAME; +import static org.opensearch.cache.common.tier.TieredSpilloverCacheStatsHolder.TIER_DIMENSION_VALUE_DISK; +import static org.opensearch.cache.common.tier.TieredSpilloverCacheStatsHolder.TIER_DIMENSION_VALUE_ON_HEAP; import static org.opensearch.common.cache.store.settings.OpenSearchOnHeapCacheSettings.MAXIMUM_SIZE_IN_BYTES_KEY; public class TieredSpilloverCacheTests extends OpenSearchTestCase { - // TODO: TSC stats impl is in a future PR. Parts of tests which use stats values are missing for now. static final List dimensionNames = List.of("dim1", "dim2", "dim3"); private ClusterSettings clusterSettings; @@ -56,6 +64,7 @@ public void setup() { Settings settings = Settings.EMPTY; clusterSettings = new ClusterSettings(settings, new HashSet<>()); clusterSettings.registerSetting(TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get(CacheType.INDICES_REQUEST_CACHE)); + clusterSettings.registerSetting(DISK_CACHE_ENABLED_SETTING_MAP.get(CacheType.INDICES_REQUEST_CACHE)); } public void testComputeIfAbsentWithoutAnyOnHeapCacheEviction() throws Exception { @@ -87,6 +96,9 @@ public void testComputeIfAbsentWithoutAnyOnHeapCacheEviction() throws Exception tieredSpilloverCache.computeIfAbsent(key, tieredCacheLoader); } assertEquals(0, removalListener.evictionsMetric.count()); + assertEquals(numOfItems1, getMissesForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + assertEquals(0, getHitsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + assertEquals(0, getEvictionsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); // Try to hit cache again with some randomization. int numOfItems2 = randomIntBetween(1, onHeapCacheSize / 2 - 1); @@ -105,6 +117,13 @@ public void testComputeIfAbsentWithoutAnyOnHeapCacheEviction() throws Exception } } assertEquals(0, removalListener.evictionsMetric.count()); + assertEquals(cacheHit, getHitsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + assertEquals(numOfItems1 + cacheMiss, getMissesForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + assertEquals(0, getEvictionsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + + assertEquals(0, getHitsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_DISK)); + assertEquals(numOfItems1 + cacheMiss, getMissesForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_DISK)); + assertEquals(0, getEvictionsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_DISK)); } public void testComputeIfAbsentWithFactoryBasedCacheCreation() throws Exception { @@ -158,7 +177,7 @@ public void testComputeIfAbsentWithFactoryBasedCacheCreation() throws Exception OpenSearchOnHeapCache.OpenSearchOnHeapCacheFactory.NAME, new OpenSearchOnHeapCache.OpenSearchOnHeapCacheFactory(), MockDiskCache.MockDiskCacheFactory.NAME, - new MockDiskCache.MockDiskCacheFactory(0, randomIntBetween(100, 300)) + new MockDiskCache.MockDiskCacheFactory(0, randomIntBetween(100, 300), false) ) ); @@ -172,12 +191,25 @@ public void testComputeIfAbsentWithFactoryBasedCacheCreation() throws Exception LoadAwareCacheLoader, String> tieredCacheLoader = getLoadAwareCacheLoader(); tieredSpilloverCache.computeIfAbsent(getICacheKey(key), tieredCacheLoader); } + + int expectedDiskEntries = numOfItems1 - onHeapCacheSize; tieredSpilloverCache.getOnHeapCache().keys().forEach(onHeapKeys::add); tieredSpilloverCache.getDiskCache().keys().forEach(diskTierKeys::add); - // Verify on heap cache size. + // Verify on heap cache stats. assertEquals(onHeapCacheSize, tieredSpilloverCache.getOnHeapCache().count()); - // Verify disk cache size. - assertEquals(numOfItems1 - onHeapCacheSize, tieredSpilloverCache.getDiskCache().count()); + assertEquals(onHeapCacheSize, getItemsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + assertEquals(numOfItems1, getMissesForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + assertEquals(0, getHitsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + assertEquals(expectedDiskEntries, getEvictionsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + assertEquals(onHeapCacheSize * keyValueSize, getSizeInBytesForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + + // Verify disk cache stats. + assertEquals(expectedDiskEntries, tieredSpilloverCache.getDiskCache().count()); + assertEquals(expectedDiskEntries, getItemsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_DISK)); + assertEquals(0, getHitsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_DISK)); + assertEquals(numOfItems1, getMissesForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_DISK)); + assertEquals(0, getEvictionsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_DISK)); + assertEquals(expectedDiskEntries * keyValueSize, getSizeInBytesForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_DISK)); } public void testWithFactoryCreationWithOnHeapCacheNotPresent() { @@ -220,7 +252,7 @@ public void testWithFactoryCreationWithOnHeapCacheNotPresent() { OpenSearchOnHeapCache.OpenSearchOnHeapCacheFactory.NAME, new OpenSearchOnHeapCache.OpenSearchOnHeapCacheFactory(), MockDiskCache.MockDiskCacheFactory.NAME, - new MockDiskCache.MockDiskCacheFactory(0, randomIntBetween(100, 300)) + new MockDiskCache.MockDiskCacheFactory(0, randomIntBetween(100, 300), false) ) ) ); @@ -265,7 +297,7 @@ public void testWithFactoryCreationWithDiskCacheNotPresent() { OpenSearchOnHeapCache.OpenSearchOnHeapCacheFactory.NAME, new OpenSearchOnHeapCache.OpenSearchOnHeapCacheFactory(), MockDiskCache.MockDiskCacheFactory.NAME, - new MockDiskCache.MockDiskCacheFactory(0, randomIntBetween(100, 300)) + new MockDiskCache.MockDiskCacheFactory(0, randomIntBetween(100, 300), false) ) ) ); @@ -302,9 +334,10 @@ public void testComputeIfAbsentWithEvictionsFromOnHeapCache() throws Exception { ) .build() ) + .setClusterSettings(clusterSettings) .build(); - ICache.Factory mockDiskCacheFactory = new MockDiskCache.MockDiskCacheFactory(0, diskCacheSize); + ICache.Factory mockDiskCacheFactory = new MockDiskCache.MockDiskCacheFactory(0, diskCacheSize, false); TieredSpilloverCache tieredSpilloverCache = new TieredSpilloverCache.Builder() .setOnHeapCacheFactory(onHeapCacheFactory) @@ -324,6 +357,15 @@ public void testComputeIfAbsentWithEvictionsFromOnHeapCache() throws Exception { tieredSpilloverCache.computeIfAbsent(key, tieredCacheLoader); } + long actualDiskCacheSize = tieredSpilloverCache.getDiskCache().count(); + + assertEquals(numOfItems1, getMissesForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + assertEquals(0, getHitsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + assertEquals(actualDiskCacheSize, getEvictionsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + assertEquals(onHeapCacheSize, getItemsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + assertEquals(onHeapCacheSize * keyValueSize, getSizeInBytesForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + assertEquals(actualDiskCacheSize, getItemsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_DISK)); + tieredSpilloverCache.getOnHeapCache().keys().forEach(onHeapKeys::add); tieredSpilloverCache.getDiskCache().keys().forEach(diskTierKeys::add); @@ -347,12 +389,18 @@ public void testComputeIfAbsentWithEvictionsFromOnHeapCache() throws Exception { assertFalse(loadAwareCacheLoader.isLoaded()); } } - for (int iter = 0; iter < randomIntBetween(50, 200); iter++) { + int numRandom = randomIntBetween(50, 200); + for (int iter = 0; iter < numRandom; iter++) { // Hit cache with randomized key which is expected to miss cache always. LoadAwareCacheLoader, String> tieredCacheLoader = getLoadAwareCacheLoader(); tieredSpilloverCache.computeIfAbsent(getICacheKey(UUID.randomUUID().toString()), tieredCacheLoader); cacheMiss++; } + + assertEquals(numOfItems1 + cacheMiss + diskCacheHit, getMissesForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + assertEquals(onHeapCacheHit, getHitsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + assertEquals(cacheMiss + numOfItems1, getMissesForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_DISK)); + assertEquals(diskCacheHit, getHitsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_DISK)); } public void testComputeIfAbsentWithEvictionsFromTieredCache() throws Exception { @@ -382,8 +430,13 @@ public void testComputeIfAbsentWithEvictionsFromTieredCache() throws Exception { tieredSpilloverCache.computeIfAbsent(getICacheKey(UUID.randomUUID().toString()), tieredCacheLoader); } - int evictions = numOfItems - (totalSize); + int evictions = numOfItems - (totalSize); // Evictions from the cache as a whole assertEquals(evictions, removalListener.evictionsMetric.count()); + assertEquals(evictions, getEvictionsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_DISK)); + assertEquals( + evictions + getItemsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_DISK), + getEvictionsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP) + ); } public void testGetAndCount() throws Exception { @@ -439,7 +492,7 @@ public void testGetAndCount() throws Exception { assertEquals(numOfItems1, tieredSpilloverCache.count()); } - public void testPut() { + public void testPut() throws Exception { int onHeapCacheSize = randomIntBetween(10, 30); int diskCacheSize = randomIntBetween(onHeapCacheSize + 1, 100); int keyValueSize = 50; @@ -462,6 +515,8 @@ public void testPut() { ICacheKey key = getICacheKey(UUID.randomUUID().toString()); String value = UUID.randomUUID().toString(); tieredSpilloverCache.put(key, value); + assertEquals(1, getItemsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + assertEquals(1, tieredSpilloverCache.count()); } public void testPutAndVerifyNewItemsArePresentOnHeapCache() throws Exception { @@ -494,6 +549,9 @@ public void testPutAndVerifyNewItemsArePresentOnHeapCache() throws Exception { tieredSpilloverCache.computeIfAbsent(getICacheKey(UUID.randomUUID().toString()), getLoadAwareCacheLoader()); } + assertEquals(onHeapCacheSize, getItemsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + assertEquals(0, getItemsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_DISK)); + // Again try to put OnHeap cache capacity amount of new items. List> newKeyList = new ArrayList<>(); for (int i = 0; i < onHeapCacheSize; i++) { @@ -512,9 +570,11 @@ public void testPutAndVerifyNewItemsArePresentOnHeapCache() throws Exception { for (int i = 0; i < actualOnHeapCacheKeys.size(); i++) { assertTrue(newKeyList.contains(actualOnHeapCacheKeys.get(i))); } + assertEquals(onHeapCacheSize, getItemsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + assertEquals(onHeapCacheSize, getItemsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_DISK)); } - public void testInvalidate() { + public void testInvalidate() throws Exception { int onHeapCacheSize = 1; int diskCacheSize = 10; int keyValueSize = 20; @@ -538,11 +598,12 @@ public void testInvalidate() { String value = UUID.randomUUID().toString(); // First try to invalidate without the key present in cache. tieredSpilloverCache.invalidate(key); - // assertEquals(0, tieredSpilloverCache.stats().getEvictionsByDimensions(HEAP_DIMS)); + assertEquals(0, getEvictionsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); // Now try to invalidate with the key present in onHeap cache. tieredSpilloverCache.put(key, value); tieredSpilloverCache.invalidate(key); + assertEquals(0, getEvictionsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); // Evictions metric shouldn't increase for invalidations. assertEquals(0, tieredSpilloverCache.count()); @@ -552,11 +613,15 @@ public void testInvalidate() { tieredSpilloverCache.put(key2, UUID.randomUUID().toString()); assertEquals(2, tieredSpilloverCache.count()); + assertEquals(1, getItemsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + assertEquals(1, getItemsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_DISK)); // Again invalidate older key, leaving one in heap tier and zero in disk tier tieredSpilloverCache.invalidate(key); + assertEquals(0, getEvictionsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_DISK)); + assertEquals(0, getItemsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_DISK)); + assertEquals(1, getItemsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); assertEquals(1, tieredSpilloverCache.count()); - } public void testCacheKeys() throws Exception { @@ -749,6 +814,9 @@ public String load(ICacheKey key) { } } assertEquals(1, numberOfTimesKeyLoaded); // It should be loaded only once. + // We should see only one heap miss, and the rest hits + assertEquals(1, getMissesForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + assertEquals(numberOfSameKeys - 1, getHitsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); } public void testConcurrencyForEvictionFlowFromOnHeapToDiskTier() throws Exception { @@ -757,7 +825,7 @@ public void testConcurrencyForEvictionFlowFromOnHeapToDiskTier() throws Exceptio MockCacheRemovalListener removalListener = new MockCacheRemovalListener<>(); ICache.Factory onHeapCacheFactory = new OpenSearchOnHeapCache.OpenSearchOnHeapCacheFactory(); - ICache.Factory diskCacheFactory = new MockDiskCache.MockDiskCacheFactory(500, diskCacheSize); + ICache.Factory diskCacheFactory = new MockDiskCache.MockDiskCacheFactory(500, diskCacheSize, false); CacheConfig cacheConfig = new CacheConfig.Builder().setKeyType(String.class) .setKeyType(String.class) .setWeigher((k, v) -> 150) @@ -777,6 +845,7 @@ public void testConcurrencyForEvictionFlowFromOnHeapToDiskTier() throws Exceptio ) .build() ) + .setClusterSettings(clusterSettings) .setDimensionNames(dimensionNames) .build(); TieredSpilloverCache tieredSpilloverCache = new TieredSpilloverCache.Builder() @@ -792,7 +861,7 @@ public void testConcurrencyForEvictionFlowFromOnHeapToDiskTier() throws Exceptio // Put first key on tiered cache. Will go into onHeap cache. tieredSpilloverCache.computeIfAbsent(keyToBeEvicted, getLoadAwareCacheLoader()); - // assertEquals(1, tieredSpilloverCache.stats().getEntriesByDimensions(HEAP_DIMS)); + assertEquals(1, getItemsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); CountDownLatch countDownLatch = new CountDownLatch(1); CountDownLatch countDownLatch1 = new CountDownLatch(1); // Put second key on tiered cache. Will cause eviction of first key from onHeap cache and should go into @@ -830,6 +899,10 @@ public void testConcurrencyForEvictionFlowFromOnHeapToDiskTier() throws Exceptio assertEquals(1, tieredSpilloverCache.getOnHeapCache().count()); assertEquals(1, onDiskCache.count()); + + assertEquals(1, getEvictionsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + assertEquals(1, getItemsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP)); + assertEquals(1, getItemsForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_DISK)); assertNotNull(onDiskCache.get(keyToBeEvicted)); } @@ -957,7 +1030,7 @@ public CachedQueryResult.PolicyValues apply(String s) { OpenSearchOnHeapCache.OpenSearchOnHeapCacheFactory.NAME, new OpenSearchOnHeapCache.OpenSearchOnHeapCacheFactory(), MockDiskCache.MockDiskCacheFactory.NAME, - new MockDiskCache.MockDiskCacheFactory(0, randomIntBetween(100, 300)) + new MockDiskCache.MockDiskCacheFactory(0, randomIntBetween(100, 300), false) ) ); @@ -1008,6 +1081,237 @@ public void testMinimumThresholdSettingValue() throws Exception { assertEquals(validDuration, concreteSetting.get(validSettings)); } + public void testPutWithDiskCacheDisabledSetting() throws Exception { + int onHeapCacheSize = randomIntBetween(10, 30); + int diskCacheSize = randomIntBetween(onHeapCacheSize + 1, 100); + int keyValueSize = 50; + int totalSize = onHeapCacheSize + diskCacheSize; + + MockCacheRemovalListener removalListener = new MockCacheRemovalListener<>(); + TieredSpilloverCache tieredSpilloverCache = initializeTieredSpilloverCache( + keyValueSize, + diskCacheSize, + removalListener, + Settings.builder() + .put( + OpenSearchOnHeapCacheSettings.getSettingListForCacheType(CacheType.INDICES_REQUEST_CACHE) + .get(MAXIMUM_SIZE_IN_BYTES_KEY) + .getKey(), + onHeapCacheSize * keyValueSize + "b" + ) + .put(DISK_CACHE_ENABLED_SETTING_MAP.get(CacheType.INDICES_REQUEST_CACHE).getKey(), false) + .build(), + 0 + ); + + int numOfItems1 = randomIntBetween(onHeapCacheSize + 1, totalSize); // Create more items than onHeap cache. + for (int iter = 0; iter < numOfItems1; iter++) { + ICacheKey key = getICacheKey(UUID.randomUUID().toString()); + LoadAwareCacheLoader, String> loadAwareCacheLoader = getLoadAwareCacheLoader(); + tieredSpilloverCache.computeIfAbsent(key, loadAwareCacheLoader); + } + ICache onHeapCache = tieredSpilloverCache.getOnHeapCache(); + ICache diskCache = tieredSpilloverCache.getDiskCache(); + assertEquals(onHeapCacheSize, onHeapCache.count()); + assertEquals(0, diskCache.count()); // Disk cache shouldn't have anything considering it is disabled. + assertEquals(numOfItems1 - onHeapCacheSize, removalListener.evictionsMetric.count()); + } + + public void testGetPutAndInvalidateWithDiskCacheDisabled() throws Exception { + int onHeapCacheSize = randomIntBetween(10, 30); + int diskCacheSize = randomIntBetween(onHeapCacheSize + 1, 100); + int keyValueSize = 50; + int totalSize = onHeapCacheSize + diskCacheSize; + MockCacheRemovalListener removalListener = new MockCacheRemovalListener<>(); + TieredSpilloverCache tieredSpilloverCache = initializeTieredSpilloverCache( + keyValueSize, + diskCacheSize, + removalListener, + Settings.builder() + .put( + OpenSearchOnHeapCacheSettings.getSettingListForCacheType(CacheType.INDICES_REQUEST_CACHE) + .get(MAXIMUM_SIZE_IN_BYTES_KEY) + .getKey(), + onHeapCacheSize * keyValueSize + "b" + ) + .build(), + 0 + ); + + int numOfItems1 = randomIntBetween(onHeapCacheSize + 1, totalSize - 1); // Create more items than onHeap + // cache to cause spillover. + for (int iter = 0; iter < numOfItems1; iter++) { + ICacheKey key = getICacheKey(UUID.randomUUID().toString()); + LoadAwareCacheLoader, String> loadAwareCacheLoader = getLoadAwareCacheLoader(); + tieredSpilloverCache.computeIfAbsent(key, loadAwareCacheLoader); + } + ICache onHeapCache = tieredSpilloverCache.getOnHeapCache(); + ICache diskCache = tieredSpilloverCache.getDiskCache(); + List> diskCacheKeys = new ArrayList<>(); + tieredSpilloverCache.getDiskCache().keys().forEach(diskCacheKeys::add); + long actualDiskCacheCount = diskCache.count(); + long actualTieredCacheCount = tieredSpilloverCache.count(); + assertEquals(onHeapCacheSize, onHeapCache.count()); + assertEquals(numOfItems1 - onHeapCacheSize, actualDiskCacheCount); + assertEquals(0, removalListener.evictionsMetric.count()); + assertEquals(numOfItems1, actualTieredCacheCount); + for (ICacheKey diskKey : diskCacheKeys) { + assertNotNull(tieredSpilloverCache.get(diskKey)); + } + + tieredSpilloverCache.enableDisableDiskCache(false); // Disable disk cache now. + int numOfItems2 = totalSize - numOfItems1; + for (int iter = 0; iter < numOfItems2; iter++) { + ICacheKey key = getICacheKey(UUID.randomUUID().toString()); + LoadAwareCacheLoader, String> loadAwareCacheLoader = getLoadAwareCacheLoader(); + tieredSpilloverCache.computeIfAbsent(key, loadAwareCacheLoader); + } + for (ICacheKey diskKey : diskCacheKeys) { + assertNull(tieredSpilloverCache.get(diskKey)); // Considering disk cache is disabled, we shouldn't find + // these keys. + } + assertEquals(onHeapCacheSize, onHeapCache.count()); // Should remain same. + assertEquals(0, diskCache.count() - actualDiskCacheCount); // Considering it is disabled now, shouldn't cache + // any more items. + assertEquals(numOfItems2, removalListener.evictionsMetric.count()); // Considering onHeap cache was already + // full, we should all existing onHeap entries being evicted. + assertEquals(0, tieredSpilloverCache.count() - actualTieredCacheCount); // Count still returns disk cache + // entries count as they haven't been cleared yet. + long lastKnownTieredCacheEntriesCount = tieredSpilloverCache.count(); + + // Clear up disk cache keys. + for (ICacheKey diskKey : diskCacheKeys) { + tieredSpilloverCache.invalidate(diskKey); + } + assertEquals(0, diskCache.count()); + assertEquals(lastKnownTieredCacheEntriesCount - diskCacheKeys.size(), tieredSpilloverCache.count()); + + tieredSpilloverCache.invalidateAll(); // Clear up all the keys. + assertEquals(0, tieredSpilloverCache.count()); + } + + public void testTiersDoNotTrackStats() throws Exception { + int onHeapCacheSize = randomIntBetween(10, 30); + int diskCacheSize = randomIntBetween(onHeapCacheSize + 1, 100); + int keyValueSize = 50; + MockCacheRemovalListener removalListener = new MockCacheRemovalListener<>(); + TieredSpilloverCache tieredSpilloverCache = initializeTieredSpilloverCache( + keyValueSize, + diskCacheSize, + removalListener, + Settings.builder() + .put( + OpenSearchOnHeapCacheSettings.getSettingListForCacheType(CacheType.INDICES_REQUEST_CACHE) + .get(MAXIMUM_SIZE_IN_BYTES_KEY) + .getKey(), + onHeapCacheSize * keyValueSize + "b" + ) + .build(), + 0 + ); + + // do some gets to put entries in both tiers + int numMisses = onHeapCacheSize + randomIntBetween(10, 20); + for (int iter = 0; iter < numMisses; iter++) { + ICacheKey key = getICacheKey(UUID.randomUUID().toString()); + LoadAwareCacheLoader, String> tieredCacheLoader = getLoadAwareCacheLoader(); + tieredSpilloverCache.computeIfAbsent(key, tieredCacheLoader); + } + assertNotEquals(new ImmutableCacheStats(0, 0, 0, 0, 0), tieredSpilloverCache.stats().getTotalStats()); + assertEquals(new ImmutableCacheStats(0, 0, 0, 0, 0), tieredSpilloverCache.getOnHeapCache().stats().getTotalStats()); + ImmutableCacheStats diskStats = tieredSpilloverCache.getDiskCache().stats().getTotalStats(); + assertEquals(new ImmutableCacheStats(0, 0, 0, 0, 0), diskStats); + } + + public void testTierStatsAddCorrectly() throws Exception { + /* We expect the total stats to be: + * totalHits = heapHits + diskHits + * totalMisses = diskMisses + * totalEvictions = diskEvictions + * totalSize = heapSize + diskSize + * totalEntries = heapEntries + diskEntries + */ + + int onHeapCacheSize = randomIntBetween(10, 30); + int diskCacheSize = randomIntBetween(onHeapCacheSize + 1, 100); + int keyValueSize = 50; + MockCacheRemovalListener removalListener = new MockCacheRemovalListener<>(); + TieredSpilloverCache tieredSpilloverCache = initializeTieredSpilloverCache( + keyValueSize, + diskCacheSize, + removalListener, + Settings.builder() + .put( + OpenSearchOnHeapCacheSettings.getSettingListForCacheType(CacheType.INDICES_REQUEST_CACHE) + .get(MAXIMUM_SIZE_IN_BYTES_KEY) + .getKey(), + onHeapCacheSize * keyValueSize + "b" + ) + .build(), + 0 + ); + + List> usedKeys = new ArrayList<>(); + // Fill the cache, getting some entries + evictions for both tiers + int numMisses = onHeapCacheSize + diskCacheSize + randomIntBetween(10, 20); + for (int iter = 0; iter < numMisses; iter++) { + ICacheKey key = getICacheKey(UUID.randomUUID().toString()); + usedKeys.add(key); + LoadAwareCacheLoader, String> tieredCacheLoader = getLoadAwareCacheLoader(); + tieredSpilloverCache.computeIfAbsent(key, tieredCacheLoader); + } + // Also do some random hits + Random rand = Randomness.get(); + int approxNumHits = 30; + for (int i = 0; i < approxNumHits; i++) { + LoadAwareCacheLoader, String> tieredCacheLoader = getLoadAwareCacheLoader(); + ICacheKey key = usedKeys.get(rand.nextInt(usedKeys.size())); + tieredSpilloverCache.computeIfAbsent(key, tieredCacheLoader); + } + + ImmutableCacheStats totalStats = tieredSpilloverCache.stats().getTotalStats(); + ImmutableCacheStats heapStats = getStatsSnapshotForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP); + ImmutableCacheStats diskStats = getStatsSnapshotForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_DISK); + + assertEquals(totalStats.getHits(), heapStats.getHits() + diskStats.getHits()); + assertEquals(totalStats.getMisses(), diskStats.getMisses()); + assertEquals(totalStats.getEvictions(), diskStats.getEvictions()); + assertEquals(totalStats.getSizeInBytes(), heapStats.getSizeInBytes() + diskStats.getSizeInBytes()); + assertEquals(totalStats.getItems(), heapStats.getItems() + diskStats.getItems()); + + // Also check the heap stats don't have zero misses or evictions + assertNotEquals(0, heapStats.getMisses()); + assertNotEquals(0, heapStats.getEvictions()); + + // Now turn off the disk tier and do more misses and evictions from the heap tier. + // These should be added to the totals, as the disk tier is now absent + long missesBeforeDisablingDiskCache = totalStats.getMisses(); + long evictionsBeforeDisablingDiskCache = totalStats.getEvictions(); + long heapTierEvictionsBeforeDisablingDiskCache = heapStats.getEvictions(); + + clusterSettings.applySettings( + Settings.builder().put(DISK_CACHE_ENABLED_SETTING_MAP.get(CacheType.INDICES_REQUEST_CACHE).getKey(), false).build() + ); + + int newMisses = randomIntBetween(10, 30); + for (int i = 0; i < newMisses; i++) { + LoadAwareCacheLoader, String> tieredCacheLoader = getLoadAwareCacheLoader(); + tieredSpilloverCache.computeIfAbsent(getICacheKey(UUID.randomUUID().toString()), tieredCacheLoader); + } + + totalStats = tieredSpilloverCache.stats().getTotalStats(); + heapStats = getStatsSnapshotForTier(tieredSpilloverCache, TIER_DIMENSION_VALUE_ON_HEAP); + assertEquals(missesBeforeDisablingDiskCache + newMisses, totalStats.getMisses()); + assertEquals(heapTierEvictionsBeforeDisablingDiskCache + newMisses, heapStats.getEvictions()); + assertEquals(evictionsBeforeDisablingDiskCache + newMisses, totalStats.getEvictions()); + + // Turn the disk cache back on in cluster settings for other tests + clusterSettings.applySettings( + Settings.builder().put(DISK_CACHE_ENABLED_SETTING_MAP.get(CacheType.INDICES_REQUEST_CACHE).getKey(), true).build() + ); + + } + private List getMockDimensions() { List dims = new ArrayList<>(); for (String dimensionName : dimensionNames) { @@ -1121,8 +1425,9 @@ private TieredSpilloverCache intializeTieredSpilloverCache( .put(settings) .build() ) + .setClusterSettings(clusterSettings) .build(); - ICache.Factory mockDiskCacheFactory = new MockDiskCache.MockDiskCacheFactory(diskDeliberateDelay, diskCacheSize); + ICache.Factory mockDiskCacheFactory = new MockDiskCache.MockDiskCacheFactory(diskDeliberateDelay, diskCacheSize, false); TieredSpilloverCache.Builder builder = new TieredSpilloverCache.Builder().setCacheType( CacheType.INDICES_REQUEST_CACHE @@ -1136,4 +1441,42 @@ private TieredSpilloverCache intializeTieredSpilloverCache( } return builder.build(); } + + // Helper functions for extracting tier aggregated stats. + private long getHitsForTier(TieredSpilloverCache tsc, String tierValue) throws IOException { + return getStatsSnapshotForTier(tsc, tierValue).getHits(); + } + + private long getMissesForTier(TieredSpilloverCache tsc, String tierValue) throws IOException { + return getStatsSnapshotForTier(tsc, tierValue).getMisses(); + } + + private long getEvictionsForTier(TieredSpilloverCache tsc, String tierValue) throws IOException { + return getStatsSnapshotForTier(tsc, tierValue).getEvictions(); + } + + private long getSizeInBytesForTier(TieredSpilloverCache tsc, String tierValue) throws IOException { + return getStatsSnapshotForTier(tsc, tierValue).getSizeInBytes(); + } + + private long getItemsForTier(TieredSpilloverCache tsc, String tierValue) throws IOException { + return getStatsSnapshotForTier(tsc, tierValue).getItems(); + } + + private ImmutableCacheStats getStatsSnapshotForTier(TieredSpilloverCache tsc, String tierValue) throws IOException { + List levelsList = new ArrayList<>(dimensionNames); + levelsList.add(TIER_DIMENSION_NAME); + String[] levels = levelsList.toArray(new String[0]); + ImmutableCacheStatsHolder cacheStats = tsc.stats(levels); + // Since we always use the same list of dimensions from getMockDimensions() in keys for these tests, we can get all the stats values + // for a given tier with a single node in MDCS + List mockDimensions = getMockDimensions(); + mockDimensions.add(tierValue); + ImmutableCacheStats snapshot = cacheStats.getStatsForDimensionValues(mockDimensions); + if (snapshot == null) { + return new ImmutableCacheStats(0, 0, 0, 0, 0); // This can happen if no cache actions have happened for this set of + // dimensions yet + } + return snapshot; + } } diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RenameProcessor.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RenameProcessor.java index 7564bbdf95f45..6ec3ebb6ace81 100644 --- a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RenameProcessor.java +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RenameProcessor.java @@ -52,18 +52,21 @@ public final class RenameProcessor extends AbstractProcessor { private final TemplateScript.Factory field; private final TemplateScript.Factory targetField; private final boolean ignoreMissing; + private final boolean overrideTarget; RenameProcessor( String tag, String description, TemplateScript.Factory field, TemplateScript.Factory targetField, - boolean ignoreMissing + boolean ignoreMissing, + boolean overrideTarget ) { super(tag, description); this.field = field; this.targetField = targetField; this.ignoreMissing = ignoreMissing; + this.overrideTarget = overrideTarget; } TemplateScript.Factory getField() { @@ -78,6 +81,10 @@ boolean isIgnoreMissing() { return ignoreMissing; } + boolean isOverrideTarget() { + return overrideTarget; + } + @Override public IngestDocument execute(IngestDocument document) { String path = document.renderTemplate(field); @@ -94,9 +101,10 @@ public IngestDocument execute(IngestDocument document) { // We fail here if the target field point to an array slot that is out of range. // If we didn't do this then we would fail if we set the value in the target_field // and then on failure processors would not see that value we tried to rename as we already - // removed it. + // removed it. If the target field is out of range, we throw the exception no matter + // what the parameter overrideTarget is. String target = document.renderTemplate(targetField); - if (document.hasField(target, true)) { + if (document.hasField(target, true) && !overrideTarget) { throw new IllegalArgumentException("field [" + target + "] already exists"); } @@ -143,7 +151,8 @@ public RenameProcessor create( scriptService ); boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false); - return new RenameProcessor(processorTag, description, fieldTemplate, targetFieldTemplate, ignoreMissing); + boolean overrideTarget = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "override_target", false); + return new RenameProcessor(processorTag, description, fieldTemplate, targetFieldTemplate, ignoreMissing, overrideTarget); } } } diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/DotExpanderProcessorTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/DotExpanderProcessorTests.java index cd912269a593d..73719b24c74ea 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/DotExpanderProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/DotExpanderProcessorTests.java @@ -105,6 +105,7 @@ public void testEscapeFields_valueField() throws Exception { null, new TestTemplateService.MockTemplateScript.Factory("foo"), new TestTemplateService.MockTemplateScript.Factory("foo.bar"), + false, false ); processor.execute(document); diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RenameProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RenameProcessorFactoryTests.java index ec43be97689ee..8ce9203db43ce 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RenameProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RenameProcessorFactoryTests.java @@ -77,6 +77,19 @@ public void testCreateWithIgnoreMissing() throws Exception { assertThat(renameProcessor.isIgnoreMissing(), equalTo(true)); } + public void testCreateWithOverrideTarget() throws Exception { + Map config = new HashMap<>(); + config.put("field", "old_field"); + config.put("target_field", "new_field"); + config.put("override_target", true); + String processorTag = randomAlphaOfLength(10); + RenameProcessor renameProcessor = factory.create(null, processorTag, null, config); + assertThat(renameProcessor.getTag(), equalTo(processorTag)); + assertThat(renameProcessor.getField().newInstance(Collections.emptyMap()).execute(), equalTo("old_field")); + assertThat(renameProcessor.getTargetField().newInstance(Collections.emptyMap()).execute(), equalTo("new_field")); + assertThat(renameProcessor.isOverrideTarget(), equalTo(true)); + } + public void testCreateNoFieldPresent() throws Exception { Map config = new HashMap<>(); config.put("target_field", "new_field"); diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RenameProcessorTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RenameProcessorTests.java index a600464371af8..ad5b46e924278 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RenameProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RenameProcessorTests.java @@ -59,7 +59,7 @@ public void testRename() throws Exception { do { newFieldName = RandomDocumentPicks.randomFieldName(random()); } while (RandomDocumentPicks.canAddField(newFieldName, ingestDocument) == false || newFieldName.equals(fieldName)); - Processor processor = createRenameProcessor(fieldName, newFieldName, false); + Processor processor = createRenameProcessor(fieldName, newFieldName, false, false); processor.execute(ingestDocument); assertThat(ingestDocument.getFieldValue(newFieldName, Object.class), equalTo(fieldValue)); } @@ -77,7 +77,7 @@ public void testRenameArrayElement() throws Exception { document.put("one", one); IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); - Processor processor = createRenameProcessor("list.0", "item", false); + Processor processor = createRenameProcessor("list.0", "item", false, false); processor.execute(ingestDocument); Object actualObject = ingestDocument.getSourceAndMetadata().get("list"); assertThat(actualObject, instanceOf(List.class)); @@ -90,7 +90,7 @@ public void testRenameArrayElement() throws Exception { assertThat(actualObject, instanceOf(String.class)); assertThat(actualObject, equalTo("item1")); - processor = createRenameProcessor("list.0", "list.3", false); + processor = createRenameProcessor("list.0", "list.3", false, randomBoolean()); try { processor.execute(ingestDocument); fail("processor execute should have failed"); @@ -105,7 +105,7 @@ public void testRenameArrayElement() throws Exception { public void testRenameNonExistingField() throws Exception { IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>()); String fieldName = RandomDocumentPicks.randomFieldName(random()); - Processor processor = createRenameProcessor(fieldName, RandomDocumentPicks.randomFieldName(random()), false); + Processor processor = createRenameProcessor(fieldName, RandomDocumentPicks.randomFieldName(random()), false, false); try { processor.execute(ingestDocument); fail("processor execute should have failed"); @@ -114,7 +114,7 @@ public void testRenameNonExistingField() throws Exception { } // when using template snippet, the resolved field path maybe empty - processor = createRenameProcessor("", RandomDocumentPicks.randomFieldName(random()), false); + processor = createRenameProcessor("", RandomDocumentPicks.randomFieldName(random()), false, false); try { processor.execute(ingestDocument); fail("processor execute should have failed"); @@ -127,30 +127,36 @@ public void testRenameNonExistingFieldWithIgnoreMissing() throws Exception { IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>()); IngestDocument ingestDocument = new IngestDocument(originalIngestDocument); String fieldName = RandomDocumentPicks.randomFieldName(random()); - Processor processor = createRenameProcessor(fieldName, RandomDocumentPicks.randomFieldName(random()), true); + Processor processor = createRenameProcessor(fieldName, RandomDocumentPicks.randomFieldName(random()), true, false); processor.execute(ingestDocument); assertIngestDocument(originalIngestDocument, ingestDocument); // when using template snippet, the resolved field path maybe empty - processor = createRenameProcessor("", RandomDocumentPicks.randomFieldName(random()), true); + processor = createRenameProcessor("", RandomDocumentPicks.randomFieldName(random()), true, false); processor.execute(ingestDocument); assertIngestDocument(originalIngestDocument, ingestDocument); } public void testRenameNewFieldAlreadyExists() throws Exception { IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); - String fieldName = RandomDocumentPicks.randomExistingFieldName(random(), ingestDocument); - Processor processor = createRenameProcessor( - RandomDocumentPicks.randomExistingFieldName(random(), ingestDocument), - fieldName, - false - ); + String field = RandomDocumentPicks.randomExistingFieldName(random(), ingestDocument); + Object fieldValue = ingestDocument.getFieldValue(field, Object.class); + String targetField = RandomDocumentPicks.addRandomField(random(), ingestDocument, RandomDocumentPicks.randomFieldValue(random())); + + Processor processor = createRenameProcessor(field, targetField, false, false); try { processor.execute(ingestDocument); fail("processor execute should have failed"); } catch (IllegalArgumentException e) { - assertThat(e.getMessage(), equalTo("field [" + fieldName + "] already exists")); + assertThat(e.getMessage(), equalTo("field [" + targetField + "] already exists")); } + + Processor processorWithOverrideTarget = createRenameProcessor(field, targetField, false, true); + + processorWithOverrideTarget.execute(ingestDocument); + assertThat(ingestDocument.hasField(field), equalTo(false)); + assertThat(ingestDocument.hasField(targetField), equalTo(true)); + assertThat(ingestDocument.getFieldValue(targetField, Object.class), equalTo(fieldValue)); } public void testRenameExistingFieldNullValue() throws Exception { @@ -158,7 +164,7 @@ public void testRenameExistingFieldNullValue() throws Exception { String fieldName = RandomDocumentPicks.randomFieldName(random()); ingestDocument.setFieldValue(fieldName, null); String newFieldName = randomValueOtherThanMany(ingestDocument::hasField, () -> RandomDocumentPicks.randomFieldName(random())); - Processor processor = createRenameProcessor(fieldName, newFieldName, false); + Processor processor = createRenameProcessor(fieldName, newFieldName, false, false); processor.execute(ingestDocument); if (newFieldName.startsWith(fieldName + '.')) { assertThat(ingestDocument.getFieldValue(fieldName, Object.class), instanceOf(Map.class)); @@ -182,7 +188,7 @@ public Object put(String key, Object value) { source.put("list", Collections.singletonList("item")); IngestDocument ingestDocument = new IngestDocument(source, Collections.emptyMap()); - Processor processor = createRenameProcessor("list", "new_field", false); + Processor processor = createRenameProcessor("list", "new_field", false, false); try { processor.execute(ingestDocument); fail("processor execute should have failed"); @@ -206,7 +212,7 @@ public Object remove(Object key) { source.put("list", Collections.singletonList("item")); IngestDocument ingestDocument = new IngestDocument(source, Collections.emptyMap()); - Processor processor = createRenameProcessor("list", "new_field", false); + Processor processor = createRenameProcessor("list", "new_field", false, false); try { processor.execute(ingestDocument); fail("processor execute should have failed"); @@ -221,12 +227,12 @@ public void testRenameLeafIntoBranch() throws Exception { Map source = new HashMap<>(); source.put("foo", "bar"); IngestDocument ingestDocument = new IngestDocument(source, Collections.emptyMap()); - Processor processor1 = createRenameProcessor("foo", "foo.bar", false); + Processor processor1 = createRenameProcessor("foo", "foo.bar", false, false); processor1.execute(ingestDocument); assertThat(ingestDocument.getFieldValue("foo", Map.class), equalTo(Collections.singletonMap("bar", "bar"))); assertThat(ingestDocument.getFieldValue("foo.bar", String.class), equalTo("bar")); - Processor processor2 = createRenameProcessor("foo.bar", "foo.bar.baz", false); + Processor processor2 = createRenameProcessor("foo.bar", "foo.bar.baz", false, false); processor2.execute(ingestDocument); assertThat( ingestDocument.getFieldValue("foo", Map.class), @@ -236,18 +242,19 @@ public void testRenameLeafIntoBranch() throws Exception { assertThat(ingestDocument.getFieldValue("foo.bar.baz", String.class), equalTo("bar")); // for fun lets try to restore it (which don't allow today) - Processor processor3 = createRenameProcessor("foo.bar.baz", "foo", false); + Processor processor3 = createRenameProcessor("foo.bar.baz", "foo", false, false); Exception e = expectThrows(IllegalArgumentException.class, () -> processor3.execute(ingestDocument)); assertThat(e.getMessage(), equalTo("field [foo] already exists")); } - private RenameProcessor createRenameProcessor(String field, String targetField, boolean ignoreMissing) { + private RenameProcessor createRenameProcessor(String field, String targetField, boolean ignoreMissing, boolean overrideTarget) { return new RenameProcessor( randomAlphaOfLength(10), null, new TestTemplateService.MockTemplateScript.Factory(field), new TestTemplateService.MockTemplateScript.Factory(targetField), - ignoreMissing + ignoreMissing, + overrideTarget ); } } diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/280_rename_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/280_rename_processor.yml index 96b2256bcc1dc..0ef658896ff0a 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/280_rename_processor.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/280_rename_processor.yml @@ -64,3 +64,42 @@ teardown: index: test id: 1 - match: { _source.message: "foo bar baz" } + +--- +"Test rename processor with override_target": + - skip: + version: " - 2.13.99" + reason: "introduced in 2.14.0" + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "rename" : { + "field" : "foo", + "target_field" : "bar", + "override_target" : true + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "my_pipeline" + body: { + foo: "foo", + bar: "bar" + } + + - do: + get: + index: test + id: 1 + - match: { _source: { "bar": "foo" } } diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/70_bulk.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/70_bulk.yml index d7be48a92908c..edb7b77eb8d28 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/70_bulk.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/70_bulk.yml @@ -167,3 +167,90 @@ teardown: index: test_index id: test_id3 - match: { _source: {"f1": "v2", "f2": 47, "field1": "value1"}} + +--- +"Test bulk API with batch enabled happy case": + - skip: + version: " - 2.13.99" + reason: "Added in 2.14.0" + + - do: + bulk: + refresh: true + batch_size: 2 + pipeline: "pipeline1" + body: + - '{"index": {"_index": "test_index", "_id": "test_id1"}}' + - '{"text": "text1"}' + - '{"index": {"_index": "test_index", "_id": "test_id2"}}' + - '{"text": "text2"}' + - '{"index": {"_index": "test_index", "_id": "test_id3"}}' + - '{"text": "text3"}' + - '{"index": {"_index": "test_index", "_id": "test_id4"}}' + - '{"text": "text4"}' + - '{"index": {"_index": "test_index", "_id": "test_id5", "pipeline": "pipeline2"}}' + - '{"text": "text5"}' + - '{"index": {"_index": "test_index", "_id": "test_id6", "pipeline": "pipeline2"}}' + - '{"text": "text6"}' + + - match: { errors: false } + + - do: + get: + index: test_index + id: test_id5 + - match: { _source: {"text": "text5", "field2": "value2"}} + + - do: + get: + index: test_index + id: test_id3 + - match: { _source: { "text": "text3", "field1": "value1" } } + +--- +"Test bulk API with batch_size missing": + - skip: + version: " - 2.13.99" + reason: "Added in 2.14.0" + + - do: + bulk: + refresh: true + pipeline: "pipeline1" + body: + - '{"index": {"_index": "test_index", "_id": "test_id1"}}' + - '{"text": "text1"}' + - '{"index": {"_index": "test_index", "_id": "test_id2"}}' + - '{"text": "text2"}' + + - match: { errors: false } + + - do: + get: + index: test_index + id: test_id1 + - match: { _source: { "text": "text1", "field1": "value1" } } + + - do: + get: + index: test_index + id: test_id2 + - match: { _source: { "text": "text2", "field1": "value1" } } + +--- +"Test bulk API with invalid batch_size": + - skip: + version: " - 2.13.99" + reason: "Added in 2.14.0" + + - do: + catch: bad_request + bulk: + refresh: true + batch_size: -1 + pipeline: "pipeline1" + body: + - '{"index": {"_index": "test_index", "_id": "test_id1"}}' + - '{"text": "text1"}' + - '{"index": {"_index": "test_index", "_id": "test_id2"}}' + - '{"text": "text2"}' diff --git a/modules/lang-mustache/build.gradle b/modules/lang-mustache/build.gradle index 14eafd8d43e13..bcf5c07ea8c64 100644 --- a/modules/lang-mustache/build.gradle +++ b/modules/lang-mustache/build.gradle @@ -38,7 +38,7 @@ opensearchplugin { } dependencies { - api "com.github.spullara.mustache.java:compiler:0.9.10" + api "com.github.spullara.mustache.java:compiler:0.9.13" } restResources { diff --git a/modules/lang-mustache/licenses/compiler-0.9.10.jar.sha1 b/modules/lang-mustache/licenses/compiler-0.9.10.jar.sha1 deleted file mode 100644 index 6336318c2ce1a..0000000000000 --- a/modules/lang-mustache/licenses/compiler-0.9.10.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -6111ae24e3be9ecbd75f5fe908583fc14b4f0174 \ No newline at end of file diff --git a/modules/lang-mustache/licenses/compiler-0.9.13.jar.sha1 b/modules/lang-mustache/licenses/compiler-0.9.13.jar.sha1 new file mode 100644 index 0000000000000..70d53aac260eb --- /dev/null +++ b/modules/lang-mustache/licenses/compiler-0.9.13.jar.sha1 @@ -0,0 +1 @@ +60666500a7dce7a5d3e17c09b46ea6f037192bd5 \ No newline at end of file diff --git a/modules/lang-mustache/src/yamlRestTest/resources/rest-api-spec/test/lang_mustache/20_render_search_template.yml b/modules/lang-mustache/src/yamlRestTest/resources/rest-api-spec/test/lang_mustache/20_render_search_template.yml index 946b63a65d923..a0f6828e66d79 100644 --- a/modules/lang-mustache/src/yamlRestTest/resources/rest-api-spec/test/lang_mustache/20_render_search_template.yml +++ b/modules/lang-mustache/src/yamlRestTest/resources/rest-api-spec/test/lang_mustache/20_render_search_template.yml @@ -47,7 +47,7 @@ - match: { template_output.aggs.my_terms.terms.field: "my_other_field" } - do: - catch: /Improperly.closed.variable.in.query-template/ + catch: /Improperly.closed.variable:.my_value.in.query-template/ render_search_template: body: { "source": { "query": { "match": { "text": "{{{my_value}}" } }, "aggs": { "my_terms": { "terms": { "field": "{{my_field}}" } } } }, "params": { "my_value": "bar", "my_field": "field1" } } --- @@ -99,7 +99,7 @@ - match: { template_output.size: 100 } - do: - catch: /Improperly.closed.variable.in.query-template/ + catch: /Improperly.closed.variable:.my_value.in.query-template/ render_search_template: body: { "source": "{ \"query\": { \"match\": { \"text\": \"{{{my_value}}\" } }, \"size\": {{my_size}} }", "params": { "my_value": "bar", "my_size": 100 } } diff --git a/modules/rank-eval/src/internalClusterTest/java/org/opensearch/index/rankeval/RankEvalRequestIT.java b/modules/rank-eval/src/internalClusterTest/java/org/opensearch/index/rankeval/RankEvalRequestIT.java index 488c2e33648e7..0e3db9d1c78b3 100644 --- a/modules/rank-eval/src/internalClusterTest/java/org/opensearch/index/rankeval/RankEvalRequestIT.java +++ b/modules/rank-eval/src/internalClusterTest/java/org/opensearch/index/rankeval/RankEvalRequestIT.java @@ -345,7 +345,7 @@ public void testIndicesOptions() { request.indicesOptions(IndicesOptions.fromParameters("closed", null, null, "false", SearchRequest.DEFAULT_INDICES_OPTIONS)); response = client().execute(RankEvalAction.INSTANCE, request).actionGet(); assertEquals(1, response.getFailures().size()); - assertThat(response.getFailures().get("amsterdam_query"), instanceOf(IndexClosedException.class)); + assertThat(response.getFailures().get("amsterdam_query"), instanceOf(IllegalArgumentException.class)); // test allow_no_indices request = new RankEvalRequest(task, new String[] { "bad*" }); diff --git a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/store/disk/EhcacheDiskCache.java b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/store/disk/EhcacheDiskCache.java index 185d51732a116..9a4dce1067b61 100644 --- a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/store/disk/EhcacheDiskCache.java +++ b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/store/disk/EhcacheDiskCache.java @@ -25,7 +25,9 @@ import org.opensearch.common.cache.serializer.ICacheKeySerializer; import org.opensearch.common.cache.serializer.Serializer; import org.opensearch.common.cache.stats.CacheStatsHolder; +import org.opensearch.common.cache.stats.DefaultCacheStatsHolder; import org.opensearch.common.cache.stats.ImmutableCacheStatsHolder; +import org.opensearch.common.cache.stats.NoopCacheStatsHolder; import org.opensearch.common.cache.store.builders.ICacheBuilder; import org.opensearch.common.cache.store.config.CacheConfig; import org.opensearch.common.collect.Tuple; @@ -162,7 +164,13 @@ private EhcacheDiskCache(Builder builder) { this.ehCacheEventListener = new EhCacheEventListener(builder.getRemovalListener(), builder.getWeigher()); this.cache = buildCache(Duration.ofMillis(expireAfterAccess.getMillis()), builder); List dimensionNames = Objects.requireNonNull(builder.dimensionNames, "Dimension names can't be null"); - this.cacheStatsHolder = new CacheStatsHolder(dimensionNames); + if (builder.getStatsTrackingEnabled()) { + // If this cache is being used, FeatureFlags.PLUGGABLE_CACHE is already on, so we can always use the DefaultCacheStatsHolder + // unless statsTrackingEnabled is explicitly set to false in CacheConfig. + this.cacheStatsHolder = new DefaultCacheStatsHolder(dimensionNames, EhcacheDiskCacheFactory.EHCACHE_DISK_CACHE_NAME); + } else { + this.cacheStatsHolder = NoopCacheStatsHolder.getInstance(); + } } @SuppressWarnings({ "rawtypes" }) @@ -412,6 +420,7 @@ public Iterable> keys() { /** * Gives the current count of keys in disk cache. + * If enableStatsTracking is set to false in the builder, always returns 0. * @return current count of keys */ @Override @@ -444,12 +453,13 @@ public void close() { } /** - * Relevant stats for this cache. - * @return CacheStats + * Relevant stats for this cache, aggregated by levels. + * @param levels The levels to aggregate by. + * @return ImmutableCacheStatsHolder */ @Override - public ImmutableCacheStatsHolder stats() { - return cacheStatsHolder.getImmutableCacheStatsHolder(); + public ImmutableCacheStatsHolder stats(String[] levels) { + return cacheStatsHolder.getImmutableCacheStatsHolder(levels); } /** @@ -508,7 +518,7 @@ private long getNewValuePairSize(CacheEvent, ? extends By public void onEvent(CacheEvent, ? extends ByteArrayWrapper> event) { switch (event.getType()) { case CREATED: - cacheStatsHolder.incrementEntries(event.getKey().dimensions); + cacheStatsHolder.incrementItems(event.getKey().dimensions); cacheStatsHolder.incrementSizeInBytes(event.getKey().dimensions, getNewValuePairSize(event)); assert event.getOldValue() == null; break; @@ -516,7 +526,7 @@ public void onEvent(CacheEvent, ? extends ByteArrayWrappe this.removalListener.onRemoval( new RemovalNotification<>(event.getKey(), deserializeValue(event.getOldValue()), RemovalReason.EVICTED) ); - cacheStatsHolder.decrementEntries(event.getKey().dimensions); + cacheStatsHolder.decrementItems(event.getKey().dimensions); cacheStatsHolder.decrementSizeInBytes(event.getKey().dimensions, getOldValuePairSize(event)); cacheStatsHolder.incrementEvictions(event.getKey().dimensions); assert event.getNewValue() == null; @@ -525,7 +535,7 @@ public void onEvent(CacheEvent, ? extends ByteArrayWrappe this.removalListener.onRemoval( new RemovalNotification<>(event.getKey(), deserializeValue(event.getOldValue()), RemovalReason.EXPLICIT) ); - cacheStatsHolder.decrementEntries(event.getKey().dimensions); + cacheStatsHolder.decrementItems(event.getKey().dimensions); cacheStatsHolder.decrementSizeInBytes(event.getKey().dimensions, getOldValuePairSize(event)); assert event.getNewValue() == null; break; @@ -533,7 +543,7 @@ public void onEvent(CacheEvent, ? extends ByteArrayWrappe this.removalListener.onRemoval( new RemovalNotification<>(event.getKey(), deserializeValue(event.getOldValue()), RemovalReason.INVALIDATED) ); - cacheStatsHolder.decrementEntries(event.getKey().dimensions); + cacheStatsHolder.decrementItems(event.getKey().dimensions); cacheStatsHolder.decrementSizeInBytes(event.getKey().dimensions, getOldValuePairSize(event)); assert event.getNewValue() == null; break; diff --git a/plugins/cache-ehcache/src/test/java/org/opensearch/cache/store/disk/EhCacheDiskCacheTests.java b/plugins/cache-ehcache/src/test/java/org/opensearch/cache/store/disk/EhCacheDiskCacheTests.java index 06ebed08d7525..29551befd3e9f 100644 --- a/plugins/cache-ehcache/src/test/java/org/opensearch/cache/store/disk/EhCacheDiskCacheTests.java +++ b/plugins/cache-ehcache/src/test/java/org/opensearch/cache/store/disk/EhCacheDiskCacheTests.java @@ -93,7 +93,7 @@ public void testBasicGetAndPut() throws IOException { String value = ehcacheTest.get(getICacheKey(entry.getKey())); assertEquals(entry.getValue(), value); } - assertEquals(randomKeys, ehcacheTest.stats().getTotalEntries()); + assertEquals(randomKeys, ehcacheTest.stats().getTotalItems()); assertEquals(randomKeys, ehcacheTest.stats().getTotalHits()); assertEquals(expectedSize, ehcacheTest.stats().getTotalSizeInBytes()); assertEquals(randomKeys, ehcacheTest.count()); @@ -217,7 +217,7 @@ public void testConcurrentPut() throws Exception { assertEquals(entry.getValue(), value); } assertEquals(randomKeys, ehcacheTest.count()); - assertEquals(randomKeys, ehcacheTest.stats().getTotalEntries()); + assertEquals(randomKeys, ehcacheTest.stats().getTotalItems()); ehcacheTest.close(); } } @@ -416,7 +416,7 @@ public String load(ICacheKey key) { assertEquals(1, numberOfTimesValueLoaded); assertEquals(0, ((EhcacheDiskCache) ehcacheTest).getCompletableFutureMap().size()); assertEquals(1, ehcacheTest.stats().getTotalMisses()); - assertEquals(1, ehcacheTest.stats().getTotalEntries()); + assertEquals(1, ehcacheTest.stats().getTotalItems()); assertEquals(numberOfRequest - 1, ehcacheTest.stats().getTotalHits()); assertEquals(1, ehcacheTest.count()); ehcacheTest.close(); @@ -829,7 +829,8 @@ public void testInvalidateWithDropDimensions() throws Exception { ICacheKey keyToDrop = keysAdded.get(0); - ImmutableCacheStats snapshot = ehCacheDiskCachingTier.stats().getStatsForDimensionValues(keyToDrop.dimensions); + String[] levels = dimensionNames.toArray(new String[0]); + ImmutableCacheStats snapshot = ehCacheDiskCachingTier.stats(levels).getStatsForDimensionValues(keyToDrop.dimensions); assertNotNull(snapshot); keyToDrop.setDropStatsForDimensions(true); @@ -837,7 +838,7 @@ public void testInvalidateWithDropDimensions() throws Exception { // Now assert the stats are gone for any key that has this combination of dimensions, but still there otherwise for (ICacheKey keyAdded : keysAdded) { - snapshot = ehCacheDiskCachingTier.stats().getStatsForDimensionValues(keyAdded.dimensions); + snapshot = ehCacheDiskCachingTier.stats(levels).getStatsForDimensionValues(keyAdded.dimensions); if (keyAdded.dimensions.equals(keyToDrop.dimensions)) { assertNull(snapshot); } else { @@ -849,6 +850,38 @@ public void testInvalidateWithDropDimensions() throws Exception { } } + public void testStatsTrackingDisabled() throws Exception { + Settings settings = Settings.builder().build(); + MockRemovalListener removalListener = new MockRemovalListener<>(); + ToLongBiFunction, String> weigher = getWeigher(); + try (NodeEnvironment env = newNodeEnvironment(settings)) { + ICache ehcacheTest = new EhcacheDiskCache.Builder().setThreadPoolAlias("ehcacheTest") + .setStoragePath(env.nodePaths()[0].indicesPath.toString() + "/request_cache") + .setIsEventListenerModeSync(true) + .setKeyType(String.class) + .setValueType(String.class) + .setKeySerializer(new StringSerializer()) + .setValueSerializer(new StringSerializer()) + .setDimensionNames(List.of(dimensionName)) + .setCacheType(CacheType.INDICES_REQUEST_CACHE) + .setSettings(settings) + .setExpireAfterAccess(TimeValue.MAX_VALUE) + .setMaximumWeightInBytes(CACHE_SIZE_IN_BYTES) + .setRemovalListener(removalListener) + .setWeigher(weigher) + .setStatsTrackingEnabled(false) + .build(); + int randomKeys = randomIntBetween(10, 100); + for (int i = 0; i < randomKeys; i++) { + ICacheKey iCacheKey = getICacheKey(UUID.randomUUID().toString()); + ehcacheTest.put(iCacheKey, UUID.randomUUID().toString()); + assertEquals(0, ehcacheTest.count()); // Expect count of 0 if NoopCacheStatsHolder is used + assertEquals(new ImmutableCacheStats(0, 0, 0, 0, 0), ehcacheTest.stats().getTotalStats()); + } + ehcacheTest.close(); + } + } + private List getRandomDimensions(List dimensionNames) { Random rand = Randomness.get(); int bound = 3; diff --git a/plugins/discovery-gce/build.gradle b/plugins/discovery-gce/build.gradle index 92cdda59d1c99..80aae03bc0332 100644 --- a/plugins/discovery-gce/build.gradle +++ b/plugins/discovery-gce/build.gradle @@ -18,7 +18,7 @@ opensearchplugin { } dependencies { - api "com.google.apis:google-api-services-compute:v1-rev235-1.25.0" + api "com.google.apis:google-api-services-compute:v1-rev20240407-2.0.0" api "com.google.api-client:google-api-client:1.35.2" api "com.google.oauth-client:google-oauth-client:1.35.0" api "com.google.http-client:google-http-client:${versions.google_http_client}" diff --git a/plugins/discovery-gce/licenses/google-api-services-compute-v1-rev20240407-2.0.0.jar.sha1 b/plugins/discovery-gce/licenses/google-api-services-compute-v1-rev20240407-2.0.0.jar.sha1 new file mode 100644 index 0000000000000..834d718641a51 --- /dev/null +++ b/plugins/discovery-gce/licenses/google-api-services-compute-v1-rev20240407-2.0.0.jar.sha1 @@ -0,0 +1 @@ +edf93bc92c9b87fee51aa6c3545b565e58075c05 \ No newline at end of file diff --git a/plugins/discovery-gce/licenses/google-api-services-compute-v1-rev235-1.25.0.jar.sha1 b/plugins/discovery-gce/licenses/google-api-services-compute-v1-rev235-1.25.0.jar.sha1 deleted file mode 100644 index f79af846281de..0000000000000 --- a/plugins/discovery-gce/licenses/google-api-services-compute-v1-rev235-1.25.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -67bf1ac84286b4f9ea996a90f6e91e36dc648aff \ No newline at end of file diff --git a/plugins/repository-gcs/build.gradle b/plugins/repository-gcs/build.gradle index c4b1ab8d6875e..110df89f25de8 100644 --- a/plugins/repository-gcs/build.gradle +++ b/plugins/repository-gcs/build.gradle @@ -61,7 +61,7 @@ dependencies { api 'com.google.api-client:google-api-client:2.2.0' api 'com.google.api.grpc:proto-google-common-protos:2.37.1' - api 'com.google.api.grpc:proto-google-iam-v1:0.12.0' + api 'com.google.api.grpc:proto-google-iam-v1:1.33.0' api "com.google.auth:google-auth-library-credentials:${versions.google_auth}" api "com.google.auth:google-auth-library-oauth2-http:${versions.google_auth}" diff --git a/plugins/repository-gcs/licenses/proto-google-iam-v1-0.12.0.jar.sha1 b/plugins/repository-gcs/licenses/proto-google-iam-v1-0.12.0.jar.sha1 deleted file mode 100644 index 2bfae3456d499..0000000000000 --- a/plugins/repository-gcs/licenses/proto-google-iam-v1-0.12.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -ea312c0250a5d0a7cdd1b20bc2c3259938b79855 \ No newline at end of file diff --git a/plugins/repository-gcs/licenses/proto-google-iam-v1-1.33.0.jar.sha1 b/plugins/repository-gcs/licenses/proto-google-iam-v1-1.33.0.jar.sha1 new file mode 100644 index 0000000000000..ba04056c54697 --- /dev/null +++ b/plugins/repository-gcs/licenses/proto-google-iam-v1-1.33.0.jar.sha1 @@ -0,0 +1 @@ +4766da92d1f36c8b612c1c142d5f3ace3774f098 \ No newline at end of file diff --git a/plugins/repository-hdfs/build.gradle b/plugins/repository-hdfs/build.gradle index cd7175e70e607..e019a878dfcf0 100644 --- a/plugins/repository-hdfs/build.gradle +++ b/plugins/repository-hdfs/build.gradle @@ -70,7 +70,7 @@ dependencies { api 'com.google.code.gson:gson:2.10.1' runtimeOnly "com.google.guava:guava:${versions.guava}" api "commons-logging:commons-logging:${versions.commonslogging}" - api 'commons-cli:commons-cli:1.6.0' + api 'commons-cli:commons-cli:1.7.0' api "commons-codec:commons-codec:${versions.commonscodec}" api 'commons-collections:commons-collections:3.2.2' api "org.apache.commons:commons-compress:${versions.commonscompress}" diff --git a/plugins/repository-hdfs/licenses/commons-cli-1.6.0.jar.sha1 b/plugins/repository-hdfs/licenses/commons-cli-1.6.0.jar.sha1 deleted file mode 100644 index bb94eda6814ea..0000000000000 --- a/plugins/repository-hdfs/licenses/commons-cli-1.6.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -38166a23afb5bd5520f739b87b3be87f7f0fb96d \ No newline at end of file diff --git a/plugins/repository-hdfs/licenses/commons-cli-1.7.0.jar.sha1 b/plugins/repository-hdfs/licenses/commons-cli-1.7.0.jar.sha1 new file mode 100644 index 0000000000000..759bc9275d346 --- /dev/null +++ b/plugins/repository-hdfs/licenses/commons-cli-1.7.0.jar.sha1 @@ -0,0 +1 @@ +6504b3f17e8bc5adc6b6c8deecc90144d0154075 \ No newline at end of file diff --git a/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java b/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java index da2c6e8c1b0ee..21184380d54a9 100644 --- a/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java +++ b/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java @@ -249,7 +249,22 @@ protected S3Repository createRepository( ClusterService clusterService, RecoverySettings recoverySettings ) { - return new S3Repository(metadata, registry, service, clusterService, recoverySettings, null, null, null, null, null, false) { + return new S3Repository( + metadata, + registry, + service, + clusterService, + recoverySettings, + null, + null, + null, + null, + null, + false, + null, + null, + null + ) { @Override public BlobStore blobStore() { diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/GenericStatsMetricPublisher.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/GenericStatsMetricPublisher.java new file mode 100644 index 0000000000000..136fd68223354 --- /dev/null +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/GenericStatsMetricPublisher.java @@ -0,0 +1,90 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.repositories.s3; + +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; + +/** + * Generic stats of repository-s3 plugin. + */ +public class GenericStatsMetricPublisher { + + private final AtomicLong normalPriorityQSize = new AtomicLong(); + private final AtomicInteger normalPriorityPermits = new AtomicInteger(); + private final AtomicLong lowPriorityQSize = new AtomicLong(); + private final AtomicInteger lowPriorityPermits = new AtomicInteger(); + private final long normalPriorityQCapacity; + private final int maxNormalPriorityPermits; + private final long lowPriorityQCapacity; + private final int maxLowPriorityPermits; + + public GenericStatsMetricPublisher( + long normalPriorityQCapacity, + int maxNormalPriorityPermits, + long lowPriorityQCapacity, + int maxLowPriorityPermits + ) { + this.normalPriorityQCapacity = normalPriorityQCapacity; + this.maxNormalPriorityPermits = maxNormalPriorityPermits; + this.lowPriorityQCapacity = lowPriorityQCapacity; + this.maxLowPriorityPermits = maxLowPriorityPermits; + } + + public void updateNormalPriorityQSize(long qSize) { + normalPriorityQSize.addAndGet(qSize); + } + + public void updateLowPriorityQSize(long qSize) { + lowPriorityQSize.addAndGet(qSize); + } + + public void updateNormalPermits(boolean increment) { + if (increment) { + normalPriorityPermits.incrementAndGet(); + } else { + normalPriorityPermits.decrementAndGet(); + } + } + + public void updateLowPermits(boolean increment) { + if (increment) { + lowPriorityPermits.incrementAndGet(); + } else { + lowPriorityPermits.decrementAndGet(); + } + } + + public long getNormalPriorityQSize() { + return normalPriorityQSize.get(); + } + + public int getAcquiredNormalPriorityPermits() { + return normalPriorityPermits.get(); + } + + public long getLowPriorityQSize() { + return lowPriorityQSize.get(); + } + + public int getAcquiredLowPriorityPermits() { + return lowPriorityPermits.get(); + } + + Map stats() { + final Map results = new HashMap<>(); + results.put("NormalPriorityQUtilization", (normalPriorityQSize.get() * 100) / normalPriorityQCapacity); + results.put("LowPriorityQUtilization", (lowPriorityQSize.get() * 100) / lowPriorityQCapacity); + results.put("NormalPriorityPermitsUtilization", (normalPriorityPermits.get() * 100L) / maxNormalPriorityPermits); + results.put("LowPriorityPermitsUtilization", (lowPriorityPermits.get() * 100L) / maxLowPriorityPermits); + return results; + } +} diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java index 14829a066ca3a..acf0c5e83a17b 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java @@ -90,6 +90,7 @@ import org.opensearch.core.common.Strings; import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.core.common.unit.ByteSizeValue; +import org.opensearch.repositories.s3.async.SizeBasedBlockingQ; import org.opensearch.repositories.s3.async.UploadRequest; import org.opensearch.repositories.s3.utils.HttpRangeUtils; @@ -218,7 +219,14 @@ public void asyncBlobUpload(WriteContext writeContext, ActionListener comp writeContext.getMetadata() ); try { - if (uploadRequest.getContentLength() > ByteSizeUnit.GB.toBytes(10) && blobStore.isRedirectLargeUploads()) { + // If file size is greater than the queue capacity than SizeBasedBlockingQ will always reject the upload. + // Therefore, redirecting it to slow client. + if ((uploadRequest.getWritePriority() == WritePriority.LOW + && blobStore.getLowPrioritySizeBasedBlockingQ().isMaxCapacityBelowContentLength(uploadRequest.getContentLength()) == false) + || (uploadRequest.getWritePriority() != WritePriority.HIGH + && uploadRequest.getWritePriority() != WritePriority.URGENT + && blobStore.getNormalPrioritySizeBasedBlockingQ() + .isMaxCapacityBelowContentLength(uploadRequest.getContentLength()) == false)) { StreamContext streamContext = SocketAccess.doPrivileged( () -> writeContext.getStreamProvider(uploadRequest.getContentLength()) ); @@ -258,16 +266,30 @@ public void asyncBlobUpload(WriteContext writeContext, ActionListener comp } else { s3AsyncClient = amazonS3Reference.get().client(); } - CompletableFuture completableFuture = blobStore.getAsyncTransferManager() - .uploadObject(s3AsyncClient, uploadRequest, streamContext, blobStore.getStatsMetricPublisher()); - completableFuture.whenComplete((response, throwable) -> { - if (throwable == null) { - completionListener.onResponse(response); - } else { - Exception ex = throwable instanceof Error ? new Exception(throwable) : (Exception) throwable; - completionListener.onFailure(ex); - } - }); + + if (writeContext.getWritePriority() == WritePriority.URGENT + || writeContext.getWritePriority() == WritePriority.HIGH + || blobStore.isPermitBackedTransferEnabled() == false) { + createFileCompletableFuture(s3AsyncClient, uploadRequest, streamContext, completionListener); + } else if (writeContext.getWritePriority() == WritePriority.LOW) { + blobStore.getLowPrioritySizeBasedBlockingQ() + .produce( + new SizeBasedBlockingQ.Item( + writeContext.getFileSize(), + () -> createFileCompletableFuture(s3AsyncClient, uploadRequest, streamContext, completionListener) + ) + ); + } else if (writeContext.getWritePriority() == WritePriority.NORMAL) { + blobStore.getNormalPrioritySizeBasedBlockingQ() + .produce( + new SizeBasedBlockingQ.Item( + writeContext.getFileSize(), + () -> createFileCompletableFuture(s3AsyncClient, uploadRequest, streamContext, completionListener) + ) + ); + } else { + throw new IllegalStateException("Cannot perform upload for other priority types."); + } } } catch (Exception e) { logger.info("exception error from blob container for file {}", writeContext.getFileName()); @@ -275,6 +297,24 @@ public void asyncBlobUpload(WriteContext writeContext, ActionListener comp } } + private CompletableFuture createFileCompletableFuture( + S3AsyncClient s3AsyncClient, + UploadRequest uploadRequest, + StreamContext streamContext, + ActionListener completionListener + ) { + CompletableFuture completableFuture = blobStore.getAsyncTransferManager() + .uploadObject(s3AsyncClient, uploadRequest, streamContext, blobStore.getStatsMetricPublisher()); + return completableFuture.whenComplete((response, throwable) -> { + if (throwable == null) { + completionListener.onResponse(response); + } else { + Exception ex = throwable instanceof Error ? new Exception(throwable) : (Exception) throwable; + completionListener.onFailure(ex); + } + }); + } + @ExperimentalApi @Override public void readBlobAsync(String blobName, ActionListener listener) { diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java index fc70fbb0db00e..de815f9202f44 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java @@ -45,6 +45,7 @@ import org.opensearch.core.common.unit.ByteSizeValue; import org.opensearch.repositories.s3.async.AsyncExecutorContainer; import org.opensearch.repositories.s3.async.AsyncTransferManager; +import org.opensearch.repositories.s3.async.SizeBasedBlockingQ; import java.io.IOException; import java.util.Collections; @@ -56,6 +57,7 @@ import static org.opensearch.repositories.s3.S3Repository.BUFFER_SIZE_SETTING; import static org.opensearch.repositories.s3.S3Repository.BULK_DELETE_SIZE; import static org.opensearch.repositories.s3.S3Repository.CANNED_ACL_SETTING; +import static org.opensearch.repositories.s3.S3Repository.PERMIT_BACKED_TRANSFER_ENABLED; import static org.opensearch.repositories.s3.S3Repository.REDIRECT_LARGE_S3_UPLOAD; import static org.opensearch.repositories.s3.S3Repository.SERVER_SIDE_ENCRYPTION_SETTING; import static org.opensearch.repositories.s3.S3Repository.STORAGE_CLASS_SETTING; @@ -77,6 +79,8 @@ class S3BlobStore implements BlobStore { private volatile boolean uploadRetryEnabled; + private volatile boolean permitBackedTransferEnabled; + private volatile boolean serverSideEncryption; private volatile ObjectCannedACL cannedACL; @@ -94,6 +98,9 @@ class S3BlobStore implements BlobStore { private final AsyncExecutorContainer priorityExecutorBuilder; private final AsyncExecutorContainer normalExecutorBuilder; private final boolean multipartUploadEnabled; + private final SizeBasedBlockingQ normalPrioritySizeBasedBlockingQ; + private final SizeBasedBlockingQ lowPrioritySizeBasedBlockingQ; + private final GenericStatsMetricPublisher genericStatsMetricPublisher; S3BlobStore( S3Service service, @@ -109,7 +116,10 @@ class S3BlobStore implements BlobStore { AsyncTransferManager asyncTransferManager, AsyncExecutorContainer urgentExecutorBuilder, AsyncExecutorContainer priorityExecutorBuilder, - AsyncExecutorContainer normalExecutorBuilder + AsyncExecutorContainer normalExecutorBuilder, + SizeBasedBlockingQ normalPrioritySizeBasedBlockingQ, + SizeBasedBlockingQ lowPrioritySizeBasedBlockingQ, + GenericStatsMetricPublisher genericStatsMetricPublisher ) { this.service = service; this.s3AsyncService = s3AsyncService; @@ -128,6 +138,10 @@ class S3BlobStore implements BlobStore { // Settings to initialize blobstore with. this.redirectLargeUploads = REDIRECT_LARGE_S3_UPLOAD.get(repositoryMetadata.settings()); this.uploadRetryEnabled = UPLOAD_RETRY_ENABLED.get(repositoryMetadata.settings()); + this.normalPrioritySizeBasedBlockingQ = normalPrioritySizeBasedBlockingQ; + this.lowPrioritySizeBasedBlockingQ = lowPrioritySizeBasedBlockingQ; + this.genericStatsMetricPublisher = genericStatsMetricPublisher; + this.permitBackedTransferEnabled = PERMIT_BACKED_TRANSFER_ENABLED.get(repositoryMetadata.settings()); } @Override @@ -141,6 +155,7 @@ public void reload(RepositoryMetadata repositoryMetadata) { this.bulkDeletesSize = BULK_DELETE_SIZE.get(repositoryMetadata.settings()); this.redirectLargeUploads = REDIRECT_LARGE_S3_UPLOAD.get(repositoryMetadata.settings()); this.uploadRetryEnabled = UPLOAD_RETRY_ENABLED.get(repositoryMetadata.settings()); + this.permitBackedTransferEnabled = PERMIT_BACKED_TRANSFER_ENABLED.get(repositoryMetadata.settings()); } @Override @@ -168,6 +183,10 @@ public boolean isUploadRetryEnabled() { return uploadRetryEnabled; } + public boolean isPermitBackedTransferEnabled() { + return permitBackedTransferEnabled; + } + public String bucket() { return bucket; } @@ -184,6 +203,14 @@ public int getBulkDeletesSize() { return bulkDeletesSize; } + public SizeBasedBlockingQ getNormalPrioritySizeBasedBlockingQ() { + return normalPrioritySizeBasedBlockingQ; + } + + public SizeBasedBlockingQ getLowPrioritySizeBasedBlockingQ() { + return lowPrioritySizeBasedBlockingQ; + } + @Override public BlobContainer blobContainer(BlobPath path) { return new S3BlobContainer(path, this); @@ -201,7 +228,9 @@ public void close() throws IOException { @Override public Map stats() { - return statsMetricPublisher.getStats().toMap(); + Map stats = statsMetricPublisher.getStats().toMap(); + stats.putAll(genericStatsMetricPublisher.stats()); + return stats; } @Override @@ -211,6 +240,7 @@ public Map> extendedStats() { } Map> extendedStats = new HashMap<>(); statsMetricPublisher.getExtendedStats().forEach((k, v) -> extendedStats.put(k, v.toMap())); + extendedStats.put(Metric.GENERIC_STATS, genericStatsMetricPublisher.stats()); return extendedStats; } diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3Repository.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3Repository.java index f7772a57c9afd..01b75c0b915f2 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3Repository.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3Repository.java @@ -49,6 +49,7 @@ import org.opensearch.common.settings.SecureSetting; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.concurrent.OpenSearchExecutors; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.Strings; import org.opensearch.core.common.settings.SecureString; @@ -63,6 +64,7 @@ import org.opensearch.repositories.blobstore.MeteredBlobStoreRepository; import org.opensearch.repositories.s3.async.AsyncExecutorContainer; import org.opensearch.repositories.s3.async.AsyncTransferManager; +import org.opensearch.repositories.s3.async.SizeBasedBlockingQ; import org.opensearch.snapshots.SnapshotId; import org.opensearch.snapshots.SnapshotInfo; import org.opensearch.threadpool.Scheduler; @@ -156,6 +158,15 @@ class S3Repository extends MeteredBlobStoreRepository { Setting.Property.NodeScope ); + /** + * Whether large uploads need to be redirected to slow sync s3 client. + */ + static final Setting PERMIT_BACKED_TRANSFER_ENABLED = Setting.boolSetting( + "permit_backed_transfer_enabled", + true, + Setting.Property.NodeScope + ); + /** * Whether retry on uploads are enabled. This setting wraps inputstream with buffered stream to enable retries. */ @@ -193,6 +204,37 @@ class S3Repository extends MeteredBlobStoreRepository { true, Setting.Property.NodeScope ); + /** + * Percentage of total available permits to be available for priority transfers. + */ + public static Setting S3_PRIORITY_PERMIT_ALLOCATION_PERCENT = Setting.intSetting( + "s3_priority_permit_alloc_perc", + 70, + 21, + 80, + Setting.Property.NodeScope + ); + + /** + * Duration in minutes to wait for a permit in case no permit is available. + */ + public static Setting S3_PERMIT_WAIT_DURATION_MIN = Setting.intSetting( + "s3_permit_wait_duration_min", + 5, + 1, + 10, + Setting.Property.NodeScope + ); + + /** + * Number of transfer queue consumers + */ + public static Setting S3_TRANSFER_QUEUE_CONSUMERS = new Setting<>( + "s3_transfer_queue_consumers", + (s) -> Integer.toString(Math.max(5, OpenSearchExecutors.allocatedProcessors(s) * 2)), + (s) -> Setting.parseInt(s, 5, "s3_transfer_queue_consumers"), + Setting.Property.NodeScope + ); /** * Big files can be broken down into chunks during snapshotting if needed. Defaults to 1g. @@ -252,6 +294,9 @@ class S3Repository extends MeteredBlobStoreRepository { private final AsyncExecutorContainer priorityExecutorBuilder; private final AsyncExecutorContainer normalExecutorBuilder; private final Path pluginConfigPath; + private final SizeBasedBlockingQ normalPrioritySizeBasedBlockingQ; + private final SizeBasedBlockingQ lowPrioritySizeBasedBlockingQ; + private final GenericStatsMetricPublisher genericStatsMetricPublisher; private volatile int bulkDeletesSize; @@ -267,7 +312,10 @@ class S3Repository extends MeteredBlobStoreRepository { final AsyncExecutorContainer priorityExecutorBuilder, final AsyncExecutorContainer normalExecutorBuilder, final S3AsyncService s3AsyncService, - final boolean multipartUploadEnabled + final boolean multipartUploadEnabled, + final SizeBasedBlockingQ normalPrioritySizeBasedBlockingQ, + final SizeBasedBlockingQ lowPrioritySizeBasedBlockingQ, + final GenericStatsMetricPublisher genericStatsMetricPublisher ) { this( metadata, @@ -281,7 +329,10 @@ class S3Repository extends MeteredBlobStoreRepository { normalExecutorBuilder, s3AsyncService, multipartUploadEnabled, - Path.of("") + Path.of(""), + normalPrioritySizeBasedBlockingQ, + lowPrioritySizeBasedBlockingQ, + genericStatsMetricPublisher ); } @@ -300,7 +351,10 @@ class S3Repository extends MeteredBlobStoreRepository { final AsyncExecutorContainer normalExecutorBuilder, final S3AsyncService s3AsyncService, final boolean multipartUploadEnabled, - Path pluginConfigPath + Path pluginConfigPath, + final SizeBasedBlockingQ normalPrioritySizeBasedBlockingQ, + final SizeBasedBlockingQ lowPrioritySizeBasedBlockingQ, + final GenericStatsMetricPublisher genericStatsMetricPublisher ) { super(metadata, namedXContentRegistry, clusterService, recoverySettings, buildLocation(metadata)); this.service = service; @@ -311,6 +365,9 @@ class S3Repository extends MeteredBlobStoreRepository { this.urgentExecutorBuilder = urgentExecutorBuilder; this.priorityExecutorBuilder = priorityExecutorBuilder; this.normalExecutorBuilder = normalExecutorBuilder; + this.normalPrioritySizeBasedBlockingQ = normalPrioritySizeBasedBlockingQ; + this.lowPrioritySizeBasedBlockingQ = lowPrioritySizeBasedBlockingQ; + this.genericStatsMetricPublisher = genericStatsMetricPublisher; validateRepositoryMetadata(metadata); readRepositoryMetadata(); @@ -373,7 +430,10 @@ protected S3BlobStore createBlobStore() { asyncUploadUtils, urgentExecutorBuilder, priorityExecutorBuilder, - normalExecutorBuilder + normalExecutorBuilder, + normalPrioritySizeBasedBlockingQ, + lowPrioritySizeBasedBlockingQ, + genericStatsMetricPublisher ); } diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java index e7d2a4d024e60..110d91bfbd822 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java @@ -41,6 +41,9 @@ import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.concurrent.OpenSearchExecutors; import org.opensearch.core.common.io.stream.NamedWriteableRegistry; +import org.opensearch.core.common.unit.ByteSizeUnit; +import org.opensearch.core.common.unit.ByteSizeValue; +import org.opensearch.core.common.util.CollectionUtils; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.env.Environment; import org.opensearch.env.NodeEnvironment; @@ -53,6 +56,8 @@ import org.opensearch.repositories.s3.async.AsyncExecutorContainer; import org.opensearch.repositories.s3.async.AsyncTransferEventLoopGroup; import org.opensearch.repositories.s3.async.AsyncTransferManager; +import org.opensearch.repositories.s3.async.SizeBasedBlockingQ; +import org.opensearch.repositories.s3.async.TransferSemaphoresHolder; import org.opensearch.script.ScriptService; import org.opensearch.threadpool.ExecutorBuilder; import org.opensearch.threadpool.FixedExecutorBuilder; @@ -69,6 +74,8 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; import java.util.function.Supplier; /** @@ -82,6 +89,8 @@ public class S3RepositoryPlugin extends Plugin implements RepositoryPlugin, Relo private static final String PRIORITY_STREAM_READER = "priority_stream_reader"; private static final String FUTURE_COMPLETION = "future_completion"; private static final String STREAM_READER = "stream_reader"; + private static final String LOW_TRANSFER_QUEUE_CONSUMER = "low_transfer_queue_consumer"; + private static final String NORMAL_TRANSFER_QUEUE_CONSUMER = "normal_transfer_queue_consumer"; protected final S3Service service; private final S3AsyncService s3AsyncService; @@ -91,6 +100,12 @@ public class S3RepositoryPlugin extends Plugin implements RepositoryPlugin, Relo private AsyncExecutorContainer urgentExecutorBuilder; private AsyncExecutorContainer priorityExecutorBuilder; private AsyncExecutorContainer normalExecutorBuilder; + private ExecutorService lowTransferQConsumerService; + private ExecutorService normalTransferQConsumerService; + private SizeBasedBlockingQ normalPrioritySizeBasedBlockingQ; + private SizeBasedBlockingQ lowPrioritySizeBasedBlockingQ; + private TransferSemaphoresHolder transferSemaphoresHolder; + private GenericStatsMetricPublisher genericStatsMetricPublisher; public S3RepositoryPlugin(final Settings settings, final Path configPath) { this(settings, configPath, new S3Service(configPath), new S3AsyncService(configPath)); @@ -120,9 +135,36 @@ public List> getExecutorBuilders(Settings settings) { TimeValue.timeValueMinutes(5) ) ); + executorBuilders.add( + new FixedExecutorBuilder( + settings, + LOW_TRANSFER_QUEUE_CONSUMER, + lowPriorityTransferQConsumers(settings), + 10, + "thread_pool." + LOW_TRANSFER_QUEUE_CONSUMER + ) + ); + executorBuilders.add( + new FixedExecutorBuilder( + settings, + NORMAL_TRANSFER_QUEUE_CONSUMER, + normalPriorityTransferQConsumers(settings), + 10, + "thread_pool." + NORMAL_TRANSFER_QUEUE_CONSUMER + ) + ); return executorBuilders; } + private int lowPriorityTransferQConsumers(Settings settings) { + double lowPriorityAllocation = ((double) (100 - S3Repository.S3_PRIORITY_PERMIT_ALLOCATION_PERCENT.get(settings))) / 100; + return Math.max(2, (int) (lowPriorityAllocation * S3Repository.S3_TRANSFER_QUEUE_CONSUMERS.get(settings))); + } + + private int normalPriorityTransferQConsumers(Settings settings) { + return S3Repository.S3_TRANSFER_QUEUE_CONSUMERS.get(settings); + } + static int halfNumberOfProcessors(int numberOfProcessors) { return (numberOfProcessors + 1) / 2; } @@ -189,7 +231,67 @@ public Collection createComponents( threadPool.executor(STREAM_READER), new AsyncTransferEventLoopGroup(normalEventLoopThreads) ); - return Collections.emptyList(); + + this.lowTransferQConsumerService = threadPool.executor(LOW_TRANSFER_QUEUE_CONSUMER); + this.normalTransferQConsumerService = threadPool.executor(NORMAL_TRANSFER_QUEUE_CONSUMER); + + // High number of permit allocation because each op acquiring permit performs disk IO, computation and network IO. + int availablePermits = Math.max(allocatedProcessors(clusterService.getSettings()) * 4, 10); + double priorityPermitAllocation = ((double) S3Repository.S3_PRIORITY_PERMIT_ALLOCATION_PERCENT.get(clusterService.getSettings())) + / 100; + int normalPriorityPermits = (int) (priorityPermitAllocation * availablePermits); + int lowPriorityPermits = availablePermits - normalPriorityPermits; + + int normalPriorityConsumers = normalPriorityTransferQConsumers(clusterService.getSettings()); + int lowPriorityConsumers = lowPriorityTransferQConsumers(clusterService.getSettings()); + + ByteSizeValue normalPriorityQCapacity = new ByteSizeValue(normalPriorityConsumers * 10L, ByteSizeUnit.GB); + ByteSizeValue lowPriorityQCapacity = new ByteSizeValue(lowPriorityConsumers * 20L, ByteSizeUnit.GB); + + this.genericStatsMetricPublisher = new GenericStatsMetricPublisher( + normalPriorityQCapacity.getBytes(), + normalPriorityPermits, + lowPriorityQCapacity.getBytes(), + lowPriorityPermits + ); + + this.normalPrioritySizeBasedBlockingQ = new SizeBasedBlockingQ( + normalPriorityQCapacity, + normalTransferQConsumerService, + normalPriorityConsumers, + genericStatsMetricPublisher, + SizeBasedBlockingQ.QueueEventType.NORMAL + ); + + LowPrioritySizeBasedBlockingQ lowPrioritySizeBasedBlockingQ = new LowPrioritySizeBasedBlockingQ( + lowPriorityQCapacity, + lowTransferQConsumerService, + lowPriorityConsumers, + genericStatsMetricPublisher + ); + this.lowPrioritySizeBasedBlockingQ = lowPrioritySizeBasedBlockingQ; + this.transferSemaphoresHolder = new TransferSemaphoresHolder( + normalPriorityPermits, + lowPriorityPermits, + S3Repository.S3_PERMIT_WAIT_DURATION_MIN.get(clusterService.getSettings()), + TimeUnit.MINUTES, + genericStatsMetricPublisher + ); + + return CollectionUtils.arrayAsArrayList(this.normalPrioritySizeBasedBlockingQ, lowPrioritySizeBasedBlockingQ); + } + + // New class because in core, components are injected via guice only by instance creation due to which + // same binding types fail. + private static final class LowPrioritySizeBasedBlockingQ extends SizeBasedBlockingQ { + public LowPrioritySizeBasedBlockingQ( + ByteSizeValue capacity, + ExecutorService executorService, + int consumers, + GenericStatsMetricPublisher genericStatsMetricPublisher + ) { + super(capacity, executorService, consumers, genericStatsMetricPublisher, QueueEventType.LOW); + } } // proxy method for testing @@ -204,7 +306,8 @@ protected S3Repository createRepository( S3Repository.PARALLEL_MULTIPART_UPLOAD_MINIMUM_PART_SIZE_SETTING.get(clusterService.getSettings()).getBytes(), normalExecutorBuilder.getStreamReader(), priorityExecutorBuilder.getStreamReader(), - urgentExecutorBuilder.getStreamReader() + urgentExecutorBuilder.getStreamReader(), + transferSemaphoresHolder ); return new S3Repository( metadata, @@ -218,7 +321,10 @@ protected S3Repository createRepository( normalExecutorBuilder, s3AsyncService, S3Repository.PARALLEL_MULTIPART_UPLOAD_ENABLED_SETTING.get(clusterService.getSettings()), - configPath + configPath, + normalPrioritySizeBasedBlockingQ, + lowPrioritySizeBasedBlockingQ, + genericStatsMetricPublisher ); } @@ -263,7 +369,9 @@ public List> getSettings() { S3Repository.PARALLEL_MULTIPART_UPLOAD_MINIMUM_PART_SIZE_SETTING, S3Repository.PARALLEL_MULTIPART_UPLOAD_ENABLED_SETTING, S3Repository.REDIRECT_LARGE_S3_UPLOAD, - S3Repository.UPLOAD_RETRY_ENABLED + S3Repository.UPLOAD_RETRY_ENABLED, + S3Repository.S3_PRIORITY_PERMIT_ALLOCATION_PERCENT, + S3Repository.PERMIT_BACKED_TRANSFER_ENABLED ); } diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3TransferRejectedException.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3TransferRejectedException.java new file mode 100644 index 0000000000000..c9fa93ea0f5c3 --- /dev/null +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3TransferRejectedException.java @@ -0,0 +1,20 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.repositories.s3; + +import org.opensearch.OpenSearchException; + +/** + * Thrown when transfer event is rejected due to breach in event queue size. + */ +public class S3TransferRejectedException extends OpenSearchException { + public S3TransferRejectedException(String msg) { + super(msg); + } +} diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java index b4c4ed0ecaa75..4c95a0ffc5ec3 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java @@ -23,6 +23,7 @@ import org.opensearch.common.StreamContext; import org.opensearch.common.blobstore.stream.write.WritePriority; import org.opensearch.common.io.InputStreamContainer; +import org.opensearch.repositories.s3.S3TransferRejectedException; import org.opensearch.repositories.s3.SocketAccess; import org.opensearch.repositories.s3.StatsMetricPublisher; import org.opensearch.repositories.s3.io.CheckedContainer; @@ -34,6 +35,8 @@ import java.util.List; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutorService; +import java.util.concurrent.Semaphore; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReferenceArray; /** @@ -41,7 +44,7 @@ */ public class AsyncPartsHandler { - private static Logger log = LogManager.getLogger(AsyncPartsHandler.class); + private static final Logger log = LogManager.getLogger(AsyncPartsHandler.class); /** * Uploads parts of the upload multipart request* @@ -55,9 +58,10 @@ public class AsyncPartsHandler { * @param completedParts Reference of completed parts * @param inputStreamContainers Checksum containers * @param statsMetricPublisher sdk metric publisher + * @param maxRetryablePartSize Max content size which can be used for retries in buffered streams. * @return list of completable futures - * @throws IOException thrown in case of an IO error */ + @SuppressWarnings({ "rawtypes", "unchecked" }) public static List> uploadParts( S3AsyncClient s3AsyncClient, ExecutorService executorService, @@ -69,35 +73,52 @@ public static List> uploadParts( AtomicReferenceArray completedParts, AtomicReferenceArray inputStreamContainers, StatsMetricPublisher statsMetricPublisher, - boolean uploadRetryEnabled - ) throws IOException { + boolean uploadRetryEnabled, + TransferSemaphoresHolder transferSemaphoresHolder, + long maxRetryablePartSize + ) throws InterruptedException { List> futures = new ArrayList<>(); + TransferSemaphoresHolder.RequestContext requestContext = transferSemaphoresHolder.createRequestContext(); for (int partIdx = 0; partIdx < streamContext.getNumberOfParts(); partIdx++) { - InputStreamContainer inputStreamContainer = streamContext.provideStream(partIdx); - inputStreamContainers.set(partIdx, new CheckedContainer(inputStreamContainer.getContentLength())); - UploadPartRequest.Builder uploadPartRequestBuilder = UploadPartRequest.builder() - .bucket(uploadRequest.getBucket()) - .partNumber(partIdx + 1) - .key(uploadRequest.getKey()) - .uploadId(uploadId) - .overrideConfiguration(o -> o.addMetricPublisher(statsMetricPublisher.multipartUploadMetricCollector)) - .contentLength(inputStreamContainer.getContentLength()); - if (uploadRequest.doRemoteDataIntegrityCheck()) { - uploadPartRequestBuilder.checksumAlgorithm(ChecksumAlgorithm.CRC32); - } - uploadPart( - s3AsyncClient, - executorService, - priorityExecutorService, - urgentExecutorService, - completedParts, - inputStreamContainers, - futures, - uploadPartRequestBuilder.build(), - inputStreamContainer, - uploadRequest, - uploadRetryEnabled + Semaphore semaphore = maybeAcquireSemaphore( + transferSemaphoresHolder, + requestContext, + uploadRequest.getWritePriority(), + uploadRequest.getKey() ); + try { + InputStreamContainer inputStreamContainer = streamContext.provideStream(partIdx); + inputStreamContainers.set(partIdx, new CheckedContainer(inputStreamContainer.getContentLength())); + UploadPartRequest.Builder uploadPartRequestBuilder = UploadPartRequest.builder() + .bucket(uploadRequest.getBucket()) + .partNumber(partIdx + 1) + .key(uploadRequest.getKey()) + .uploadId(uploadId) + .overrideConfiguration(o -> o.addMetricPublisher(statsMetricPublisher.multipartUploadMetricCollector)) + .contentLength(inputStreamContainer.getContentLength()); + if (uploadRequest.doRemoteDataIntegrityCheck()) { + uploadPartRequestBuilder.checksumAlgorithm(ChecksumAlgorithm.CRC32); + } + uploadPart( + s3AsyncClient, + executorService, + priorityExecutorService, + urgentExecutorService, + completedParts, + inputStreamContainers, + futures, + uploadPartRequestBuilder.build(), + inputStreamContainer, + uploadRequest, + uploadRetryEnabled, + maxRetryablePartSize, + semaphore + ); + } catch (Exception ex) { + if (semaphore != null) { + semaphore.release(); + } + } } return futures; @@ -137,14 +158,54 @@ public static InputStream maybeRetryInputStream( InputStream inputStream, WritePriority writePriority, boolean uploadRetryEnabled, - long contentLength + long contentLength, + long maxRetryablePartSize ) { - if (uploadRetryEnabled == true && (writePriority == WritePriority.HIGH || writePriority == WritePriority.URGENT)) { - return new BufferedInputStream(inputStream, (int) (contentLength + 1)); + // Since we are backing uploads with limited permits, it is ok to use buffered stream. Maximum in-memory buffer + // would be (max permits * maxRetryablePartSize) excluding urgent + if (uploadRetryEnabled == true + && (contentLength <= maxRetryablePartSize || writePriority == WritePriority.HIGH || writePriority == WritePriority.URGENT)) { + return new UploadTrackedBufferedInputStream(inputStream, (int) (contentLength + 1)); } return inputStream; } + public static Semaphore maybeAcquireSemaphore( + TransferSemaphoresHolder transferSemaphoresHolder, + TransferSemaphoresHolder.RequestContext requestContext, + WritePriority writePriority, + String file + ) throws InterruptedException { + final TransferSemaphoresHolder.TypeSemaphore semaphore; + if (writePriority != WritePriority.HIGH && writePriority != WritePriority.URGENT) { + semaphore = transferSemaphoresHolder.acquirePermit(writePriority, requestContext); + if (semaphore == null) { + throw new S3TransferRejectedException("Permit not available for transfer of file " + file); + } + } else { + semaphore = null; + } + + return semaphore; + } + + /** + * Overridden stream to identify upload streams among all buffered stream instances for triaging. + */ + static class UploadTrackedBufferedInputStream extends BufferedInputStream { + AtomicBoolean closed = new AtomicBoolean(); + + public UploadTrackedBufferedInputStream(InputStream in, int length) { + super(in, length); + } + + @Override + public void close() throws IOException { + super.close(); + closed.set(true); + } + } + private static void uploadPart( S3AsyncClient s3AsyncClient, ExecutorService executorService, @@ -156,8 +217,11 @@ private static void uploadPart( UploadPartRequest uploadPartRequest, InputStreamContainer inputStreamContainer, UploadRequest uploadRequest, - boolean uploadRetryEnabled + boolean uploadRetryEnabled, + long maxRetryablePartSize, + Semaphore semaphore ) { + Integer partNumber = uploadPartRequest.partNumber(); ExecutorService streamReadExecutor; @@ -173,7 +237,8 @@ private static void uploadPart( inputStreamContainer.getInputStream(), uploadRequest.getWritePriority(), uploadRetryEnabled, - uploadPartRequest.contentLength() + uploadPartRequest.contentLength(), + maxRetryablePartSize ); CompletableFuture uploadPartResponseFuture = SocketAccess.doPrivileged( () -> s3AsyncClient.uploadPart( @@ -183,6 +248,10 @@ private static void uploadPart( ); CompletableFuture convertFuture = uploadPartResponseFuture.whenComplete((resp, throwable) -> { + if (semaphore != null) { + semaphore.release(); + } + try { inputStream.close(); } catch (IOException ex) { diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java index 80538059d17b8..0f9bf3be77d73 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java @@ -21,6 +21,7 @@ import software.amazon.awssdk.services.s3.model.CreateMultipartUploadResponse; import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.PutObjectResponse; import software.amazon.awssdk.services.s3.model.S3Exception; import software.amazon.awssdk.utils.CollectionUtils; import software.amazon.awssdk.utils.CompletableFutureUtils; @@ -48,6 +49,7 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletionException; import java.util.concurrent.ExecutorService; +import java.util.concurrent.Semaphore; import java.util.concurrent.atomic.AtomicReferenceArray; import java.util.function.BiFunction; import java.util.function.Supplier; @@ -64,6 +66,10 @@ public final class AsyncTransferManager { private final ExecutorService priorityExecutorService; private final ExecutorService urgentExecutorService; private final long minimumPartSize; + private final long maxRetryablePartSize; + + @SuppressWarnings("rawtypes") + private final TransferSemaphoresHolder transferSemaphoresHolder; /** * The max number of parts on S3 side is 10,000 @@ -74,19 +80,22 @@ public final class AsyncTransferManager { * Construct a new object of AsyncTransferManager * * @param minimumPartSize The minimum part size for parallel multipart uploads - * @param executorService The stream reader {@link ExecutorService} for normal priority uploads - * @param priorityExecutorService The stream read {@link ExecutorService} for high priority uploads */ + @SuppressWarnings("rawtypes") public AsyncTransferManager( long minimumPartSize, ExecutorService executorService, ExecutorService priorityExecutorService, - ExecutorService urgentExecutorService + ExecutorService urgentExecutorService, + TransferSemaphoresHolder transferSemaphoresHolder ) { this.executorService = executorService; this.priorityExecutorService = priorityExecutorService; this.minimumPartSize = minimumPartSize; + // 10% buffer to allow additional metadata size in content such as encryption. + this.maxRetryablePartSize = (long) (minimumPartSize + 0.1 * minimumPartSize); this.urgentExecutorService = urgentExecutorService; + this.transferSemaphoresHolder = transferSemaphoresHolder; } /** @@ -108,7 +117,21 @@ public CompletableFuture uploadObject( try { if (streamContext.getNumberOfParts() == 1) { log.debug(() -> "Starting the upload as a single upload part request"); - uploadInOneChunk(s3AsyncClient, uploadRequest, streamContext.provideStream(0), returnFuture, statsMetricPublisher); + TransferSemaphoresHolder.RequestContext requestContext = transferSemaphoresHolder.createRequestContext(); + Semaphore semaphore = AsyncPartsHandler.maybeAcquireSemaphore( + transferSemaphoresHolder, + requestContext, + uploadRequest.getWritePriority(), + uploadRequest.getKey() + ); + try { + uploadInOneChunk(s3AsyncClient, uploadRequest, streamContext, returnFuture, statsMetricPublisher, semaphore); + } catch (Exception ex) { + if (semaphore != null) { + semaphore.release(); + } + throw ex; + } } else { log.debug(() -> "Starting the upload as multipart upload request"); uploadInParts(s3AsyncClient, uploadRequest, streamContext, returnFuture, statsMetricPublisher); @@ -146,21 +169,19 @@ private void uploadInParts( // Ensure cancellations are forwarded to the createMultipartUploadFuture future CompletableFutureUtils.forwardExceptionTo(returnFuture, createMultipartUploadFuture); - createMultipartUploadFuture.whenComplete((createMultipartUploadResponse, throwable) -> { - if (throwable != null) { - handleException(returnFuture, () -> "Failed to initiate multipart upload", throwable); - } else { - log.debug(() -> "Initiated new multipart upload, uploadId: " + createMultipartUploadResponse.uploadId()); - doUploadInParts( - s3AsyncClient, - uploadRequest, - streamContext, - returnFuture, - createMultipartUploadResponse.uploadId(), - statsMetricPublisher - ); - } - }); + String uploadId; + try { + // Block main thread here so that upload of parts doesn't get executed in future completion thread. + // We should never execute latent operation like acquisition of permit in future completion pool. + CreateMultipartUploadResponse createMultipartUploadResponse = createMultipartUploadFuture.get(); + uploadId = createMultipartUploadResponse.uploadId(); + log.debug(() -> "Initiated new multipart upload, uploadId: " + createMultipartUploadResponse.uploadId()); + } catch (Exception ex) { + handleException(returnFuture, () -> "Failed to initiate multipart upload", ex); + return; + } + + doUploadInParts(s3AsyncClient, uploadRequest, streamContext, returnFuture, uploadId, statsMetricPublisher); } private void doUploadInParts( @@ -189,7 +210,9 @@ private void doUploadInParts( completedParts, inputStreamContainers, statsMetricPublisher, - uploadRequest.isUploadRetryEnabled() + uploadRequest.isUploadRetryEnabled(), + transferSemaphoresHolder, + maxRetryablePartSize ); } catch (Exception ex) { try { @@ -320,12 +343,14 @@ public long calculateOptimalPartSize(long contentLengthOfSource, WritePriority w return (long) Math.max(optimalPartSize, minimumPartSize); } + @SuppressWarnings("unchecked") private void uploadInOneChunk( S3AsyncClient s3AsyncClient, UploadRequest uploadRequest, - InputStreamContainer inputStreamContainer, + StreamContext streamContext, CompletableFuture returnFuture, - StatsMetricPublisher statsMetricPublisher + StatsMetricPublisher statsMetricPublisher, + Semaphore semaphore ) { PutObjectRequest.Builder putObjectRequestBuilder = PutObjectRequest.builder() .bucket(uploadRequest.getBucket()) @@ -340,6 +365,7 @@ private void uploadInOneChunk( putObjectRequestBuilder.checksumAlgorithm(ChecksumAlgorithm.CRC32); putObjectRequestBuilder.checksumCRC32(base64StringFromLong(uploadRequest.getExpectedChecksum())); } + PutObjectRequest putObjectRequest = putObjectRequestBuilder.build(); ExecutorService streamReadExecutor; if (uploadRequest.getWritePriority() == WritePriority.URGENT) { streamReadExecutor = urgentExecutorService; @@ -349,25 +375,33 @@ private void uploadInOneChunk( streamReadExecutor = executorService; } - InputStream inputStream = AsyncPartsHandler.maybeRetryInputStream( - inputStreamContainer.getInputStream(), - uploadRequest.getWritePriority(), - uploadRequest.isUploadRetryEnabled(), - uploadRequest.getContentLength() - ); - CompletableFuture putObjectFuture = SocketAccess.doPrivileged( - () -> s3AsyncClient.putObject( - putObjectRequestBuilder.build(), - AsyncRequestBody.fromInputStream(inputStream, inputStreamContainer.getContentLength(), streamReadExecutor) - ).handle((resp, throwable) -> { - try { - inputStream.close(); - } catch (IOException e) { - log.error( - () -> new ParameterizedMessage("Failed to close stream while uploading single file {}.", uploadRequest.getKey()), - e - ); - } + CompletableFuture putObjectFuture = SocketAccess.doPrivileged(() -> { + InputStream inputStream = null; + CompletableFuture putObjectRespFuture; + try { + InputStreamContainer inputStreamContainer = streamContext.provideStream(0); + inputStream = AsyncPartsHandler.maybeRetryInputStream( + inputStreamContainer.getInputStream(), + uploadRequest.getWritePriority(), + uploadRequest.isUploadRetryEnabled(), + uploadRequest.getContentLength(), + maxRetryablePartSize + ); + AsyncRequestBody asyncRequestBody = AsyncRequestBody.fromInputStream( + inputStream, + inputStreamContainer.getContentLength(), + streamReadExecutor + ); + putObjectRespFuture = s3AsyncClient.putObject(putObjectRequest, asyncRequestBody); + } catch (Exception e) { + releaseResourcesSafely(semaphore, inputStream, uploadRequest.getKey()); + return CompletableFuture.failedFuture(e); + } + + InputStream finalInputStream = inputStream; + return putObjectRespFuture.handle((resp, throwable) -> { + releaseResourcesSafely(semaphore, finalInputStream, uploadRequest.getKey()); + if (throwable != null) { Throwable unwrappedThrowable = ExceptionsHelper.unwrap(throwable, S3Exception.class); if (unwrappedThrowable != null) { @@ -395,13 +429,27 @@ private void uploadInOneChunk( } return null; - }) - ); + }); + }); CompletableFutureUtils.forwardExceptionTo(returnFuture, putObjectFuture); CompletableFutureUtils.forwardResultTo(putObjectFuture, returnFuture); } + private void releaseResourcesSafely(Semaphore semaphore, InputStream inputStream, String file) { + if (semaphore != null) { + semaphore.release(); + } + + if (inputStream != null) { + try { + inputStream.close(); + } catch (IOException e) { + log.error(() -> new ParameterizedMessage("Failed to close stream while uploading single file {}.", file), e); + } + } + } + private void deleteUploadedObject(S3AsyncClient s3AsyncClient, UploadRequest uploadRequest) { DeleteObjectRequest deleteObjectRequest = DeleteObjectRequest.builder() .bucket(uploadRequest.getBucket()) diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/SizeBasedBlockingQ.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/SizeBasedBlockingQ.java new file mode 100644 index 0000000000000..170c80f5d4db6 --- /dev/null +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/SizeBasedBlockingQ.java @@ -0,0 +1,230 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.repositories.s3.async; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.store.AlreadyClosedException; +import org.opensearch.common.lifecycle.AbstractLifecycleComponent; +import org.opensearch.core.common.unit.ByteSizeValue; +import org.opensearch.repositories.s3.GenericStatsMetricPublisher; +import org.opensearch.repositories.s3.S3TransferRejectedException; + +import java.util.concurrent.ExecutorService; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +/** + * Queue implementation to accept events based on their storage attribute. If size of queue is breached, then transfer + * event is rejected. + */ +public class SizeBasedBlockingQ extends AbstractLifecycleComponent { + private static final Logger log = LogManager.getLogger(SizeBasedBlockingQ.class); + + protected final LinkedBlockingQueue queue; + protected final Lock lock; + protected final Condition notEmpty; + + protected final AtomicLong currentSize; + protected final ByteSizeValue capacity; + protected final AtomicBoolean closed; + protected final ExecutorService executorService; + protected final int consumers; + private final GenericStatsMetricPublisher genericStatsMetricPublisher; + private final QueueEventType queueEventType; + + /** + * Constructor to create sized based blocking queue. + */ + public SizeBasedBlockingQ( + ByteSizeValue capacity, + ExecutorService executorService, + int consumers, + GenericStatsMetricPublisher genericStatsMetricPublisher, + QueueEventType queueEventType + ) { + this.queue = new LinkedBlockingQueue<>(); + this.lock = new ReentrantLock(); + this.notEmpty = lock.newCondition(); + this.currentSize = new AtomicLong(); + this.capacity = capacity; + this.closed = new AtomicBoolean(); + this.executorService = executorService; + this.consumers = consumers; + this.genericStatsMetricPublisher = genericStatsMetricPublisher; + this.queueEventType = queueEventType; + } + + public enum QueueEventType { + NORMAL, + LOW; + } + + @Override + protected void doStart() { + for (int worker = 0; worker < consumers; worker++) { + Thread consumer = new Consumer(queue, currentSize, lock, notEmpty, closed, genericStatsMetricPublisher, queueEventType); + executorService.submit(consumer); + } + } + + /** + * Add an item to the queue + */ + public void produce(Item item) throws InterruptedException { + if (item == null || item.size <= 0) { + throw new IllegalStateException("Invalid item input to produce."); + } + log.debug(() -> "Transfer queue event received of size: " + item.size + ". Current queue utilisation: " + currentSize.get()); + + if (currentSize.get() + item.size >= capacity.getBytes()) { + throw new S3TransferRejectedException("S3 Transfer queue capacity reached"); + } + + final Lock lock = this.lock; + final AtomicLong currentSize = this.currentSize; + lock.lock(); + try { + if (currentSize.get() + item.size >= capacity.getBytes()) { + throw new S3TransferRejectedException("S3 Transfer queue capacity reached"); + } + if (closed.get()) { + throw new AlreadyClosedException("Transfer queue is already closed."); + } + queue.put(item); + currentSize.addAndGet(item.size); + notEmpty.signalAll(); + updateStats(item.size, queueEventType, genericStatsMetricPublisher); + } finally { + lock.unlock(); + } + + } + + private static void updateStats(long itemSize, QueueEventType queueEventType, GenericStatsMetricPublisher genericStatsMetricPublisher) { + if (queueEventType == QueueEventType.NORMAL) { + genericStatsMetricPublisher.updateNormalPriorityQSize(itemSize); + } else if (queueEventType == QueueEventType.LOW) { + genericStatsMetricPublisher.updateLowPriorityQSize(itemSize); + } + } + + public int getSize() { + return queue.size(); + } + + public boolean isMaxCapacityBelowContentLength(long contentLength) { + return contentLength < capacity.getBytes(); + } + + protected static class Consumer extends Thread { + private final LinkedBlockingQueue queue; + private final Lock lock; + private final Condition notEmpty; + private final AtomicLong currentSize; + private final AtomicBoolean closed; + private final GenericStatsMetricPublisher genericStatsMetricPublisher; + private final QueueEventType queueEventType; + + public Consumer( + LinkedBlockingQueue queue, + AtomicLong currentSize, + Lock lock, + Condition notEmpty, + AtomicBoolean closed, + GenericStatsMetricPublisher genericStatsMetricPublisher, + QueueEventType queueEventType + ) { + this.queue = queue; + this.lock = lock; + this.notEmpty = notEmpty; + this.currentSize = currentSize; + this.closed = closed; + this.genericStatsMetricPublisher = genericStatsMetricPublisher; + this.queueEventType = queueEventType; + } + + @Override + public void run() { + while (true) { + try { + consume(); + } catch (AlreadyClosedException ex) { + return; + } catch (Exception ex) { + log.error("Failed to consume transfer event", ex); + } + } + } + + private void consume() throws InterruptedException { + final Lock lock = this.lock; + final AtomicLong currentSize = this.currentSize; + lock.lock(); + Item item; + try { + if (closed.get()) { + throw new AlreadyClosedException("transfer queue closed"); + } + while (currentSize.get() == 0) { + notEmpty.await(); + if (closed.get()) { + throw new AlreadyClosedException("transfer queue closed"); + } + } + + item = queue.take(); + currentSize.addAndGet(-item.size); + updateStats(-item.size, queueEventType, genericStatsMetricPublisher); + } finally { + lock.unlock(); + } + + try { + item.consumable.run(); + } catch (Exception ex) { + log.error("Exception on executing item consumable", ex); + } + } + + } + + public static class Item { + private final long size; + private final Runnable consumable; + + public Item(long size, Runnable consumable) { + this.size = size; + this.consumable = consumable; + } + } + + @Override + protected void doStop() { + doClose(); + } + + @Override + protected void doClose() { + lock.lock(); + try { + if (closed.get() == true) { + return; + } + closed.set(true); + notEmpty.signalAll(); + } finally { + lock.unlock(); + } + } +} diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/TransferSemaphoresHolder.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/TransferSemaphoresHolder.java new file mode 100644 index 0000000000000..7dccedb8d5278 --- /dev/null +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/TransferSemaphoresHolder.java @@ -0,0 +1,186 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.repositories.s3.async; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.common.blobstore.stream.write.WritePriority; +import org.opensearch.repositories.s3.GenericStatsMetricPublisher; + +import java.util.Objects; +import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; + +/** + * Transfer semaphore holder for controlled transfer of data to remote. + */ +public class TransferSemaphoresHolder { + private static final Logger log = LogManager.getLogger(TransferSemaphoresHolder.class); + // For tests + protected TypeSemaphore lowPrioritySemaphore; + protected TypeSemaphore normalPrioritySemaphore; + private final int normalPriorityPermits; + private final int lowPriorityPermits; + private final int acquireWaitDuration; + private final TimeUnit acquireWaitDurationUnit; + + /** + * Constructor to create semaphores holder. + */ + public TransferSemaphoresHolder( + int normalPriorityPermits, + int lowPriorityPermits, + int acquireWaitDuration, + TimeUnit timeUnit, + GenericStatsMetricPublisher genericStatsPublisher + ) { + + this.normalPriorityPermits = normalPriorityPermits; + this.lowPriorityPermits = lowPriorityPermits; + this.normalPrioritySemaphore = new TypeSemaphore( + normalPriorityPermits, + TypeSemaphore.PermitType.NORMAL, + genericStatsPublisher::updateNormalPermits + ); + this.lowPrioritySemaphore = new TypeSemaphore( + lowPriorityPermits, + TypeSemaphore.PermitType.LOW, + genericStatsPublisher::updateLowPermits + ); + this.acquireWaitDuration = acquireWaitDuration; + this.acquireWaitDurationUnit = timeUnit; + } + + /** + * Overridden semaphore to identify transfer semaphores among all other semaphores for triaging. + */ + public static class TypeSemaphore extends Semaphore { + private final PermitType permitType; + private final Consumer permitChangeConsumer; + + public enum PermitType { + NORMAL, + LOW; + } + + public TypeSemaphore(int permits, PermitType permitType, Consumer permitChangeConsumer) { + super(permits); + this.permitType = permitType; + this.permitChangeConsumer = permitChangeConsumer; + } + + public PermitType getType() { + return permitType; + } + + @Override + public boolean tryAcquire() { + boolean acquired = super.tryAcquire(); + if (acquired) { + permitChangeConsumer.accept(true); + } + return acquired; + } + + @Override + public boolean tryAcquire(long timeout, TimeUnit unit) throws InterruptedException { + boolean acquired = super.tryAcquire(timeout, unit); + if (acquired) { + permitChangeConsumer.accept(true); + } + return acquired; + } + + @Override + public void release() { + super.release(); + permitChangeConsumer.accept(false); + } + } + + /** + * For multiple part requests of a single file, request context object will be set with the decision if low + * priority permits can also be utilized in high priority transfers of parts of the file. If high priority get fully + * consumed then low priority permits will be acquired for transfer. + * + * If a low priority transfer request comes in and a high priority transfer is in progress then till current + * high priority transfer finishes, low priority transfer may have to compete. This is an acceptable side effect + * because low priority transfers are generally heavy and it is ok to have slow progress in the beginning. + * + */ + public static class RequestContext { + + private final boolean lowPriorityPermitsConsumable; + + private RequestContext(boolean lowPriorityPermitsConsumable) { + this.lowPriorityPermitsConsumable = lowPriorityPermitsConsumable; + } + + } + + public RequestContext createRequestContext() { + return new RequestContext(this.lowPrioritySemaphore.availablePermits() == lowPriorityPermits); + } + + /** + * Acquire permit based on the availability and based on the transfer priority. + * A high priority event can acquire a low priority semaphore if all low permits are available. + * A low priority event can acquire a high priority semaphore if at least 40% of high permits are available. We + * reserve this bandwidth to ensure that high priority events never wait for permits in case of ongoing low priority + * transfers. + */ + public TypeSemaphore acquirePermit(WritePriority writePriority, RequestContext requestContext) throws InterruptedException { + log.debug( + () -> "Acquire permit request for transfer type: " + + writePriority + + ". Available high priority permits: " + + normalPrioritySemaphore.availablePermits() + + " and low priority permits: " + + lowPrioritySemaphore.availablePermits() + ); + // Try acquiring low priority permit or high priority permit immediately if available. + // Otherwise, we wait for low priority permit. + if (Objects.requireNonNull(writePriority) == WritePriority.LOW) { + if (lowPrioritySemaphore.tryAcquire()) { + return lowPrioritySemaphore; + } else if (normalPrioritySemaphore.availablePermits() > 0.4 * normalPriorityPermits && normalPrioritySemaphore.tryAcquire()) { + return normalPrioritySemaphore; + } else if (lowPrioritySemaphore.tryAcquire(acquireWaitDuration, acquireWaitDurationUnit)) { + return lowPrioritySemaphore; + } + return null; + } + + // Try acquiring high priority permit or low priority permit immediately if available. + // Otherwise, we wait for high priority permit. + if (normalPrioritySemaphore.tryAcquire()) { + return normalPrioritySemaphore; + } else if (requestContext.lowPriorityPermitsConsumable && lowPrioritySemaphore.tryAcquire()) { + return lowPrioritySemaphore; + } else if (normalPrioritySemaphore.tryAcquire(acquireWaitDuration, acquireWaitDurationUnit)) { + return normalPrioritySemaphore; + } + return null; + } + + /** + * Used in tests. + */ + public int getNormalPriorityPermits() { + return normalPriorityPermits; + } + + /** + * Used in tests. + */ + public int getLowPriorityPermits() { + return lowPriorityPermits; + } +} diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/UploadRequest.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/UploadRequest.java index b944a72225d36..79b58ff215c54 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/UploadRequest.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/UploadRequest.java @@ -26,8 +26,8 @@ public class UploadRequest { private final CheckedConsumer uploadFinalizer; private final boolean doRemoteDataIntegrityCheck; private final Long expectedChecksum; - private boolean uploadRetryEnabled; private final Map metadata; + private final boolean uploadRetryEnabled; /** * Construct a new UploadRequest object diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/RepositoryCredentialsTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/RepositoryCredentialsTests.java index f84d953baae8e..573a4f3f51a41 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/RepositoryCredentialsTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/RepositoryCredentialsTests.java @@ -303,7 +303,22 @@ protected S3Repository createRepository( ClusterService clusterService, RecoverySettings recoverySettings ) { - return new S3Repository(metadata, registry, service, clusterService, recoverySettings, null, null, null, null, null, false) { + return new S3Repository( + metadata, + registry, + service, + clusterService, + recoverySettings, + null, + null, + null, + null, + null, + false, + null, + null, + null + ) { @Override protected void assertSnapshotOrGenericThread() { // eliminate thread name check as we create repo manually on test/main threads diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerMockClientTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerMockClientTests.java index 4173f8b66387f..9b413ac81d766 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerMockClientTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerMockClientTests.java @@ -47,7 +47,10 @@ import org.opensearch.repositories.s3.async.AsyncExecutorContainer; import org.opensearch.repositories.s3.async.AsyncTransferEventLoopGroup; import org.opensearch.repositories.s3.async.AsyncTransferManager; +import org.opensearch.repositories.s3.async.SizeBasedBlockingQ; +import org.opensearch.repositories.s3.async.TransferSemaphoresHolder; import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.Scheduler; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -65,6 +68,7 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; @@ -91,8 +95,13 @@ public class S3BlobContainerMockClientTests extends OpenSearchTestCase implement private MockS3AsyncService asyncService; private ExecutorService futureCompletionService; private ExecutorService streamReaderService; + private ExecutorService remoteTransferRetry; + private ExecutorService transferQueueConsumerService; + private ScheduledExecutorService scheduler; private AsyncTransferEventLoopGroup transferNIOGroup; private S3BlobContainer blobContainer; + private SizeBasedBlockingQ normalPrioritySizeBasedBlockingQ; + private SizeBasedBlockingQ lowPrioritySizeBasedBlockingQ; static class MockS3AsyncService extends S3AsyncService { @@ -364,7 +373,27 @@ public void setUp() throws Exception { asyncService = new MockS3AsyncService(configPath(), 1000); futureCompletionService = Executors.newSingleThreadExecutor(); streamReaderService = Executors.newSingleThreadExecutor(); + remoteTransferRetry = Executors.newFixedThreadPool(20); + transferQueueConsumerService = Executors.newFixedThreadPool(20); + scheduler = new Scheduler.SafeScheduledThreadPoolExecutor(1); transferNIOGroup = new AsyncTransferEventLoopGroup(1); + GenericStatsMetricPublisher genericStatsMetricPublisher = new GenericStatsMetricPublisher(10000L, 10, 10000L, 10); + normalPrioritySizeBasedBlockingQ = new SizeBasedBlockingQ( + new ByteSizeValue(Runtime.getRuntime().availableProcessors() * 10L, ByteSizeUnit.GB), + transferQueueConsumerService, + 10, + genericStatsMetricPublisher, + SizeBasedBlockingQ.QueueEventType.NORMAL + ); + lowPrioritySizeBasedBlockingQ = new SizeBasedBlockingQ( + new ByteSizeValue(Runtime.getRuntime().availableProcessors() * 20L, ByteSizeUnit.GB), + transferQueueConsumerService, + 5, + genericStatsMetricPublisher, + SizeBasedBlockingQ.QueueEventType.NORMAL + ); + normalPrioritySizeBasedBlockingQ.start(); + lowPrioritySizeBasedBlockingQ.start(); blobContainer = createBlobContainer(); super.setUp(); } @@ -373,6 +402,14 @@ public void setUp() throws Exception { @After public void tearDown() throws Exception { IOUtils.close(asyncService); + futureCompletionService.shutdown(); + streamReaderService.shutdown(); + remoteTransferRetry.shutdown(); + transferQueueConsumerService.shutdown(); + normalPrioritySizeBasedBlockingQ.close(); + lowPrioritySizeBasedBlockingQ.close(); + scheduler.shutdown(); + transferNIOGroup.close(); super.tearDown(); } @@ -394,7 +431,7 @@ private S3BlobStore createBlobStore() { streamReaderService, transferNIOGroup ); - + GenericStatsMetricPublisher genericStatsMetricPublisher = new GenericStatsMetricPublisher(10000L, 10, 10000L, 10); return new S3BlobStore( null, asyncService, @@ -410,11 +447,21 @@ private S3BlobStore createBlobStore() { S3Repository.PARALLEL_MULTIPART_UPLOAD_MINIMUM_PART_SIZE_SETTING.getDefault(Settings.EMPTY).getBytes(), asyncExecutorContainer.getStreamReader(), asyncExecutorContainer.getStreamReader(), - asyncExecutorContainer.getStreamReader() + asyncExecutorContainer.getStreamReader(), + new TransferSemaphoresHolder( + 3, + Math.max(Runtime.getRuntime().availableProcessors() * 5, 10), + 5, + TimeUnit.MINUTES, + genericStatsMetricPublisher + ) ), asyncExecutorContainer, asyncExecutorContainer, - asyncExecutorContainer + asyncExecutorContainer, + normalPrioritySizeBasedBlockingQ, + lowPrioritySizeBasedBlockingQ, + genericStatsMetricPublisher ); } @@ -574,19 +621,32 @@ private int calculateNumberOfParts(long contentLength, long partSize) { return (int) ((contentLength % partSize) == 0 ? contentLength / partSize : (contentLength / partSize) + 1); } - public void testFailureWhenLargeFileRedirected() throws IOException, ExecutionException, InterruptedException { - testLargeFilesRedirectedToSlowSyncClient(true); + public void testFailureWhenLargeFileRedirected() throws IOException, InterruptedException { + testLargeFilesRedirectedToSlowSyncClient(true, WritePriority.LOW); + testLargeFilesRedirectedToSlowSyncClient(true, WritePriority.NORMAL); } - public void testLargeFileRedirected() throws IOException, ExecutionException, InterruptedException { - testLargeFilesRedirectedToSlowSyncClient(false); + public void testLargeFileRedirected() throws IOException, InterruptedException { + testLargeFilesRedirectedToSlowSyncClient(false, WritePriority.LOW); + testLargeFilesRedirectedToSlowSyncClient(false, WritePriority.NORMAL); } - private void testLargeFilesRedirectedToSlowSyncClient(boolean expectException) throws IOException, InterruptedException { - final ByteSizeValue partSize = new ByteSizeValue(1024, ByteSizeUnit.MB); - + private void testLargeFilesRedirectedToSlowSyncClient(boolean expectException, WritePriority writePriority) throws IOException, + InterruptedException { + ByteSizeValue capacity = new ByteSizeValue(1, ByteSizeUnit.GB); int numberOfParts = 20; - final long lastPartSize = new ByteSizeValue(20, ByteSizeUnit.MB).getBytes(); + final ByteSizeValue partSize = new ByteSizeValue(capacity.getBytes() / numberOfParts + 1, ByteSizeUnit.BYTES); + + GenericStatsMetricPublisher genericStatsMetricPublisher = new GenericStatsMetricPublisher(10000L, 10, 10000L, 10); + SizeBasedBlockingQ sizeBasedBlockingQ = new SizeBasedBlockingQ( + capacity, + transferQueueConsumerService, + 10, + genericStatsMetricPublisher, + SizeBasedBlockingQ.QueueEventType.NORMAL + ); + + final long lastPartSize = new ByteSizeValue(200, ByteSizeUnit.MB).getBytes(); final long blobSize = ((numberOfParts - 1) * partSize.getBytes()) + lastPartSize; CountDownLatch countDownLatch = new CountDownLatch(1); AtomicReference exceptionRef = new AtomicReference<>(); @@ -609,6 +669,9 @@ private void testLargeFilesRedirectedToSlowSyncClient(boolean expectException) t when(blobStore.getStatsMetricPublisher()).thenReturn(new StatsMetricPublisher()); when(blobStore.bufferSizeInBytes()).thenReturn(bufferSize); + when(blobStore.getLowPrioritySizeBasedBlockingQ()).thenReturn(sizeBasedBlockingQ); + when(blobStore.getNormalPrioritySizeBasedBlockingQ()).thenReturn(sizeBasedBlockingQ); + final boolean serverSideEncryption = randomBoolean(); when(blobStore.serverSideEncryption()).thenReturn(serverSideEncryption); @@ -658,7 +721,7 @@ private void testLargeFilesRedirectedToSlowSyncClient(boolean expectException) t .streamContextSupplier(streamContextSupplier) .fileSize(blobSize) .failIfAlreadyExists(false) - .writePriority(WritePriority.HIGH) + .writePriority(writePriority) .uploadFinalizer(Assert::assertTrue) .doRemoteDataIntegrityCheck(false) .metadata(new HashMap<>()) @@ -693,5 +756,4 @@ private void testLargeFilesRedirectedToSlowSyncClient(boolean expectException) t } }); } - } diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerRetriesTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerRetriesTests.java index 10578090da75c..96ef28d24c14f 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerRetriesTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerRetriesTests.java @@ -67,6 +67,8 @@ import org.opensearch.repositories.s3.async.AsyncExecutorContainer; import org.opensearch.repositories.s3.async.AsyncTransferEventLoopGroup; import org.opensearch.repositories.s3.async.AsyncTransferManager; +import org.opensearch.repositories.s3.async.SizeBasedBlockingQ; +import org.opensearch.repositories.s3.async.TransferSemaphoresHolder; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -87,6 +89,8 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; @@ -114,7 +118,12 @@ public class S3BlobContainerRetriesTests extends AbstractBlobContainerRetriesTes private S3AsyncService asyncService; private ExecutorService futureCompletionService; private ExecutorService streamReaderService; + private ExecutorService remoteTransferRetry; + private ExecutorService transferQueueConsumerService; + private ScheduledExecutorService scheduler; private AsyncTransferEventLoopGroup transferNIOGroup; + private SizeBasedBlockingQ normalPrioritySizeBasedBlockingQ; + private SizeBasedBlockingQ lowPrioritySizeBasedBlockingQ; @Before public void setUp() throws Exception { @@ -125,7 +134,26 @@ public void setUp() throws Exception { futureCompletionService = Executors.newSingleThreadExecutor(); streamReaderService = Executors.newSingleThreadExecutor(); transferNIOGroup = new AsyncTransferEventLoopGroup(1); - + remoteTransferRetry = Executors.newFixedThreadPool(20); + transferQueueConsumerService = Executors.newFixedThreadPool(2); + scheduler = new ScheduledThreadPoolExecutor(1); + GenericStatsMetricPublisher genericStatsMetricPublisher = new GenericStatsMetricPublisher(10000L, 10, 10000L, 10); + normalPrioritySizeBasedBlockingQ = new SizeBasedBlockingQ( + new ByteSizeValue(Runtime.getRuntime().availableProcessors() * 5L, ByteSizeUnit.GB), + transferQueueConsumerService, + 2, + genericStatsMetricPublisher, + SizeBasedBlockingQ.QueueEventType.NORMAL + ); + lowPrioritySizeBasedBlockingQ = new SizeBasedBlockingQ( + new ByteSizeValue(Runtime.getRuntime().availableProcessors() * 5L, ByteSizeUnit.GB), + transferQueueConsumerService, + 2, + genericStatsMetricPublisher, + SizeBasedBlockingQ.QueueEventType.LOW + ); + normalPrioritySizeBasedBlockingQ.start(); + lowPrioritySizeBasedBlockingQ.start(); // needed by S3AsyncService SocketAccess.doPrivileged(() -> System.setProperty("opensearch.path.conf", configPath().toString())); super.setUp(); @@ -137,6 +165,11 @@ public void tearDown() throws Exception { streamReaderService.shutdown(); futureCompletionService.shutdown(); + remoteTransferRetry.shutdown(); + transferQueueConsumerService.shutdown(); + scheduler.shutdown(); + normalPrioritySizeBasedBlockingQ.close(); + lowPrioritySizeBasedBlockingQ.close(); IOUtils.close(transferNIOGroup); if (previousOpenSearchPathConf != null) { @@ -205,7 +238,7 @@ protected AsyncMultiStreamBlobContainer createBlobContainer( streamReaderService, transferNIOGroup ); - + GenericStatsMetricPublisher genericStatsMetricPublisher = new GenericStatsMetricPublisher(10000L, 10, 10000L, 10); return new S3BlobContainer( BlobPath.cleanPath(), new S3BlobStore( @@ -223,11 +256,21 @@ protected AsyncMultiStreamBlobContainer createBlobContainer( S3Repository.PARALLEL_MULTIPART_UPLOAD_MINIMUM_PART_SIZE_SETTING.getDefault(Settings.EMPTY).getBytes(), asyncExecutorContainer.getStreamReader(), asyncExecutorContainer.getStreamReader(), - asyncExecutorContainer.getStreamReader() + asyncExecutorContainer.getStreamReader(), + new TransferSemaphoresHolder( + 3, + Math.max(Runtime.getRuntime().availableProcessors() * 5, 10), + 5, + TimeUnit.MINUTES, + genericStatsMetricPublisher + ) ), asyncExecutorContainer, asyncExecutorContainer, - asyncExecutorContainer + asyncExecutorContainer, + normalPrioritySizeBasedBlockingQ, + lowPrioritySizeBasedBlockingQ, + genericStatsMetricPublisher ) ) { @Override diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3RepositoryTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3RepositoryTests.java index 6fec535ae6301..f8e9903bb3577 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3RepositoryTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3RepositoryTests.java @@ -169,7 +169,11 @@ private S3Repository createS3Repo(RepositoryMetadata metadata) { null, null, null, - false + false, + null, + null, + null, + null ) { @Override protected void assertSnapshotOrGenericThread() { diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java index 04d1819bef02b..89add3cdbfc60 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java @@ -33,6 +33,7 @@ import org.opensearch.common.io.InputStreamContainer; import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.repositories.blobstore.ZeroInputStream; +import org.opensearch.repositories.s3.GenericStatsMetricPublisher; import org.opensearch.repositories.s3.StatsMetricPublisher; import org.opensearch.test.OpenSearchTestCase; import org.junit.Before; @@ -46,6 +47,7 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import static org.mockito.ArgumentMatchers.any; @@ -63,11 +65,19 @@ public class AsyncTransferManagerTests extends OpenSearchTestCase { @Before public void setUp() throws Exception { s3AsyncClient = mock(S3AsyncClient.class); + GenericStatsMetricPublisher genericStatsMetricPublisher = new GenericStatsMetricPublisher(10000L, 10, 10000L, 10); asyncTransferManager = new AsyncTransferManager( ByteSizeUnit.MB.toBytes(5), Executors.newSingleThreadExecutor(), Executors.newSingleThreadExecutor(), - Executors.newSingleThreadExecutor() + Executors.newSingleThreadExecutor(), + new TransferSemaphoresHolder( + 3, + Math.max(Runtime.getRuntime().availableProcessors() * 5, 10), + 5, + TimeUnit.MINUTES, + genericStatsMetricPublisher + ) ); super.setUp(); } diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/SizeBasedBlockingQTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/SizeBasedBlockingQTests.java new file mode 100644 index 0000000000000..5be4037407d23 --- /dev/null +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/SizeBasedBlockingQTests.java @@ -0,0 +1,102 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.repositories.s3.async; + +import org.opensearch.core.common.unit.ByteSizeUnit; +import org.opensearch.core.common.unit.ByteSizeValue; +import org.opensearch.repositories.s3.GenericStatsMetricPublisher; +import org.opensearch.repositories.s3.S3TransferRejectedException; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.After; +import org.junit.Before; + +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.atomic.AtomicBoolean; + +public class SizeBasedBlockingQTests extends OpenSearchTestCase { + private ExecutorService consumerService; + private ExecutorService producerService; + + @Override + @Before + public void setUp() throws Exception { + this.consumerService = Executors.newFixedThreadPool(10); + this.producerService = Executors.newFixedThreadPool(100); + super.setUp(); + } + + @After + public void tearDown() throws Exception { + consumerService.shutdown(); + producerService.shutdown(); + super.tearDown(); + } + + public void testProducerConsumerOfBulkItems() throws InterruptedException { + GenericStatsMetricPublisher genericStatsMetricPublisher = new GenericStatsMetricPublisher(10000L, 10, 10000L, 10); + SizeBasedBlockingQ.QueueEventType queueEventType = randomBoolean() + ? SizeBasedBlockingQ.QueueEventType.NORMAL + : SizeBasedBlockingQ.QueueEventType.LOW; + SizeBasedBlockingQ sizeBasedBlockingQ = new SizeBasedBlockingQ( + new ByteSizeValue(ByteSizeUnit.BYTES.toBytes(10)), + consumerService, + 10, + genericStatsMetricPublisher, + queueEventType + ); + sizeBasedBlockingQ.start(); + int numOfItems = randomIntBetween(100, 1000); + CountDownLatch latch = new CountDownLatch(numOfItems); + AtomicBoolean unknownError = new AtomicBoolean(); + for (int i = 0; i < numOfItems; i++) { + final int idx = i; + producerService.submit(() -> { + boolean throwException = randomBoolean(); + + SizeBasedBlockingQ.Item item = new TestItemToStr(randomIntBetween(1, 5), () -> { + latch.countDown(); + if (throwException) { + throw new RuntimeException("throwing random exception"); + } + }, idx); + + try { + sizeBasedBlockingQ.produce(item); + } catch (InterruptedException e) { + latch.countDown(); + unknownError.set(true); + throw new RuntimeException(e); + } catch (S3TransferRejectedException ex) { + latch.countDown(); + } + }); + } + latch.await(); + sizeBasedBlockingQ.close(); + assertFalse(unknownError.get()); + assertEquals(0L, genericStatsMetricPublisher.getNormalPriorityQSize()); + assertEquals(0L, genericStatsMetricPublisher.getLowPriorityQSize()); + } + + static class TestItemToStr extends SizeBasedBlockingQ.Item { + private final int id; + + public TestItemToStr(long size, Runnable consumable, int id) { + super(size, consumable); + this.id = id; + } + + @Override + public String toString() { + return String.valueOf(id); + } + } +} diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/TransferSemaphoresHolderTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/TransferSemaphoresHolderTests.java new file mode 100644 index 0000000000000..236f02c5eb1f7 --- /dev/null +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/TransferSemaphoresHolderTests.java @@ -0,0 +1,276 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.repositories.s3.async; + +import org.opensearch.common.blobstore.stream.write.WritePriority; +import org.opensearch.repositories.s3.GenericStatsMetricPublisher; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +import org.mockito.Mockito; + +import static org.opensearch.repositories.s3.async.TransferSemaphoresHolder.TypeSemaphore.PermitType; + +public class TransferSemaphoresHolderTests extends OpenSearchTestCase { + + public void testAllocation() { + int availablePermits = randomIntBetween(5, 20); + double priorityAllocation = randomDoubleBetween(0.1, 0.9, true); + int normalPermits = (int) (availablePermits * priorityAllocation); + int lowPermits = availablePermits - normalPermits; + GenericStatsMetricPublisher genericStatsPublisher = new GenericStatsMetricPublisher(10000L, 10, 10000L, 10); + TransferSemaphoresHolder transferSemaphoresHolder = new TransferSemaphoresHolder( + normalPermits, + lowPermits, + 1, + TimeUnit.NANOSECONDS, + genericStatsPublisher + ); + assertEquals(normalPermits, transferSemaphoresHolder.getNormalPriorityPermits()); + assertEquals(lowPermits, transferSemaphoresHolder.getLowPriorityPermits()); + assertEquals(0, genericStatsPublisher.getAcquiredNormalPriorityPermits()); + assertEquals(0, genericStatsPublisher.getAcquiredLowPriorityPermits()); + } + + public void testLowPriorityEventPermitAcquisition() throws InterruptedException { + int availablePermits = randomIntBetween(5, 50); + double priorityAllocation = randomDoubleBetween(0.1, 0.9, true); + int normalPermits = (int) (availablePermits * priorityAllocation); + int lowPermits = availablePermits - normalPermits; + GenericStatsMetricPublisher genericStatsPublisher = new GenericStatsMetricPublisher(10000L, 10, 10000L, 10); + TransferSemaphoresHolder transferSemaphoresHolder = new TransferSemaphoresHolder( + normalPermits, + lowPermits, + 1, + TimeUnit.NANOSECONDS, + genericStatsPublisher + ); + + List semaphores = new ArrayList<>(); + int normalPermitsEligibleForLowEvents = normalPermits - (int) (normalPermits * 0.4); + + int lowAcquisitionsExpected = (normalPermitsEligibleForLowEvents + lowPermits); + for (int i = 0; i < lowAcquisitionsExpected; i++) { + TransferSemaphoresHolder.RequestContext requestContext = transferSemaphoresHolder.createRequestContext(); + TransferSemaphoresHolder.TypeSemaphore acquiredSemaphore = transferSemaphoresHolder.acquirePermit( + WritePriority.LOW, + requestContext + ); + semaphores.add(acquiredSemaphore); + if (i >= lowPermits) { + assertEquals(PermitType.NORMAL, acquiredSemaphore.getType()); + } else { + assertEquals(PermitType.LOW, acquiredSemaphore.getType()); + } + } + + for (int i = 0; i < normalPermits - normalPermitsEligibleForLowEvents; i++) { + TransferSemaphoresHolder.RequestContext requestContext = transferSemaphoresHolder.createRequestContext(); + TransferSemaphoresHolder.TypeSemaphore acquiredSemaphore = transferSemaphoresHolder.acquirePermit( + WritePriority.NORMAL, + requestContext + ); + assertEquals(PermitType.NORMAL, acquiredSemaphore.getType()); + semaphores.add(acquiredSemaphore); + } + + TransferSemaphoresHolder.RequestContext requestContext = transferSemaphoresHolder.createRequestContext(); + TransferSemaphoresHolder.TypeSemaphore acquiredSemaphore = transferSemaphoresHolder.acquirePermit( + WritePriority.LOW, + requestContext + ); + assertNull(acquiredSemaphore); + + assertEquals(availablePermits, semaphores.size()); + semaphores.forEach(Semaphore::release); + assertEquals(normalPermits, transferSemaphoresHolder.getNormalPriorityPermits()); + assertEquals(lowPermits, transferSemaphoresHolder.getLowPriorityPermits()); + assertEquals(0, genericStatsPublisher.getAcquiredNormalPriorityPermits()); + assertEquals(0, genericStatsPublisher.getAcquiredLowPriorityPermits()); + + } + + public void testNormalPermitEventAcquisition() throws InterruptedException { + int availablePermits = randomIntBetween(5, 50); + double priorityAllocation = randomDoubleBetween(0.1, 0.9, true); + int normalPermits = (int) (availablePermits * priorityAllocation); + int lowPermits = availablePermits - normalPermits; + GenericStatsMetricPublisher genericStatsPublisher = new GenericStatsMetricPublisher(10000L, 10, 10000L, 10); + TransferSemaphoresHolder transferSemaphoresHolder = new TransferSemaphoresHolder( + normalPermits, + lowPermits, + 1, + TimeUnit.NANOSECONDS, + genericStatsPublisher + ); + + List semaphores = new ArrayList<>(); + List lowSemaphores = new ArrayList<>(); + int normalAcquisitionsExpected = normalPermits + lowPermits; + TransferSemaphoresHolder.RequestContext requestContext = transferSemaphoresHolder.createRequestContext(); + for (int i = 0; i < normalAcquisitionsExpected; i++) { + TransferSemaphoresHolder.TypeSemaphore acquiredSemaphore = transferSemaphoresHolder.acquirePermit( + WritePriority.NORMAL, + requestContext + ); + semaphores.add(acquiredSemaphore); + if (i >= normalPermits) { + assertEquals(PermitType.LOW, acquiredSemaphore.getType()); + lowSemaphores.add(acquiredSemaphore); + } else { + assertEquals(PermitType.NORMAL, acquiredSemaphore.getType()); + } + } + assertEquals(availablePermits, semaphores.size()); + + int lowAcquired = lowPermits; + + Semaphore removedLowSemaphore = lowSemaphores.remove(0); + removedLowSemaphore.release(); + semaphores.remove(removedLowSemaphore); + + requestContext = transferSemaphoresHolder.createRequestContext(); + TransferSemaphoresHolder.TypeSemaphore acquiredSemaphore = transferSemaphoresHolder.acquirePermit( + WritePriority.LOW, + requestContext + ); + semaphores.add(acquiredSemaphore); + lowSemaphores.add(acquiredSemaphore); + while (lowAcquired > 1) { + requestContext = transferSemaphoresHolder.createRequestContext(); + acquiredSemaphore = transferSemaphoresHolder.acquirePermit(WritePriority.NORMAL, requestContext); + assertNull(acquiredSemaphore); + lowAcquired--; + } + + semaphores.forEach(Semaphore::release); + assertEquals(normalPermits, transferSemaphoresHolder.getNormalPriorityPermits()); + assertEquals(lowPermits, transferSemaphoresHolder.getLowPriorityPermits()); + assertEquals(0, genericStatsPublisher.getAcquiredNormalPriorityPermits()); + assertEquals(0, genericStatsPublisher.getAcquiredLowPriorityPermits()); + } + + private static class TestTransferSemaphoresHolder extends TransferSemaphoresHolder { + AtomicInteger normalWaitCount = new AtomicInteger(); + AtomicInteger lowWaitCount = new AtomicInteger(); + + /** + * Constructor to create semaphores holder. + */ + public TestTransferSemaphoresHolder( + int normalPermits, + int lowPermits, + int acquireWaitDuration, + TimeUnit timeUnit, + GenericStatsMetricPublisher genericStatsMetricPublisher + ) throws InterruptedException { + super(normalPermits, lowPermits, acquireWaitDuration, timeUnit, genericStatsMetricPublisher); + TypeSemaphore executingNormalSemaphore = normalPrioritySemaphore; + TypeSemaphore executingLowSemaphore = lowPrioritySemaphore; + + this.normalPrioritySemaphore = Mockito.spy(normalPrioritySemaphore); + this.lowPrioritySemaphore = Mockito.spy(lowPrioritySemaphore); + Mockito.doAnswer(invocation -> { + normalWaitCount.incrementAndGet(); + return false; + }).when(normalPrioritySemaphore).tryAcquire(Mockito.anyLong(), Mockito.any(TimeUnit.class)); + Mockito.doAnswer(invocation -> executingNormalSemaphore.availablePermits()).when(normalPrioritySemaphore).availablePermits(); + Mockito.doAnswer(invocation -> executingNormalSemaphore.tryAcquire()).when(normalPrioritySemaphore).tryAcquire(); + + Mockito.doAnswer(invocation -> { + lowWaitCount.incrementAndGet(); + return false; + }).when(lowPrioritySemaphore).tryAcquire(Mockito.anyLong(), Mockito.any(TimeUnit.class)); + Mockito.doAnswer(invocation -> executingLowSemaphore.availablePermits()).when(lowPrioritySemaphore).availablePermits(); + Mockito.doAnswer(invocation -> executingLowSemaphore.tryAcquire()).when(lowPrioritySemaphore).tryAcquire(); + } + } + + public void testNormalSemaphoreAcquiredWait() throws InterruptedException { + int availablePermits = randomIntBetween(10, 50); + double priorityAllocation = randomDoubleBetween(0.1, 0.9, true); + int normalPermits = (int) (availablePermits * priorityAllocation); + GenericStatsMetricPublisher genericStatsPublisher = new GenericStatsMetricPublisher(10000L, 10, 10000L, 10); + TestTransferSemaphoresHolder transferSemaphoresHolder = new TestTransferSemaphoresHolder( + normalPermits, + availablePermits - normalPermits, + 5, + TimeUnit.MINUTES, + genericStatsPublisher + ); + + TransferSemaphoresHolder.RequestContext requestContext = transferSemaphoresHolder.createRequestContext(); + TransferSemaphoresHolder.TypeSemaphore lowSemaphore = transferSemaphoresHolder.acquirePermit(WritePriority.LOW, requestContext); + assertEquals(PermitType.LOW, lowSemaphore.getType()); + for (int i = 0; i < normalPermits; i++) { + requestContext = transferSemaphoresHolder.createRequestContext(); + TransferSemaphoresHolder.TypeSemaphore acquiredSemaphore = transferSemaphoresHolder.acquirePermit( + WritePriority.NORMAL, + requestContext + ); + assertEquals(PermitType.NORMAL, acquiredSemaphore.getType()); + } + + TransferSemaphoresHolder.TypeSemaphore acquiredSemaphore = transferSemaphoresHolder.acquirePermit( + WritePriority.NORMAL, + requestContext + ); + assertNull(acquiredSemaphore); + assertEquals(1, transferSemaphoresHolder.normalWaitCount.get()); + assertEquals(0, transferSemaphoresHolder.lowWaitCount.get()); + } + + public void testLowSemaphoreAcquiredWait() throws InterruptedException { + int availablePermits = randomIntBetween(10, 50); + double priorityAllocation = randomDoubleBetween(0.1, 0.9, true); + int normalPermits = (int) (availablePermits * priorityAllocation); + int lowPermits = availablePermits - normalPermits; + GenericStatsMetricPublisher genericStatsPublisher = new GenericStatsMetricPublisher(10000L, 10, 10000L, 10); + TestTransferSemaphoresHolder transferSemaphoresHolder = new TestTransferSemaphoresHolder( + normalPermits, + lowPermits, + 5, + TimeUnit.MINUTES, + genericStatsPublisher + ); + + TransferSemaphoresHolder.RequestContext requestContext = transferSemaphoresHolder.createRequestContext(); + int normalPermitsEligibleForLowEvents = normalPermits - (int) (normalPermits * 0.4); + for (int i = 0; i < normalPermitsEligibleForLowEvents; i++) { + TransferSemaphoresHolder.TypeSemaphore lowSemaphore = transferSemaphoresHolder.acquirePermit( + WritePriority.NORMAL, + requestContext + ); + assertEquals(PermitType.NORMAL, lowSemaphore.getType()); + } + + for (int i = 0; i < lowPermits; i++) { + requestContext = transferSemaphoresHolder.createRequestContext(); + TransferSemaphoresHolder.TypeSemaphore acquiredSemaphore = transferSemaphoresHolder.acquirePermit( + WritePriority.LOW, + requestContext + ); + assertEquals(PermitType.LOW, acquiredSemaphore.getType()); + } + + TransferSemaphoresHolder.TypeSemaphore acquiredSemaphore = transferSemaphoresHolder.acquirePermit( + WritePriority.LOW, + requestContext + ); + assertNull(acquiredSemaphore); + assertEquals(1, transferSemaphoresHolder.lowWaitCount.get()); + assertEquals(0, transferSemaphoresHolder.normalWaitCount.get()); + } + +} diff --git a/qa/wildfly/build.gradle b/qa/wildfly/build.gradle index 5d37be47e782e..abf033fff378a 100644 --- a/qa/wildfly/build.gradle +++ b/qa/wildfly/build.gradle @@ -40,7 +40,7 @@ apply plugin: 'opensearch.internal-distribution-download' testFixtures.useFixture() dependencies { - providedCompile('jakarta.enterprise:jakarta.enterprise.cdi-api:4.0.1') { + providedCompile('jakarta.enterprise:jakarta.enterprise.cdi-api:4.1.0') { exclude module: 'jakarta.annotation-api' } providedCompile 'jakarta.ws.rs:jakarta.ws.rs-api:3.1.0' diff --git a/release-notes/opensearch.release-notes-2.14.0.md b/release-notes/opensearch.release-notes-2.14.0.md new file mode 100644 index 0000000000000..8ef0215baa67a --- /dev/null +++ b/release-notes/opensearch.release-notes-2.14.0.md @@ -0,0 +1,87 @@ +## 2024-05-07 Version 2.14.0 Release Notes + +## [2.14.0] +### Added +- Add explicit dependency to validatePom and generatePom tasks ([#12909](https://github.com/opensearch-project/OpenSearch/pull/12909)) +- Constant Keyword Field ([#12285](https://github.com/opensearch-project/OpenSearch/pull/12285)) +- [Concurrent Segment Search] Perform buildAggregation concurrently and support Composite Aggregations ([#12697](https://github.com/opensearch-project/OpenSearch/pull/12697)) +- Convert ingest processor supports ip type ([#12818](https://github.com/opensearch-project/OpenSearch/pull/12818)) +- Allow setting KEYSTORE_PASSWORD through env variable ([#12865](https://github.com/opensearch-project/OpenSearch/pull/12865)) +- Add a counter to node stat (and _cat/shards) api to track shard going from idle to non-idle ([#12768](https://github.com/opensearch-project/OpenSearch/pull/12768)) +- [Concurrent Segment Search] Disable concurrent segment search for system indices and throttled requests ([#12954](https://github.com/opensearch-project/OpenSearch/pull/12954)) +- Rename ingest processor supports overriding target field if exists ([#12990](https://github.com/opensearch-project/OpenSearch/pull/12990)) +- [Tiered Caching] Make took time caching policy setting dynamic ([#13063](https://github.com/opensearch-project/OpenSearch/pull/13063)) +- Detect breaking changes on pull requests ([#9044](https://github.com/opensearch-project/OpenSearch/pull/9044)) +- Add cluster primary balance contraint for rebalancing with buffer ([#12656](https://github.com/opensearch-project/OpenSearch/pull/12656)) +- [Remote Store] Make translog transfer timeout configurable ([#12704](https://github.com/opensearch-project/OpenSearch/pull/12704)) +- Derived fields support to derive field values at query time without indexing ([#12569](https://github.com/opensearch-project/OpenSearch/pull/12569)) +- Add support for more than one protocol for transport ([#12967](https://github.com/opensearch-project/OpenSearch/pull/12967)) +- [Tiered Caching] Add dimension-based stats to ICache implementations. ([#12531](https://github.com/opensearch-project/OpenSearch/pull/12531)) +- Add changes for overriding remote store and replication settings during snapshot restore. ([#11868](https://github.com/opensearch-project/OpenSearch/pull/11868)) +- Reject Resize index requests (i.e, split, shrink and clone), While DocRep to SegRep migration is in progress.([#12686](https://github.com/opensearch-project/OpenSearch/pull/12686)) +- Add an individual setting of rate limiter for segment replication ([#12959](https://github.com/opensearch-project/OpenSearch/pull/12959)) +- [Tiered Caching] Add dimension-based stats to TieredSpilloverCache ([#13236](https://github.com/opensearch-project/OpenSearch/pull/13236)) +- [Tiered Caching] Expose new cache stats API ([#13237](https://github.com/opensearch-project/OpenSearch/pull/13237)) +- [Streaming Indexing] Ensure support of the new transport by security plugin ([#13174](https://github.com/opensearch-project/OpenSearch/pull/13174)) +- Add cluster setting to dynamically configure the buckets for filter rewrite optimization. ([#13179](https://github.com/opensearch-project/OpenSearch/pull/13179)) +- [Tiered caching] Make Indices Request Cache Stale Key Mgmt Threshold setting dynamic ([#12941](https://github.com/opensearch-project/OpenSearch/pull/12941)) +- Make search query counters dynamic to support all query types ([#12601](https://github.com/opensearch-project/OpenSearch/pull/12601)) +- [Tiered Caching] Gate new stats logic behind FeatureFlags.PLUGGABLE_CACHE ([#13238](https://github.com/opensearch-project/OpenSearch/pull/13238)) +- [Tiered Caching] Add a dynamic setting to disable/enable disk cache. ([#13373](https://github.com/opensearch-project/OpenSearch/pull/13373)) +- Batch mode for async fetching shard information in GatewayAllocator for unassigned shards ([#8746](https://github.com/opensearch-project/OpenSearch/pull/8746)) +- [Remote Store] Add settings for remote path type and hash algorithm ([#13225](https://github.com/opensearch-project/OpenSearch/pull/13225)) +- [Remote Store] Upload remote paths during remote enabled index creation ([#13386](https://github.com/opensearch-project/OpenSearch/pull/13386)) +- [Search Pipeline] Handle default pipeline for multiple indices ([#13276](https://github.com/opensearch-project/OpenSearch/pull/13276)) +- [Batch Ingestion] Add `batch_size` to `_bulk` API. ([#12457](https://github.com/opensearch-project/OpenSearch/issues/12457)) +- [Remote Store] Add capability of doing refresh as determined by the translog ([#12992](https://github.com/opensearch-project/OpenSearch/pull/12992)) +- Support multi ranges traversal when doing date histogram rewrite optimization. ([#13317](https://github.com/opensearch-project/OpenSearch/pull/13317)) + +### Dependencies +- Bump `org.apache.commons:commons-configuration2` from 2.10.0 to 2.10.1 ([#12896](https://github.com/opensearch-project/OpenSearch/pull/12896)) +- Bump `asm` from 9.6 to 9.7 ([#12908](https://github.com/opensearch-project/OpenSearch/pull/12908)) +- Bump `net.minidev:json-smart` from 2.5.0 to 2.5.1 ([#12893](https://github.com/opensearch-project/OpenSearch/pull/12893), [#13117](https://github.com/opensearch-project/OpenSearch/pull/13117)) +- Bump `netty` from 4.1.107.Final to 4.1.109.Final ([#12924](https://github.com/opensearch-project/OpenSearch/pull/12924), [#13233](https://github.com/opensearch-project/OpenSearch/pull/13233)) +- Bump `commons-io:commons-io` from 2.15.1 to 2.16.0 ([#12996](https://github.com/opensearch-project/OpenSearch/pull/12996), [#12998](https://github.com/opensearch-project/OpenSearch/pull/12998), [#12999](https://github.com/opensearch-project/OpenSearch/pull/12999)) +- Bump `org.apache.commons:commons-compress` from 1.24.0 to 1.26.1 ([#12627](https://github.com/opensearch-project/OpenSearch/pull/12627)) +- Bump `org.apache.commons:commonscodec` from 1.15 to 1.16.1 ([#12627](https://github.com/opensearch-project/OpenSearch/pull/12627)) +- Bump `org.apache.commons:commonslang` from 3.13.0 to 3.14.0 ([#12627](https://github.com/opensearch-project/OpenSearch/pull/12627)) +- Bump Apache Tika from 2.6.0 to 2.9.2 ([#12627](https://github.com/opensearch-project/OpenSearch/pull/12627)) +- Bump `com.gradle.enterprise` from 3.16.2 to 3.17.2 ([#13116](https://github.com/opensearch-project/OpenSearch/pull/13116), [#13191](https://github.com/opensearch-project/OpenSearch/pull/13191), [#13327](https://github.com/opensearch-project/OpenSearch/pull/13327)) +- Bump `gradle/wrapper-validation-action` from 2 to 3 ([#13192](https://github.com/opensearch-project/OpenSearch/pull/13192)) +- Bump joda from 2.12.2 to 2.12.7 ([#13193](https://github.com/opensearch-project/OpenSearch/pull/13193)) +- Bump bouncycastle from 1.77 to 1.78 ([#13243](https://github.com/opensearch-project/OpenSearch/pull/13243)) +- Update google dependencies in repository-gcs and discovery-gce ([#13213](https://github.com/opensearch-project/OpenSearch/pull/13213)) +- Bump `com.google.apis:google-api-services-compute` from v1-rev235-1.25.0 to v1-rev20240407-2.0.0 ([#13333](https://github.com/opensearch-project/OpenSearch/pull/13333)) +- Bump `com.github.spullara.mustache.java:compiler` from 0.9.10 to 0.9.11 ([#13329](https://github.com/opensearch-project/OpenSearch/pull/13329)) +- Bump `commons-cli:commons-cli` from 1.6.0 to 1.7.0 ([#13331](https://github.com/opensearch-project/OpenSearch/pull/13331)) +- Bump `jakarta.enterprise:jakarta.enterprise.cdi-api` from 4.0.1 to 4.1.0 ([#13328](https://github.com/opensearch-project/OpenSearch/pull/13328)) +- Bump `com.google.api.grpc:proto-google-iam-v1` from 0.12.0 to 1.33.0 ([#13332](https://github.com/opensearch-project/OpenSearch/pull/13332)) +- Bump `com.squareup.okio:okio` from 3.8.0 to 3.9.0 ([#12997](https://github.com/opensearch-project/OpenSearch/pull/12997)) +- OpenJDK Update (April 2024 Patch releases), update to Eclipse Temurin 21.0.3+9 ([#13389](https://github.com/opensearch-project/OpenSearch/pull/13389)) +- Bump `org.bouncycastle:bc-fips` from 1.0.2.4 to 1.0.2.5 ([#13446](https://github.com/opensearch-project/OpenSearch/pull/13446)) +- Bump `lycheeverse/lychee-action` from 1.9.3 to 1.10.0 ([#13447](https://github.com/opensearch-project/OpenSearch/pull/13447)) +- Bump `com.netflix.nebula.ospackage-base` from 11.8.1 to 11.9.0 ([#13440](https://github.com/opensearch-project/OpenSearch/pull/13440)) + +### Changed +- [BWC and API enforcement] Enforcing the presence of API annotations at build time ([#12872](https://github.com/opensearch-project/OpenSearch/pull/12872)) +- Improve built-in secure transports support ([#12907](https://github.com/opensearch-project/OpenSearch/pull/12907)) +- Update links to documentation in rest-api-spec ([#13043](https://github.com/opensearch-project/OpenSearch/pull/13043)) +- Ignoring unavailable shards during search request execution with ignore_available parameter ([#13298](https://github.com/opensearch-project/OpenSearch/pull/13298)) +- Refactoring globMatch using simpleMatchWithNormalizedStrings from Regex ([#13104](https://github.com/opensearch-project/OpenSearch/pull/13104)) +- [BWC and API enforcement] Reconsider the breaking changes check policy to detect breaking changes against released versions ([#13292](https://github.com/opensearch-project/OpenSearch/pull/13292)) +- Switch to macos-13 runner for precommit and assemble github actions due to macos-latest is now arm64 ([#13412](https://github.com/opensearch-project/OpenSearch/pull/13412)) +- [Revert] Prevent unnecessary fetch sub phase processor initialization during fetch phase execution ([#12503](https://github.com/opensearch-project/OpenSearch/pull/12503)) + +### Fixed +- Fix bulk API ignores ingest pipeline for upsert ([#12883](https://github.com/opensearch-project/OpenSearch/pull/12883)) +- Fix issue with feature flags where default value may not be honored ([#12849](https://github.com/opensearch-project/OpenSearch/pull/12849)) +- Fix UOE While building Exists query for nested search_as_you_type field ([#12048](https://github.com/opensearch-project/OpenSearch/pull/12048)) +- Enabled mockTelemetryPlugin for IT and fixed OOM issues ([#13054](https://github.com/opensearch-project/OpenSearch/pull/13054)) +- Fix from and size parameter can be negative when searching ([#13047](https://github.com/opensearch-project/OpenSearch/pull/13047)) +- Fix implement mark() and markSupported() in class FilterStreamInput ([#13098](https://github.com/opensearch-project/OpenSearch/pull/13098)) +- Fix IndicesRequestCache Stale calculation ([#13070](https://github.com/opensearch-project/OpenSearch/pull/13070)] +- Fix snapshot _status API to return correct status for partial snapshots ([#12812](https://github.com/opensearch-project/OpenSearch/pull/12812)) +- Improve the error messages for _stats with closed indices ([#13012](https://github.com/opensearch-project/OpenSearch/pull/13012)) +- Ignore BaseRestHandler unconsumed content check as it's always consumed. ([#13290](https://github.com/opensearch-project/OpenSearch/pull/13290)) +- Fix mapper_parsing_exception when using flat_object fields with names longer than 11 characters ([#13259](https://github.com/opensearch-project/OpenSearch/pull/13259)) +- DATETIME_FORMATTER_CACHING_SETTING experimental feature should not default to 'true' ([#13532](https://github.com/opensearch-project/OpenSearch/pull/13532)) \ No newline at end of file diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/bulk.json b/rest-api-spec/src/main/resources/rest-api-spec/api/bulk.json index bb066cd131480..e0566b811ff07 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/bulk.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/bulk.json @@ -74,6 +74,10 @@ "require_alias": { "type": "boolean", "description": "Sets require_alias for all incoming documents. Defaults to unset (false)" + }, + "batch_size": { + "type": "int", + "description": "Sets the batch size" } }, "body":{ diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml index e7da9a0bc454c..fa71137912a91 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml @@ -644,3 +644,59 @@ setup: - match: { aggregations.histo.buckets.0.doc_count: 1 } - match: { aggregations.histo.buckets.20.key: 20 } - match: { aggregations.histo.buckets.20.doc_count: 1 } + +--- +"date_histogram profiler shows filter rewrite info": + - skip: + version: " - 2.99.99" + reason: debug info for filter rewrite added in 3.0.0 (to be backported to 2.14.0) + + - do: + indices.create: + index: test_2 + body: + settings: + number_of_replicas: 0 + number_of_shards: 1 + mappings: + properties: + date: + type: date + + - do: + bulk: + index: test_2 + refresh: true + body: + - '{"index": {}}' + - '{"date": "2016-01-01"}' + - '{"index": {}}' + - '{"date": "2016-01-02"}' + - '{"index": {}}' + - '{"date": "2016-02-01"}' + - '{"index": {}}' + - '{"date": "2016-03-01"}' + + - do: + search: + index: test_2 + body: + size: 0 + profile: true + aggs: + histo: + date_histogram: + field: date + calendar_interval: month + + - match: { hits.total.value: 4 } + - length: { aggregations.histo.buckets: 3 } + - match: { aggregations.histo.buckets.0.key_as_string: "2016-01-01T00:00:00.000Z" } + - match: { aggregations.histo.buckets.0.doc_count: 2 } + - match: { profile.shards.0.aggregations.0.type: DateHistogramAggregator } + - match: { profile.shards.0.aggregations.0.description: histo } + - match: { profile.shards.0.aggregations.0.debug.total_buckets: 3 } + - match: { profile.shards.0.aggregations.0.debug.optimized_segments: 1 } + - match: { profile.shards.0.aggregations.0.debug.unoptimized_segments: 0 } + - match: { profile.shards.0.aggregations.0.debug.leaf_visited: 1 } + - match: { profile.shards.0.aggregations.0.debug.inner_visited: 0 } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/230_composite.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/230_composite.yml index 2808be8cd7045..3a0099dae3b33 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/230_composite.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/230_composite.yml @@ -1069,3 +1069,61 @@ setup: - match: { aggregations.test.buckets.1.doc_count: 2 } - match: { aggregations.test.buckets.2.key.kw: null } - match: { aggregations.test.buckets.2.doc_count: 2 } + +--- +"composite aggregation date_histogram profile shows filter rewrite info": + - skip: + version: " - 2.99.99" + reason: debug info for filter rewrite added in 3.0.0 (to be backported to 2.14.0) + + - do: + indices.create: + index: test_2 + body: + settings: + number_of_replicas: 0 + number_of_shards: 1 + mappings: + properties: + date: + type: date + - do: + bulk: + index: test_2 + refresh: true + body: + - '{"index": {}}' + - '{"date": "2016-01-01"}' + - '{"index": {}}' + - '{"date": "2016-01-02"}' + - '{"index": {}}' + - '{"date": "2016-02-01"}' + - '{"index": {}}' + - '{"date": "2016-03-01"}' + - do: + search: + index: test_2 + body: + size: 0 + profile: true + aggregations: + test: + composite: + sources: [ + { + "date": { + "date_histogram": { + "field": "date", + "calendar_interval": "1d", + "format": "strict_date" + } + } + } + ] + + - match: { hits.total.value: 4 } + - length: { aggregations.test.buckets: 4 } + - match: { profile.shards.0.aggregations.0.debug.optimized_segments: 1 } + - match: { profile.shards.0.aggregations.0.debug.unoptimized_segments: 0 } + - match: { profile.shards.0.aggregations.0.debug.leaf_visited: 1 } + - match: { profile.shards.0.aggregations.0.debug.inner_visited: 0 } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/330_auto_date_histogram.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/330_auto_date_histogram.yml index 6b5e06a549be3..1356eac41ae79 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/330_auto_date_histogram.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/330_auto_date_histogram.yml @@ -99,3 +99,29 @@ setup: - length: { aggregations.histo.buckets: 2 } - match: { profile.shards.0.aggregations.0.type: AutoDateHistogramAggregator.FromSingle } - match: { profile.shards.0.aggregations.0.debug.surviving_buckets: 4 } + +--- +"auto_date_histogram profile shows filter rewrite info": + - skip: + version: " - 2.99.99" + reason: debug info for filter rewrite added in 3.0.0 (to be backported to 2.14.0) + + - do: + search: + body: + profile: true + size: 0 + aggs: + histo: + auto_date_histogram: + field: date + buckets: 2 + + - match: { hits.total.value: 4 } + - length: { aggregations.histo.buckets: 2 } + - match: { profile.shards.0.aggregations.0.type: AutoDateHistogramAggregator.FromSingle } + - match: { profile.shards.0.aggregations.0.debug.surviving_buckets: 4 } + - match: { profile.shards.0.aggregations.0.debug.optimized_segments: 1 } + - match: { profile.shards.0.aggregations.0.debug.unoptimized_segments: 0 } + - match: { profile.shards.0.aggregations.0.debug.leaf_visited: 1 } + - match: { profile.shards.0.aggregations.0.debug.inner_visited: 0 } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_multi_terms.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_multi_terms.yml index 7db5f31d8e761..f9354db46a384 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_multi_terms.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_multi_terms.yml @@ -760,3 +760,138 @@ setup: - match: { aggregations.histo.buckets.2.key_as_string: "2022-03-25T00:00:00.000Z" } - match: { aggregations.histo.buckets.2.m_terms.buckets.0.key: [ "a", "127.0.0.1" ] } - match: { aggregations.histo.buckets.2.m_terms.buckets.1.key: [ "b", "127.0.0.1" ] } + +--- +"multi_terms bucket and multiple orders test": + - skip: + version: "- 2.0.99" + reason: multi_terms aggregation is introduced in 2.1.0 + + - do: + bulk: + index: test_1 + refresh: true + body: + - '{"index": {}}' + - '{"str": "a", "integer": 1, "double": 1234.5, "boolean": true}' + - '{"index": {}}' + - '{"str": "a", "integer": 2, "double": 5678.9, "boolean": true}' + - '{"index": {}}' + - '{"str": "a", "integer": 1, "double": 123.4, "boolean": false}' + - '{"index": {}}' + - '{"str": "a", "integer": 2, "double": 456.7, "boolean": false}' + - '{"index": {}}' + - '{"str": "b", "integer": 3, "double": 1234.5, "boolean": true}' + - '{"index": {}}' + - '{"str": "b", "integer": 4, "double": 5678.9, "boolean": true}' + - '{"index": {}}' + - '{"str": "b", "integer": 3, "double": 234.5, "boolean": false}' + - '{"index": {}}' + - '{"str": "b", "integer": 4, "double": 456.7, "boolean": false}' + + - do: + search: + index: test_1 + size: 0 + body: + aggs: + m_terms: + multi_terms: + terms: + - field: str + - field: boolean + order: + - max_int: desc + - min_double: asc + aggs: + max_int: + max: + field: integer + min_double: + min: + field: double + + - length: { aggregations.m_terms.buckets: 4 } + - match: { aggregations.m_terms.buckets.0.key: ["b", false] } + - match: { aggregations.m_terms.buckets.0.key_as_string: "b|false" } + - match: { aggregations.m_terms.buckets.0.doc_count: 2 } + - match: { aggregations.m_terms.buckets.0.max_int.value: 4.0 } + - match: { aggregations.m_terms.buckets.0.min_double.value: 234.5 } + - match: { aggregations.m_terms.buckets.1.key: ["b", true] } + - match: { aggregations.m_terms.buckets.1.key_as_string: "b|true" } + - match: { aggregations.m_terms.buckets.1.doc_count: 2 } + - match: { aggregations.m_terms.buckets.1.max_int.value: 4.0 } + - match: { aggregations.m_terms.buckets.1.min_double.value: 1234.5 } + - match: { aggregations.m_terms.buckets.2.key: [ "a", false ] } + - match: { aggregations.m_terms.buckets.2.key_as_string: "a|false" } + - match: { aggregations.m_terms.buckets.2.doc_count: 2 } + - match: { aggregations.m_terms.buckets.2.max_int.value: 2.0 } + - match: { aggregations.m_terms.buckets.2.min_double.value: 123.4 } + - match: { aggregations.m_terms.buckets.3.key: [ "a", true ] } + - match: { aggregations.m_terms.buckets.3.key_as_string: "a|true" } + - match: { aggregations.m_terms.buckets.3.doc_count: 2 } + - match: { aggregations.m_terms.buckets.3.max_int.value: 2.0 } + - match: { aggregations.m_terms.buckets.3.min_double.value: 1234.5 } + +--- +"multi_terms bucket and multiple orders with size test": + - skip: + version: "- 2.0.99" + reason: multi_terms aggregation is introduced in 2.1.0 + + - do: + bulk: + index: test_1 + refresh: true + body: + - '{"index": {}}' + - '{"str": "a", "integer": 1, "double": 1234.5, "boolean": true}' + - '{"index": {}}' + - '{"str": "a", "integer": 2, "double": 5678.9, "boolean": true}' + - '{"index": {}}' + - '{"str": "a", "integer": 1, "double": 123.4, "boolean": false}' + - '{"index": {}}' + - '{"str": "a", "integer": 2, "double": 456.7, "boolean": false}' + - '{"index": {}}' + - '{"str": "b", "integer": 3, "double": 1234.5, "boolean": true}' + - '{"index": {}}' + - '{"str": "b", "integer": 4, "double": 5678.9, "boolean": true}' + - '{"index": {}}' + - '{"str": "b", "integer": 3, "double": 234.5, "boolean": false}' + - '{"index": {}}' + - '{"str": "b", "integer": 4, "double": 456.7, "boolean": false}' + + - do: + search: + index: test_1 + size: 0 + body: + aggs: + m_terms: + multi_terms: + terms: + - field: str + - field: boolean + order: + - max_int: desc + - min_double: asc + size: 2 + aggs: + max_int: + max: + field: integer + min_double: + min: + field: double + + - length: { aggregations.m_terms.buckets: 2 } + - match: { aggregations.m_terms.buckets.0.key: ["b", false] } + - match: { aggregations.m_terms.buckets.0.key_as_string: "b|false" } + - match: { aggregations.m_terms.buckets.0.doc_count: 2 } + - match: { aggregations.m_terms.buckets.0.max_int.value: 4.0 } + - match: { aggregations.m_terms.buckets.0.min_double.value: 234.5 } + - match: { aggregations.m_terms.buckets.1.key: ["b", true] } + - match: { aggregations.m_terms.buckets.1.key_as_string: "b|true" } + - match: { aggregations.m_terms.buckets.1.doc_count: 2 } + - match: { aggregations.m_terms.buckets.1.max_int.value: 4.0 } + - match: { aggregations.m_terms.buckets.1.min_double.value: 1234.5 } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/400_inner_hits.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/400_inner_hits.yml new file mode 100644 index 0000000000000..cbfc92a40835c --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/400_inner_hits.yml @@ -0,0 +1,68 @@ +setup: + - do: + indices.create: + index: test_1 + body: + settings: + number_of_replicas: 0 + mappings: + properties: + list_id: + type: integer + names: + type: nested + properties: + full_name: + type: text + + - do: + bulk: + refresh: true + body: + - index: + _index: test_1 + _id: 1 + - list_id: 1 + names: + - full_name: John Doe + - full_name: John Micheal Doe + - index: + _index: test_1 + _id: 2 + - list_id: 2 + names: + - full_name: Jane Doe + - full_name: Jane Michelle Doe + +--- +"Include inner hits in top hits": + - skip: + version: " - 2.13.99" + reason: "the regression was fixed in 2.14.0" + - do: + search: + rest_total_hits_as_int: true + body: + query: + nested: + path: names + query: + match: + names.full_name: Doe + inner_hits: { } + size: 0 + aggs: + lists: + terms: + field: list_id + aggs: + top_result: + top_hits: + size: 10 + + - length: { hits.hits: 0 } + - length: { aggregations.lists.buckets: 2 } + - length: { aggregations.lists.buckets.0.top_result.hits.hits: 1 } + - length: { aggregations.lists.buckets.0.top_result.hits.hits.0.inner_hits.names.hits.hits: 2 } + - length: { aggregations.lists.buckets.1.top_result.hits.hits: 1 } + - length: { aggregations.lists.buckets.1.top_result.hits.hits.0.inner_hits.names.hits.hits: 2 } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_doc_values_field.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_doc_values_field.yml index d5ece1719dc48..a133060f07c6f 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_doc_values_field.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_doc_values_field.yml @@ -46,6 +46,18 @@ type: ip index: true doc_values: true + boolean: + type: boolean + index: true + doc_values: true + date: + type: date + index: true + doc_values: true + date_nanos: + type: date_nanos + index: true + doc_values: true - do: bulk: @@ -53,11 +65,11 @@ refresh: true body: - '{"index": {"_index": "test-iodvq", "_id": "1" }}' - - '{ "some_keyword": "ingesting some random keyword data", "byte": 120, "double": 100.0, "float": "800.0", "half_float": "400.0", "integer": 1290, "long": 13456, "short": 150, "unsigned_long": 10223372036854775800, "ip_field": "192.168.0.1" }' + - '{ "some_keyword": "ingesting some random keyword data", "byte": 120, "double": 100.0, "float": "800.0", "half_float": "400.0", "integer": 1290, "long": 13456, "short": 150, "unsigned_long": 10223372036854775800, "ip_field": "192.168.0.1", "boolean": true, "date_nanos": "2018-10-29T12:12:12.123456789Z", "date": "2018-10-29T12:12:12.987Z" }' - '{ "index": { "_index": "test-iodvq", "_id": "2" }}' - - '{ "some_keyword": "400", "byte": 121, "double": 101.0, "float": "801.0", "half_float": "401.0", "integer": 1291, "long": 13457, "short": 151, "unsigned_long": 10223372036854775801, "ip_field": "192.168.0.2" }' + - '{ "some_keyword": "400", "byte": 121, "double": 101.0, "float": "801.0", "half_float": "401.0", "integer": 1291, "long": 13457, "short": 151, "unsigned_long": 10223372036854775801, "ip_field": "192.168.0.2", "boolean": true, "date_nanos": "2020-10-29T12:12:12.987654321Z", "date": "2020-10-29T12:12:12.987Z" }' - '{ "index": { "_index": "test-iodvq", "_id": "3" } }' - - '{ "some_keyword": "5", "byte": 122, "double": 102.0, "float": "802.0", "half_float": "402.0", "integer": 1292, "long": 13458, "short": 152, "unsigned_long": 10223372036854775802, "ip_field": "192.168.0.3" }' + - '{ "some_keyword": "5", "byte": 122, "double": 102.0, "float": "802.0", "half_float": "402.0", "integer": 1292, "long": 13458, "short": 152, "unsigned_long": 10223372036854775802, "ip_field": "192.168.0.3", "boolean": false, "date_nanos": "2024-10-29T12:12:12.987654321Z", "date": "2024-10-29T12:12:12.987Z" }' - do: search: @@ -183,6 +195,17 @@ - match: {hits.total: 1} + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + boolean: true + + - match: { hits.total: 2 } + - do: search: rest_total_hits_as_int: true @@ -282,6 +305,17 @@ - match: { hits.total: 2 } + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + boolean: [true, false] + + - match: { hits.total: 3 } + - do: search: rest_total_hits_as_int: true @@ -415,6 +449,92 @@ - match: { hits.total: 2 } + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + date_nanos: { + gte: "2018-10-29T12:12:12.123456789Z" + }, + } + + - match: { hits.total: 3 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + date: { + gte: "2018-10-29T12:12:12.987Z", + lte: "2020-10-29T12:12:12.987Z" + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + boolean: { + gte: true + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + boolean: { + lte: true + }, + } + + - match: { hits.total: 3 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + boolean: { + lte: true, + gte: false + }, + } + + - match: { hits.total: 3 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + boolean: { + lte: false, + gte: true + }, + } + + - match: { hits.total: 0 } --- "search on fields with only index enabled": - do: @@ -463,6 +583,18 @@ type: ip index: true doc_values: false + boolean: + type: boolean + index: true + doc_values: false + date_nanos: + type: date_nanos + index: true + doc_values: false + date: + type: date + index: true + doc_values: false - do: bulk: @@ -470,11 +602,11 @@ refresh: true body: - '{"index": {"_index": "test-index", "_id": "1" }}' - - '{ "some_keyword": "ingesting some random keyword data", "byte": 120, "double": 100.0, "float": "800.0", "half_float": "400.0", "integer": 1290, "long": 13456, "short": 150, "unsigned_long": 10223372036854775800, "ip_field": "192.168.0.1" }' + - '{ "some_keyword": "ingesting some random keyword data", "byte": 120, "double": 100.0, "float": "800.0", "half_float": "400.0", "integer": 1290, "long": 13456, "short": 150, "unsigned_long": 10223372036854775800, "ip_field": "192.168.0.1", "boolean": true, "date_nanos": "2018-10-29T12:12:12.123456789Z", "date": "2018-10-29T12:12:12.987Z" }' - '{ "index": { "_index": "test-index", "_id": "2" }}' - - '{ "some_keyword": "400", "byte": 121, "double": 101.0, "float": "801.0", "half_float": "401.0", "integer": 1291, "long": 13457, "short": 151, "unsigned_long": 10223372036854775801, "ip_field": "192.168.0.2" }' + - '{ "some_keyword": "400", "byte": 121, "double": 101.0, "float": "801.0", "half_float": "401.0", "integer": 1291, "long": 13457, "short": 151, "unsigned_long": 10223372036854775801, "ip_field": "192.168.0.2", "boolean": true, "date_nanos": "2020-10-29T12:12:12.123456789Z", "date": "2020-10-29T12:12:12.987Z" }' - '{ "index": { "_index": "test-index", "_id": "3" } }' - - '{ "some_keyword": "5", "byte": 122, "double": 102.0, "float": "802.0", "half_float": "402.0", "integer": 1292, "long": 13458, "short": 152, "unsigned_long": 10223372036854775802, "ip_field": "192.168.0.3" }' + - '{ "some_keyword": "5", "byte": 122, "double": 102.0, "float": "802.0", "half_float": "402.0", "integer": 1292, "long": 13458, "short": 152, "unsigned_long": 10223372036854775802, "ip_field": "192.168.0.3", "boolean": false, "date_nanos": "2024-10-29T12:12:12.123456789Z", "date": "2024-10-29T12:12:12.987Z" }' - do: search: @@ -600,6 +732,18 @@ - match: {hits.total: 1} + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + boolean: true + + - match: { hits.total: 2 } + + - do: search: rest_total_hits_as_int: true @@ -699,6 +843,17 @@ - match: { hits.total: 2 } + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + boolean: [true, false] + + - match: { hits.total: 3 } + - do: search: rest_total_hits_as_int: true @@ -831,6 +986,93 @@ lte: "192.168.0.2" - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + date_nanos: { + gte: "2018-10-29T12:12:12.123456789Z" + }, + } + + - match: { hits.total: 3 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + date: { + gte: "2018-10-29T12:12:12.987Z", + lte: "2020-10-29T12:12:12.987Z" + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + boolean: { + gte: true + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + boolean: { + lte: true + }, + } + + - match: { hits.total: 3 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + boolean: { + lte: true, + gte: false + }, + } + + - match: { hits.total: 3 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + boolean: { + lte: false, + gte: true + }, + } + + - match: { hits.total: 0 } --- "search on fields with only doc_values enabled": - skip: @@ -883,6 +1125,18 @@ type: ip index: false doc_values: true + boolean: + type: boolean + index: false + doc_values: true + date_nanos: + type: date_nanos + index: false + doc_values: true + date: + type: date + index: false + doc_values: true - do: bulk: @@ -890,11 +1144,11 @@ refresh: true body: - '{"index": {"_index": "test-doc-values", "_id": "1" }}' - - '{ "some_keyword": "ingesting some random keyword data", "byte": 120, "double": 100.0, "float": "800.0", "half_float": "400.0", "integer": 1290, "long": 13456, "short": 150, "unsigned_long": 10223372036854775800, "ip_field": "192.168.0.1" }' + - '{ "some_keyword": "ingesting some random keyword data", "byte": 120, "double": 100.0, "float": "800.0", "half_float": "400.0", "integer": 1290, "long": 13456, "short": 150, "unsigned_long": 10223372036854775800, "ip_field": "192.168.0.1", "boolean": true, "date_nanos": "2018-10-29T12:12:12.123456789Z", "date": "2018-10-29T12:12:12.987Z" }' - '{ "index": { "_index": "test-doc-values", "_id": "2" }}' - - '{ "some_keyword": "400", "byte": 121, "double": 101.0, "float": "801.0", "half_float": "401.0", "integer": 1291, "long": 13457, "short": 151, "unsigned_long": 10223372036854775801, "ip_field": "192.168.0.2" }' + - '{ "some_keyword": "400", "byte": 121, "double": 101.0, "float": "801.0", "half_float": "401.0", "integer": 1291, "long": 13457, "short": 151, "unsigned_long": 10223372036854775801, "ip_field": "192.168.0.2", "boolean": true, "date_nanos": "2020-10-29T12:12:12.123456789Z", "date": "2020-10-29T12:12:12.987Z" }' - '{ "index": { "_index": "test-doc-values", "_id": "3" } }' - - '{ "some_keyword": "5", "byte": 122, "double": 102.0, "float": "802.0", "half_float": "402.0", "integer": 1292, "long": 13458, "short": 152, "unsigned_long": 10223372036854775802, "ip_field": "192.168.0.3" }' + - '{ "some_keyword": "5", "byte": 122, "double": 102.0, "float": "802.0", "half_float": "402.0", "integer": 1292, "long": 13458, "short": 152, "unsigned_long": 10223372036854775802, "ip_field": "192.168.0.3", "boolean": false, "date_nanos": "2024-10-29T12:12:12.123456789Z", "date": "2024-10-29T12:12:12.987Z" }' - do: search: @@ -1019,6 +1273,17 @@ - match: { hits.total: 1 } + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + boolean: false + + - match: { hits.total: 1 } + - do: search: rest_total_hits_as_int: true @@ -1107,6 +1372,17 @@ - match: { hits.total: 2 } + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + boolean: [true, false] + + - match: { hits.total: 3 } + - do: search: rest_total_hits_as_int: true @@ -1239,3 +1515,90 @@ lte: "192.168.0.2" - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + date_nanos: { + gte: "2018-10-29T12:12:12.123456789Z" + }, + } + + - match: { hits.total: 3 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + date: { + gte: "2018-10-29T12:12:12.987Z", + lte: "2020-10-29T12:12:12.987Z" + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + boolean: { + gte: true + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + boolean: { + lte: true + }, + } + + - match: { hits.total: 3 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + boolean: { + lte: true, + gte: false + }, + } + + - match: { hits.total: 3 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + boolean: { + lte: false, + gte: true + }, + } + + - match: { hits.total: 0 } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/360_from_and_size.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/360_from_and_size.yml new file mode 100644 index 0000000000000..7f3fb77b86366 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/360_from_and_size.yml @@ -0,0 +1,113 @@ +setup: + - do: + indices.create: + index: test_1 + - do: + index: + index: test_1 + id: 1 + body: { foo: bar } + - do: + index: + index: test_1 + id: 2 + body: { foo: bar } + - do: + index: + index: test_1 + id: 3 + body: { foo: bar } + + - do: + index: + index: test_1 + id: 4 + body: { foo: bar } + - do: + indices.refresh: + index: [test_1] + +--- +teardown: + - do: + indices.delete: + index: test_1 + ignore: 404 + +--- +"Throws exception if from or size query parameter is negative": + - skip: + version: " - 2.13.99" + reason: "fixed in 2.14.0" + - do: + catch: '/\[from\] parameter cannot be negative, found \[-5\]/' + search: + index: test_1 + from: -5 + size: 10 + body: + query: + match: + foo: bar + + - do: + catch: '/\[size\] parameter cannot be negative, found \[-1\]/' + search: + index: test_1 + from: 0 + size: -1 + body: + query: + match: + foo: bar + + - do: + search: + index: test_1 + from: 0 + size: 10 + body: + query: + match: + foo: bar + + - match: {hits.total.value: 4} + +--- +"Throws exception if from or size request body parameter is negative": + - skip: + version: " - 2.13.99" + reason: "fixed in 2.14.0" + - do: + catch: '/\[from\] parameter cannot be negative, found \[-5\]/' + search: + index: test_1 + body: + from: -5 + size: 10 + query: + match: + foo: bar + + - do: + catch: '/\[size\] parameter cannot be negative, found \[-1\]/' + search: + index: test_1 + body: + from: 0 + size: -1 + query: + match: + foo: bar + + - do: + search: + index: test_1 + body: + from: 0 + size: 10 + query: + match: + foo: bar + + - match: {hits.total.value: 4} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search_pipeline/10_basic.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search_pipeline/10_basic.yml index 60c0706415bc2..a4877975a0052 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search_pipeline/10_basic.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search_pipeline/10_basic.yml @@ -1,8 +1,8 @@ --- "Test basic pipeline crud": - skip: - version: " - 2.6.99" - reason: "Added in 2.7.0" + version: " - 2.8.99" + reason: "Added in 2.9.0" - do: search_pipeline.put: id: "my_pipeline" @@ -32,8 +32,8 @@ --- "Test Put Versioned Pipeline": - skip: - version: " - 2.6.99" - reason: "Added in 2.7.0" + version: " - 2.8.99" + reason: "Added in 2.9.0" - do: search_pipeline.put: id: "my_pipeline" @@ -125,8 +125,8 @@ --- "Test Get All Pipelines": - skip: - version: " - 2.6.99" - reason: "Added in 2.7.0" + version: " - 2.8.99" + reason: "Added in 2.9.0" - do: search_pipeline.put: id: "first_pipeline" @@ -152,8 +152,8 @@ --- "Test invalid config": - skip: - version: " - 2.6.99" - reason: "Added in 2.7.0" + version: " - 2.8.99" + reason: "Added in 2.9.0" - do: catch: /parse_exception/ search_pipeline.put: diff --git a/server/build.gradle b/server/build.gradle index a076a6bee36bf..9714f13ec67d6 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -98,7 +98,7 @@ dependencies { // percentiles aggregation api 'com.tdunning:t-digest:3.3' // precentil ranks aggregation - api 'org.hdrhistogram:HdrHistogram:2.1.12' + api 'org.hdrhistogram:HdrHistogram:2.2.1' // lucene spatial api "org.locationtech.spatial4j:spatial4j:${versions.spatial4j}", optional @@ -173,6 +173,22 @@ tasks.named("testingConventions").configure { } } +// Set to current version by default +def japicmpCompareTarget = System.getProperty("japicmp.compare.version") +if (japicmpCompareTarget == null) { /* use latest released version */ + // Read the list from maven central. + // Fetch the metadata and parse the xml into Version instances, pick the latest one + japicmpCompareTarget = new URL('https://repo1.maven.org/maven2/org/opensearch/opensearch/maven-metadata.xml').openStream().withStream { s -> + new XmlParser().parse(s) + .versioning.versions.version + .collect { it.text() }.findAll { it ==~ /\d+\.\d+\.\d+/ } + .collect { org.opensearch.gradle.Version.fromString(it) } + .toSorted() + .last() + .toString() + } +} + def generateModulesList = tasks.register("generateModulesList") { List modules = project(':modules').subprojects.collect { it.name } File modulesFile = new File(buildDir, 'generated-resources/modules.txt') @@ -380,9 +396,10 @@ tasks.named("sourcesJar").configure { } } -/** Compares the current build against a snapshot build */ +/** Compares the current build against a laltest released version or the version supplied through 'japicmp.compare.version' system property */ tasks.register("japicmp", me.champeau.gradle.japicmp.JapicmpTask) { - oldClasspath.from(files("${buildDir}/snapshot/opensearch-${version}.jar")) + logger.info("Comparing public APIs from ${version} to ${japicmpCompareTarget}") + oldClasspath.from(files("${buildDir}/japicmp-target/opensearch-${japicmpCompareTarget}.jar")) newClasspath.from(tasks.named('jar')) onlyModified = true failOnModification = true @@ -390,50 +407,48 @@ tasks.register("japicmp", me.champeau.gradle.japicmp.JapicmpTask) { annotationIncludes = ['@org.opensearch.common.annotation.PublicApi', '@org.opensearch.common.annotation.DeprecatedApi'] txtOutputFile = layout.buildDirectory.file("reports/java-compatibility/report.txt") htmlOutputFile = layout.buildDirectory.file("reports/java-compatibility/report.html") - dependsOn downloadSnapshot + dependsOn downloadJapicmpCompareTarget } /** If the Java API Comparison task failed, print a hint if the change should be merged from its target branch */ gradle.taskGraph.afterTask { Task task, TaskState state -> if (task.name == 'japicmp' && state.failure != null) { - def sha = getGitShaFromJar("${buildDir}/snapshot/opensearch-${version}.jar") - logger.info("Incompatiable java api from snapshot jar built off of commit ${sha}") - - if (!inHistory(sha)) { - logger.warn('\u001B[33mPlease merge from the target branch and run this task again.\u001B[0m') - } + logger.info("Public APIs changes incompatiable with ${japicmpCompareTarget} target have been detected") } } -/** Downloads latest snapshot from maven repository */ -tasks.register("downloadSnapshot", Copy) { +/** Downloads latest released version from maven repository */ +tasks.register("downloadJapicmpCompareTarget", Copy) { def mavenSnapshotRepoUrl = "https://aws.oss.sonatype.org/content/repositories/snapshots/" def groupId = "org.opensearch" def artifactId = "opensearch" - def repos = project.getRepositories(); - MavenArtifactRepository opensearchRepo = repos.maven(repo -> { - repo.setName("opensearch-snapshots"); - repo.setUrl(mavenSnapshotRepoUrl); - }); - - repos.exclusiveContent(exclusiveRepo -> { - exclusiveRepo.filter(descriptor -> descriptor.includeGroup(groupId)); - exclusiveRepo.forRepositories(opensearchRepo); - }); + // Add repository for snapshot artifacts if japicmp compare target version is snapshot + if (japicmpCompareTarget.endsWith("-SNAPSHOT")) { + def repos = project.getRepositories(); + MavenArtifactRepository opensearchRepo = repos.maven(repo -> { + repo.setName("opensearch-snapshots"); + repo.setUrl(mavenSnapshotRepoUrl); + }); + + repos.exclusiveContent(exclusiveRepo -> { + exclusiveRepo.filter(descriptor -> descriptor.includeGroup(groupId)); + exclusiveRepo.forRepositories(opensearchRepo); + }); + } configurations { - snapshotArtifact { + japicmpCompareTargetArtifact { exclude group: 'org.apache.lucene' } } dependencies { - snapshotArtifact("${groupId}:${artifactId}:${version}:") + japicmpCompareTargetArtifact("${groupId}:${artifactId}:${japicmpCompareTarget}:") } - from configurations.snapshotArtifact - into "$buildDir/snapshot" + from configurations.japicmpCompareTargetArtifact + into "$buildDir/japicmp-target" } /** Check if the sha is in the current history */ diff --git a/server/licenses/HdrHistogram-2.1.12.jar.sha1 b/server/licenses/HdrHistogram-2.1.12.jar.sha1 deleted file mode 100644 index 9d20fa0e5f22d..0000000000000 --- a/server/licenses/HdrHistogram-2.1.12.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -6eb7552156e0d517ae80cc2247be1427c8d90452 \ No newline at end of file diff --git a/server/licenses/HdrHistogram-2.2.1.jar.sha1 b/server/licenses/HdrHistogram-2.2.1.jar.sha1 new file mode 100644 index 0000000000000..68225950d4744 --- /dev/null +++ b/server/licenses/HdrHistogram-2.2.1.jar.sha1 @@ -0,0 +1 @@ +0eb1feb351f64176c377772a30174e582c0274d5 \ No newline at end of file diff --git a/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteCloneIndexIT.java b/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteCloneIndexIT.java index f50e8fd0a38cf..4be049c9a9109 100644 --- a/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteCloneIndexIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteCloneIndexIT.java @@ -40,13 +40,17 @@ */ import org.opensearch.Version; +import org.opensearch.action.admin.cluster.health.ClusterHealthRequest; +import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; import org.opensearch.action.admin.indices.settings.get.GetSettingsResponse; import org.opensearch.action.admin.indices.shrink.ResizeType; import org.opensearch.action.admin.indices.stats.IndicesStatsResponse; +import org.opensearch.client.Requests; import org.opensearch.cluster.routing.allocation.decider.EnableAllocationDecider; import org.opensearch.common.settings.Settings; import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.index.query.TermsQueryBuilder; +import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.remotestore.RemoteStoreBaseIntegTestCase; import org.opensearch.test.VersionUtils; @@ -125,7 +129,9 @@ public void testCreateCloneIndex() { .cluster() .prepareUpdateSettings() .setTransientSettings( - Settings.builder().put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), (String) null) + Settings.builder() + .put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), (String) null) + .put(RecoverySettings.INDICES_INTERNAL_REMOTE_UPLOAD_TIMEOUT.getKey(), (String) null) ) .get(); } @@ -156,7 +162,11 @@ public void testCreateCloneIndexFailure() throws ExecutionException, Interrupted client().admin() .cluster() .prepareUpdateSettings() - .setTransientSettings(Settings.builder().put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), "none")) + .setTransientSettings( + Settings.builder() + .put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), "none") + .put(RecoverySettings.INDICES_INTERNAL_REMOTE_UPLOAD_TIMEOUT.getKey(), "10s") + ) .get(); try { setFailRate(REPOSITORY_NAME, 100); @@ -168,9 +178,14 @@ public void testCreateCloneIndexFailure() throws ExecutionException, Interrupted .setWaitForActiveShards(0) .setSettings(Settings.builder().put("index.number_of_replicas", 0).putNull("index.blocks.write").build()) .get(); - - Thread.sleep(2000); - ensureYellow("target"); + // waiting more than waitForRemoteStoreSync's sleep time of 30 sec to deterministically fail + Thread.sleep(40000); + ensureRed("target"); + ClusterHealthRequest healthRequest = Requests.clusterHealthRequest() + .waitForNoRelocatingShards(true) + .waitForNoInitializingShards(true); + ClusterHealthResponse actionGet = client().admin().cluster().health(healthRequest).actionGet(); + assertEquals(actionGet.getUnassignedShards(), numPrimaryShards); } catch (ExecutionException | InterruptedException e) { throw new RuntimeException(e); @@ -182,11 +197,12 @@ public void testCreateCloneIndexFailure() throws ExecutionException, Interrupted .cluster() .prepareUpdateSettings() .setTransientSettings( - Settings.builder().put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), (String) null) + Settings.builder() + .put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), (String) null) + .put(RecoverySettings.INDICES_INTERNAL_REMOTE_UPLOAD_TIMEOUT.getKey(), (String) null) ) .get(); } - } } diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/ClusterHealthIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/ClusterHealthIT.java index d63b87cbee6f7..0304e00a49070 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/ClusterHealthIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/ClusterHealthIT.java @@ -377,7 +377,7 @@ public void testHealthOnClusterManagerFailover() throws Exception { .prepareHealth() .setWaitForEvents(Priority.LANGUID) .setWaitForGreenStatus() - .setClusterManagerNodeTimeout(TimeValue.timeValueMinutes(2)) + .setClusterManagerNodeTimeout(TimeValue.timeValueMinutes(3)) .execute() ); internalCluster().restartNode(internalCluster().getClusterManagerName(), InternalTestCluster.EMPTY_CALLBACK); diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/metadata/ClusterIndexRefreshIntervalIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/metadata/ClusterIndexRefreshIntervalIT.java index 25fa7ae7eb8eb..f936b53f52a7b 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/metadata/ClusterIndexRefreshIntervalIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/metadata/ClusterIndexRefreshIntervalIT.java @@ -235,33 +235,19 @@ public void testDefaultRefreshIntervalWithUpdateClusterAndIndexSettings() throws } public void testRefreshIntervalDisabled() throws ExecutionException, InterruptedException { - TimeValue clusterMinimumRefreshInterval = client().settings() - .getAsTime(IndicesService.CLUSTER_MINIMUM_INDEX_REFRESH_INTERVAL_SETTING.getKey(), TimeValue.MINUS_ONE); - boolean createIndexSuccess = clusterMinimumRefreshInterval.equals(TimeValue.MINUS_ONE); String clusterManagerName = internalCluster().getClusterManagerName(); List dataNodes = new ArrayList<>(internalCluster().getDataNodeNames()); Settings settings = Settings.builder() .put(indexSettings()) .put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), IndexSettings.MINIMUM_REFRESH_INTERVAL) .build(); - if (createIndexSuccess) { - createIndex(INDEX_NAME, settings); - ensureYellowAndNoInitializingShards(INDEX_NAME); - ensureGreen(INDEX_NAME); - GetIndexResponse getIndexResponse = client(clusterManagerName).admin().indices().getIndex(new GetIndexRequest()).get(); - IndicesService indicesService = internalCluster().getInstance(IndicesService.class, randomFrom(dataNodes)); - String uuid = getIndexResponse.getSettings().get(INDEX_NAME).get(IndexMetadata.SETTING_INDEX_UUID); - IndexService indexService = indicesService.indexService(new Index(INDEX_NAME, uuid)); - assertEquals(IndexSettings.MINIMUM_REFRESH_INTERVAL, indexService.getRefreshTaskInterval()); - } else { - IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> createIndex(INDEX_NAME, settings)); - assertEquals( - "invalid index.refresh_interval [-1]: cannot be smaller than cluster.minimum.index.refresh_interval [" - + getMinRefreshIntervalForRefreshDisabled() - + "]", - exception.getMessage() - ); - } + createIndex(INDEX_NAME, settings); + ensureGreen(INDEX_NAME); + GetIndexResponse getIndexResponse = client(clusterManagerName).admin().indices().getIndex(new GetIndexRequest()).get(); + IndicesService indicesService = internalCluster().getInstance(IndicesService.class, randomFrom(dataNodes)); + String uuid = getIndexResponse.getSettings().get(INDEX_NAME).get(IndexMetadata.SETTING_INDEX_UUID); + IndexService indexService = indicesService.indexService(new Index(INDEX_NAME, uuid)); + assertEquals(IndexSettings.MINIMUM_REFRESH_INTERVAL, indexService.getRefreshTaskInterval()); } protected TimeValue getMinRefreshIntervalForRefreshDisabled() { @@ -366,6 +352,147 @@ public void testClusterMinimumChangeOnIndexWithCustomRefreshInterval() throws Ex assertEquals(customRefreshInterval, indexService.getRefreshTaskInterval()); } + public void testClusterMinimumRefreshIntervalOfMinusOneFails() { + // This test checks that we can not set cluster minimum refresh interval as -1 (or -1ms). + String clusterManagerName = internalCluster().getClusterManagerName(); + String refreshInterval = randomFrom("-1", "-1ms"); + IllegalArgumentException ex = assertThrows( + IllegalArgumentException.class, + () -> client(clusterManagerName).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(CLUSTER_MINIMUM_INDEX_REFRESH_INTERVAL_SETTING.getKey(), refreshInterval)) + .get() + ); + assertEquals( + "failed to parse value [" + refreshInterval + "] for setting [cluster.minimum.index.refresh_interval], must be >= [0ms]", + ex.getMessage() + ); + } + + public void testClusterMinimumRefreshIntervalOfZero() { + // This test checks that we can set the cluster minimum refresh interval as 0. + String clusterManagerName = internalCluster().getClusterManagerName(); + client(clusterManagerName).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(CLUSTER_MINIMUM_INDEX_REFRESH_INTERVAL_SETTING.getKey(), "0")) + .get(); + } + + public void testDefaultRefreshIntervalOfMinusOneIrrespectiveOfMinimum() { + // This test checks that we are able to set the cluster default refresh interval to one regardless of what the + // minimum is set to. -1 corresponds to no period background refreshes. + String clusterManagerName = internalCluster().getClusterManagerName(); + client(clusterManagerName).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings( + Settings.builder() + .put(CLUSTER_MINIMUM_INDEX_REFRESH_INTERVAL_SETTING.getKey(), randomFrom("0", "1ms", "1s", "10s")) + .put(CLUSTER_DEFAULT_INDEX_REFRESH_INTERVAL_SETTING.getKey(), randomFrom("-1", "-1ms")) + ) + .get(); + } + + public void testCreateIndexWithMinusOneRefreshInterval() throws ExecutionException, InterruptedException { + // This test checks that we are able to create index with -1 refresh interval using index settings and default interval both. + String clusterManagerName = internalCluster().getClusterManagerName(); + client(clusterManagerName).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings( + Settings.builder() + .put(CLUSTER_MINIMUM_INDEX_REFRESH_INTERVAL_SETTING.getKey(), "10s") + .put(CLUSTER_DEFAULT_INDEX_REFRESH_INTERVAL_SETTING.getKey(), "10s") + ) + .get(); + + Settings indexSettings = Settings.builder() + .put(indexSettings()) + .put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), randomFrom("-1", "-1ms")) + .build(); + createIndex(INDEX_NAME, indexSettings); + ensureGreen(INDEX_NAME); + + IndexService indexService = getIndexServiceFromRandomDataNode(INDEX_NAME); + assertEquals(-1, indexService.getRefreshTaskInterval().millis()); + + client(clusterManagerName).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(CLUSTER_DEFAULT_INDEX_REFRESH_INTERVAL_SETTING.getKey(), randomFrom("-1", "-1ms"))) + .get(); + createIndex(OTHER_INDEX_NAME); + ensureGreen(OTHER_INDEX_NAME); + indexService = getIndexServiceFromRandomDataNode(OTHER_INDEX_NAME); + assertEquals(-1, indexService.getRefreshTaskInterval().millis()); + } + + public void testUpdateIndexWithMinusOneRefreshInterval() throws ExecutionException, InterruptedException { + // This test checks that we are able to update index with -1 refresh interval using index settings and default interval both. + String clusterManagerName = internalCluster().getClusterManagerName(); + client(clusterManagerName).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings( + Settings.builder() + .put(CLUSTER_MINIMUM_INDEX_REFRESH_INTERVAL_SETTING.getKey(), "10s") + .put(CLUSTER_DEFAULT_INDEX_REFRESH_INTERVAL_SETTING.getKey(), "10s") + ) + .get(); + + createIndex(INDEX_NAME); + ensureGreen(INDEX_NAME); + IndexService indexService = getIndexServiceFromRandomDataNode(INDEX_NAME); + assertEquals(10, indexService.getRefreshTaskInterval().seconds()); + + client(clusterManagerName).admin() + .indices() + .updateSettings( + new UpdateSettingsRequest(INDEX_NAME).settings( + Settings.builder().put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), randomFrom("-1", "-1ms")) + ) + ) + .actionGet(); + assertEquals(-1, indexService.getRefreshTaskInterval().millis()); + + client(clusterManagerName).admin() + .indices() + .updateSettings( + new UpdateSettingsRequest(INDEX_NAME).settings( + Settings.builder().put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), "100s") + ) + ) + .actionGet(); + assertEquals(100, indexService.getRefreshTaskInterval().seconds()); + + client(clusterManagerName).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(CLUSTER_DEFAULT_INDEX_REFRESH_INTERVAL_SETTING.getKey(), randomFrom("-1", "-1ms"))) + .get(); + + client(clusterManagerName).admin() + .indices() + .updateSettings( + new UpdateSettingsRequest(INDEX_NAME).settings( + Settings.builder().putNull(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey()) + ) + ) + .actionGet(); + assertEquals(-1, indexService.getRefreshTaskInterval().millis()); + } + + private IndexService getIndexServiceFromRandomDataNode(String indexName) throws ExecutionException, InterruptedException { + String clusterManagerName = internalCluster().getClusterManagerName(); + List dataNodes = new ArrayList<>(internalCluster().getDataNodeNames()); + GetIndexResponse getIndexResponse = client(clusterManagerName).admin().indices().getIndex(new GetIndexRequest()).get(); + IndicesService indicesService = internalCluster().getInstance(IndicesService.class, randomFrom(dataNodes)); + String uuid = getIndexResponse.getSettings().get(indexName).get(IndexMetadata.SETTING_INDEX_UUID); + return indicesService.indexService(new Index(indexName, uuid)); + } + protected TimeValue getDefaultRefreshInterval() { return IndexSettings.DEFAULT_REFRESH_INTERVAL; } diff --git a/server/src/internalClusterTest/java/org/opensearch/discovery/ClusterManagerDisruptionIT.java b/server/src/internalClusterTest/java/org/opensearch/discovery/ClusterManagerDisruptionIT.java index 79f6ba6dfa642..65ca8eab278ef 100644 --- a/server/src/internalClusterTest/java/org/opensearch/discovery/ClusterManagerDisruptionIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/discovery/ClusterManagerDisruptionIT.java @@ -198,11 +198,15 @@ public void testIsolateClusterManagerAndVerifyClusterStateConsensus() throws Exc + nodeState ); } - } - ClusterStateStats clusterStateStats = internalCluster().clusterService().getClusterManagerService().getClusterStateStats(); - assertTrue(clusterStateStats.getUpdateFailed() > 0); + }); + + ClusterStateStats clusterStateStats = internalCluster().clusterService(isolatedNode) + .getClusterManagerService() + .getClusterStateStats(); + assertTrue(clusterStateStats.getUpdateFailed() > 0); + } /** diff --git a/server/src/internalClusterTest/java/org/opensearch/discovery/StableClusterManagerDisruptionIT.java b/server/src/internalClusterTest/java/org/opensearch/discovery/StableClusterManagerDisruptionIT.java index f6f2b2dbfd096..4de79a31d0146 100644 --- a/server/src/internalClusterTest/java/org/opensearch/discovery/StableClusterManagerDisruptionIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/discovery/StableClusterManagerDisruptionIT.java @@ -203,7 +203,7 @@ public void testStaleClusterManagerNotHijackingMajority() throws Exception { 3, Settings.builder() .put(LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING.getKey(), "1s") - .put(Coordinator.PUBLISH_TIMEOUT_SETTING.getKey(), "1s") + .put(Coordinator.PUBLISH_TIMEOUT_SETTING.getKey(), "2s") .build() ); ensureStableCluster(3); diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java index ba03532a9aa2f..bc0557ddc2afa 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java @@ -32,24 +32,31 @@ package org.opensearch.gateway; +import org.apache.lucene.index.CorruptIndexException; import org.opensearch.Version; import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsAction; import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsRequest; import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsAction; import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsRequest; +import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; import org.opensearch.action.admin.cluster.reroute.ClusterRerouteResponse; import org.opensearch.action.admin.cluster.shards.ClusterSearchShardsGroup; import org.opensearch.action.admin.cluster.shards.ClusterSearchShardsResponse; +import org.opensearch.action.admin.indices.exists.indices.IndicesExistsResponse; import org.opensearch.action.admin.indices.recovery.RecoveryResponse; import org.opensearch.action.admin.indices.stats.IndexStats; +import org.opensearch.action.admin.indices.stats.IndicesStatsResponse; import org.opensearch.action.admin.indices.stats.ShardStats; import org.opensearch.action.support.ActionTestUtils; +import org.opensearch.action.support.master.AcknowledgedResponse; +import org.opensearch.client.Requests; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.coordination.ElectionSchedulerFactory; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.cluster.routing.UnassignedInfo; +import org.opensearch.cluster.routing.allocation.ExistingShardsAllocator; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.Settings; import org.opensearch.common.xcontent.XContentFactory; @@ -62,6 +69,7 @@ import org.opensearch.index.MergePolicyProvider; import org.opensearch.index.engine.Engine; import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.ShardPath; import org.opensearch.indices.IndicesService; import org.opensearch.indices.recovery.RecoveryState; @@ -71,6 +79,7 @@ import org.opensearch.indices.store.TransportNodesListShardStoreMetadataHelper; import org.opensearch.plugins.Plugin; import org.opensearch.test.InternalSettingsPlugin; +import org.opensearch.test.InternalTestCluster; import org.opensearch.test.InternalTestCluster.RestartCallback; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.test.OpenSearchIntegTestCase.ClusterScope; @@ -94,6 +103,8 @@ import static java.util.Collections.emptyMap; import static java.util.Collections.emptySet; import static org.opensearch.cluster.coordination.ClusterBootstrapService.INITIAL_CLUSTER_MANAGER_NODES_SETTING; +import static org.opensearch.cluster.health.ClusterHealthStatus.GREEN; +import static org.opensearch.cluster.health.ClusterHealthStatus.RED; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; @@ -750,6 +761,276 @@ public void testMessyElectionsStillMakeClusterGoGreen() throws Exception { ensureGreen("test"); } + public void testBatchModeEnabled() throws Exception { + internalCluster().startClusterManagerOnlyNodes( + 1, + Settings.builder().put(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.getKey(), true).build() + ); + List dataOnlyNodes = internalCluster().startDataOnlyNodes(2); + createIndex( + "test", + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build() + ); + ensureGreen("test"); + Settings node0DataPathSettings = internalCluster().dataPathSettings(dataOnlyNodes.get(0)); + Settings node1DataPathSettings = internalCluster().dataPathSettings(dataOnlyNodes.get(1)); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(dataOnlyNodes.get(0))); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(dataOnlyNodes.get(1))); + ensureRed("test"); + ensureStableCluster(1); + + logger.info("--> Now do a protective reroute"); + ClusterRerouteResponse clusterRerouteResponse = client().admin().cluster().prepareReroute().setRetryFailed(true).get(); + assertTrue(clusterRerouteResponse.isAcknowledged()); + + ShardsBatchGatewayAllocator gatewayAllocator = internalCluster().getInstance( + ShardsBatchGatewayAllocator.class, + internalCluster().getClusterManagerName() + ); + assertTrue(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.get(internalCluster().clusterService().getSettings())); + assertEquals(1, gatewayAllocator.getNumberOfStartedShardBatches()); + assertEquals(1, gatewayAllocator.getNumberOfStoreShardBatches()); + + // Now start both data nodes and ensure batch mode is working + logger.info("--> restarting the stopped nodes"); + internalCluster().startDataOnlyNode(Settings.builder().put("node.name", dataOnlyNodes.get(0)).put(node0DataPathSettings).build()); + internalCluster().startDataOnlyNode(Settings.builder().put("node.name", dataOnlyNodes.get(1)).put(node1DataPathSettings).build()); + ensureStableCluster(3); + ensureGreen("test"); + assertEquals(0, gatewayAllocator.getNumberOfStartedShardBatches()); + assertEquals(0, gatewayAllocator.getNumberOfStoreShardBatches()); + assertEquals(0, gatewayAllocator.getNumberOfInFlightFetches()); + } + + public void testBatchModeDisabled() throws Exception { + internalCluster().startClusterManagerOnlyNodes( + 1, + Settings.builder().put(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.getKey(), false).build() + ); + List dataOnlyNodes = internalCluster().startDataOnlyNodes(2); + createIndex( + "test", + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build() + ); + + ensureGreen("test"); + Settings node0DataPathSettings = internalCluster().dataPathSettings(dataOnlyNodes.get(0)); + Settings node1DataPathSettings = internalCluster().dataPathSettings(dataOnlyNodes.get(1)); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(dataOnlyNodes.get(0))); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(dataOnlyNodes.get(1))); + ensureStableCluster(1); + + logger.info("--> Now do a protective reroute"); + ClusterRerouteResponse clusterRerouteResponse = client().admin().cluster().prepareReroute().setRetryFailed(true).get(); + assertTrue(clusterRerouteResponse.isAcknowledged()); + + ShardsBatchGatewayAllocator gatewayAllocator = internalCluster().getInstance( + ShardsBatchGatewayAllocator.class, + internalCluster().getClusterManagerName() + ); + ensureRed("test"); + + assertFalse(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.get(internalCluster().clusterService().getSettings())); + + // assert no batches created + assertEquals(0, gatewayAllocator.getNumberOfStartedShardBatches()); + assertEquals(0, gatewayAllocator.getNumberOfStoreShardBatches()); + + logger.info("--> restarting the stopped nodes"); + internalCluster().startDataOnlyNode(Settings.builder().put("node.name", dataOnlyNodes.get(0)).put(node0DataPathSettings).build()); + internalCluster().startDataOnlyNode(Settings.builder().put("node.name", dataOnlyNodes.get(1)).put(node1DataPathSettings).build()); + ensureStableCluster(3); + ensureGreen("test"); + } + + public void testNBatchesCreationAndAssignment() throws Exception { + // we will reduce batch size to 5 to make sure we have enough batches to test assignment + // Total number of primary shards = 50 (50 indices*1) + // Total number of replica shards = 50 (50 indices*1) + // Total batches creation for primaries and replicas will be 10 each + + internalCluster().startClusterManagerOnlyNodes( + 1, + Settings.builder().put(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.getKey(), true).build() + ); + List dataOnlyNodes = internalCluster().startDataOnlyNodes(2); + createNIndices(50, "test"); + ensureStableCluster(3); + IndicesStatsResponse indicesStats = dataNodeClient().admin().indices().prepareStats().get(); + assertThat(indicesStats.getSuccessfulShards(), equalTo(100)); + ClusterHealthResponse health = client().admin() + .cluster() + .health(Requests.clusterHealthRequest().waitForGreenStatus().timeout("1m")) + .actionGet(); + assertFalse(health.isTimedOut()); + assertEquals(GREEN, health.getStatus()); + + String clusterManagerName = internalCluster().getClusterManagerName(); + Settings clusterManagerDataPathSettings = internalCluster().dataPathSettings(clusterManagerName); + Settings node0DataPathSettings = internalCluster().dataPathSettings(dataOnlyNodes.get(0)); + Settings node1DataPathSettings = internalCluster().dataPathSettings(dataOnlyNodes.get(1)); + + internalCluster().stopCurrentClusterManagerNode(); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(dataOnlyNodes.get(0))); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(dataOnlyNodes.get(1))); + + // Now start cluster manager node and post that verify batches created + internalCluster().startClusterManagerOnlyNodes( + 1, + Settings.builder() + .put("node.name", clusterManagerName) + .put(clusterManagerDataPathSettings) + .put(ShardsBatchGatewayAllocator.GATEWAY_ALLOCATOR_BATCH_SIZE.getKey(), 5) + .put(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.getKey(), true) + .build() + ); + ensureStableCluster(1); + + logger.info("--> Now do a protective reroute"); // to avoid any race condition in test + ClusterRerouteResponse clusterRerouteResponse = client().admin().cluster().prepareReroute().setRetryFailed(true).get(); + assertTrue(clusterRerouteResponse.isAcknowledged()); + + ShardsBatchGatewayAllocator gatewayAllocator = internalCluster().getInstance( + ShardsBatchGatewayAllocator.class, + internalCluster().getClusterManagerName() + ); + assertTrue(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.get(internalCluster().clusterService().getSettings())); + assertEquals(10, gatewayAllocator.getNumberOfStartedShardBatches()); + assertEquals(10, gatewayAllocator.getNumberOfStoreShardBatches()); + health = client(internalCluster().getClusterManagerName()).admin().cluster().health(Requests.clusterHealthRequest()).actionGet(); + assertFalse(health.isTimedOut()); + assertEquals(RED, health.getStatus()); + assertEquals(100, health.getUnassignedShards()); + assertEquals(0, health.getInitializingShards()); + assertEquals(0, health.getActiveShards()); + assertEquals(0, health.getRelocatingShards()); + assertEquals(0, health.getNumberOfDataNodes()); + + // Now start both data nodes and ensure batch mode is working + logger.info("--> restarting the stopped nodes"); + internalCluster().startDataOnlyNode(Settings.builder().put("node.name", dataOnlyNodes.get(0)).put(node0DataPathSettings).build()); + internalCluster().startDataOnlyNode(Settings.builder().put("node.name", dataOnlyNodes.get(1)).put(node1DataPathSettings).build()); + ensureStableCluster(3); + + // wait for cluster to turn green + health = client().admin().cluster().health(Requests.clusterHealthRequest().waitForGreenStatus().timeout("5m")).actionGet(); + assertFalse(health.isTimedOut()); + assertEquals(GREEN, health.getStatus()); + assertEquals(0, health.getUnassignedShards()); + assertEquals(0, health.getInitializingShards()); + assertEquals(100, health.getActiveShards()); + assertEquals(0, health.getRelocatingShards()); + assertEquals(2, health.getNumberOfDataNodes()); + assertEquals(0, gatewayAllocator.getNumberOfStartedShardBatches()); + assertEquals(0, gatewayAllocator.getNumberOfStoreShardBatches()); + } + + public void testCulpritShardInBatch() throws Exception { + internalCluster().startClusterManagerOnlyNodes( + 1, + Settings.builder().put(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.getKey(), true).build() + ); + List dataOnlyNodes = internalCluster().startDataOnlyNodes(3); + createNIndices(4, "test"); + ensureStableCluster(4); + ClusterHealthResponse health = client().admin() + .cluster() + .health(Requests.clusterHealthRequest().waitForGreenStatus().timeout("5m")) + .actionGet(); + assertFalse(health.isTimedOut()); + assertEquals(GREEN, health.getStatus()); + assertEquals(8, health.getActiveShards()); + + String culpritShardIndexName = "test0"; + Index idx = resolveIndex(culpritShardIndexName); + for (String node : internalCluster().nodesInclude(culpritShardIndexName)) { + IndicesService indexServices = internalCluster().getInstance(IndicesService.class, node); + IndexService indexShards = indexServices.indexServiceSafe(idx); + Integer shardId = 0; + IndexShard shard = indexShards.getShard(0); + logger.debug("--> failing shard [{}] on node [{}]", shardId, node); + shard.failShard("test", new CorruptIndexException("test corrupted", "")); + logger.debug("--> failed shard [{}] on node [{}]", shardId, node); + } + + String clusterManagerName = internalCluster().getClusterManagerName(); + Settings clusterManagerDataPathSettings = internalCluster().dataPathSettings(clusterManagerName); + Settings node0DataPathSettings = internalCluster().dataPathSettings(dataOnlyNodes.get(0)); + Settings node1DataPathSettings = internalCluster().dataPathSettings(dataOnlyNodes.get(1)); + Settings node2DataPathSettings = internalCluster().dataPathSettings(dataOnlyNodes.get(2)); + + internalCluster().stopCurrentClusterManagerNode(); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(dataOnlyNodes.get(0))); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(dataOnlyNodes.get(1))); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(dataOnlyNodes.get(2))); + + // Now start cluster manager node and post that verify batches created + internalCluster().startClusterManagerOnlyNodes( + 1, + Settings.builder() + .put("node.name", clusterManagerName) + .put(clusterManagerDataPathSettings) + .put(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.getKey(), true) + .build() + ); + ensureStableCluster(1); + + logger.info("--> Now do a protective reroute"); // to avoid any race condition in test + ClusterRerouteResponse clusterRerouteResponse = client().admin().cluster().prepareReroute().setRetryFailed(true).get(); + assertTrue(clusterRerouteResponse.isAcknowledged()); + + ShardsBatchGatewayAllocator gatewayAllocator = internalCluster().getInstance( + ShardsBatchGatewayAllocator.class, + internalCluster().getClusterManagerName() + ); + assertTrue(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.get(internalCluster().clusterService().getSettings())); + assertEquals(1, gatewayAllocator.getNumberOfStartedShardBatches()); + assertEquals(1, gatewayAllocator.getNumberOfStoreShardBatches()); + assertTrue(clusterRerouteResponse.isAcknowledged()); + health = client(internalCluster().getClusterManagerName()).admin().cluster().health(Requests.clusterHealthRequest()).actionGet(); + assertFalse(health.isTimedOut()); + assertEquals(RED, health.getStatus()); + assertEquals(8, health.getUnassignedShards()); + assertEquals(0, health.getInitializingShards()); + assertEquals(0, health.getActiveShards()); + assertEquals(0, health.getRelocatingShards()); + assertEquals(0, health.getNumberOfDataNodes()); + + logger.info("--> restarting the stopped nodes"); + internalCluster().startDataOnlyNode(Settings.builder().put("node.name", dataOnlyNodes.get(0)).put(node0DataPathSettings).build()); + internalCluster().startDataOnlyNode(Settings.builder().put("node.name", dataOnlyNodes.get(1)).put(node1DataPathSettings).build()); + internalCluster().startDataOnlyNode(Settings.builder().put("node.name", dataOnlyNodes.get(2)).put(node2DataPathSettings).build()); + ensureStableCluster(4); + + health = client().admin().cluster().health(Requests.clusterHealthRequest().waitForGreenStatus().timeout("1m")).actionGet(); + + assertEquals(RED, health.getStatus()); + assertTrue(health.isTimedOut()); + assertEquals(0, health.getNumberOfPendingTasks()); + assertEquals(0, health.getNumberOfInFlightFetch()); + assertEquals(6, health.getActiveShards()); + assertEquals(2, health.getUnassignedShards()); + assertEquals(0, health.getInitializingShards()); + assertEquals(0, health.getRelocatingShards()); + assertEquals(3, health.getNumberOfDataNodes()); + } + + private void createNIndices(int n, String prefix) { + + for (int i = 0; i < n; i++) { + createIndex( + prefix + i, + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build() + ); + // index doc2 + client().prepareIndex(prefix + i).setId("1").setSource("foo", "bar").get(); + + // index doc 2 + client().prepareIndex(prefix + i).setId("2").setSource("foo2", "bar2").get(); + ensureGreen(prefix + i); + } + } + public void testSingleShardFetchUsingBatchAction() { String indexName = "test"; int numOfShards = 1; @@ -909,6 +1190,56 @@ public void testShardStoreFetchCorruptedIndexUsingBatchAction() throws Exception assertNodeStoreFilesMetadataSuccessCase(nodeStoreFilesMetadata.get(shardId2), shardId2); } + public void testDeleteRedIndexInBatchMode() throws Exception { + internalCluster().startClusterManagerOnlyNodes( + 1, + Settings.builder().put(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.getKey(), true).build() + ); + List dataOnlyNodes = internalCluster().startDataOnlyNodes( + 2, + Settings.builder().put(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.getKey(), true).build() + ); + createIndex( + "test", + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 2).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build() + ); + createIndex( + "test1", + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 2).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build() + ); + createIndex( + "test2", + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 2).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build() + ); + createIndex( + "testg", + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build() + ); + + ensureGreen("test", "test1", "test2", "testg"); + internalCluster().stopRandomDataNode(); + ensureStableCluster(2); + + ShardsBatchGatewayAllocator gatewayAllocator = internalCluster().getInstance( + ShardsBatchGatewayAllocator.class, + internalCluster().getClusterManagerName() + ); + ensureRed("test", "test1", "test2"); + + assertTrue(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.get(internalCluster().clusterService().getSettings())); + + logger.info("--> Now do a reroute so batches are created"); // to avoid any race condition in test + ClusterRerouteResponse clusterRerouteResponse = client().admin().cluster().prepareReroute().setRetryFailed(true).get(); + assertTrue(clusterRerouteResponse.isAcknowledged()); + + AcknowledgedResponse deleteIndexResponse = client().admin().indices().prepareDelete("test").get(); + assertTrue(deleteIndexResponse.isAcknowledged()); + + ensureYellow("testg"); + IndicesExistsResponse indexExistResponse = client().admin().indices().prepareExists("test").get(); + assertFalse(indexExistResponse.isExists()); + } + private void prepareIndices(String[] indices, int numberOfPrimaryShards, int numberOfReplicaShards) { for (String index : indices) { createIndex( diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/CacheStatsAPIIndicesRequestCacheIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/CacheStatsAPIIndicesRequestCacheIT.java new file mode 100644 index 0000000000000..de7a52761c77c --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/indices/CacheStatsAPIIndicesRequestCacheIT.java @@ -0,0 +1,291 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest; +import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse; +import org.opensearch.action.admin.indices.stats.CommonStatsFlags; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.Client; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.Randomness; +import org.opensearch.common.cache.CacheType; +import org.opensearch.common.cache.service.NodeCacheStats; +import org.opensearch.common.cache.stats.ImmutableCacheStats; +import org.opensearch.common.cache.stats.ImmutableCacheStatsHolderTests; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.common.xcontent.XContentHelper; +import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.cache.request.RequestCacheStats; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.test.OpenSearchIntegTestCase; +import org.opensearch.test.ParameterizedStaticSettingsOpenSearchIntegTestCase; +import org.opensearch.test.hamcrest.OpenSearchAssertions; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertSearchResponse; + +// Use a single data node to simplify logic about cache stats across different shards. +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 1) +public class CacheStatsAPIIndicesRequestCacheIT extends ParameterizedStaticSettingsOpenSearchIntegTestCase { + public CacheStatsAPIIndicesRequestCacheIT(Settings settings) { + super(settings); + } + + @ParametersFactory + public static Collection parameters() { + return Arrays.asList(new Object[] { Settings.builder().put(FeatureFlags.PLUGGABLE_CACHE, "true").build() }); + } + + public void testCacheStatsAPIWIthOnHeapCache() throws Exception { + String index1Name = "index1"; + String index2Name = "index2"; + Client client = client(); + + startIndex(client, index1Name); + startIndex(client, index2Name); + + // Search twice for the same doc in index 1 + for (int i = 0; i < 2; i++) { + searchIndex(client, index1Name, ""); + } + + // Search once for a doc in index 2 + searchIndex(client, index2Name, ""); + + // First, aggregate by indices only + Map xContentMap = getNodeCacheStatsXContentMap(client, List.of(IndicesRequestCache.INDEX_DIMENSION_NAME)); + + List index1Keys = List.of(CacheType.INDICES_REQUEST_CACHE.getValue(), IndicesRequestCache.INDEX_DIMENSION_NAME, index1Name); + // Since we searched twice, we expect to see 1 hit, 1 miss and 1 entry for index 1 + ImmutableCacheStats expectedStats = new ImmutableCacheStats(1, 1, 0, 0, 1); + checkCacheStatsAPIResponse(xContentMap, index1Keys, expectedStats, false, true); + // Get the request size for one request, so we can reuse it for next index + int requestSize = (int) ((Map) ImmutableCacheStatsHolderTests.getValueFromNestedXContentMap( + xContentMap, + index1Keys + )).get(ImmutableCacheStats.Fields.SIZE_IN_BYTES); + assertTrue(requestSize > 0); + + List index2Keys = List.of(CacheType.INDICES_REQUEST_CACHE.getValue(), IndicesRequestCache.INDEX_DIMENSION_NAME, index2Name); + // We searched once in index 2, we expect 1 miss + 1 entry + expectedStats = new ImmutableCacheStats(0, 1, 0, requestSize, 1); + checkCacheStatsAPIResponse(xContentMap, index2Keys, expectedStats, true, true); + + // The total stats for the node should be 1 hit, 2 misses, and 2 entries + expectedStats = new ImmutableCacheStats(1, 2, 0, 2 * requestSize, 2); + List totalStatsKeys = List.of(CacheType.INDICES_REQUEST_CACHE.getValue()); + checkCacheStatsAPIResponse(xContentMap, totalStatsKeys, expectedStats, true, true); + + // Aggregate by shards only + xContentMap = getNodeCacheStatsXContentMap(client, List.of(IndicesRequestCache.SHARD_ID_DIMENSION_NAME)); + + List index1Shard0Keys = List.of( + CacheType.INDICES_REQUEST_CACHE.getValue(), + IndicesRequestCache.SHARD_ID_DIMENSION_NAME, + "[" + index1Name + "][0]" + ); + + expectedStats = new ImmutableCacheStats(1, 1, 0, requestSize, 1); + checkCacheStatsAPIResponse(xContentMap, index1Shard0Keys, expectedStats, true, true); + + List index2Shard0Keys = List.of( + CacheType.INDICES_REQUEST_CACHE.getValue(), + IndicesRequestCache.SHARD_ID_DIMENSION_NAME, + "[" + index2Name + "][0]" + ); + expectedStats = new ImmutableCacheStats(0, 1, 0, requestSize, 1); + checkCacheStatsAPIResponse(xContentMap, index2Shard0Keys, expectedStats, true, true); + + // Aggregate by indices and shards + xContentMap = getNodeCacheStatsXContentMap( + client, + List.of(IndicesRequestCache.INDEX_DIMENSION_NAME, IndicesRequestCache.SHARD_ID_DIMENSION_NAME) + ); + + index1Keys = List.of( + CacheType.INDICES_REQUEST_CACHE.getValue(), + IndicesRequestCache.INDEX_DIMENSION_NAME, + index1Name, + IndicesRequestCache.SHARD_ID_DIMENSION_NAME, + "[" + index1Name + "][0]" + ); + + expectedStats = new ImmutableCacheStats(1, 1, 0, requestSize, 1); + checkCacheStatsAPIResponse(xContentMap, index1Keys, expectedStats, true, true); + + index2Keys = List.of( + CacheType.INDICES_REQUEST_CACHE.getValue(), + IndicesRequestCache.INDEX_DIMENSION_NAME, + index2Name, + IndicesRequestCache.SHARD_ID_DIMENSION_NAME, + "[" + index2Name + "][0]" + ); + + expectedStats = new ImmutableCacheStats(0, 1, 0, requestSize, 1); + checkCacheStatsAPIResponse(xContentMap, index2Keys, expectedStats, true, true); + + } + + // TODO: Add testCacheStatsAPIWithTieredCache when TSC stats implementation PR is merged + + public void testStatsMatchOldApi() throws Exception { + // The main purpose of this test is to check that the new and old APIs are both correctly estimating memory size, + // using the logic that includes the overhead memory in ICacheKey. + String index = "index"; + Client client = client(); + startIndex(client, index); + + int numKeys = Randomness.get().nextInt(100) + 1; + for (int i = 0; i < numKeys; i++) { + searchIndex(client, index, String.valueOf(i)); + } + // Get some hits as well + for (int i = 0; i < numKeys / 2; i++) { + searchIndex(client, index, String.valueOf(i)); + } + + RequestCacheStats oldApiStats = client.admin() + .indices() + .prepareStats(index) + .setRequestCache(true) + .get() + .getTotal() + .getRequestCache(); + assertNotEquals(0, oldApiStats.getMemorySizeInBytes()); + + List xContentMapKeys = List.of(CacheType.INDICES_REQUEST_CACHE.getValue()); + Map xContentMap = getNodeCacheStatsXContentMap(client, List.of()); + ImmutableCacheStats expected = new ImmutableCacheStats( + oldApiStats.getHitCount(), + oldApiStats.getMissCount(), + oldApiStats.getEvictions(), + oldApiStats.getMemorySizeInBytes(), + 0 + ); + // Don't check entries, as the old API doesn't track this + checkCacheStatsAPIResponse(xContentMap, xContentMapKeys, expected, true, false); + } + + public void testNullLevels() throws Exception { + String index = "index"; + Client client = client(); + startIndex(client, index); + int numKeys = Randomness.get().nextInt(100) + 1; + for (int i = 0; i < numKeys; i++) { + searchIndex(client, index, String.valueOf(i)); + } + Map xContentMap = getNodeCacheStatsXContentMap(client, null); + // Null levels should result in only the total cache stats being returned -> 6 fields inside the response. + assertEquals(6, ((Map) xContentMap.get("request_cache")).size()); + } + + private void startIndex(Client client, String indexName) throws InterruptedException { + assertAcked( + client.admin() + .indices() + .prepareCreate(indexName) + .setMapping("k", "type=keyword") + .setSettings( + Settings.builder() + .put(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING.getKey(), true) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + ) + .get() + ); + indexRandom(true, client.prepareIndex(indexName).setSource("k", "hello")); + ensureSearchable(indexName); + } + + private SearchResponse searchIndex(Client client, String index, String searchSuffix) { + SearchResponse resp = client.prepareSearch(index) + .setRequestCache(true) + .setQuery(QueryBuilders.termQuery("k", "hello" + searchSuffix)) + .get(); + assertSearchResponse(resp); + OpenSearchAssertions.assertAllSuccessful(resp); + return resp; + } + + private static Map getNodeCacheStatsXContentMap(Client client, List aggregationLevels) throws IOException { + + CommonStatsFlags statsFlags = new CommonStatsFlags(); + statsFlags.includeAllCacheTypes(); + String[] flagsLevels; + if (aggregationLevels == null) { + flagsLevels = null; + } else { + flagsLevels = aggregationLevels.toArray(new String[0]); + } + statsFlags.setLevels(flagsLevels); + + NodesStatsResponse nodeStatsResponse = client.admin() + .cluster() + .prepareNodesStats("data:true") + .addMetric(NodesStatsRequest.Metric.CACHE_STATS.metricName()) + .setIndices(statsFlags) + .get(); + // Can always get the first data node as there's only one in this test suite + assertEquals(1, nodeStatsResponse.getNodes().size()); + NodeCacheStats ncs = nodeStatsResponse.getNodes().get(0).getNodeCacheStats(); + + XContentBuilder builder = XContentFactory.jsonBuilder(); + Map paramMap = new HashMap<>(); + if (aggregationLevels != null && !aggregationLevels.isEmpty()) { + paramMap.put("level", String.join(",", aggregationLevels)); + } + ToXContent.Params params = new ToXContent.MapParams(paramMap); + + builder.startObject(); + ncs.toXContent(builder, params); + builder.endObject(); + + String resultString = builder.toString(); + return XContentHelper.convertToMap(MediaTypeRegistry.JSON.xContent(), resultString, true); + } + + private static void checkCacheStatsAPIResponse( + Map xContentMap, + List xContentMapKeys, + ImmutableCacheStats expectedStats, + boolean checkMemorySize, + boolean checkEntries + ) { + // Assumes the keys point to a level whose keys are the field values ("size_in_bytes", "evictions", etc) and whose values store + // those stats + Map aggregatedStatsResponse = (Map) ImmutableCacheStatsHolderTests.getValueFromNestedXContentMap( + xContentMap, + xContentMapKeys + ); + assertNotNull(aggregatedStatsResponse); + assertEquals(expectedStats.getHits(), (int) aggregatedStatsResponse.get(ImmutableCacheStats.Fields.HIT_COUNT)); + assertEquals(expectedStats.getMisses(), (int) aggregatedStatsResponse.get(ImmutableCacheStats.Fields.MISS_COUNT)); + assertEquals(expectedStats.getEvictions(), (int) aggregatedStatsResponse.get(ImmutableCacheStats.Fields.EVICTIONS)); + if (checkMemorySize) { + assertEquals(expectedStats.getSizeInBytes(), (int) aggregatedStatsResponse.get(ImmutableCacheStats.Fields.SIZE_IN_BYTES)); + } + if (checkEntries) { + assertEquals(expectedStats.getItems(), (int) aggregatedStatsResponse.get(ImmutableCacheStats.Fields.ITEM_COUNT)); + } + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java index ec5637cec6485..ae2295cb874f5 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java @@ -34,24 +34,40 @@ import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; +import org.opensearch.action.admin.cluster.node.stats.NodeStats; +import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse; +import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; import org.opensearch.action.admin.indices.alias.Alias; +import org.opensearch.action.admin.indices.cache.clear.ClearIndicesCacheRequest; import org.opensearch.action.admin.indices.forcemerge.ForceMergeResponse; import org.opensearch.action.search.SearchResponse; import org.opensearch.action.search.SearchType; import org.opensearch.client.Client; +import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand; +import org.opensearch.cluster.routing.allocation.decider.EnableAllocationDecider; import org.opensearch.common.settings.Settings; import org.opensearch.common.time.DateFormatter; +import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.FeatureFlags; +import org.opensearch.core.index.Index; +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.env.NodeEnvironment; +import org.opensearch.index.IndexNotFoundException; import org.opensearch.index.cache.request.RequestCacheStats; import org.opensearch.index.query.QueryBuilders; import org.opensearch.search.aggregations.bucket.global.GlobalAggregationBuilder; import org.opensearch.search.aggregations.bucket.histogram.DateHistogramInterval; import org.opensearch.search.aggregations.bucket.histogram.Histogram; import org.opensearch.search.aggregations.bucket.histogram.Histogram.Bucket; +import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.test.ParameterizedStaticSettingsOpenSearchIntegTestCase; import org.opensearch.test.hamcrest.OpenSearchAssertions; +import java.nio.file.Files; +import java.nio.file.Path; import java.time.ZoneId; import java.time.ZoneOffset; import java.time.ZonedDateTime; @@ -59,7 +75,13 @@ import java.util.Arrays; import java.util.Collection; import java.util.List; +import java.util.concurrent.TimeUnit; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; +import static org.opensearch.cluster.routing.allocation.decider.EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING; +import static org.opensearch.indices.IndicesRequestCache.INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING; +import static org.opensearch.indices.IndicesService.INDICES_CACHE_CLEANUP_INTERVAL_SETTING_KEY; import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; import static org.opensearch.search.aggregations.AggregationBuilders.dateHistogram; import static org.opensearch.search.aggregations.AggregationBuilders.dateRange; @@ -69,6 +91,7 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0, supportsDedicatedMasters = false) public class IndicesRequestCacheIT extends ParameterizedStaticSettingsOpenSearchIntegTestCase { public IndicesRequestCacheIT(Settings settings) { super(settings); @@ -92,25 +115,31 @@ protected boolean useRandomReplicationStrategy() { // One of the primary purposes of the query cache is to cache aggs results public void testCacheAggs() throws Exception { Client client = client(); + String index = "index"; assertAcked( client.admin() .indices() - .prepareCreate("index") + .prepareCreate(index) .setMapping("f", "type=date") - .setSettings(Settings.builder().put(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING.getKey(), true)) + .setSettings( + Settings.builder() + .put(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING.getKey(), true) + .put(SETTING_NUMBER_OF_SHARDS, 1) + .put(SETTING_NUMBER_OF_REPLICAS, 0) + ) .get() ); indexRandom( true, - client.prepareIndex("index").setSource("f", "2014-03-10T00:00:00.000Z"), - client.prepareIndex("index").setSource("f", "2014-05-13T00:00:00.000Z") + client.prepareIndex(index).setSource("f", "2014-03-10T00:00:00.000Z"), + client.prepareIndex(index).setSource("f", "2014-05-13T00:00:00.000Z") ); - ensureSearchable("index"); + ensureSearchable(index); // This is not a random example: serialization with time zones writes shared strings // which used to not work well with the query cache because of the handles stream output // see #9500 - final SearchResponse r1 = client.prepareSearch("index") + final SearchResponse r1 = client.prepareSearch(index) .setSize(0) .setSearchType(SearchType.QUERY_THEN_FETCH) .addAggregation( @@ -124,12 +153,12 @@ public void testCacheAggs() throws Exception { // The cached is actually used assertThat( - client.admin().indices().prepareStats("index").setRequestCache(true).get().getTotal().getRequestCache().getMemorySizeInBytes(), + client.admin().indices().prepareStats(index).setRequestCache(true).get().getTotal().getRequestCache().getMemorySizeInBytes(), greaterThan(0L) ); for (int i = 0; i < 10; ++i) { - final SearchResponse r2 = client.prepareSearch("index") + final SearchResponse r2 = client.prepareSearch(index) .setSize(0) .setSearchType(SearchType.QUERY_THEN_FETCH) .addAggregation( @@ -156,10 +185,11 @@ public void testCacheAggs() throws Exception { public void testQueryRewrite() throws Exception { Client client = client(); + String index = "index"; assertAcked( client.admin() .indices() - .prepareCreate("index") + .prepareCreate(index) .setMapping("s", "type=date") .setSettings( Settings.builder() @@ -172,28 +202,28 @@ public void testQueryRewrite() throws Exception { ); indexRandom( true, - client.prepareIndex("index").setId("1").setRouting("1").setSource("s", "2016-03-19"), - client.prepareIndex("index").setId("2").setRouting("1").setSource("s", "2016-03-20"), - client.prepareIndex("index").setId("3").setRouting("1").setSource("s", "2016-03-21"), - client.prepareIndex("index").setId("4").setRouting("2").setSource("s", "2016-03-22"), - client.prepareIndex("index").setId("5").setRouting("2").setSource("s", "2016-03-23"), - client.prepareIndex("index").setId("6").setRouting("2").setSource("s", "2016-03-24"), - client.prepareIndex("index").setId("7").setRouting("3").setSource("s", "2016-03-25"), - client.prepareIndex("index").setId("8").setRouting("3").setSource("s", "2016-03-26"), - client.prepareIndex("index").setId("9").setRouting("3").setSource("s", "2016-03-27") + client.prepareIndex(index).setId("1").setRouting("1").setSource("s", "2016-03-19"), + client.prepareIndex(index).setId("2").setRouting("1").setSource("s", "2016-03-20"), + client.prepareIndex(index).setId("3").setRouting("1").setSource("s", "2016-03-21"), + client.prepareIndex(index).setId("4").setRouting("2").setSource("s", "2016-03-22"), + client.prepareIndex(index).setId("5").setRouting("2").setSource("s", "2016-03-23"), + client.prepareIndex(index).setId("6").setRouting("2").setSource("s", "2016-03-24"), + client.prepareIndex(index).setId("7").setRouting("3").setSource("s", "2016-03-25"), + client.prepareIndex(index).setId("8").setRouting("3").setSource("s", "2016-03-26"), + client.prepareIndex(index).setId("9").setRouting("3").setSource("s", "2016-03-27") ); - ensureSearchable("index"); - assertCacheState(client, "index", 0, 0); + ensureSearchable(index); + assertCacheState(client, index, 0, 0); // Force merge the index to ensure there can be no background merges during the subsequent searches that would invalidate the cache - ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge("index").setFlush(true).get(); + ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge(index).setFlush(true).get(); OpenSearchAssertions.assertAllSuccessful(forceMergeResponse); refreshAndWaitForReplication(); - ensureSearchable("index"); + ensureSearchable(index); - assertCacheState(client, "index", 0, 0); + assertCacheState(client, index, 0, 0); - final SearchResponse r1 = client.prepareSearch("index") + final SearchResponse r1 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setQuery(QueryBuilders.rangeQuery("s").gte("2016-03-19").lte("2016-03-25")) @@ -202,9 +232,9 @@ public void testQueryRewrite() throws Exception { .get(); OpenSearchAssertions.assertAllSuccessful(r1); assertThat(r1.getHits().getTotalHits().value, equalTo(7L)); - assertCacheState(client, "index", 0, 5); + assertCacheState(client, index, 0, 5); - final SearchResponse r2 = client.prepareSearch("index") + final SearchResponse r2 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setQuery(QueryBuilders.rangeQuery("s").gte("2016-03-20").lte("2016-03-26")) @@ -212,9 +242,9 @@ public void testQueryRewrite() throws Exception { .get(); OpenSearchAssertions.assertAllSuccessful(r2); assertThat(r2.getHits().getTotalHits().value, equalTo(7L)); - assertCacheState(client, "index", 3, 7); + assertCacheState(client, index, 3, 7); - final SearchResponse r3 = client.prepareSearch("index") + final SearchResponse r3 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setQuery(QueryBuilders.rangeQuery("s").gte("2016-03-21").lte("2016-03-27")) @@ -222,15 +252,16 @@ public void testQueryRewrite() throws Exception { .get(); OpenSearchAssertions.assertAllSuccessful(r3); assertThat(r3.getHits().getTotalHits().value, equalTo(7L)); - assertCacheState(client, "index", 6, 9); + assertCacheState(client, index, 6, 9); } public void testQueryRewriteMissingValues() throws Exception { Client client = client(); + String index = "index"; assertAcked( client.admin() .indices() - .prepareCreate("index") + .prepareCreate(index) .setMapping("s", "type=date") .setSettings( Settings.builder() @@ -242,61 +273,62 @@ public void testQueryRewriteMissingValues() throws Exception { ); indexRandom( true, - client.prepareIndex("index").setId("1").setSource("s", "2016-03-19"), - client.prepareIndex("index").setId("2").setSource("s", "2016-03-20"), - client.prepareIndex("index").setId("3").setSource("s", "2016-03-21"), - client.prepareIndex("index").setId("4").setSource("s", "2016-03-22"), - client.prepareIndex("index").setId("5").setSource("s", "2016-03-23"), - client.prepareIndex("index").setId("6").setSource("s", "2016-03-24"), - client.prepareIndex("index").setId("7").setSource("other", "value"), - client.prepareIndex("index").setId("8").setSource("s", "2016-03-26"), - client.prepareIndex("index").setId("9").setSource("s", "2016-03-27") + client.prepareIndex(index).setId("1").setSource("s", "2016-03-19"), + client.prepareIndex(index).setId("2").setSource("s", "2016-03-20"), + client.prepareIndex(index).setId("3").setSource("s", "2016-03-21"), + client.prepareIndex(index).setId("4").setSource("s", "2016-03-22"), + client.prepareIndex(index).setId("5").setSource("s", "2016-03-23"), + client.prepareIndex(index).setId("6").setSource("s", "2016-03-24"), + client.prepareIndex(index).setId("7").setSource("other", "value"), + client.prepareIndex(index).setId("8").setSource("s", "2016-03-26"), + client.prepareIndex(index).setId("9").setSource("s", "2016-03-27") ); - ensureSearchable("index"); - assertCacheState(client, "index", 0, 0); + ensureSearchable(index); + assertCacheState(client, index, 0, 0); // Force merge the index to ensure there can be no background merges during the subsequent searches that would invalidate the cache - ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge("index").setFlush(true).get(); + ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge(index).setFlush(true).get(); OpenSearchAssertions.assertAllSuccessful(forceMergeResponse); refreshAndWaitForReplication(); - ensureSearchable("index"); + ensureSearchable(index); - assertCacheState(client, "index", 0, 0); + assertCacheState(client, index, 0, 0); - final SearchResponse r1 = client.prepareSearch("index") + final SearchResponse r1 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setQuery(QueryBuilders.rangeQuery("s").gte("2016-03-19").lte("2016-03-28")) .get(); OpenSearchAssertions.assertAllSuccessful(r1); assertThat(r1.getHits().getTotalHits().value, equalTo(8L)); - assertCacheState(client, "index", 0, 1); + assertCacheState(client, index, 0, 1); - final SearchResponse r2 = client.prepareSearch("index") + final SearchResponse r2 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setQuery(QueryBuilders.rangeQuery("s").gte("2016-03-19").lte("2016-03-28")) .get(); OpenSearchAssertions.assertAllSuccessful(r2); assertThat(r2.getHits().getTotalHits().value, equalTo(8L)); - assertCacheState(client, "index", 1, 1); + assertCacheState(client, index, 1, 1); - final SearchResponse r3 = client.prepareSearch("index") + final SearchResponse r3 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setQuery(QueryBuilders.rangeQuery("s").gte("2016-03-19").lte("2016-03-28")) .get(); OpenSearchAssertions.assertAllSuccessful(r3); assertThat(r3.getHits().getTotalHits().value, equalTo(8L)); - assertCacheState(client, "index", 2, 1); + assertCacheState(client, index, 2, 1); } public void testQueryRewriteDates() throws Exception { Client client = client(); + String index = "index"; assertAcked( client.admin() .indices() - .prepareCreate("index") + .prepareCreate(index) .setMapping("d", "type=date") .setSettings( Settings.builder() @@ -308,28 +340,28 @@ public void testQueryRewriteDates() throws Exception { ); indexRandom( true, - client.prepareIndex("index").setId("1").setSource("d", "2014-01-01T00:00:00"), - client.prepareIndex("index").setId("2").setSource("d", "2014-02-01T00:00:00"), - client.prepareIndex("index").setId("3").setSource("d", "2014-03-01T00:00:00"), - client.prepareIndex("index").setId("4").setSource("d", "2014-04-01T00:00:00"), - client.prepareIndex("index").setId("5").setSource("d", "2014-05-01T00:00:00"), - client.prepareIndex("index").setId("6").setSource("d", "2014-06-01T00:00:00"), - client.prepareIndex("index").setId("7").setSource("d", "2014-07-01T00:00:00"), - client.prepareIndex("index").setId("8").setSource("d", "2014-08-01T00:00:00"), - client.prepareIndex("index").setId("9").setSource("d", "2014-09-01T00:00:00") + client.prepareIndex(index).setId("1").setSource("d", "2014-01-01T00:00:00"), + client.prepareIndex(index).setId("2").setSource("d", "2014-02-01T00:00:00"), + client.prepareIndex(index).setId("3").setSource("d", "2014-03-01T00:00:00"), + client.prepareIndex(index).setId("4").setSource("d", "2014-04-01T00:00:00"), + client.prepareIndex(index).setId("5").setSource("d", "2014-05-01T00:00:00"), + client.prepareIndex(index).setId("6").setSource("d", "2014-06-01T00:00:00"), + client.prepareIndex(index).setId("7").setSource("d", "2014-07-01T00:00:00"), + client.prepareIndex(index).setId("8").setSource("d", "2014-08-01T00:00:00"), + client.prepareIndex(index).setId("9").setSource("d", "2014-09-01T00:00:00") ); - ensureSearchable("index"); - assertCacheState(client, "index", 0, 0); + ensureSearchable(index); + assertCacheState(client, index, 0, 0); // Force merge the index to ensure there can be no background merges during the subsequent searches that would invalidate the cache - ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge("index").setFlush(true).get(); + ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge(index).setFlush(true).get(); OpenSearchAssertions.assertAllSuccessful(forceMergeResponse); refreshAndWaitForReplication(); - ensureSearchable("index"); + ensureSearchable(index); - assertCacheState(client, "index", 0, 0); + assertCacheState(client, index, 0, 0); - final SearchResponse r1 = client.prepareSearch("index") + final SearchResponse r1 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setQuery(QueryBuilders.rangeQuery("d").gte("2013-01-01T00:00:00").lte("now")) @@ -338,9 +370,9 @@ public void testQueryRewriteDates() throws Exception { .get(); OpenSearchAssertions.assertAllSuccessful(r1); assertThat(r1.getHits().getTotalHits().value, equalTo(9L)); - assertCacheState(client, "index", 0, 1); + assertCacheState(client, index, 0, 1); - final SearchResponse r2 = client.prepareSearch("index") + final SearchResponse r2 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setQuery(QueryBuilders.rangeQuery("d").gte("2013-01-01T00:00:00").lte("now")) @@ -348,9 +380,9 @@ public void testQueryRewriteDates() throws Exception { .get(); OpenSearchAssertions.assertAllSuccessful(r2); assertThat(r2.getHits().getTotalHits().value, equalTo(9L)); - assertCacheState(client, "index", 1, 1); + assertCacheState(client, index, 1, 1); - final SearchResponse r3 = client.prepareSearch("index") + final SearchResponse r3 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setQuery(QueryBuilders.rangeQuery("d").gte("2013-01-01T00:00:00").lte("now")) @@ -358,7 +390,7 @@ public void testQueryRewriteDates() throws Exception { .get(); OpenSearchAssertions.assertAllSuccessful(r3); assertThat(r3.getHits().getTotalHits().value, equalTo(9L)); - assertCacheState(client, "index", 2, 1); + assertCacheState(client, index, 2, 1); } public void testQueryRewriteDatesWithNow() throws Exception { @@ -449,53 +481,54 @@ public void testCanCache() throws Exception { .put("index.number_of_routing_shards", 2) .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) .build(); - assertAcked(client.admin().indices().prepareCreate("index").setMapping("s", "type=date").setSettings(settings).get()); + String index = "index"; + assertAcked(client.admin().indices().prepareCreate(index).setMapping("s", "type=date").setSettings(settings).get()); indexRandom( true, - client.prepareIndex("index").setId("1").setRouting("1").setSource("s", "2016-03-19"), - client.prepareIndex("index").setId("2").setRouting("1").setSource("s", "2016-03-20"), - client.prepareIndex("index").setId("3").setRouting("1").setSource("s", "2016-03-21"), - client.prepareIndex("index").setId("4").setRouting("2").setSource("s", "2016-03-22"), - client.prepareIndex("index").setId("5").setRouting("2").setSource("s", "2016-03-23"), - client.prepareIndex("index").setId("6").setRouting("2").setSource("s", "2016-03-24"), - client.prepareIndex("index").setId("7").setRouting("3").setSource("s", "2016-03-25"), - client.prepareIndex("index").setId("8").setRouting("3").setSource("s", "2016-03-26"), - client.prepareIndex("index").setId("9").setRouting("3").setSource("s", "2016-03-27") + client.prepareIndex(index).setId("1").setRouting("1").setSource("s", "2016-03-19"), + client.prepareIndex(index).setId("2").setRouting("1").setSource("s", "2016-03-20"), + client.prepareIndex(index).setId("3").setRouting("1").setSource("s", "2016-03-21"), + client.prepareIndex(index).setId("4").setRouting("2").setSource("s", "2016-03-22"), + client.prepareIndex(index).setId("5").setRouting("2").setSource("s", "2016-03-23"), + client.prepareIndex(index).setId("6").setRouting("2").setSource("s", "2016-03-24"), + client.prepareIndex(index).setId("7").setRouting("3").setSource("s", "2016-03-25"), + client.prepareIndex(index).setId("8").setRouting("3").setSource("s", "2016-03-26"), + client.prepareIndex(index).setId("9").setRouting("3").setSource("s", "2016-03-27") ); - ensureSearchable("index"); - assertCacheState(client, "index", 0, 0); + ensureSearchable(index); + assertCacheState(client, index, 0, 0); // Force merge the index to ensure there can be no background merges during the subsequent searches that would invalidate the cache - ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge("index").setFlush(true).get(); + ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge(index).setFlush(true).get(); OpenSearchAssertions.assertAllSuccessful(forceMergeResponse); refreshAndWaitForReplication(); - ensureSearchable("index"); + ensureSearchable(index); - assertCacheState(client, "index", 0, 0); + assertCacheState(client, index, 0, 0); // If size > 0 we should no cache by default - final SearchResponse r1 = client.prepareSearch("index") + final SearchResponse r1 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(1) .setQuery(QueryBuilders.rangeQuery("s").gte("2016-03-19").lte("2016-03-25")) .get(); OpenSearchAssertions.assertAllSuccessful(r1); assertThat(r1.getHits().getTotalHits().value, equalTo(7L)); - assertCacheState(client, "index", 0, 0); + assertCacheState(client, index, 0, 0); // If search type is DFS_QUERY_THEN_FETCH we should not cache - final SearchResponse r2 = client.prepareSearch("index") + final SearchResponse r2 = client.prepareSearch(index) .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) .setSize(0) .setQuery(QueryBuilders.rangeQuery("s").gte("2016-03-20").lte("2016-03-26")) .get(); OpenSearchAssertions.assertAllSuccessful(r2); assertThat(r2.getHits().getTotalHits().value, equalTo(7L)); - assertCacheState(client, "index", 0, 0); + assertCacheState(client, index, 0, 0); // If search type is DFS_QUERY_THEN_FETCH we should not cache even if // the cache flag is explicitly set on the request - final SearchResponse r3 = client.prepareSearch("index") + final SearchResponse r3 = client.prepareSearch(index) .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) .setSize(0) .setRequestCache(true) @@ -503,10 +536,10 @@ public void testCanCache() throws Exception { .get(); OpenSearchAssertions.assertAllSuccessful(r3); assertThat(r3.getHits().getTotalHits().value, equalTo(7L)); - assertCacheState(client, "index", 0, 0); + assertCacheState(client, index, 0, 0); // If the request has an non-filter aggregation containing now we should not cache - final SearchResponse r5 = client.prepareSearch("index") + final SearchResponse r5 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setRequestCache(true) @@ -515,10 +548,10 @@ public void testCanCache() throws Exception { .get(); OpenSearchAssertions.assertAllSuccessful(r5); assertThat(r5.getHits().getTotalHits().value, equalTo(7L)); - assertCacheState(client, "index", 0, 0); + assertCacheState(client, index, 0, 0); // If size > 1 and cache flag is set on the request we should cache - final SearchResponse r6 = client.prepareSearch("index") + final SearchResponse r6 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(1) .setRequestCache(true) @@ -526,10 +559,10 @@ public void testCanCache() throws Exception { .get(); OpenSearchAssertions.assertAllSuccessful(r6); assertThat(r6.getHits().getTotalHits().value, equalTo(7L)); - assertCacheState(client, "index", 0, 2); + assertCacheState(client, index, 0, 2); // If the request has a filter aggregation containing now we should cache since it gets rewritten - final SearchResponse r4 = client.prepareSearch("index") + final SearchResponse r4 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setRequestCache(true) @@ -538,7 +571,7 @@ public void testCanCache() throws Exception { .get(); OpenSearchAssertions.assertAllSuccessful(r4); assertThat(r4.getHits().getTotalHits().value, equalTo(7L)); - assertCacheState(client, "index", 0, 4); + assertCacheState(client, index, 0, 4); } public void testCacheWithFilteredAlias() throws InterruptedException { @@ -548,61 +581,63 @@ public void testCacheWithFilteredAlias() throws InterruptedException { .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) .build(); + String index = "index"; assertAcked( client.admin() .indices() - .prepareCreate("index") + .prepareCreate(index) .setMapping("created_at", "type=date") .setSettings(settings) .addAlias(new Alias("last_week").filter(QueryBuilders.rangeQuery("created_at").gte("now-7d/d"))) .get() ); ZonedDateTime now = ZonedDateTime.now(ZoneOffset.UTC); - client.prepareIndex("index").setId("1").setRouting("1").setSource("created_at", DateTimeFormatter.ISO_LOCAL_DATE.format(now)).get(); + client.prepareIndex(index).setId("1").setRouting("1").setSource("created_at", DateTimeFormatter.ISO_LOCAL_DATE.format(now)).get(); // Force merge the index to ensure there can be no background merges during the subsequent searches that would invalidate the cache - ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge("index").setFlush(true).get(); + ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge(index).setFlush(true).get(); OpenSearchAssertions.assertAllSuccessful(forceMergeResponse); refreshAndWaitForReplication(); - indexRandomForConcurrentSearch("index"); + indexRandomForConcurrentSearch(index); - assertCacheState(client, "index", 0, 0); + assertCacheState(client, index, 0, 0); - SearchResponse r1 = client.prepareSearch("index") + SearchResponse r1 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setQuery(QueryBuilders.rangeQuery("created_at").gte("now-7d/d")) .get(); OpenSearchAssertions.assertAllSuccessful(r1); assertThat(r1.getHits().getTotalHits().value, equalTo(1L)); - assertCacheState(client, "index", 0, 1); + assertCacheState(client, index, 0, 1); - r1 = client.prepareSearch("index") + r1 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setQuery(QueryBuilders.rangeQuery("created_at").gte("now-7d/d")) .get(); OpenSearchAssertions.assertAllSuccessful(r1); assertThat(r1.getHits().getTotalHits().value, equalTo(1L)); - assertCacheState(client, "index", 1, 1); + assertCacheState(client, index, 1, 1); r1 = client.prepareSearch("last_week").setSearchType(SearchType.QUERY_THEN_FETCH).setSize(0).get(); OpenSearchAssertions.assertAllSuccessful(r1); assertThat(r1.getHits().getTotalHits().value, equalTo(1L)); - assertCacheState(client, "index", 1, 2); + assertCacheState(client, index, 1, 2); r1 = client.prepareSearch("last_week").setSearchType(SearchType.QUERY_THEN_FETCH).setSize(0).get(); OpenSearchAssertions.assertAllSuccessful(r1); assertThat(r1.getHits().getTotalHits().value, equalTo(1L)); - assertCacheState(client, "index", 2, 2); + assertCacheState(client, index, 2, 2); } public void testProfileDisableCache() throws Exception { Client client = client(); + String index = "index"; assertAcked( client.admin() .indices() - .prepareCreate("index") + .prepareCreate(index) .setMapping("k", "type=keyword") .setSettings( Settings.builder() @@ -612,14 +647,14 @@ public void testProfileDisableCache() throws Exception { ) .get() ); - indexRandom(true, client.prepareIndex("index").setSource("k", "hello")); - ensureSearchable("index"); + indexRandom(true, client.prepareIndex(index).setSource("k", "hello")); + ensureSearchable(index); int expectedHits = 0; int expectedMisses = 0; for (int i = 0; i < 5; i++) { boolean profile = i % 2 == 0; - SearchResponse resp = client.prepareSearch("index") + SearchResponse resp = client.prepareSearch(index) .setRequestCache(true) .setProfile(profile) .setQuery(QueryBuilders.termQuery("k", "hello")) @@ -634,16 +669,17 @@ public void testProfileDisableCache() throws Exception { expectedHits++; } } - assertCacheState(client, "index", expectedHits, expectedMisses); + assertCacheState(client, index, expectedHits, expectedMisses); } } public void testCacheWithInvalidation() throws Exception { Client client = client(); + String index = "index"; assertAcked( client.admin() .indices() - .prepareCreate("index") + .prepareCreate(index) .setMapping("k", "type=keyword") .setSettings( Settings.builder() @@ -654,38 +690,688 @@ public void testCacheWithInvalidation() throws Exception { ) .get() ); - indexRandom(true, client.prepareIndex("index").setSource("k", "hello")); - ensureSearchable("index"); - SearchResponse resp = client.prepareSearch("index").setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello")).get(); + indexRandom(true, client.prepareIndex(index).setSource("k", "hello")); + ensureSearchable(index); + SearchResponse resp = client.prepareSearch(index).setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello")).get(); assertSearchResponse(resp); OpenSearchAssertions.assertAllSuccessful(resp); assertThat(resp.getHits().getTotalHits().value, equalTo(1L)); - assertCacheState(client, "index", 0, 1); + assertCacheState(client, index, 0, 1); // Index but don't refresh - indexRandom(false, client.prepareIndex("index").setSource("k", "hello2")); - resp = client.prepareSearch("index").setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello")).get(); + indexRandom(false, client.prepareIndex(index).setSource("k", "hello2")); + resp = client.prepareSearch(index).setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello")).get(); assertSearchResponse(resp); // Should expect hit as here as refresh didn't happen - assertCacheState(client, "index", 1, 1); + assertCacheState(client, index, 1, 1); // Explicit refresh would invalidate cache refreshAndWaitForReplication(); // Hit same query again - resp = client.prepareSearch("index").setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello")).get(); + resp = client.prepareSearch(index).setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello")).get(); assertSearchResponse(resp); // Should expect miss as key has changed due to change in IndexReader.CacheKey (due to refresh) - assertCacheState(client, "index", 1, 2); + assertCacheState(client, index, 1, 2); + } + + // calling cache clear api, when staleness threshold is lower than staleness, it should clean the stale keys from cache + public void testCacheClearAPIRemovesStaleKeysWhenStalenessThresholdIsLow() throws Exception { + String node = internalCluster().startNode( + Settings.builder() + .put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_STALENESS_THRESHOLD_SETTING_KEY, 0.10) + .put( + IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, + // setting intentionally high to avoid cache cleaner interfering + TimeValue.timeValueMillis(300) + ) + ); + Client client = client(node); + String index1 = "index1"; + String index2 = "index2"; + setupIndex(client, index1); + setupIndex(client, index2); + + // create first cache entry in index1 + createCacheEntry(client, index1, "hello"); + assertCacheState(client, index1, 0, 1); + long memorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(memorySizeForIndex1 > 0); + + // create second cache entry in index1 + createCacheEntry(client, index1, "there"); + assertCacheState(client, index1, 0, 2); + long finalMemorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(finalMemorySizeForIndex1 > memorySizeForIndex1); + + // create first cache entry in index2 + createCacheEntry(client, index2, "hello"); + assertCacheState(client, index2, 0, 1); + assertTrue(getRequestCacheStats(client, index2).getMemorySizeInBytes() > 0); + + ClearIndicesCacheRequest clearIndicesCacheRequest = new ClearIndicesCacheRequest(index2); + client.admin().indices().clearCache(clearIndicesCacheRequest).actionGet(); + + // cache cleaner should have cleaned up the stale key from index 2 + assertEquals(0, getRequestCacheStats(client, index2).getMemorySizeInBytes()); + // cache cleaner should NOT have cleaned from index 1 + assertEquals(finalMemorySizeForIndex1, getRequestCacheStats(client, index1).getMemorySizeInBytes()); + } + + // when staleness threshold is lower than staleness, it should clean the stale keys from cache + public void testStaleKeysCleanupWithLowThreshold() throws Exception { + int cacheCleanIntervalInMillis = 1; + String node = internalCluster().startNode( + Settings.builder() + .put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_STALENESS_THRESHOLD_SETTING_KEY, 0.10) + .put( + IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, + TimeValue.timeValueMillis(cacheCleanIntervalInMillis) + ) + ); + Client client = client(node); + String index1 = "index1"; + String index2 = "index2"; + setupIndex(client, index1); + setupIndex(client, index2); + + // create first cache entry in index1 + createCacheEntry(client, index1, "hello"); + assertCacheState(client, index1, 0, 1); + long memorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(memorySizeForIndex1 > 0); + + // create second cache entry in index1 + createCacheEntry(client, index1, "there"); + assertCacheState(client, index1, 0, 2); + long finalMemorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(finalMemorySizeForIndex1 > memorySizeForIndex1); + + // create first cache entry in index2 + createCacheEntry(client, index2, "hello"); + assertCacheState(client, index2, 0, 1); + assertTrue(getRequestCacheStats(client, index2).getMemorySizeInBytes() > 0); + + // force refresh so that it creates 1 stale key + flushAndRefresh(index2); + // sleep until cache cleaner would have cleaned up the stale key from index 2 + assertBusy(() -> { + // cache cleaner should have cleaned up the stale key from index 2 + assertEquals(0, getRequestCacheStats(client, index2).getMemorySizeInBytes()); + // cache cleaner should NOT have cleaned from index 1 + assertEquals(finalMemorySizeForIndex1, getRequestCacheStats(client, index1).getMemorySizeInBytes()); + }, cacheCleanIntervalInMillis * 2, TimeUnit.MILLISECONDS); + // sleep until cache cleaner would have cleaned up the stale key from index 2 + } + + // when staleness threshold is equal to staleness, it should clean the stale keys from cache + public void testCacheCleanupOnEqualStalenessAndThreshold() throws Exception { + int cacheCleanIntervalInMillis = 1; + String node = internalCluster().startNode( + Settings.builder() + .put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_STALENESS_THRESHOLD_SETTING_KEY, 0.33) + .put( + IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, + TimeValue.timeValueMillis(cacheCleanIntervalInMillis) + ) + ); + Client client = client(node); + String index1 = "index1"; + String index2 = "index2"; + setupIndex(client, index1); + setupIndex(client, index2); + + // create first cache entry in index1 + createCacheEntry(client, index1, "hello"); + assertCacheState(client, index1, 0, 1); + long memorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(memorySizeForIndex1 > 0); + + // create second cache entry in index1 + createCacheEntry(client, index1, "there"); + assertCacheState(client, index1, 0, 2); + long finalMemorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(finalMemorySizeForIndex1 > memorySizeForIndex1); + + // create first cache entry in index2 + createCacheEntry(client, index2, "hello"); + assertCacheState(client, index2, 0, 1); + assertTrue(getRequestCacheStats(client, index2).getMemorySizeInBytes() > 0); + + // force refresh so that it creates 1 stale key + flushAndRefresh(index2); + // sleep until cache cleaner would have cleaned up the stale key from index 2 + assertBusy(() -> { + // cache cleaner should have cleaned up the stale key from index 2 + assertEquals(0, getRequestCacheStats(client, index2).getMemorySizeInBytes()); + // cache cleaner should NOT have cleaned from index 1 + assertEquals(finalMemorySizeForIndex1, getRequestCacheStats(client, index1).getMemorySizeInBytes()); + }, cacheCleanIntervalInMillis * 2, TimeUnit.MILLISECONDS); + } + + // when staleness threshold is higher than staleness, it should NOT clean the cache + public void testCacheCleanupSkipsWithHighStalenessThreshold() throws Exception { + int cacheCleanIntervalInMillis = 1; + String node = internalCluster().startNode( + Settings.builder() + .put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_STALENESS_THRESHOLD_SETTING_KEY, 0.90) + .put( + IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, + TimeValue.timeValueMillis(cacheCleanIntervalInMillis) + ) + ); + Client client = client(node); + String index1 = "index1"; + String index2 = "index2"; + setupIndex(client, index1); + setupIndex(client, index2); + + // create first cache entry in index1 + createCacheEntry(client, index1, "hello"); + assertCacheState(client, index1, 0, 1); + long memorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(memorySizeForIndex1 > 0); + + // create second cache entry in index1 + createCacheEntry(client, index1, "there"); + assertCacheState(client, index1, 0, 2); + long finalMemorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(finalMemorySizeForIndex1 > memorySizeForIndex1); + + // create first cache entry in index2 + createCacheEntry(client, index2, "hello"); + assertCacheState(client, index2, 0, 1); + assertTrue(getRequestCacheStats(client, index2).getMemorySizeInBytes() > 0); + + // force refresh so that it creates 1 stale key + flushAndRefresh(index2); + // sleep until cache cleaner would have cleaned up the stale key from index 2 + assertBusy(() -> { + // cache cleaner should NOT have cleaned up the stale key from index 2 + assertTrue(getRequestCacheStats(client, index2).getMemorySizeInBytes() > 0); + // cache cleaner should NOT have cleaned from index 1 + assertEquals(finalMemorySizeForIndex1, getRequestCacheStats(client, index1).getMemorySizeInBytes()); + }, cacheCleanIntervalInMillis * 2, TimeUnit.MILLISECONDS); + } + + // when staleness threshold is explicitly set to 0, cache cleaner regularly cleans up stale keys. + public void testCacheCleanupOnZeroStalenessThreshold() throws Exception { + int cacheCleanIntervalInMillis = 50; + String node = internalCluster().startNode( + Settings.builder() + .put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_STALENESS_THRESHOLD_SETTING_KEY, 0) + .put( + IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, + TimeValue.timeValueMillis(cacheCleanIntervalInMillis) + ) + ); + Client client = client(node); + String index1 = "index1"; + String index2 = "index2"; + setupIndex(client, index1); + setupIndex(client, index2); + + // create 10 index1 cache entries + for (int i = 1; i <= 10; i++) { + long cacheSizeBefore = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + createCacheEntry(client, index1, "hello" + i); + assertCacheState(client, index1, 0, i); + long cacheSizeAfter = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(cacheSizeAfter > cacheSizeBefore); + } + + long finalMemorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + + // create first cache entry in index2 + createCacheEntry(client, index2, "hello"); + assertCacheState(client, index2, 0, 1); + assertTrue(getRequestCacheStats(client, index2).getMemorySizeInBytes() > 0); + + // force refresh so that it creates 1 stale key + flushAndRefresh(index2); + // sleep until cache cleaner would have cleaned up the stale key from index 2 + assertBusy(() -> { + // cache cleaner should have cleaned up the stale key from index 2 + assertEquals(0, getRequestCacheStats(client, index2).getMemorySizeInBytes()); + // cache cleaner should NOT have cleaned from index 1 + assertEquals(finalMemorySizeForIndex1, getRequestCacheStats(client, index1).getMemorySizeInBytes()); + }, cacheCleanIntervalInMillis * 2, TimeUnit.MILLISECONDS); + } + + // when staleness threshold is not explicitly set, cache cleaner regularly cleans up stale keys + public void testStaleKeysRemovalWithoutExplicitThreshold() throws Exception { + int cacheCleanIntervalInMillis = 1; + String node = internalCluster().startNode( + Settings.builder() + .put( + IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, + TimeValue.timeValueMillis(cacheCleanIntervalInMillis) + ) + ); + String index1 = "index1"; + String index2 = "index2"; + Client client = client(node); + setupIndex(client, index1); + setupIndex(client, index2); + + // create first cache entry in index1 + createCacheEntry(client, index1, "hello"); + assertCacheState(client, index1, 0, 1); + long memorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(memorySizeForIndex1 > 0); + + // create second cache entry in index1 + createCacheEntry(client, index1, "there"); + assertCacheState(client, index1, 0, 2); + long finalMemorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(finalMemorySizeForIndex1 > memorySizeForIndex1); + + // create first cache entry in index2 + createCacheEntry(client, index2, "hello"); + assertCacheState(client, index2, 0, 1); + assertTrue(getRequestCacheStats(client, index2).getMemorySizeInBytes() > 0); + + // force refresh so that it creates 1 stale key + flushAndRefresh(index2); + // sleep until cache cleaner would have cleaned up the stale key from index 2 + assertBusy(() -> { + // cache cleaner should have cleaned up the stale key from index 2 + assertEquals(0, getRequestCacheStats(client, index2).getMemorySizeInBytes()); + // cache cleaner should NOT have cleaned from index 1 + assertEquals(finalMemorySizeForIndex1, getRequestCacheStats(client, index1).getMemorySizeInBytes()); + }, cacheCleanIntervalInMillis * 2, TimeUnit.MILLISECONDS); + } + + // when cache cleaner interval setting is not set, cache cleaner is configured appropriately with the fall-back setting + public void testCacheCleanupWithDefaultSettings() throws Exception { + int cacheCleanIntervalInMillis = 1; + String node = internalCluster().startNode( + Settings.builder().put(INDICES_CACHE_CLEANUP_INTERVAL_SETTING_KEY, TimeValue.timeValueMillis(cacheCleanIntervalInMillis)) + ); + Client client = client(node); + String index1 = "index1"; + String index2 = "index2"; + setupIndex(client, index1); + setupIndex(client, index2); + + // create first cache entry in index1 + createCacheEntry(client, index1, "hello"); + assertCacheState(client, index1, 0, 1); + long memorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(memorySizeForIndex1 > 0); + + // create second cache entry in index1 + createCacheEntry(client, index1, "there"); + assertCacheState(client, index1, 0, 2); + long finalMemorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(finalMemorySizeForIndex1 > memorySizeForIndex1); + + // create first cache entry in index2 + createCacheEntry(client, index2, "hello"); + assertCacheState(client, index2, 0, 1); + assertTrue(getRequestCacheStats(client, index2).getMemorySizeInBytes() > 0); + + // force refresh so that it creates 1 stale key + flushAndRefresh(index2); + // sleep until cache cleaner would have cleaned up the stale key from index 2 + assertBusy(() -> { + // cache cleaner should have cleaned up the stale key from index 2 + assertEquals(0, getRequestCacheStats(client, index2).getMemorySizeInBytes()); + // cache cleaner should NOT have cleaned from index 1 + assertEquals(finalMemorySizeForIndex1, getRequestCacheStats(client, index1).getMemorySizeInBytes()); + }, cacheCleanIntervalInMillis * 2, TimeUnit.MILLISECONDS); + } + + // staleness threshold updates flows through to the cache cleaner + public void testDynamicStalenessThresholdUpdate() throws Exception { + int cacheCleanIntervalInMillis = 1; + String node = internalCluster().startNode( + Settings.builder() + .put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_STALENESS_THRESHOLD_SETTING_KEY, 0.90) + .put( + IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, + TimeValue.timeValueMillis(cacheCleanIntervalInMillis) + ) + ); + Client client = client(node); + String index1 = "index1"; + String index2 = "index2"; + setupIndex(client, index1); + setupIndex(client, index2); + + // create first cache entry in index1 + createCacheEntry(client, index1, "hello"); + assertCacheState(client, index1, 0, 1); + long memorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(memorySizeForIndex1 > 0); + + // create second cache entry in index1 + createCacheEntry(client, index1, "there"); + assertCacheState(client, index1, 0, 2); + assertTrue(getRequestCacheStats(client, index1).getMemorySizeInBytes() > memorySizeForIndex1); + + // create first cache entry in index2 + createCacheEntry(client, index2, "hello"); + assertCacheState(client, index2, 0, 1); + long finalMemorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(finalMemorySizeForIndex1 > 0); + + // force refresh so that it creates 1 stale key + flushAndRefresh(index2); + assertBusy(() -> { + // cache cleaner should NOT have cleaned up the stale key from index 2 + assertTrue(getRequestCacheStats(client, index2).getMemorySizeInBytes() > 0); + }, cacheCleanIntervalInMillis * 2, TimeUnit.MILLISECONDS); + + // Update indices.requests.cache.cleanup.staleness_threshold to "10%" + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); + updateSettingsRequest.persistentSettings(Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), 0.10)); + assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + + assertBusy(() -> { + // cache cleaner should have cleaned up the stale key from index 2 + assertEquals(0, getRequestCacheStats(client, index2).getMemorySizeInBytes()); + // cache cleaner should NOT have cleaned from index 1 + assertEquals(finalMemorySizeForIndex1, getRequestCacheStats(client, index1).getMemorySizeInBytes()); + }, cacheCleanIntervalInMillis * 2, TimeUnit.MILLISECONDS); + } + + // staleness threshold dynamic updates should throw exceptions on invalid input + public void testInvalidStalenessThresholdUpdateThrowsException() throws Exception { + int cacheCleanIntervalInMillis = 1; + String node = internalCluster().startNode( + Settings.builder() + .put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_STALENESS_THRESHOLD_SETTING_KEY, 0.90) + .put( + IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, + TimeValue.timeValueMillis(cacheCleanIntervalInMillis) + ) + ); + Client client = client(node); + String index1 = "index1"; + setupIndex(client, index1); + + // create first cache entry in index1 + createCacheEntry(client, index1, "hello"); + assertCacheState(client, index1, 0, 1); + assertTrue(getRequestCacheStats(client, index1).getMemorySizeInBytes() > 0); + + // Update indices.requests.cache.cleanup.staleness_threshold to "10%" with illegal argument + assertThrows("Ratio should be in [0-1.0]", IllegalArgumentException.class, () -> { + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); + updateSettingsRequest.persistentSettings( + Settings.builder().put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_STALENESS_THRESHOLD_SETTING_KEY, 10) + ); + client().admin().cluster().updateSettings(updateSettingsRequest).actionGet(); + }); + + // everything else should continue to work fine later on. + // force refresh so that it creates 1 stale key + flushAndRefresh(index1); + // sleep until cache cleaner would have cleaned up the stale key from index 2 + assertBusy(() -> { + // cache cleaner should NOT have cleaned from index 1 + assertEquals(0, getRequestCacheStats(client, index1).getMemorySizeInBytes()); + }, cacheCleanIntervalInMillis * 2, TimeUnit.MILLISECONDS); + } + + // closing the Index after caching will clean up from Indices Request Cache + public void testCacheClearanceAfterIndexClosure() throws Exception { + int cacheCleanIntervalInMillis = 100; + String node = internalCluster().startNode( + Settings.builder() + .put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_STALENESS_THRESHOLD_SETTING_KEY, 0.10) + .put( + IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, + TimeValue.timeValueMillis(cacheCleanIntervalInMillis) + ) + ); + Client client = client(node); + String index = "index"; + setupIndex(client, index); + + // assert there are no entries in the cache for index + assertEquals(0, getRequestCacheStats(client, index).getMemorySizeInBytes()); + // assert there are no entries in the cache from other indices in the node + assertEquals(0, getNodeCacheStats(client).getMemorySizeInBytes()); + // create first cache entry in index + createCacheEntry(client, index, "hello"); + assertCacheState(client, index, 0, 1); + assertTrue(getRequestCacheStats(client, index).getMemorySizeInBytes() > 0); + assertTrue(getNodeCacheStats(client).getMemorySizeInBytes() > 0); + + // close index + assertAcked(client.admin().indices().prepareClose(index)); + // request cache stats cannot be access since Index should be closed + try { + getRequestCacheStats(client, index); + } catch (Exception e) { + assert (e instanceof IndexClosedException); + } + // sleep until cache cleaner would have cleaned up the stale key from index + assertBusy(() -> { + // cache cleaner should have cleaned up the stale keys from index + assertEquals(0, getNodeCacheStats(client).getMemorySizeInBytes()); + }, cacheCleanIntervalInMillis * 2, TimeUnit.MILLISECONDS); + } + + // deleting the Index after caching will clean up from Indices Request Cache + public void testCacheCleanupAfterIndexDeletion() throws Exception { + int cacheCleanIntervalInMillis = 100; + String node = internalCluster().startNode( + Settings.builder() + .put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_STALENESS_THRESHOLD_SETTING_KEY, 0.10) + .put( + IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, + TimeValue.timeValueMillis(cacheCleanIntervalInMillis) + ) + ); + Client client = client(node); + String index = "index"; + setupIndex(client, index); + + // assert there are no entries in the cache for index + assertEquals(0, getRequestCacheStats(client, index).getMemorySizeInBytes()); + // assert there are no entries in the cache from other indices in the node + assertEquals(0, getNodeCacheStats(client).getMemorySizeInBytes()); + // create first cache entry in index + createCacheEntry(client, index, "hello"); + assertCacheState(client, index, 0, 1); + assertTrue(getRequestCacheStats(client, index).getMemorySizeInBytes() > 0); + assertTrue(getNodeCacheStats(client).getMemorySizeInBytes() > 0); + + // delete index + assertAcked(client.admin().indices().prepareDelete(index)); + // request cache stats cannot be access since Index should be deleted + try { + getRequestCacheStats(client, index); + } catch (Exception e) { + assert (e instanceof IndexNotFoundException); + } + + // sleep until cache cleaner would have cleaned up the stale key from index + assertBusy(() -> { + // cache cleaner should have cleaned up the stale keys from index + assertEquals(0, getNodeCacheStats(client).getMemorySizeInBytes()); + }, cacheCleanIntervalInMillis * 2, TimeUnit.MILLISECONDS); + } + + // when staleness threshold is lower than staleness, it should clean the cache from all indices having stale keys + public void testStaleKeysCleanupWithMultipleIndices() throws Exception { + int cacheCleanIntervalInMillis = 10; + String node = internalCluster().startNode( + Settings.builder() + .put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_STALENESS_THRESHOLD_SETTING_KEY, 0.10) + .put( + IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, + TimeValue.timeValueMillis(cacheCleanIntervalInMillis) + ) + ); + Client client = client(node); + String index1 = "index1"; + String index2 = "index2"; + setupIndex(client, index1); + setupIndex(client, index2); + + // assert cache is empty for index1 + assertEquals(0, getRequestCacheStats(client, index1).getMemorySizeInBytes()); + // create first cache entry in index1 + createCacheEntry(client, index1, "hello"); + assertCacheState(client, index1, 0, 1); + long memorySizeForIndex1With1Entries = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(memorySizeForIndex1With1Entries > 0); + + // create second cache entry in index1 + createCacheEntry(client, index1, "there"); + assertCacheState(client, index1, 0, 2); + long memorySizeForIndex1With2Entries = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(memorySizeForIndex1With2Entries > memorySizeForIndex1With1Entries); + + // assert cache is empty for index2 + assertEquals(0, getRequestCacheStats(client, index2).getMemorySizeInBytes()); + // create first cache entry in index2 + createCacheEntry(client, index2, "hello"); + assertCacheState(client, index2, 0, 1); + assertTrue(getRequestCacheStats(client, index2).getMemorySizeInBytes() > 0); + + // force refresh both index1 and index2 + flushAndRefresh(index1, index2); + // create another cache entry in index 1 same as memorySizeForIndex1With1Entries, this should not be cleaned up. + createCacheEntry(client, index1, "hello"); + // sleep until cache cleaner would have cleaned up the stale key from index2 + assertBusy(() -> { + // cache cleaner should have cleaned up the stale key from index2 and hence cache should be empty + assertEquals(0, getRequestCacheStats(client, index2).getMemorySizeInBytes()); + // cache cleaner should have only cleaned up the stale entities for index1 + long currentMemorySizeInBytesForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + // assert the memory size of index1 to only contain 1 entry added after flushAndRefresh + assertEquals(memorySizeForIndex1With1Entries, currentMemorySizeInBytesForIndex1); + // cache for index1 should not be empty since there was an item cached after flushAndRefresh + assertTrue(currentMemorySizeInBytesForIndex1 > 0); + }, cacheCleanIntervalInMillis * 2, TimeUnit.MILLISECONDS); + } + + public void testDeleteAndCreateSameIndexShardOnSameNode() throws Exception { + String node_1 = internalCluster().startNode(Settings.builder().build()); + Client client = client(node_1); + + logger.info("Starting a node in the cluster"); + + assertThat(cluster().size(), equalTo(1)); + ClusterHealthResponse healthResponse = client().admin().cluster().prepareHealth().setWaitForNodes("1").execute().actionGet(); + assertThat(healthResponse.isTimedOut(), equalTo(false)); + + String indexName = "test"; + + logger.info("Creating an index: {} with 2 shards", indexName); + createIndex( + indexName, + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 2).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build() + ); + + ensureGreen(indexName); + + logger.info("Writing few docs and searching those which will cache items in RequestCache"); + indexRandom(true, client.prepareIndex(indexName).setSource("k", "hello")); + indexRandom(true, client.prepareIndex(indexName).setSource("y", "hello again")); + SearchResponse resp = client.prepareSearch(indexName).setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello")).get(); + assertSearchResponse(resp); + resp = client.prepareSearch(indexName).setRequestCache(true).setQuery(QueryBuilders.termQuery("y", "hello")).get(); + + RequestCacheStats stats = getNodeCacheStats(client); + assertTrue(stats.getMemorySizeInBytes() > 0); + + logger.info("Disabling allocation"); + Settings newSettings = Settings.builder() + .put(CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING.getKey(), EnableAllocationDecider.Allocation.NONE.name()) + .build(); + client().admin().cluster().prepareUpdateSettings().setTransientSettings(newSettings).execute().actionGet(); + + logger.info("Starting a second node"); + String node_2 = internalCluster().startDataOnlyNode(Settings.builder().build()); + assertThat(cluster().size(), equalTo(2)); + healthResponse = client().admin().cluster().prepareHealth().setWaitForNodes("2").execute().actionGet(); + assertThat(healthResponse.isTimedOut(), equalTo(false)); + + logger.info("Moving the shard:{} from node:{} to node:{}", indexName + "#0", node_1, node_2); + MoveAllocationCommand cmd = new MoveAllocationCommand(indexName, 0, node_1, node_2); + internalCluster().client().admin().cluster().prepareReroute().add(cmd).get(); + ClusterHealthResponse clusterHealth = client().admin() + .cluster() + .prepareHealth() + .setWaitForNoRelocatingShards(true) + .setWaitForNoInitializingShards(true) + .get(); + assertThat(clusterHealth.isTimedOut(), equalTo(false)); + + ClusterState state = client().admin().cluster().prepareState().get().getState(); + final Index index = state.metadata().index(indexName).getIndex(); + + assertBusy(() -> { + assertThat(Files.exists(shardDirectory(node_1, index, 0)), equalTo(false)); + assertThat(Files.exists(shardDirectory(node_2, index, 0)), equalTo(true)); + }); + + logger.info("Moving the shard: {} again from node:{} to node:{}", indexName + "#0", node_2, node_1); + cmd = new MoveAllocationCommand(indexName, 0, node_2, node_1); + internalCluster().client().admin().cluster().prepareReroute().add(cmd).get(); + clusterHealth = client().admin() + .cluster() + .prepareHealth() + .setWaitForNoRelocatingShards(true) + .setWaitForNoInitializingShards(true) + .get(); + assertThat(clusterHealth.isTimedOut(), equalTo(false)); + assertThat(Files.exists(shardDirectory(node_1, index, 0)), equalTo(true)); + + assertBusy(() -> { + assertThat(Files.exists(shardDirectory(node_1, index, 0)), equalTo(true)); + assertThat(Files.exists(shardDirectory(node_2, index, 0)), equalTo(false)); + }); + + logger.info("Clearing the cache for index:{}. And verify the request stats doesn't go negative", indexName); + ClearIndicesCacheRequest clearIndicesCacheRequest = new ClearIndicesCacheRequest(indexName); + client.admin().indices().clearCache(clearIndicesCacheRequest).actionGet(); + + stats = getNodeCacheStats(client(node_1)); + assertTrue(stats.getMemorySizeInBytes() == 0); + stats = getNodeCacheStats(client(node_2)); + assertTrue(stats.getMemorySizeInBytes() == 0); + } + + private Path shardDirectory(String server, Index index, int shard) { + NodeEnvironment env = internalCluster().getInstance(NodeEnvironment.class, server); + final Path[] paths = env.availableShardPaths(new ShardId(index, shard)); + assert paths.length == 1; + return paths[0]; + } + + private void setupIndex(Client client, String index) throws Exception { + assertAcked( + client.admin() + .indices() + .prepareCreate(index) + .setMapping("k", "type=keyword") + .setSettings( + Settings.builder() + .put(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING.getKey(), true) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + ) + .get() + ); + indexRandom(true, client.prepareIndex(index).setSource("k", "hello")); + indexRandom(true, client.prepareIndex(index).setSource("k", "there")); + ensureSearchable(index); + } + + private void createCacheEntry(Client client, String index, String value) { + SearchResponse resp = client.prepareSearch(index).setRequestCache(true).setQuery(QueryBuilders.termQuery("k", value)).get(); + assertSearchResponse(resp); + OpenSearchAssertions.assertAllSuccessful(resp); } private static void assertCacheState(Client client, String index, long expectedHits, long expectedMisses) { - RequestCacheStats requestCacheStats = client.admin() - .indices() - .prepareStats(index) - .setRequestCache(true) - .get() - .getTotal() - .getRequestCache(); + RequestCacheStats requestCacheStats = getRequestCacheStats(client, index); // Check the hit count and miss count together so if they are not // correct we can see both values assertEquals( @@ -695,4 +1381,17 @@ private static void assertCacheState(Client client, String index, long expectedH } + private static RequestCacheStats getRequestCacheStats(Client client, String index) { + return client.admin().indices().prepareStats(index).setRequestCache(true).get().getTotal().getRequestCache(); + } + + private static RequestCacheStats getNodeCacheStats(Client client) { + NodesStatsResponse stats = client.admin().cluster().prepareNodesStats().execute().actionGet(); + for (NodeStats stat : stats.getNodes()) { + if (stat.getNode().isDataNode()) { + return stat.getIndices().getRequestCache(); + } + } + return null; + } } diff --git a/server/src/internalClusterTest/java/org/opensearch/persistent/PersistentTasksExecutorFullRestartIT.java b/server/src/internalClusterTest/java/org/opensearch/persistent/PersistentTasksExecutorFullRestartIT.java index 708388b3328f0..151a207d2c191 100644 --- a/server/src/internalClusterTest/java/org/opensearch/persistent/PersistentTasksExecutorFullRestartIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/persistent/PersistentTasksExecutorFullRestartIT.java @@ -43,6 +43,7 @@ import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.concurrent.TimeUnit; import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; @@ -129,7 +130,7 @@ public void testFullClusterRestart() throws Exception { .custom(PersistentTasksCustomMetadata.TYPE)).tasks(), empty() ); - }); + }, 20, TimeUnit.SECONDS); } } diff --git a/server/src/internalClusterTest/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionForClusterManagerIT.java b/server/src/internalClusterTest/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionForClusterManagerIT.java index 4d1964326820e..b9da5ffb86af0 100644 --- a/server/src/internalClusterTest/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionForClusterManagerIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionForClusterManagerIT.java @@ -170,8 +170,8 @@ public void testAdmissionControlResponseStatus() throws Exception { @Override public void sendResponse(RestResponse response) { - waitForResponse.countDown(); aliasResponse.set(response); + waitForResponse.countDown(); } }; diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/DocRepMigrationTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/DocRepMigrationTestCase.java index 5240949ff87b9..61def4ec6e2a4 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/DocRepMigrationTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/DocRepMigrationTestCase.java @@ -29,7 +29,7 @@ public void testMixedModeAddDocRep() throws Exception { ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); updateSettingsRequest.persistentSettings(Settings.builder().put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), "mixed")); assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); - addRemote = false; + setAddRemote(false); internalCluster().startNode(); String[] allNodes = internalCluster().getNodeNames(); assertBusy(() -> { assertEquals(client.admin().cluster().prepareClusterStats().get().getNodes().size(), allNodes.length); }); diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/MigrationBaseTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/MigrationBaseTestCase.java index 0c35f91121059..611dfc2756b29 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/MigrationBaseTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/MigrationBaseTestCase.java @@ -16,19 +16,25 @@ import org.opensearch.action.delete.DeleteResponse; import org.opensearch.action.index.IndexRequest; import org.opensearch.action.index.IndexResponse; +import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.metadata.RepositoryMetadata; +import org.opensearch.cluster.routing.RoutingNode; import org.opensearch.common.UUIDs; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.FeatureFlags; import org.opensearch.repositories.fs.ReloadableFsRepository; import org.opensearch.test.OpenSearchIntegTestCase; +import org.junit.Before; import java.nio.file.Path; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.concurrent.ExecutionException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; +import static org.opensearch.cluster.routing.allocation.decider.EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; import static org.opensearch.repositories.fs.ReloadableFsRepository.REPOSITORIES_FAILRATE_SETTING; @@ -51,6 +57,16 @@ public class MigrationBaseTestCase extends OpenSearchIntegTestCase { randomAlphaOfLength(5) ); + void setAddRemote(boolean addRemote) { + this.addRemote = addRemote; + } + + @Before + public void setUp() throws Exception { + super.setUp(); + setAddRemote(false); + } + protected Settings nodeSettings(int nodeOrdinal) { if (segmentRepoPath == null || translogRepoPath == null) { segmentRepoPath = randomRepoPath().toAbsolutePath(); @@ -114,6 +130,20 @@ public BulkResponse indexBulk(String indexName, int numDocs) { return client().bulk(bulkRequest).actionGet(); } + Map getShardCountByNodeId() { + final Map shardCountByNodeId = new HashMap<>(); + final ClusterState clusterState = client().admin().cluster().prepareState().get().getState(); + for (final RoutingNode node : clusterState.getRoutingNodes()) { + logger.info( + "----> node {} has {} shards", + node.nodeId(), + clusterState.getRoutingNodes().node(node.nodeId()).numberOfOwningShards() + ); + shardCountByNodeId.put(node.nodeId(), clusterState.getRoutingNodes().node(node.nodeId()).numberOfOwningShards()); + } + return shardCountByNodeId; + } + private void indexSingleDoc(String indexName) { IndexResponse indexResponse = client().prepareIndex(indexName).setId("id").setSource("field", "value").get(); assertEquals(DocWriteResponse.Result.CREATED, indexResponse.getResult()); @@ -128,6 +158,8 @@ public class AsyncIndexingService { private AtomicBoolean finished = new AtomicBoolean(); private Thread indexingThread; + private int refreshFrequency = 3; + AsyncIndexingService(String indexName) { this.indexName = indexName; } @@ -151,10 +183,42 @@ private Thread getIndexingThread() { while (finished.get() == false) { indexSingleDoc(indexName); long currentDocCount = indexedDocs.incrementAndGet(); + if (currentDocCount > 0 && currentDocCount % refreshFrequency == 0) { + logger.info("--> [iteration {}] flushing index", currentDocCount); + if (rarely()) { + client().admin().indices().prepareFlush(indexName).get(); + } else { + client().admin().indices().prepareRefresh(indexName).get(); + } + } logger.info("Completed ingestion of {} docs", currentDocCount); - } }); } + + public void setRefreshFrequency(int refreshFrequency) { + this.refreshFrequency = refreshFrequency; + } + } + + public void excludeNodeSet(String attr, String value) { + assertAcked( + internalCluster().client() + .admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put("cluster.routing.allocation.exclude._" + attr, value)) + .get() + ); + } + + public void stopShardRebalancing() { + assertAcked( + client().admin() + .cluster() + .prepareUpdateSettings() + .setPersistentSettings(Settings.builder().put(CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), "none").build()) + .get() + ); } } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteDualReplicationIT.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteDualReplicationIT.java index e316bae5d8ebc..5094a7cf29c6a 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteDualReplicationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteDualReplicationIT.java @@ -30,6 +30,7 @@ import org.opensearch.test.transport.MockTransportService; import java.util.Collection; +import java.util.List; import java.util.Map; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -89,7 +90,7 @@ public void testRemotePrimaryDocRepReplica() throws Exception { initDocRepToRemoteMigration(); logger.info("---> Starting 1 remote enabled data node"); - addRemote = true; + setAddRemote(true); String remoteNodeName = internalCluster().startDataOnlyNode(); internalCluster().validateClusterFormed(); assertEquals( @@ -132,8 +133,8 @@ public void testRemotePrimaryDocRepReplica() throws Exception { /* Scenario: - - Starts 1 docrep backed data node - - Creates an index with 0 replica + - Starts 2 docrep backed data node + - Creates an index with 1 replica - Starts 1 remote backed data node - Index some docs - Move primary copy from docrep to remote through _cluster/reroute @@ -145,14 +146,14 @@ public void testRemotePrimaryDocRepReplica() throws Exception { public void testRemotePrimaryDocRepAndRemoteReplica() throws Exception { internalCluster().startClusterManagerOnlyNode(); - logger.info("---> Starting 1 docrep data nodes"); - String docrepNodeName = internalCluster().startDataOnlyNode(); + logger.info("---> Starting 2 docrep data nodes"); + internalCluster().startDataOnlyNodes(2); internalCluster().validateClusterFormed(); assertEquals(internalCluster().client().admin().cluster().prepareGetRepositories().get().repositories().size(), 0); - logger.info("---> Creating index with 0 replica"); + logger.info("---> Creating index with 1 replica"); Settings zeroReplicas = Settings.builder() - .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) .put(IndexService.RETENTION_LEASE_SYNC_INTERVAL_SETTING.getKey(), "1s") .put(IndexService.GLOBAL_CHECKPOINT_SYNC_INTERVAL_SETTING.getKey(), "1s") .build(); @@ -161,7 +162,7 @@ public void testRemotePrimaryDocRepAndRemoteReplica() throws Exception { initDocRepToRemoteMigration(); logger.info("---> Starting 1 remote enabled data node"); - addRemote = true; + setAddRemote(true); String remoteNodeName = internalCluster().startDataOnlyNode(); internalCluster().validateClusterFormed(); @@ -245,14 +246,26 @@ RLs on remote enabled copies are brought up to (GlobalCkp + 1) upon a flush requ pollAndCheckRetentionLeases(REMOTE_PRI_DOCREP_REMOTE_REP); } + /* + Scenario: + - Starts 2 docrep backed data node + - Creates an index with 1 replica + - Starts 1 remote backed data node + - Index some docs + - Move primary copy from docrep to remote through _cluster/reroute + - Starts another remote backed data node + - Expands index to 2 replicas. One replica copy lies in remote backed node and other in docrep backed node + - Index some more docs + - Assert retention lease consistency + */ public void testMissingRetentionLeaseCreatedOnFailedOverRemoteReplica() throws Exception { internalCluster().startClusterManagerOnlyNode(); - logger.info("---> Starting docrep data node"); - internalCluster().startDataOnlyNode(); + logger.info("---> Starting 2 docrep data nodes"); + internalCluster().startDataOnlyNodes(2); Settings zeroReplicasAndOverridenSyncIntervals = Settings.builder() - .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) .put(IndexService.GLOBAL_CHECKPOINT_SYNC_INTERVAL_SETTING.getKey(), "100ms") .put(IndexService.RETENTION_LEASE_SYNC_INTERVAL_SETTING.getKey(), "100ms") .build(); @@ -323,11 +336,10 @@ private void pollAndCheckRetentionLeases(String indexName) throws Exception { /* Scenario: - - Starts 1 docrep backed data node - - Creates an index with 0 replica + - Starts 2 docrep backed data node + - Creates an index with 1 replica - Starts 1 remote backed data node - Move primary copy from docrep to remote through _cluster/reroute - - Expands index to 1 replica - Stops remote enabled node - Ensure doc count is same after failover - Index some more docs to ensure working of failed-over primary @@ -335,18 +347,18 @@ private void pollAndCheckRetentionLeases(String indexName) throws Exception { public void testFailoverRemotePrimaryToDocrepReplica() throws Exception { internalCluster().startClusterManagerOnlyNode(); - logger.info("---> Starting 1 docrep data nodes"); - String docrepNodeName = internalCluster().startDataOnlyNode(); + logger.info("---> Starting 2 docrep data nodes"); + internalCluster().startDataOnlyNodes(2); internalCluster().validateClusterFormed(); assertEquals(internalCluster().client().admin().cluster().prepareGetRepositories().get().repositories().size(), 0); logger.info("---> Creating index with 0 replica"); - Settings excludeRemoteNode = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build(); + Settings excludeRemoteNode = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build(); createIndex(FAILOVER_REMOTE_TO_DOCREP, excludeRemoteNode); ensureGreen(FAILOVER_REMOTE_TO_DOCREP); initDocRepToRemoteMigration(); logger.info("---> Starting 1 remote enabled data node"); - addRemote = true; + setAddRemote(true); String remoteNodeName = internalCluster().startDataOnlyNode(); internalCluster().validateClusterFormed(); assertEquals( @@ -376,8 +388,8 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception { ); ensureGreen(FAILOVER_REMOTE_TO_DOCREP); - logger.info("---> Expanding index to 1 replica copy"); - Settings twoReplicas = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build(); + logger.info("---> Expanding index to 2 replica copies"); + Settings twoReplicas = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2).build(); assertAcked( internalCluster().client() .admin() @@ -412,7 +424,7 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception { logger.info("---> Stop remote store enabled node"); internalCluster().stopRandomNode(InternalTestCluster.nameFilter(remoteNodeName)); - ensureStableCluster(2); + ensureStableCluster(3); ensureYellow(FAILOVER_REMOTE_TO_DOCREP); shardStatsMap = internalCluster().client().admin().indices().prepareStats(FAILOVER_REMOTE_TO_DOCREP).setDocs(true).get().asMap(); @@ -433,7 +445,150 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception { refreshAndWaitForReplication(FAILOVER_REMOTE_TO_DOCREP); shardStatsMap = internalCluster().client().admin().indices().prepareStats(FAILOVER_REMOTE_TO_DOCREP).setDocs(true).get().asMap(); - assertEquals(1, shardStatsMap.size()); + assertEquals(2, shardStatsMap.size()); + shardStatsMap.forEach( + (shardRouting, shardStats) -> { assertEquals(firstBatch + secondBatch, shardStats.getStats().getDocs().getCount()); } + ); + } + + /* + Scenario: + - Starts 2 docrep backed data nodes + - Creates an index with 1 replica + - Starts 1 remote backed data node + - Moves primary copy from docrep to remote through _cluster/reroute + - Starts 1 more remote backed data node + - Expands index to 2 replicas, one each on new remote node and docrep node + - Stops remote enabled node hosting the primary + - Ensures remote replica gets promoted to primary + - Ensures doc count is same after failover + - Indexes some more docs to ensure working of failed-over primary + */ + public void testFailoverRemotePrimaryToRemoteReplica() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + + logger.info("---> Starting 2 docrep data nodes"); + List docrepNodeNames = internalCluster().startDataOnlyNodes(2); + internalCluster().validateClusterFormed(); + assertEquals(internalCluster().client().admin().cluster().prepareGetRepositories().get().repositories().size(), 0); + + logger.info("---> Creating index with 1 replica"); + createIndex(FAILOVER_REMOTE_TO_REMOTE, Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build()); + ensureGreen(FAILOVER_REMOTE_TO_REMOTE); + initDocRepToRemoteMigration(); + + logger.info("---> Starting 1 remote enabled data node"); + addRemote = true; + String remoteNodeName1 = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + assertEquals( + internalCluster().client() + .admin() + .cluster() + .prepareGetRepositories(REPOSITORY_NAME, REPOSITORY_2_NAME) + .get() + .repositories() + .size(), + 2 + ); + + logger.info("---> Starting doc ingestion in parallel thread"); + AsyncIndexingService asyncIndexingService = new AsyncIndexingService(FAILOVER_REMOTE_TO_REMOTE); + asyncIndexingService.startIndexing(); + + String primaryNodeName = primaryNodeName(FAILOVER_REMOTE_TO_REMOTE); + logger.info("---> Moving primary copy from docrep node {} to remote enabled node {}", primaryNodeName, remoteNodeName1); + assertAcked( + internalCluster().client() + .admin() + .cluster() + .prepareReroute() + .add(new MoveAllocationCommand(FAILOVER_REMOTE_TO_REMOTE, 0, primaryNodeName, remoteNodeName1)) + .get() + ); + waitForRelocation(); + ensureGreen(FAILOVER_REMOTE_TO_REMOTE); + assertEquals(primaryNodeName(FAILOVER_REMOTE_TO_REMOTE), remoteNodeName1); + + logger.info("---> Starting 1 more remote enabled data node"); + String remoteNodeName2 = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + + logger.info("---> Expanding index to 2 replica copies, on docrepNode and remoteNode2"); + assertAcked( + internalCluster().client() + .admin() + .indices() + .prepareUpdateSettings() + .setIndices(FAILOVER_REMOTE_TO_REMOTE) + .setSettings( + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2) + // prevent replica copy from being allocated to the extra docrep node + .put("index.routing.allocation.exclude._name", primaryNodeName) + .build() + ) + .get() + ); + ensureGreen(FAILOVER_REMOTE_TO_REMOTE); + + logger.info("---> Stopping indexing thread"); + asyncIndexingService.stopIndexing(); + + refreshAndWaitForReplication(FAILOVER_REMOTE_TO_REMOTE); + Map shardStatsMap = internalCluster().client() + .admin() + .indices() + .prepareStats(FAILOVER_REMOTE_TO_REMOTE) + .setDocs(true) + .get() + .asMap(); + DiscoveryNodes nodes = internalCluster().client().admin().cluster().prepareState().get().getState().getNodes(); + long initialPrimaryDocCount = 0; + for (ShardRouting shardRouting : shardStatsMap.keySet()) { + if (shardRouting.primary()) { + assertTrue(nodes.get(shardRouting.currentNodeId()).isRemoteStoreNode()); + initialPrimaryDocCount = shardStatsMap.get(shardRouting).getStats().getDocs().getCount(); + } + } + int firstBatch = (int) asyncIndexingService.getIndexedDocs(); + assertReplicaAndPrimaryConsistency(FAILOVER_REMOTE_TO_REMOTE, firstBatch, 0); + + logger.info("---> Stop remote store enabled node hosting the primary"); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(remoteNodeName1)); + ensureStableCluster(4); + ensureYellowAndNoInitializingShards(FAILOVER_REMOTE_TO_REMOTE); + DiscoveryNodes finalNodes = internalCluster().client().admin().cluster().prepareState().get().getState().getNodes(); + + waitUntil(() -> { + ClusterState clusterState = client().admin().cluster().prepareState().get().getState(); + String nodeId = clusterState.getRoutingTable().index(FAILOVER_REMOTE_TO_REMOTE).shard(0).primaryShard().currentNodeId(); + if (nodeId == null) { + return false; + } else { + assertEquals(finalNodes.get(nodeId).getName(), remoteNodeName2); + return finalNodes.get(nodeId).isRemoteStoreNode(); + } + }); + + shardStatsMap = internalCluster().client().admin().indices().prepareStats(FAILOVER_REMOTE_TO_REMOTE).setDocs(true).get().asMap(); + long primaryDocCountAfterFailover = 0; + for (ShardRouting shardRouting : shardStatsMap.keySet()) { + if (shardRouting.primary()) { + assertTrue(finalNodes.get(shardRouting.currentNodeId()).isRemoteStoreNode()); + primaryDocCountAfterFailover = shardStatsMap.get(shardRouting).getStats().getDocs().getCount(); + } + } + assertEquals(initialPrimaryDocCount, primaryDocCountAfterFailover); + + logger.info("---> Index some more docs to ensure that the failed over primary is ingesting new docs"); + int secondBatch = randomIntBetween(1, 10); + logger.info("---> Indexing {} more docs", secondBatch); + indexBulk(FAILOVER_REMOTE_TO_REMOTE, secondBatch); + refreshAndWaitForReplication(FAILOVER_REMOTE_TO_REMOTE); + + shardStatsMap = internalCluster().client().admin().indices().prepareStats(FAILOVER_REMOTE_TO_REMOTE).setDocs(true).get().asMap(); + assertEquals(2, shardStatsMap.size()); shardStatsMap.forEach( (shardRouting, shardStats) -> { assertEquals(firstBatch + secondBatch, shardStats.getStats().getDocs().getCount()); } ); @@ -445,7 +600,6 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception { - Creates an index with 0 replica - Starts 1 remote backed data node - Move primary copy from docrep to remote through _cluster/reroute - - Expands index to 1 replica - Stops remote enabled node - Ensure doc count is same after failover - Index some more docs to ensure working of failed-over primary @@ -468,7 +622,7 @@ public void testFailoverRemotePrimaryToDocrepReplicaReseedToRemotePrimary() thro ensureGreen(FAILOVER_REMOTE_TO_DOCREP); logger.info("---> Starting a new remote enabled node"); - addRemote = true; + setAddRemote(true); String remoteNodeName = internalCluster().startDataOnlyNode(); internalCluster().validateClusterFormed(); assertEquals( @@ -529,7 +683,8 @@ private void assertReplicaAndPrimaryConsistency(String indexName, int firstBatch RemoteSegmentStats remoteSegmentStats = shardStats.getSegments().getRemoteSegmentStats(); assertTrue(remoteSegmentStats.getUploadBytesSucceeded() > 0); assertTrue(remoteSegmentStats.getTotalUploadTime() > 0); - } else { + } + if (shardRouting.unassigned() == false && shardRouting.primary() == false) { boolean remoteNode = nodes.get(shardRouting.currentNodeId()).isRemoteStoreNode(); assertEquals( "Mismatched doc count. Is this on remote node ? " + remoteNode, diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteMigrationAllocationDeciderIT.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteMigrationAllocationDeciderIT.java index de425ffc63816..eeb6a5a5626e4 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteMigrationAllocationDeciderIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteMigrationAllocationDeciderIT.java @@ -11,8 +11,11 @@ import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.cluster.routing.UnassignedInfo; import org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand; +import org.opensearch.cluster.routing.allocation.decider.Decision; import org.opensearch.common.Priority; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; @@ -21,13 +24,17 @@ import java.io.IOException; import java.util.List; +import java.util.Locale; +import java.util.Optional; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode.MIXED; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.Direction.REMOTE_STORE; import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) -public class RemoteMigrationAllocationDeciderIT extends MigrationBaseTestCase { +public class RemoteMigrationAllocationDeciderIT extends RemoteStoreMigrationShardAllocationBaseTestCase { // When the primary is on doc rep node, existing replica copy can get allocated on excluded docrep node. public void testFilterAllocationSkipsReplica() throws IOException { @@ -127,4 +134,404 @@ public void testFilterAllocationSkipsReplicaOnExcludedNode() throws IOException assertTrue(clusterHealthResponse.isTimedOut()); ensureYellow("test"); } + + // When under mixed mode and remote_store direction, a primary shard can only be allocated to a remote node + + public void testNewPrimaryShardAllocationForRemoteStoreMigration() throws Exception { + logger.info("Initialize cluster"); + internalCluster().startClusterManagerOnlyNode(); + + logger.info("Add non-remote data node"); + String nonRemoteNodeName = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + DiscoveryNode nonRemoteNode = assertNodeInCluster(nonRemoteNodeName); + + logger.info("Set mixed mode and remote_store direction"); + setClusterMode(MIXED.mode); + setDirection(REMOTE_STORE.direction); + + logger.info("Verify expected decision for allocating a new primary shard on a non-remote node"); + prepareIndexWithoutReplica(Optional.empty()); + Decision decision = getDecisionForTargetNode(nonRemoteNode, true, true, false); + assertEquals(Decision.Type.NO, decision.type()); + assertEquals( + "[remote_store migration_direction]: primary shard copy can not be allocated to a non-remote node", + decision.getExplanation().toLowerCase(Locale.ROOT) + ); + + logger.info("Attempt allocation on non-remote node"); + attemptAllocation(null); + + logger.info("Verify non-allocation of primary shard on non-remote node"); + assertNonAllocation(true); + + logger.info("Add remote data node"); + setAddRemote(true); + String remoteNodeName = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + DiscoveryNode remoteNode = assertNodeInCluster(remoteNodeName); + + logger.info("Verify expected decision for allocating a new primary shard on a remote node"); + excludeAllNodes(); + decision = getDecisionForTargetNode(remoteNode, true, true, false); + assertEquals(Decision.Type.YES, decision.type()); + assertEquals( + "[remote_store migration_direction]: primary shard copy can be allocated to a remote node", + decision.getExplanation().toLowerCase(Locale.ROOT) + ); + + logger.info("Attempt free allocation"); + attemptAllocation(null); + ensureGreen(TEST_INDEX); + + logger.info("Verify allocation of primary shard on remote node"); + assertAllocation(true, remoteNode); + } + + // When under mixed mode and remote_store direction, a replica shard can only be allocated to a remote node if the primary has relocated + // to another remote node + + public void testNewReplicaShardAllocationIfPrimaryShardOnNonRemoteNodeForRemoteStoreMigration() throws Exception { + logger.info("Initialize cluster"); + internalCluster().startClusterManagerOnlyNode(); + + logger.info("Add non-remote data node"); + String nonRemoteNodeName1 = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + DiscoveryNode nonRemoteNode1 = assertNodeInCluster(nonRemoteNodeName1); + + logger.info("Allocate primary shard on non-remote node"); + prepareIndexWithAllocatedPrimary(nonRemoteNode1, Optional.empty()); + + logger.info("Add remote data node"); + setClusterMode(MIXED.mode); + setAddRemote(true); + String remoteNodeName = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + DiscoveryNode remoteNode = assertNodeInCluster(remoteNodeName); + + logger.info("Set remote_store direction"); + setDirection(REMOTE_STORE.direction); + + logger.info("Verify expected decision for allocating a replica shard on a remote node"); + excludeAllNodes(); + Decision decision = getDecisionForTargetNode(remoteNode, false, true, false); + assertEquals(Decision.Type.NO, decision.type()); + assertEquals( + "[remote_store migration_direction]: replica shard copy can not be allocated to a remote node since primary shard copy is not yet migrated to remote", + decision.getExplanation().toLowerCase(Locale.ROOT) + ); + + logger.info("Attempt free allocation of replica shard"); + attemptAllocation(null); + + logger.info("Verify non-allocation of replica shard"); + assertNonAllocation(false); + + logger.info("Add another non-remote data node"); + setAddRemote(false); + String nonRemoteNodeName2 = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + DiscoveryNode nonRemoteNode2 = assertNodeInCluster(nonRemoteNodeName2); + + logger.info("Verify expected decision for allocating the replica shard on a non-remote node"); + excludeAllNodes(); + decision = getDecisionForTargetNode(nonRemoteNode2, false, true, false); + assertEquals(Decision.Type.YES, decision.type()); + assertEquals( + "[remote_store migration_direction]: replica shard copy can be allocated to a non-remote node", + decision.getExplanation().toLowerCase(Locale.ROOT) + ); + + logger.info("Attempt free allocation of replica shard"); + attemptAllocation(null); + ensureGreen(TEST_INDEX); + + logger.info("Verify allocation of replica shard on non-remote node"); + assertAllocation(false, nonRemoteNode2); + } + + public void testNewReplicaShardAllocationIfPrimaryShardOnRemoteNodeForRemoteStoreMigration() throws Exception { + logger.info("Initialize cluster"); + internalCluster().startClusterManagerOnlyNode(); + + logger.info("Add non-remote data nodes"); + String nonRemoteNodeName1 = internalCluster().startDataOnlyNode(); + String nonRemoteNodeName2 = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + DiscoveryNode nonRemoteNode1 = assertNodeInCluster(nonRemoteNodeName1); + DiscoveryNode nonRemoteNode2 = assertNodeInCluster(nonRemoteNodeName2); + + logger.info("Allocate primary and replica shard on non-remote nodes"); + createIndex(TEST_INDEX, 1); + ensureGreen(TEST_INDEX); + + logger.info("Set mixed mode"); + setClusterMode(MIXED.mode); + + logger.info("Add remote data nodes"); + setAddRemote(true); + String remoteNodeName1 = internalCluster().startDataOnlyNode(); + String remoteNodeName2 = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + DiscoveryNode remoteNode1 = assertNodeInCluster(remoteNodeName1); + DiscoveryNode remoteNode2 = assertNodeInCluster(remoteNodeName2); + + logger.info("Set remote_store direction"); + setDirection(REMOTE_STORE.direction); + + logger.info("Relocate primary shard to remote node"); + DiscoveryNode initialPrimaryNode = primaryNodeName(TEST_INDEX).equals(nonRemoteNodeName1) ? nonRemoteNode1 : nonRemoteNode2; + DiscoveryNode initialReplicaNode = initialPrimaryNode.equals(nonRemoteNode1) ? nonRemoteNode2 : nonRemoteNode1; + assertAcked( + internalCluster().client() + .admin() + .cluster() + .prepareReroute() + .add(new MoveAllocationCommand(TEST_INDEX, 0, initialPrimaryNode.getName(), remoteNodeName1)) + .get() + ); + ensureGreen(TEST_INDEX); + assertAllocation(true, remoteNode1); + + logger.info("Verify expected decision for relocating a replica shard on non-remote node"); + Decision decision = getDecisionForTargetNode(initialPrimaryNode, false, true, true); + assertEquals(Decision.Type.YES, decision.type()); + assertEquals( + "[remote_store migration_direction]: replica shard copy can be relocated to a non-remote node", + decision.getExplanation().toLowerCase(Locale.ROOT) + ); + + logger.info("Attempt relocation of replica shard to non-remote node"); + assertAcked( + internalCluster().client() + .admin() + .cluster() + .prepareReroute() + .add(new MoveAllocationCommand(TEST_INDEX, 0, initialReplicaNode.getName(), initialPrimaryNode.getName())) + .get() + ); + + logger.info("Verify relocation of replica shard to non-remote node"); + ensureGreen(TEST_INDEX); + assertAllocation(false, initialPrimaryNode); + + logger.info("Verify expected decision for relocating a replica shard on remote node"); + decision = getDecisionForTargetNode(remoteNode2, false, true, true); + assertEquals(Decision.Type.YES, decision.type()); + assertEquals( + "[remote_store migration_direction]: replica shard copy can be relocated to a remote node since primary shard copy has been migrated to remote", + decision.getExplanation().toLowerCase(Locale.ROOT) + ); + + logger.info("Attempt relocation of replica shard to remote node"); + assertAcked( + internalCluster().client() + .admin() + .cluster() + .prepareReroute() + .add(new MoveAllocationCommand(TEST_INDEX, 0, initialPrimaryNode.getName(), remoteNodeName2)) + .get() + ); + + logger.info("Verify relocation of replica shard to non-remote node"); + ensureGreen(TEST_INDEX); + assertAllocation(false, remoteNode2); + } + + // When under strict mode, a shard can be allocated to any node + + public void testAlwaysAllocateNewShardForStrictMode() throws Exception { + boolean isRemoteCluster = randomBoolean(); + boolean isReplicaAllocation = randomBoolean(); + + logger.info("Initialize cluster and add nodes"); + setAddRemote(isRemoteCluster); + internalCluster().startClusterManagerOnlyNode(); + String nodeName1 = internalCluster().startDataOnlyNode(); + String nodeName2 = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + DiscoveryNode node1 = assertNodeInCluster(nodeName1); + DiscoveryNode node2 = assertNodeInCluster(nodeName2); + + if (isReplicaAllocation) { + prepareIndexWithAllocatedPrimary(node1, Optional.empty()); + } else { + prepareIndexWithoutReplica(Optional.empty()); + } + + if (isRemoteCluster) { + assertRemoteStoreBackedIndex(TEST_INDEX); + } else { + assertNonRemoteStoreBackedIndex(TEST_INDEX); + } + + logger.info("Verify expected decision for allocation of a shard"); + excludeAllNodes(); + Decision decision = getDecisionForTargetNode( + isReplicaAllocation ? node2 : randomFrom(node1, node2), + !isReplicaAllocation, + true, + false + ); + assertEquals(Decision.Type.YES, decision.type()); + String expectedReason = String.format( + Locale.ROOT, + "[none migration_direction]: %s shard copy can be allocated to a %s node for strict compatibility mode", + (isReplicaAllocation ? "replica" : "primary"), + (isRemoteCluster ? "remote" : "non-remote") + ); + assertEquals(expectedReason, decision.getExplanation().toLowerCase(Locale.ROOT)); + + logger.info("Attempt free allocation"); + attemptAllocation(null); + ensureGreen(TEST_INDEX); + + logger.info("Verify allocation of shard"); + assertAllocation(!isReplicaAllocation, !isReplicaAllocation ? null : node2); + } + + // When under mixed mode and remote_store direction, shard of a remote store backed index can not be allocated to a non-remote node + + public void testRemoteStoreBackedIndexShardAllocationForRemoteStoreMigration() throws Exception { + logger.info("Initialize cluster"); + internalCluster().startClusterManagerOnlyNode(); + + logger.info("Set mixed mode"); + setClusterMode(MIXED.mode); + + logger.info("Add remote and non-remote nodes"); + String nonRemoteNodeName = internalCluster().startDataOnlyNode(); + setAddRemote(true); + String remoteNodeName = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + DiscoveryNode remoteNode = assertNodeInCluster(remoteNodeName); + DiscoveryNode nonRemoteNode = assertNodeInCluster(nonRemoteNodeName); + + logger.info("Set remote_store direction"); + setDirection(REMOTE_STORE.direction); + + boolean isReplicaAllocation = randomBoolean(); + if (isReplicaAllocation) { + logger.info("Create index with primary allocated on remote node"); + prepareIndexWithAllocatedPrimary(remoteNode, Optional.empty()); + } else { + logger.info("Create index with unallocated primary"); + prepareIndexWithoutReplica(Optional.empty()); + } + + logger.info("Verify remote store backed index"); + assertRemoteStoreBackedIndex(TEST_INDEX); + + logger.info("Verify expected decision for allocation of shard on a non-remote node"); + excludeAllNodes(); + Decision decision = getDecisionForTargetNode(nonRemoteNode, !isReplicaAllocation, false, false); + assertEquals(Decision.Type.NO, decision.type()); + String expectedReason = String.format( + Locale.ROOT, + "[remote_store migration_direction]: %s shard copy can not be allocated to a non-remote node because a remote store backed index's shard copy can only be allocated to a remote node", + (isReplicaAllocation ? "replica" : "primary") + ); + assertEquals(expectedReason, decision.getExplanation().toLowerCase(Locale.ROOT)); + + logger.info("Attempt allocation of shard on non-remote node"); + attemptAllocation(nonRemoteNodeName); + + logger.info("Verify non-allocation of shard"); + assertNonAllocation(!isReplicaAllocation); + } + + // When under mixed mode and none direction, allocate shard of a remote store backed index to a remote node and shard of a non remote + // store backed index to a non-remote node only + + public void testAllocationForNoneDirectionAndMixedMode() throws Exception { + boolean isRemoteStoreBackedIndex = randomBoolean(); + boolean isReplicaAllocation = randomBoolean(); + logger.info( + String.format( + Locale.ROOT, + "Test for allocation decisions for %s shard of a %s store backed index under NONE direction", + (isReplicaAllocation ? "replica" : "primary"), + (isRemoteStoreBackedIndex ? "remote" : "non remote") + ) + ); + + logger.info("Initialize cluster"); + setAddRemote(isRemoteStoreBackedIndex); + internalCluster().startClusterManagerOnlyNode(); + + logger.info("Add data nodes"); + String previousNodeName1 = internalCluster().startDataOnlyNode(); + String previousNodeName2 = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + DiscoveryNode previousNode1 = assertNodeInCluster(previousNodeName1); + DiscoveryNode previousNode2 = assertNodeInCluster(previousNodeName2); + + logger.info("Prepare test index"); + if (isReplicaAllocation) { + prepareIndexWithAllocatedPrimary(previousNode1, Optional.empty()); + } else { + prepareIndexWithoutReplica(Optional.empty()); + } + + if (isRemoteStoreBackedIndex) { + assertRemoteStoreBackedIndex(TEST_INDEX); + } else { + assertNonRemoteStoreBackedIndex(TEST_INDEX); + } + + logger.info("Switch to MIXED cluster compatibility mode"); + setClusterMode(MIXED.mode); + setAddRemote(!addRemote); + String newNodeName = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + DiscoveryNode newNode = assertNodeInCluster(newNodeName); + + logger.info("Verify decision for allocation on the new node"); + excludeAllNodes(); + Decision decision = getDecisionForTargetNode(newNode, !isReplicaAllocation, false, false); + assertEquals(Decision.Type.NO, decision.type()); + String expectedReason = String.format( + Locale.ROOT, + "[none migration_direction]: %s shard copy can not be allocated to a %s node for %s store backed index", + (isReplicaAllocation ? "replica" : "primary"), + (isRemoteStoreBackedIndex ? "non-remote" : "remote"), + (isRemoteStoreBackedIndex ? "remote" : "non remote") + ); + assertEquals(expectedReason, decision.getExplanation().toLowerCase(Locale.ROOT)); + + logger.info("Attempt allocation of shard on new node"); + attemptAllocation(newNodeName); + + logger.info("Verify non-allocation of shard"); + assertNonAllocation(!isReplicaAllocation); + + logger.info("Verify decision for allocation on previous node"); + decision = getDecisionForTargetNode(previousNode2, !isReplicaAllocation, true, false); + assertEquals(Decision.Type.YES, decision.type()); + expectedReason = String.format( + Locale.ROOT, + "[none migration_direction]: %s shard copy can be allocated to a %s node for %s store backed index", + (isReplicaAllocation ? "replica" : "primary"), + (isRemoteStoreBackedIndex ? "remote" : "non-remote"), + (isRemoteStoreBackedIndex ? "remote" : "non remote") + ); + assertEquals(expectedReason, decision.getExplanation().toLowerCase(Locale.ROOT)); + + logger.info("Attempt free allocation of shard"); + attemptAllocation(null); + + logger.info("Verify successful allocation of shard"); + if (!isReplicaAllocation) { + ensureGreen(TEST_INDEX); + } else { + ensureYellowAndNoInitializingShards(TEST_INDEX); + } + assertAllocation(!isReplicaAllocation, null); + logger.info("Verify allocation on one of the previous nodes"); + ShardRouting shardRouting = getShardRouting(!isReplicaAllocation); + assertTrue( + shardRouting.currentNodeId().equals(previousNode1.getId()) || shardRouting.currentNodeId().equals(previousNode2.getId()) + ); + } } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteMigrationIndexMetadataUpdateIT.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteMigrationIndexMetadataUpdateIT.java new file mode 100644 index 0000000000000..45679598dc551 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteMigrationIndexMetadataUpdateIT.java @@ -0,0 +1,516 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotemigration; + +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.health.ClusterHealthStatus; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand; +import org.opensearch.common.settings.Settings; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.test.InternalTestCluster; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.util.List; +import java.util.function.Function; +import java.util.stream.Collectors; + +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class RemoteMigrationIndexMetadataUpdateIT extends MigrationBaseTestCase { + /** + * Scenario: + * Performs a blue/green type migration from docrep to remote enabled cluster. + * Asserts that remote based index settings are applied after all shards move over + */ + public void testIndexSettingsUpdateAfterIndexMovedToRemoteThroughAllocationExclude() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + + logger.info("---> Starting 2 docrep nodes"); + addRemote = false; + internalCluster().startDataOnlyNodes(2, Settings.builder().put("node.attr._type", "docrep").build()); + internalCluster().validateClusterFormed(); + + logger.info("---> Creates an index with 1 primary and 1 replica"); + String indexName = "migration-index-allocation-exclude"; + Settings oneReplica = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .build(); + logger.info("---> Asserts index still has docrep index settings"); + createIndexAndAssertDocrepProperties(indexName, oneReplica); + + logger.info("---> Start indexing in parallel thread"); + AsyncIndexingService asyncIndexingService = new AsyncIndexingService(indexName); + asyncIndexingService.startIndexing(); + initDocRepToRemoteMigration(); + + logger.info("---> Adding 2 remote enabled nodes to the cluster"); + addRemote = true; + internalCluster().startDataOnlyNodes(2, Settings.builder().put("node.attr._type", "remote").build()); + internalCluster().validateClusterFormed(); + + logger.info("---> Excluding docrep nodes from allocation"); + excludeNodeSet("type", "docrep"); + waitForRelocation(); + waitNoPendingTasksOnAll(); + + logger.info("---> Stop indexing and assert remote enabled index settings have been applied"); + asyncIndexingService.stopIndexing(); + assertRemoteProperties(indexName); + } + + /** + * Scenario: + * Performs a manual _cluster/reroute to move shards from docrep to remote enabled nodes. + * Asserts that remote based index settings are only applied for indices whose shards + * have completely moved over to remote enabled nodes + */ + public void testIndexSettingsUpdateAfterIndexMovedToRemoteThroughManualReroute() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + + logger.info("---> Starting 2 docrep nodes"); + List docrepNodeNames = internalCluster().startDataOnlyNodes(2); + internalCluster().validateClusterFormed(); + + logger.info("---> Creating 2 indices with 1 primary and 1 replica"); + String indexName1 = "migration-index-manual-reroute-1"; + String indexName2 = "migration-index-manual-reroute-2"; + Settings oneReplica = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .build(); + createIndexAndAssertDocrepProperties(indexName1, oneReplica); + createIndexAndAssertDocrepProperties(indexName2, oneReplica); + + logger.info("---> Starting parallel indexing on both indices"); + AsyncIndexingService indexOne = new AsyncIndexingService(indexName1); + indexOne.startIndexing(); + + AsyncIndexingService indexTwo = new AsyncIndexingService(indexName2); + indexTwo.startIndexing(); + + logger.info( + "---> Stopping shard rebalancing to ensure shards do not automatically move over to newer nodes after they are launched" + ); + stopShardRebalancing(); + + logger.info("---> Starting 2 remote store enabled nodes"); + initDocRepToRemoteMigration(); + addRemote = true; + List remoteNodeNames = internalCluster().startDataOnlyNodes(2); + internalCluster().validateClusterFormed(); + + String primaryNode = primaryNodeName(indexName1); + String replicaNode = docrepNodeNames.stream() + .filter(nodeName -> nodeName.equals(primaryNodeName(indexName1)) == false) + .collect(Collectors.toList()) + .get(0); + + logger.info("---> Moving over both shard copies for the first index to remote enabled nodes"); + assertAcked( + client().admin() + .cluster() + .prepareReroute() + .add(new MoveAllocationCommand(indexName1, 0, primaryNode, remoteNodeNames.get(0))) + .execute() + .actionGet() + ); + waitForRelocation(); + + assertAcked( + client().admin() + .cluster() + .prepareReroute() + .add(new MoveAllocationCommand(indexName1, 0, replicaNode, remoteNodeNames.get(1))) + .execute() + .actionGet() + ); + waitForRelocation(); + + logger.info("---> Moving only primary for the second index to remote enabled nodes"); + assertAcked( + client().admin() + .cluster() + .prepareReroute() + .add(new MoveAllocationCommand(indexName2, 0, primaryNodeName(indexName2), remoteNodeNames.get(0))) + .execute() + .actionGet() + ); + waitForRelocation(); + waitNoPendingTasksOnAll(); + + logger.info("---> Stopping indexing"); + indexOne.stopIndexing(); + indexTwo.stopIndexing(); + + logger.info("---> Assert remote settings are applied for index one but not for index two"); + assertRemoteProperties(indexName1); + assertDocrepProperties(indexName2); + } + + /** + * Scenario: + * Creates a mixed mode cluster. One index gets created before remote nodes are introduced, + * while the other one is created after remote nodes are added. + *

+ * For the first index, asserts docrep settings at first, excludes docrep nodes from + * allocation and asserts that remote index settings are applied after all shards + * have been relocated. + *

+ * For the second index, asserts that it already has remote enabled settings. + * Indexes some more docs and asserts that the index metadata version does not increment + */ + public void testIndexSettingsUpdatedOnlyForMigratingIndex() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + + logger.info("---> Starting 2 docrep nodes"); + addRemote = false; + internalCluster().startDataOnlyNodes(2, Settings.builder().put("node.attr._type", "docrep").build()); + internalCluster().validateClusterFormed(); + + logger.info("---> Creating the first index with 1 primary and 1 replica"); + String indexName = "migration-index"; + Settings oneReplica = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .build(); + createIndexAndAssertDocrepProperties(indexName, oneReplica); + + logger.info("---> Starting indexing in parallel"); + AsyncIndexingService indexingService = new AsyncIndexingService(indexName); + indexingService.startIndexing(); + + logger.info("---> Storing current index metadata version"); + long initalMetadataVersion = internalCluster().client() + .admin() + .cluster() + .prepareState() + .get() + .getState() + .metadata() + .index(indexName) + .getVersion(); + + logger.info("---> Adding 2 remote enabled nodes to the cluster"); + initDocRepToRemoteMigration(); + addRemote = true; + internalCluster().startDataOnlyNodes(2, Settings.builder().put("node.attr._type", "remote").build()); + internalCluster().validateClusterFormed(); + + logger.info("---> Excluding docrep nodes from allocation"); + excludeNodeSet("type", "docrep"); + + waitForRelocation(); + waitNoPendingTasksOnAll(); + indexingService.stopIndexing(); + + logger.info("---> Assert remote settings are applied"); + assertRemoteProperties(indexName); + assertTrue( + initalMetadataVersion < internalCluster().client() + .admin() + .cluster() + .prepareState() + .get() + .getState() + .metadata() + .index(indexName) + .getVersion() + ); + + logger.info("---> Creating a new index on remote enabled nodes"); + String secondIndex = "remote-index"; + createIndex( + secondIndex, + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).build() + ); + indexBulk(secondIndex, 100); + initalMetadataVersion = internalCluster().client() + .admin() + .cluster() + .prepareState() + .get() + .getState() + .metadata() + .index(secondIndex) + .getVersion(); + refresh(secondIndex); + ensureGreen(secondIndex); + + waitNoPendingTasksOnAll(); + + assertRemoteProperties(secondIndex); + + logger.info("---> Assert metadata version is not changed"); + assertEquals( + initalMetadataVersion, + internalCluster().client().admin().cluster().prepareState().get().getState().metadata().index(secondIndex).getVersion() + ); + } + + /** + * Scenario: + * Creates an index with 1 primary, 2 replicas on 2 docrep nodes. Since the replica + * configuration is incorrect, the index stays YELLOW. + * Starts 2 more remote nodes and initiates shard relocation through allocation exclusion. + * After shard relocation completes, shuts down the docrep nodes and asserts remote + * index settings are applied even when the index is in YELLOW state + */ + public void testIndexSettingsUpdatedEvenForMisconfiguredReplicas() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + + logger.info("---> Starting 2 docrep nodes"); + addRemote = false; + List docrepNodes = internalCluster().startDataOnlyNodes(2, Settings.builder().put("node.attr._type", "docrep").build()); + internalCluster().validateClusterFormed(); + + logger.info("---> Creating index with 1 primary and 2 replicas"); + String indexName = "migration-index-allocation-exclude"; + Settings oneReplica = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .build(); + createIndexAssertHealthAndDocrepProperties(indexName, oneReplica, this::ensureYellowAndNoInitializingShards); + + logger.info("---> Starting indexing in parallel"); + AsyncIndexingService asyncIndexingService = new AsyncIndexingService(indexName); + asyncIndexingService.startIndexing(); + + logger.info("---> Starts 2 remote enabled nodes"); + initDocRepToRemoteMigration(); + addRemote = true; + internalCluster().startDataOnlyNodes(2, Settings.builder().put("node.attr._type", "remote").build()); + internalCluster().validateClusterFormed(); + + logger.info("---> Excluding docrep nodes from allocation"); + excludeNodeSet("type", "docrep"); + waitForRelocation(); + waitNoPendingTasksOnAll(); + asyncIndexingService.stopIndexing(); + + logger.info("---> Assert cluster has turned green since more nodes are added to the cluster"); + ensureGreen(indexName); + + logger.info("---> Assert index still has dcorep settings since replica copies are still on docrep nodes"); + assertDocrepProperties(indexName); + + logger.info("---> Stopping docrep nodes"); + for (String node : docrepNodes) { + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(node)); + } + waitNoPendingTasksOnAll(); + ensureYellowAndNoInitializingShards(indexName); + + logger.info("---> Assert remote settings are applied"); + assertRemoteProperties(indexName); + } + + /** + * Scenario: + * Creates an index with 1 primary, 2 replicas on 2 docrep nodes. + * Starts 2 more remote nodes and initiates shard relocation through allocation exclusion. + * After shard relocation completes, restarts the docrep node holding extra replica shard copy + * and asserts remote index settings are applied as soon as the docrep replica copy is unassigned + */ + public void testIndexSettingsUpdatedWhenDocrepNodeIsRestarted() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + + logger.info("---> Starting 2 docrep nodes"); + addRemote = false; + List docrepNodes = internalCluster().startDataOnlyNodes(2, Settings.builder().put("node.attr._type", "docrep").build()); + internalCluster().validateClusterFormed(); + + logger.info("---> Creating index with 1 primary and 2 replicas"); + String indexName = "migration-index-allocation-exclude"; + Settings oneReplica = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .build(); + createIndexAssertHealthAndDocrepProperties(indexName, oneReplica, this::ensureYellowAndNoInitializingShards); + + logger.info("---> Starting indexing in parallel"); + AsyncIndexingService asyncIndexingService = new AsyncIndexingService(indexName); + asyncIndexingService.startIndexing(); + + logger.info("---> Starts 2 remote enabled nodes"); + initDocRepToRemoteMigration(); + addRemote = true; + internalCluster().startDataOnlyNodes(2, Settings.builder().put("node.attr._type", "remote").build()); + internalCluster().validateClusterFormed(); + + logger.info("---> Excluding docrep nodes from allocation"); + excludeNodeSet("type", "docrep"); + waitForRelocation(); + waitNoPendingTasksOnAll(); + asyncIndexingService.stopIndexing(); + + logger.info("---> Assert cluster has turned green since more nodes are added to the cluster"); + ensureGreen(indexName); + + logger.info("---> Assert index still has dcorep settings since replica copies are still on docrep nodes"); + assertDocrepProperties(indexName); + + ClusterState clusterState = internalCluster().client().admin().cluster().prepareState().get().getState(); + DiscoveryNodes nodes = clusterState.nodes(); + + String docrepReplicaNodeName = ""; + for (ShardRouting shardRouting : clusterState.routingTable().index(indexName).shard(0).getShards()) { + if (nodes.get(shardRouting.currentNodeId()).isRemoteStoreNode() == false) { + docrepReplicaNodeName = nodes.get(shardRouting.currentNodeId()).getName(); + break; + } + } + excludeNodeSet("type", null); + + logger.info("---> Stopping docrep node holding the replica copy"); + internalCluster().restartNode(docrepReplicaNodeName); + ensureStableCluster(5); + waitNoPendingTasksOnAll(); + + logger.info("---> Assert remote index settings have been applied"); + assertRemoteProperties(indexName); + logger.info("---> Assert cluster is yellow since remote index settings have been applied"); + ensureYellowAndNoInitializingShards(indexName); + } + + /** + * Scenario: + * Creates a docrep cluster with 3 nodes and an index with 1 primary and 2 replicas. + * Adds 3 more remote nodes to the cluster and moves over the primary copy from docrep + * to remote through _cluster/reroute. Asserts that the remote store path based metadata + * have been applied to the index. + * Moves over the first replica copy and asserts that the remote store based settings has not been applied + * Excludes docrep nodes from allocation to force migration of the 3rd replica copy and asserts remote + * store settings has been applied as all shards have moved over + */ + public void testRemotePathMetadataAddedWithFirstPrimaryMovingToRemote() throws Exception { + String indexName = "index-1"; + internalCluster().startClusterManagerOnlyNode(); + + logger.info("---> Starting 3 docrep nodes"); + internalCluster().startDataOnlyNodes(3, Settings.builder().put("node.attr._type", "docrep").build()); + internalCluster().validateClusterFormed(); + + logger.info("---> Creating index with 1 primary and 2 replicas"); + Settings oneReplica = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2).build(); + createIndexAndAssertDocrepProperties(indexName, oneReplica); + + logger.info("---> Adding 3 remote enabled nodes"); + initDocRepToRemoteMigration(); + addRemote = true; + List remoteEnabledNodes = internalCluster().startDataOnlyNodes( + 3, + Settings.builder().put("node.attr._type", "remote").build() + ); + + logger.info("---> Moving primary copy to remote enabled node"); + String primaryNodeName = primaryNodeName(indexName); + assertAcked( + client().admin() + .cluster() + .prepareReroute() + .add(new MoveAllocationCommand(indexName, 0, primaryNodeName, remoteEnabledNodes.get(0))) + .execute() + .actionGet() + ); + waitForRelocation(); + waitNoPendingTasksOnAll(); + + logger.info("---> Assert custom remote path based metadata is applied"); + assertCustomIndexMetadata(indexName); + + logger.info("---> Moving over one replica copy to remote enabled node"); + String replicaNodeName = replicaNodeName(indexName); + assertAcked( + client().admin() + .cluster() + .prepareReroute() + .add(new MoveAllocationCommand(indexName, 0, replicaNodeName, remoteEnabledNodes.get(1))) + .execute() + .actionGet() + ); + waitForRelocation(); + waitNoPendingTasksOnAll(); + + logger.info("---> Assert index still has docrep settings"); + assertDocrepProperties(indexName); + + logger.info("---> Excluding docrep nodes from allocation"); + excludeNodeSet("type", "docrep"); + waitForRelocation(); + waitNoPendingTasksOnAll(); + + logger.info("---> Assert index has remote store settings"); + assertRemoteProperties(indexName); + } + + private void createIndexAndAssertDocrepProperties(String index, Settings settings) { + createIndexAssertHealthAndDocrepProperties(index, settings, this::ensureGreen); + } + + private void createIndexAssertHealthAndDocrepProperties( + String index, + Settings settings, + Function ensureState + ) { + createIndex(index, settings); + refresh(index); + ensureState.apply(index); + assertDocrepProperties(index); + } + + /** + * Assert current index settings have: + * - index.remote_store.enabled == false + * - index.remote_store.segment.repository == null + * - index.remote_store.translog.repository == null + * - index.replication.type == DOCUMENT + */ + private void assertDocrepProperties(String index) { + logger.info("---> Asserting docrep index settings"); + IndexMetadata iMd = internalCluster().client().admin().cluster().prepareState().get().getState().metadata().index(index); + Settings settings = iMd.getSettings(); + assertFalse(IndexMetadata.INDEX_REMOTE_STORE_ENABLED_SETTING.get(settings)); + assertFalse(IndexMetadata.INDEX_REMOTE_TRANSLOG_REPOSITORY_SETTING.exists(settings)); + assertFalse(IndexMetadata.INDEX_REMOTE_SEGMENT_STORE_REPOSITORY_SETTING.exists(settings)); + assertEquals(ReplicationType.DOCUMENT, IndexMetadata.INDEX_REPLICATION_TYPE_SETTING.get(settings)); + } + + /** + * Assert current index settings have: + * - index.remote_store.enabled == true + * - index.remote_store.segment.repository != null + * - index.remote_store.translog.repository != null + * - index.replication.type == SEGMENT + * Asserts index metadata customs has the remote_store key + */ + private void assertRemoteProperties(String index) { + logger.info("---> Asserting remote index settings"); + IndexMetadata iMd = internalCluster().client().admin().cluster().prepareState().get().getState().metadata().index(index); + Settings settings = iMd.getSettings(); + assertTrue(IndexMetadata.INDEX_REMOTE_STORE_ENABLED_SETTING.get(settings)); + assertTrue(IndexMetadata.INDEX_REMOTE_TRANSLOG_REPOSITORY_SETTING.exists(settings)); + assertTrue(IndexMetadata.INDEX_REMOTE_SEGMENT_STORE_REPOSITORY_SETTING.exists(settings)); + assertEquals(ReplicationType.SEGMENT, IndexMetadata.INDEX_REPLICATION_TYPE_SETTING.get(settings)); + assertNotNull(iMd.getCustomData(IndexMetadata.REMOTE_STORE_CUSTOM_KEY)); + } + + /** + * Asserts index metadata customs has the remote_store key + */ + private void assertCustomIndexMetadata(String index) { + logger.info("---> Asserting custom index metadata"); + IndexMetadata iMd = internalCluster().client().admin().cluster().prepareState().get().getState().metadata().index(index); + assertNotNull(iMd.getCustomData(IndexMetadata.REMOTE_STORE_CUSTOM_KEY)); + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryLocalRecoveryIT.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryLocalRecoveryIT.java new file mode 100644 index 0000000000000..024fc68602a19 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryLocalRecoveryIT.java @@ -0,0 +1,179 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotemigration; + +import org.opensearch.action.admin.indices.stats.ShardStats; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.util.FileSystemUtils; +import org.opensearch.index.remote.RemoteSegmentStats; +import org.opensearch.index.translog.RemoteTranslogStats; +import org.opensearch.test.InternalTestCluster; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.nio.file.Path; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +import static org.opensearch.index.remote.RemoteStoreEnums.DataCategory.SEGMENTS; +import static org.opensearch.index.remote.RemoteStoreEnums.DataType.DATA; +import static org.opensearch.index.store.RemoteSegmentStoreDirectory.SEGMENT_NAME_UUID_SEPARATOR; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class RemotePrimaryLocalRecoveryIT extends MigrationBaseTestCase { + String indexName = "idx1"; + int numOfNodes = randomIntBetween(6, 9); + + /** + * Tests local recovery sanity in the happy path flow + */ + public void testLocalRecoveryRollingRestart() throws Exception { + triggerRollingRestartForRemoteMigration(0); + internalCluster().stopAllNodes(); + } + + /** + * Tests local recovery sanity during remote migration with a node restart in between + */ + public void testLocalRecoveryRollingRestartAndNodeFailure() throws Exception { + triggerRollingRestartForRemoteMigration(0); + + DiscoveryNodes discoveryNodes = internalCluster().client().admin().cluster().prepareState().get().getState().getNodes(); + DiscoveryNode nodeToRestart = (DiscoveryNode) discoveryNodes.getDataNodes().values().toArray()[randomIntBetween(0, numOfNodes - 4)]; + internalCluster().restartNode(nodeToRestart.getName()); + + Map shardStatsMap = internalCluster().client().admin().indices().prepareStats(indexName).get().asMap(); + for (Map.Entry entry : shardStatsMap.entrySet()) { + ShardRouting shardRouting = entry.getKey(); + ShardStats shardStats = entry.getValue(); + if (nodeToRestart.equals(shardRouting.currentNodeId())) { + RemoteSegmentStats remoteSegmentStats = shardStats.getStats().getSegments().getRemoteSegmentStats(); + assertTrue(remoteSegmentStats.getTotalUploadTime() > 0); + assertTrue(remoteSegmentStats.getUploadBytesSucceeded() > 0); + } + + assertBusy(() -> { + String shardPath = getShardLevelBlobPath( + client(), + indexName, + new BlobPath(), + String.valueOf(shardRouting.getId()), + SEGMENTS, + DATA + ).buildAsString(); + Path segmentDataRepoPath = segmentRepoPath.resolve(shardPath); + List segmentsNFilesInRepo = Arrays.stream(FileSystemUtils.files(segmentDataRepoPath)) + .filter(path -> path.getFileName().toString().contains("segments_")) + .map(path -> path.getFileName().toString()) + .collect(Collectors.toList()); + Set expectedUniqueSegmentsNFiles = segmentsNFilesInRepo.stream() + .map(fileName -> fileName.split(SEGMENT_NAME_UUID_SEPARATOR)[0]) + .collect(Collectors.toSet()); + assertEquals( + "Expected no duplicate segments_N files in remote but duplicates were found " + segmentsNFilesInRepo, + expectedUniqueSegmentsNFiles.size(), + segmentsNFilesInRepo.size() + ); + }, 90, TimeUnit.SECONDS); + } + + internalCluster().stopAllNodes(); + } + + /** + * Tests local recovery flow sanity in the happy path flow with replicas in place + */ + public void testLocalRecoveryFlowWithReplicas() throws Exception { + triggerRollingRestartForRemoteMigration(randomIntBetween(1, 2)); + internalCluster().stopAllNodes(); + } + + /** + * Helper method to run a rolling restart for migration to remote backed cluster + */ + private void triggerRollingRestartForRemoteMigration(int replicaCount) throws Exception { + internalCluster().startClusterManagerOnlyNodes(3); + internalCluster().startNodes(numOfNodes - 3); + + // create index + Settings indexSettings = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, replicaCount) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, randomIntBetween(1, 10)) + .build(); + createIndex(indexName, indexSettings); + ensureGreen(indexName); + indexBulk(indexName, randomIntBetween(100, 10000)); + refresh(indexName); + indexBulk(indexName, randomIntBetween(100, 10000)); + + initDocRepToRemoteMigration(); + + // rolling restart + final Settings remoteNodeAttributes = remoteStoreClusterSettings( + REPOSITORY_NAME, + segmentRepoPath, + REPOSITORY_2_NAME, + translogRepoPath + ); + internalCluster().rollingRestart(new InternalTestCluster.RestartCallback() { + // Update remote attributes + @Override + public Settings onNodeStopped(String nodeName) { + return remoteNodeAttributes; + } + }); + ensureStableCluster(numOfNodes); + ensureGreen(TimeValue.timeValueSeconds(90), indexName); + assertEquals(internalCluster().size(), numOfNodes); + + // Assert on remote uploads + Map shardStatsMap = internalCluster().client().admin().indices().prepareStats(indexName).get().asMap(); + DiscoveryNodes discoveryNodes = internalCluster().client().admin().cluster().prepareState().get().getState().getNodes(); + shardStatsMap.forEach((shardRouting, shardStats) -> { + if (discoveryNodes.get(shardRouting.currentNodeId()).isRemoteStoreNode() && shardRouting.primary()) { + RemoteSegmentStats remoteSegmentStats = shardStats.getStats().getSegments().getRemoteSegmentStats(); + assertTrue(remoteSegmentStats.getTotalUploadTime() > 0); + assertTrue(remoteSegmentStats.getUploadBytesSucceeded() > 0); + } + }); + + // Assert on new remote uploads after seeding + indexBulk(indexName, randomIntBetween(100, 10000)); + refresh(indexName); + indexBulk(indexName, randomIntBetween(100, 10000)); + Map newShardStatsMap = internalCluster().client().admin().indices().prepareStats(indexName).get().asMap(); + newShardStatsMap.forEach((shardRouting, shardStats) -> { + if (discoveryNodes.get(shardRouting.currentNodeId()).isRemoteStoreNode() && shardRouting.primary()) { + RemoteSegmentStats prevRemoteSegmentStats = shardStatsMap.get(shardRouting) + .getStats() + .getSegments() + .getRemoteSegmentStats(); + RemoteSegmentStats newRemoteSegmentStats = shardStats.getStats().getSegments().getRemoteSegmentStats(); + assertTrue(newRemoteSegmentStats.getTotalUploadTime() > prevRemoteSegmentStats.getTotalUploadTime()); + assertTrue(newRemoteSegmentStats.getUploadBytesSucceeded() > prevRemoteSegmentStats.getUploadBytesSucceeded()); + + RemoteTranslogStats prevRemoteTranslogStats = shardStatsMap.get(shardRouting) + .getStats() + .getTranslog() + .getRemoteTranslogStats(); + RemoteTranslogStats newRemoteTranslogStats = shardStats.getStats().getTranslog().getRemoteTranslogStats(); + assertTrue(newRemoteTranslogStats.getUploadBytesSucceeded() > prevRemoteTranslogStats.getUploadBytesSucceeded()); + } + }); + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryRelocationIT.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryRelocationIT.java index b1c429a45a1a1..293691ace2edd 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryRelocationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryRelocationIT.java @@ -8,9 +8,8 @@ package org.opensearch.remotemigration; -import com.carrotsearch.randomizedtesting.generators.RandomNumbers; - import org.opensearch.action.DocWriteResponse; +import org.opensearch.action.admin.cluster.health.ClusterHealthRequest; import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; import org.opensearch.action.admin.cluster.repositories.get.GetRepositoriesRequest; import org.opensearch.action.admin.cluster.repositories.get.GetRepositoriesResponse; @@ -18,21 +17,25 @@ import org.opensearch.action.delete.DeleteResponse; import org.opensearch.action.index.IndexResponse; import org.opensearch.client.Client; +import org.opensearch.client.Requests; import org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand; import org.opensearch.common.Priority; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.index.query.QueryBuilders; +import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.plugins.Plugin; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.test.hamcrest.OpenSearchAssertions; import org.opensearch.test.transport.MockTransportService; import java.util.Collection; +import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import static java.util.Arrays.asList; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; @@ -42,7 +45,6 @@ protected int maximumNumberOfShards() { return 1; } - // ToDo : Fix me when we support migration of replicas protected int maximumNumberOfReplicas() { return 0; } @@ -51,9 +53,9 @@ protected Collection> nodePlugins() { return asList(MockTransportService.TestPlugin.class); } - public void testMixedModeRelocation() throws Exception { - String docRepNode = internalCluster().startNode(); - Client client = internalCluster().client(docRepNode); + public void testRemotePrimaryRelocation() throws Exception { + List docRepNodes = internalCluster().startNodes(2); + Client client = internalCluster().client(docRepNodes.get(0)); ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); updateSettingsRequest.persistentSettings(Settings.builder().put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), "mixed")); assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); @@ -69,10 +71,13 @@ public void testMixedModeRelocation() throws Exception { refresh("test"); // add remote node in mixed mode cluster - addRemote = true; + setAddRemote(true); String remoteNode = internalCluster().startNode(); internalCluster().validateClusterFormed(); + updateSettingsRequest.persistentSettings(Settings.builder().put(MIGRATION_DIRECTION_SETTING.getKey(), "remote_store")); + assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + String remoteNode2 = internalCluster().startNode(); internalCluster().validateClusterFormed(); @@ -86,8 +91,17 @@ public void testMixedModeRelocation() throws Exception { int finalCurrentDoc1 = currentDoc; waitUntil(() -> numAutoGenDocs.get() > finalCurrentDoc1 + 5); - logger.info("--> relocating from {} to {} ", docRepNode, remoteNode); - client().admin().cluster().prepareReroute().add(new MoveAllocationCommand("test", 0, docRepNode, remoteNode)).execute().actionGet(); + // Change direction to remote store + updateSettingsRequest.persistentSettings(Settings.builder().put(MIGRATION_DIRECTION_SETTING.getKey(), "remote_store")); + assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + + logger.info("--> relocating from {} to {} ", docRepNodes, remoteNode); + client().admin() + .cluster() + .prepareReroute() + .add(new MoveAllocationCommand("test", 0, primaryNodeName("test"), remoteNode)) + .execute() + .actionGet(); ClusterHealthResponse clusterHealthResponse = client().admin() .cluster() .prepareHealth() @@ -158,16 +172,20 @@ public void testMixedModeRelocation_RemoteSeedingFail() throws Exception { refresh("test"); // add remote node in mixed mode cluster - addRemote = true; + setAddRemote(true); String remoteNode = internalCluster().startNode(); internalCluster().validateClusterFormed(); - // assert repo gets registered - GetRepositoriesRequest gr = new GetRepositoriesRequest(new String[] { REPOSITORY_NAME }); - GetRepositoriesResponse getRepositoriesResponse = client.admin().cluster().getRepositories(gr).actionGet(); - assertEquals(1, getRepositoriesResponse.repositories().size()); - setFailRate(REPOSITORY_NAME, 100); + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(RecoverySettings.INDICES_INTERNAL_REMOTE_UPLOAD_TIMEOUT.getKey(), "10s")) + .get(); + + // Change direction to remote store + updateSettingsRequest.persistentSettings(Settings.builder().put(MIGRATION_DIRECTION_SETTING.getKey(), "remote_store")); + assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); logger.info("--> relocating from {} to {} ", docRepNode, remoteNode); client().admin().cluster().prepareReroute().add(new MoveAllocationCommand("test", 0, docRepNode, remoteNode)).execute().actionGet(); @@ -181,29 +199,23 @@ public void testMixedModeRelocation_RemoteSeedingFail() throws Exception { .actionGet(); assertTrue(clusterHealthResponse.getRelocatingShards() == 1); - setFailRate(REPOSITORY_NAME, 0); - Thread.sleep(RandomNumbers.randomIntBetween(random(), 0, 2000)); - clusterHealthResponse = client().admin() - .cluster() - .prepareHealth() - .setTimeout(TimeValue.timeValueSeconds(45)) - .setWaitForEvents(Priority.LANGUID) - .setWaitForNoRelocatingShards(true) - .execute() - .actionGet(); - assertTrue(clusterHealthResponse.getRelocatingShards() == 0); - logger.info("--> remote to remote relocation complete"); + // waiting more than waitForRemoteStoreSync's sleep time of 30 sec to deterministically fail + Thread.sleep(40000); + + ClusterHealthRequest healthRequest = Requests.clusterHealthRequest() + .waitForNoRelocatingShards(true) + .waitForNoInitializingShards(true); + ClusterHealthResponse actionGet = client().admin().cluster().health(healthRequest).actionGet(); + assertEquals(actionGet.getRelocatingShards(), 0); + assertEquals(docRepNode, primaryNodeName("test")); + finished.set(true); indexingThread.join(); - refresh("test"); - OpenSearchAssertions.assertHitCount(client().prepareSearch("test").setTrackTotalHits(true).get(), numAutoGenDocs.get()); - OpenSearchAssertions.assertHitCount( - client().prepareSearch("test") - .setTrackTotalHits(true)// extra paranoia ;) - .setQuery(QueryBuilders.termQuery("auto", true)) - .get(), - numAutoGenDocs.get() - ); + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(RecoverySettings.INDICES_INTERNAL_REMOTE_UPLOAD_TIMEOUT.getKey(), (String) null)) + .get(); } private static Thread getIndexingThread(AtomicBoolean finished, AtomicInteger numAutoGenDocs) { diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteReplicaRecoveryIT.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteReplicaRecoveryIT.java new file mode 100644 index 0000000000000..196ecb991bbc0 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteReplicaRecoveryIT.java @@ -0,0 +1,180 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotemigration; + +import com.carrotsearch.randomizedtesting.generators.RandomNumbers; + +import org.opensearch.action.DocWriteResponse; +import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; +import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; +import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest; +import org.opensearch.action.delete.DeleteResponse; +import org.opensearch.action.index.IndexResponse; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand; +import org.opensearch.common.Priority; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.test.OpenSearchIntegTestCase; +import org.opensearch.test.hamcrest.OpenSearchAssertions; + +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false) + +public class RemoteReplicaRecoveryIT extends MigrationBaseTestCase { + + protected int maximumNumberOfShards() { + return 1; + } + + protected int maximumNumberOfReplicas() { + return 1; + } + + protected int minimumNumberOfReplicas() { + return 1; + } + + /* + Brings up new replica copies on remote and docrep nodes, when primary is on a remote node + Live indexing is happening meanwhile + */ + public void testReplicaRecovery() throws Exception { + internalCluster().setBootstrapClusterManagerNodeIndex(0); + String primaryNode = internalCluster().startNode(); + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); + updateSettingsRequest.persistentSettings(Settings.builder().put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), "mixed")); + assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + + // create shard with 0 replica and 1 shard + client().admin().indices().prepareCreate("test").setSettings(indexSettings()).setMapping("field", "type=text").get(); + String replicaNode = internalCluster().startNode(); + ensureGreen("test"); + + AtomicInteger numAutoGenDocs = new AtomicInteger(); + final AtomicBoolean finished = new AtomicBoolean(false); + Thread indexingThread = getThread(finished, numAutoGenDocs); + + refresh("test"); + + // add remote node in mixed mode cluster + setAddRemote(true); + String remoteNode = internalCluster().startNode(); + internalCluster().validateClusterFormed(); + + updateSettingsRequest.persistentSettings(Settings.builder().put(MIGRATION_DIRECTION_SETTING.getKey(), "remote_store")); + assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + + String remoteNode2 = internalCluster().startNode(); + internalCluster().validateClusterFormed(); + + // identify the primary + + Thread.sleep(RandomNumbers.randomIntBetween(random(), 0, 2000)); + logger.info("--> relocating primary from {} to {} ", primaryNode, remoteNode); + client().admin() + .cluster() + .prepareReroute() + .add(new MoveAllocationCommand("test", 0, primaryNode, remoteNode)) + .execute() + .actionGet(); + ClusterHealthResponse clusterHealthResponse = client().admin() + .cluster() + .prepareHealth() + .setTimeout(TimeValue.timeValueSeconds(60)) + .setWaitForEvents(Priority.LANGUID) + .setWaitForNoRelocatingShards(true) + .execute() + .actionGet(); + + assertEquals(0, clusterHealthResponse.getRelocatingShards()); + logger.info("--> relocation of primary from docrep to remote complete"); + Thread.sleep(RandomNumbers.randomIntBetween(random(), 0, 2000)); + + logger.info("--> getting up the new replicas now to doc rep node as well as remote node "); + // Increase replica count to 3 + client().admin() + .indices() + .updateSettings( + new UpdateSettingsRequest("test").settings( + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 3) + .put("index.routing.allocation.exclude._name", remoteNode) + .build() + ) + ) + .get(); + + client().admin() + .cluster() + .prepareHealth() + .setTimeout(TimeValue.timeValueSeconds(60)) + .setWaitForEvents(Priority.LANGUID) + .setWaitForGreenStatus() + .execute() + .actionGet(); + logger.info("--> replica is up now on another docrep now as well as remote node"); + + assertEquals(0, clusterHealthResponse.getRelocatingShards()); + + Thread.sleep(RandomNumbers.randomIntBetween(random(), 0, 2000)); + + // Stop replicas on docrep now. + // ToDo : Remove once we have dual replication enabled + client().admin() + .indices() + .updateSettings( + new UpdateSettingsRequest("test").settings( + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put("index.routing.allocation.exclude._name", primaryNode + "," + replicaNode) + .build() + ) + ) + .get(); + + finished.set(true); + indexingThread.join(); + refresh("test"); + OpenSearchAssertions.assertHitCount(client().prepareSearch("test").setTrackTotalHits(true).get(), numAutoGenDocs.get()); + OpenSearchAssertions.assertHitCount( + client().prepareSearch("test") + .setTrackTotalHits(true)// extra paranoia ;) + .setQuery(QueryBuilders.termQuery("auto", true)) + // .setPreference("_prefer_nodes:" + (remoteNode+ "," + remoteNode2)) + .get(), + numAutoGenDocs.get() + ); + + } + + private Thread getThread(AtomicBoolean finished, AtomicInteger numAutoGenDocs) { + Thread indexingThread = new Thread(() -> { + while (finished.get() == false && numAutoGenDocs.get() < 100) { + IndexResponse indexResponse = client().prepareIndex("test").setId("id").setSource("field", "value").get(); + assertEquals(DocWriteResponse.Result.CREATED, indexResponse.getResult()); + DeleteResponse deleteResponse = client().prepareDelete("test", "id").get(); + assertEquals(DocWriteResponse.Result.DELETED, deleteResponse.getResult()); + client().prepareIndex("test").setSource("auto", true).get(); + numAutoGenDocs.incrementAndGet(); + logger.info("Indexed {} docs here", numAutoGenDocs.get()); + } + }); + indexingThread.start(); + return indexingThread; + } + +} diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationSettingsUpdateIT.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationSettingsUpdateIT.java index c3720e6fbbd09..b71f7d7cf7e4a 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationSettingsUpdateIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationSettingsUpdateIT.java @@ -8,27 +8,15 @@ package org.opensearch.remotemigration; -import org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; import org.opensearch.client.Client; -import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.settings.Settings; import org.opensearch.common.settings.SettingsException; -import org.opensearch.core.rest.RestStatus; -import org.opensearch.index.IndexSettings; -import org.opensearch.indices.replication.common.ReplicationType; -import org.opensearch.snapshots.SnapshotInfo; -import org.opensearch.snapshots.SnapshotState; import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; import java.nio.file.Path; import java.util.Optional; -import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_SEGMENT_STORE_REPOSITORY; -import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_STORE_ENABLED; -import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY; -import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; -import static org.opensearch.index.IndexSettings.INDEX_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode.MIXED; import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode.STRICT; import static org.opensearch.node.remotestore.RemoteStoreNodeService.Direction.REMOTE_STORE; @@ -92,13 +80,7 @@ public void testNewRestoredIndexIsRemoteStoreBackedForRemoteStoreDirectionAndMix assertNodeInCluster(remoteNodeName); logger.info("Create a non remote-backed index"); - client.admin() - .indices() - .prepareCreate(TEST_INDEX) - .setSettings( - Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build() - ) - .get(); + createIndex(TEST_INDEX, 0); logger.info("Verify that non remote stored backed index is created"); assertNonRemoteStoreBackedIndex(TEST_INDEX); @@ -115,21 +97,12 @@ public void testNewRestoredIndexIsRemoteStoreBackedForRemoteStoreDirectionAndMix logger.info("Create snapshot of non remote stored backed index"); - SnapshotInfo snapshotInfo = client().admin() - .cluster() - .prepareCreateSnapshot(snapshotRepoName, snapshotName) - .setIndices(TEST_INDEX) - .setWaitForCompletion(true) - .get() - .getSnapshotInfo(); - - assertEquals(SnapshotState.SUCCESS, snapshotInfo.state()); - assertTrue(snapshotInfo.successfulShards() > 0); - assertEquals(0, snapshotInfo.failedShards()); + createSnapshot(snapshotRepoName, snapshotName, TEST_INDEX); logger.info("Restore index from snapshot under NONE direction"); String restoredIndexName1 = TEST_INDEX + "-restored1"; restoreSnapshot(snapshotRepoName, snapshotName, restoredIndexName1); + ensureGreen(restoredIndexName1); logger.info("Verify that restored index is non remote-backed"); assertNonRemoteStoreBackedIndex(restoredIndexName1); @@ -138,6 +111,7 @@ public void testNewRestoredIndexIsRemoteStoreBackedForRemoteStoreDirectionAndMix setDirection(REMOTE_STORE.direction); String restoredIndexName2 = TEST_INDEX + "-restored2"; restoreSnapshot(snapshotRepoName, snapshotName, restoredIndexName2); + ensureGreen(restoredIndexName2); logger.info("Verify that restored index is non remote-backed"); assertRemoteStoreBackedIndex(restoredIndexName2); @@ -146,10 +120,10 @@ public void testNewRestoredIndexIsRemoteStoreBackedForRemoteStoreDirectionAndMix // compatibility mode setting test public void testSwitchToStrictMode() throws Exception { - logger.info(" --> initialize cluster"); + logger.info("Initialize cluster"); initializeCluster(false); - logger.info(" --> create a mixed mode cluster"); + logger.info("Create a mixed mode cluster"); setClusterMode(MIXED.mode); addRemote = true; String remoteNodeName = internalCluster().startNode(); @@ -159,58 +133,21 @@ public void testSwitchToStrictMode() throws Exception { assertNodeInCluster(remoteNodeName); assertNodeInCluster(nonRemoteNodeName); - logger.info(" --> attempt switching to strict mode"); + logger.info("Attempt switching to strict mode"); SettingsException exception = assertThrows(SettingsException.class, () -> setClusterMode(STRICT.mode)); assertEquals( "can not switch to STRICT compatibility mode when the cluster contains both remote and non-remote nodes", exception.getMessage() ); - logger.info(" --> stop remote node so that cluster had only non-remote nodes"); + logger.info("Stop remote node so that cluster had only non-remote nodes"); internalCluster().stopRandomNode(InternalTestCluster.nameFilter(remoteNodeName)); ensureStableCluster(2); - logger.info(" --> attempt switching to strict mode"); + logger.info("Attempt switching to strict mode"); setClusterMode(STRICT.mode); } - // restore indices from a snapshot - private void restoreSnapshot(String snapshotRepoName, String snapshotName, String restoredIndexName) { - RestoreSnapshotResponse restoreSnapshotResponse = client.admin() - .cluster() - .prepareRestoreSnapshot(snapshotRepoName, snapshotName) - .setWaitForCompletion(false) - .setIndices(TEST_INDEX) - .setRenamePattern(TEST_INDEX) - .setRenameReplacement(restoredIndexName) - .get(); - - assertEquals(restoreSnapshotResponse.status(), RestStatus.ACCEPTED); - ensureGreen(restoredIndexName); - } - - // verify that the created index is not remote store backed - private void assertNonRemoteStoreBackedIndex(String indexName) { - Settings indexSettings = client.admin().indices().prepareGetIndex().execute().actionGet().getSettings().get(indexName); - assertEquals(ReplicationType.DOCUMENT.toString(), indexSettings.get(SETTING_REPLICATION_TYPE)); - assertNull(indexSettings.get(SETTING_REMOTE_STORE_ENABLED)); - assertNull(indexSettings.get(SETTING_REMOTE_SEGMENT_STORE_REPOSITORY)); - assertNull(indexSettings.get(SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY)); - } - - // verify that the created index is remote store backed - private void assertRemoteStoreBackedIndex(String indexName) { - Settings indexSettings = client.admin().indices().prepareGetIndex().execute().actionGet().getSettings().get(indexName); - assertEquals(ReplicationType.SEGMENT.toString(), indexSettings.get(SETTING_REPLICATION_TYPE)); - assertEquals("true", indexSettings.get(SETTING_REMOTE_STORE_ENABLED)); - assertEquals(REPOSITORY_NAME, indexSettings.get(SETTING_REMOTE_SEGMENT_STORE_REPOSITORY)); - assertEquals(REPOSITORY_2_NAME, indexSettings.get(SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY)); - assertEquals( - IndexSettings.DEFAULT_REMOTE_TRANSLOG_BUFFER_INTERVAL, - INDEX_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING.get(indexSettings) - ); - } - // bootstrap a cluster private void initializeCluster(boolean remoteClusterManager) { addRemote = remoteClusterManager; diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationShardAllocationBaseTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationShardAllocationBaseTestCase.java index ad2302d1ab2e1..cf689aa554c8b 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationShardAllocationBaseTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationShardAllocationBaseTestCase.java @@ -8,16 +8,37 @@ package org.opensearch.remotemigration; +import org.opensearch.action.admin.cluster.allocation.ClusterAllocationExplanation; import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; +import org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; +import org.opensearch.action.support.ActiveShardCount; +import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.routing.IndexShardRoutingTable; import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.ShardRoutingState; +import org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision; +import org.opensearch.cluster.routing.allocation.MoveDecision; +import org.opensearch.cluster.routing.allocation.NodeAllocationResult; +import org.opensearch.cluster.routing.allocation.decider.Decision; +import org.opensearch.common.Nullable; import org.opensearch.common.settings.Settings; +import org.opensearch.core.rest.RestStatus; +import org.opensearch.index.IndexSettings; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.snapshots.SnapshotInfo; +import org.opensearch.snapshots.SnapshotState; +import java.util.List; import java.util.Map; import java.util.Optional; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_SEGMENT_STORE_REPOSITORY; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_STORE_ENABLED; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; +import static org.opensearch.index.IndexSettings.INDEX_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; @@ -35,7 +56,7 @@ protected void setClusterMode(String mode) { } // set the migration direction for cluster [remote_store, docrep, none] - public void setDirection(String direction) { + protected void setDirection(String direction) { updateSettingsRequest.persistentSettings(Settings.builder().put(MIGRATION_DIRECTION_SETTING.getKey(), direction)); assertAcked(internalCluster().client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); } @@ -67,7 +88,7 @@ protected String allNodesExcept(String except) { return exclude.toString(); } - // create a new test index + // create a new test index with un-allocated primary and no replicas protected void prepareIndexWithoutReplica(Optional name) { String indexName = name.orElse(TEST_INDEX); internalCluster().client() @@ -84,6 +105,33 @@ protected void prepareIndexWithoutReplica(Optional name) { .actionGet(); } + // create a new test index with allocated primary and 1 unallocated replica + public void prepareIndexWithAllocatedPrimary(DiscoveryNode primaryShardNode, Optional name) { + String indexName = name.orElse(TEST_INDEX); + internalCluster().client() + .admin() + .indices() + .prepareCreate(indexName) + .setSettings( + Settings.builder() + .put("index.number_of_shards", 1) + .put("index.number_of_replicas", 1) + .put("index.routing.allocation.include._name", primaryShardNode.getName()) + .put("index.routing.allocation.exclude._name", allNodesExcept(primaryShardNode.getName())) + ) + .setWaitForActiveShards(ActiveShardCount.ONE) + .execute() + .actionGet(); + + ensureYellowAndNoInitializingShards(TEST_INDEX); + + logger.info(" --> verify allocation of primary shard"); + assertAllocation(true, primaryShardNode); + + logger.info(" --> verify non-allocation of replica shard"); + assertNonAllocation(false); + } + protected ShardRouting getShardRouting(boolean isPrimary) { IndexShardRoutingTable table = internalCluster().client() .admin() @@ -98,4 +146,213 @@ protected ShardRouting getShardRouting(boolean isPrimary) { return (isPrimary ? table.primaryShard() : table.replicaShards().get(0)); } + // obtain decision for allocation/relocation of a shard to a given node + protected Decision getDecisionForTargetNode( + DiscoveryNode targetNode, + boolean isPrimary, + boolean includeYesDecisions, + boolean isRelocation + ) { + ClusterAllocationExplanation explanation = internalCluster().client() + .admin() + .cluster() + .prepareAllocationExplain() + .setIndex(TEST_INDEX) + .setShard(0) + .setPrimary(isPrimary) + .setIncludeYesDecisions(includeYesDecisions) + .get() + .getExplanation(); + + Decision requiredDecision = null; + List nodeAllocationResults; + if (isRelocation) { + MoveDecision moveDecision = explanation.getShardAllocationDecision().getMoveDecision(); + nodeAllocationResults = moveDecision.getNodeDecisions(); + } else { + AllocateUnassignedDecision allocateUnassignedDecision = explanation.getShardAllocationDecision().getAllocateDecision(); + nodeAllocationResults = allocateUnassignedDecision.getNodeDecisions(); + } + + for (NodeAllocationResult nodeAllocationResult : nodeAllocationResults) { + if (nodeAllocationResult.getNode().equals(targetNode)) { + for (Decision decision : nodeAllocationResult.getCanAllocateDecision().getDecisions()) { + if (decision.label().equals(NAME)) { + requiredDecision = decision; + break; + } + } + } + } + + assertNotNull(requiredDecision); + return requiredDecision; + } + + // get allocation and relocation decisions for all nodes + protected void excludeAllNodes() { + assertAcked( + internalCluster().client() + .admin() + .indices() + .prepareUpdateSettings(TEST_INDEX) + .setSettings( + Settings.builder() + .put("index.routing.allocation.include._name", "") + .put("index.routing.allocation.exclude._name", allNodesExcept(null)) + ) + .execute() + .actionGet() + ); + } + + protected void includeAllNodes() { + assertAcked( + internalCluster().client() + .admin() + .indices() + .prepareUpdateSettings(TEST_INDEX) + .setSettings( + Settings.builder() + .put("index.routing.allocation.exclude._name", "") + .put("index.routing.allocation.include._name", allNodesExcept(null)) + ) + .execute() + .actionGet() + ); + } + + protected void attemptAllocation(@Nullable String targetNodeName) { + Settings.Builder settingsBuilder; + if (targetNodeName != null) { + settingsBuilder = Settings.builder() + .put("index.routing.allocation.include._name", targetNodeName) + .put("index.routing.allocation.exclude._name", allNodesExcept(targetNodeName)); + } else { + String clusterManagerNodeName = internalCluster().client() + .admin() + .cluster() + .prepareState() + .execute() + .actionGet() + .getState() + .getNodes() + .getClusterManagerNode() + .getName(); + // to allocate freely among all nodes other than cluster-manager node + settingsBuilder = Settings.builder() + .put("index.routing.allocation.include._name", allNodesExcept(clusterManagerNodeName)) + .put("index.routing.allocation.exclude._name", clusterManagerNodeName); + } + internalCluster().client().admin().indices().prepareUpdateSettings(TEST_INDEX).setSettings(settingsBuilder).execute().actionGet(); + } + + // verify that shard does not exist at targetNode + protected void assertNonAllocation(boolean isPrimary) { + if (isPrimary) { + ensureRed(TEST_INDEX); + } else { + ensureYellowAndNoInitializingShards(TEST_INDEX); + } + ShardRouting shardRouting = getShardRouting(isPrimary); + assertFalse(shardRouting.active()); + assertNull(shardRouting.currentNodeId()); + assertEquals(ShardRoutingState.UNASSIGNED, shardRouting.state()); + } + + // verify that shard exists at targetNode + protected void assertAllocation(boolean isPrimary, @Nullable DiscoveryNode targetNode) { + ShardRouting shardRouting = getShardRouting(isPrimary); + assertTrue(shardRouting.active()); + assertNotNull(shardRouting.currentNodeId()); + if (targetNode != null) { + assertEquals(targetNode.getId(), shardRouting.currentNodeId()); + } + } + + // create a snapshot + public static SnapshotInfo createSnapshot(String snapshotRepoName, String snapshotName, String... indices) { + SnapshotInfo snapshotInfo = internalCluster().client() + .admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, snapshotName) + .setIndices(indices) + .setWaitForCompletion(true) + .get() + .getSnapshotInfo(); + + assertEquals(SnapshotState.SUCCESS, snapshotInfo.state()); + assertTrue(snapshotInfo.successfulShards() > 0); + assertEquals(0, snapshotInfo.failedShards()); + return snapshotInfo; + } + + // create new index + public static void createIndex(String indexName, int replicaCount) { + assertAcked( + internalCluster().client() + .admin() + .indices() + .prepareCreate(indexName) + .setSettings( + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, replicaCount) + .build() + ) + .get() + ); + } + + // restore indices from a snapshot + public static RestoreSnapshotResponse restoreSnapshot(String snapshotRepoName, String snapshotName, String restoredIndexName) { + RestoreSnapshotResponse restoreSnapshotResponse = internalCluster().client() + .admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName) + .setWaitForCompletion(false) + .setIndices(TEST_INDEX) + .setRenamePattern(TEST_INDEX) + .setRenameReplacement(restoredIndexName) + .get(); + assertEquals(restoreSnapshotResponse.status(), RestStatus.ACCEPTED); + return restoreSnapshotResponse; + } + + // verify that the created index is not remote store backed + public static void assertNonRemoteStoreBackedIndex(String indexName) { + Settings indexSettings = internalCluster().client() + .admin() + .indices() + .prepareGetIndex() + .execute() + .actionGet() + .getSettings() + .get(indexName); + assertEquals(ReplicationType.DOCUMENT.toString(), indexSettings.get(SETTING_REPLICATION_TYPE)); + assertNull(indexSettings.get(SETTING_REMOTE_STORE_ENABLED)); + assertNull(indexSettings.get(SETTING_REMOTE_SEGMENT_STORE_REPOSITORY)); + assertNull(indexSettings.get(SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY)); + } + + // verify that the created index is remote store backed + public static void assertRemoteStoreBackedIndex(String indexName) { + Settings indexSettings = internalCluster().client() + .admin() + .indices() + .prepareGetIndex() + .execute() + .actionGet() + .getSettings() + .get(indexName); + assertEquals(ReplicationType.SEGMENT.toString(), indexSettings.get(SETTING_REPLICATION_TYPE)); + assertEquals("true", indexSettings.get(SETTING_REMOTE_STORE_ENABLED)); + assertEquals(REPOSITORY_NAME, indexSettings.get(SETTING_REMOTE_SEGMENT_STORE_REPOSITORY)); + assertEquals(REPOSITORY_2_NAME, indexSettings.get(SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY)); + assertEquals( + IndexSettings.DEFAULT_REMOTE_TRANSLOG_BUFFER_INTERVAL, + INDEX_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING.get(indexSettings) + ); + } + } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationTestCase.java index 640b83f194c1c..4b1c91f1d57ca 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationTestCase.java @@ -8,31 +8,48 @@ package org.opensearch.remotemigration; +import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; import org.opensearch.action.admin.cluster.repositories.get.GetRepositoriesRequest; import org.opensearch.action.admin.cluster.repositories.get.GetRepositoriesResponse; import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; import org.opensearch.client.Client; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.Priority; import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.snapshots.SnapshotInfo; import org.opensearch.test.OpenSearchIntegTestCase; +import org.opensearch.test.hamcrest.OpenSearchAssertions; +import java.nio.file.Path; import java.util.List; +import java.util.Map; import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; +import static org.hamcrest.Matchers.equalTo; @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false) public class RemoteStoreMigrationTestCase extends MigrationBaseTestCase { + protected int maximumNumberOfReplicas() { + return 1; + } + + protected int minimumNumberOfReplicas() { + return 1; + } + public void testMixedModeAddRemoteNodes() throws Exception { internalCluster().setBootstrapClusterManagerNodeIndex(0); List cmNodes = internalCluster().startNodes(1); Client client = internalCluster().client(cmNodes.get(0)); - ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); - updateSettingsRequest.persistentSettings(Settings.builder().put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), "mixed")); - assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + initDocRepToRemoteMigration(); // add remote node in mixed mode cluster - addRemote = true; + setAddRemote(true); internalCluster().startNode(); internalCluster().startNode(); internalCluster().validateClusterFormed(); @@ -43,7 +60,7 @@ public void testMixedModeAddRemoteNodes() throws Exception { assertEquals(1, getRepositoriesResponse.repositories().size()); // add docrep mode in mixed mode cluster - addRemote = true; + setAddRemote(true); internalCluster().startNode(); assertBusy(() -> { assertEquals(client.admin().cluster().prepareClusterStats().get().getNodes().size(), internalCluster().getNodeNames().length); @@ -70,4 +87,129 @@ public void testMigrationDirections() { updateSettingsRequest.persistentSettings(Settings.builder().put(MIGRATION_DIRECTION_SETTING.getKey(), "random")); assertThrows(IllegalArgumentException.class, () -> client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); } + + public void testNoShallowSnapshotInMixedMode() throws Exception { + logger.info("Initialize remote cluster"); + addRemote = true; + internalCluster().setBootstrapClusterManagerNodeIndex(0); + List cmNodes = internalCluster().startNodes(1); + Client client = internalCluster().client(cmNodes.get(0)); + + logger.info("Add remote node"); + internalCluster().startNode(); + internalCluster().validateClusterFormed(); + + logger.info("Create remote backed index"); + RemoteStoreMigrationShardAllocationBaseTestCase.createIndex("test", 0); + RemoteStoreMigrationShardAllocationBaseTestCase.assertRemoteStoreBackedIndex("test"); + + logger.info("Create shallow snapshot setting enabled repo"); + String shallowSnapshotRepoName = "shallow-snapshot-repo-name"; + Path shallowSnapshotRepoPath = randomRepoPath(); + assertAcked( + clusterAdmin().preparePutRepository(shallowSnapshotRepoName) + .setType("fs") + .setSettings( + Settings.builder() + .put("location", shallowSnapshotRepoPath) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), Boolean.TRUE) + ) + ); + + logger.info("Verify shallow snapshot creation"); + final String snapshot1 = "snapshot1"; + SnapshotInfo snapshotInfo1 = RemoteStoreMigrationShardAllocationBaseTestCase.createSnapshot( + shallowSnapshotRepoName, + snapshot1, + "test" + ); + assertEquals(snapshotInfo1.isRemoteStoreIndexShallowCopyEnabled(), true); + + logger.info("Set MIXED compatibility mode"); + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); + updateSettingsRequest.persistentSettings(Settings.builder().put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), "mixed")); + assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + + logger.info("Verify that new snapshot is not shallow"); + final String snapshot2 = "snapshot2"; + SnapshotInfo snapshotInfo2 = RemoteStoreMigrationShardAllocationBaseTestCase.createSnapshot(shallowSnapshotRepoName, snapshot2); + assertEquals(snapshotInfo2.isRemoteStoreIndexShallowCopyEnabled(), false); + } + + /* + Tests end to end remote migration via Blue Green mechanism + - Starts docrep nodes with multiple nodes, indices, replicas copies + - Adds remote nodes to cluster + - Excludes docrep nodes. + - Asserts all shards are migrated to remote store + - Asserts doc count across all shards + - Continuos indexing with refresh/flush happening + */ + public void testEndToEndRemoteMigration() throws Exception { + internalCluster().setBootstrapClusterManagerNodeIndex(0); + List docRepNodes = internalCluster().startNodes(2); + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); + updateSettingsRequest.persistentSettings(Settings.builder().put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), "mixed")); + assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + client().admin().indices().prepareCreate("test").setSettings(indexSettings()).setMapping("field", "type=text").get(); + ensureGreen("test"); + + logger.info("---> Starting doc ingestion in parallel thread"); + AsyncIndexingService asyncIndexingService = new AsyncIndexingService("test"); + asyncIndexingService.startIndexing(); + + setAddRemote(true); + + updateSettingsRequest.persistentSettings( + Settings.builder() + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), "mixed") + .put(MIGRATION_DIRECTION_SETTING.getKey(), "remote_store") + ); + assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + + internalCluster().startNodes(2); + + assertAcked( + internalCluster().client() + .admin() + .indices() + .prepareUpdateSettings() + .setIndices("test") + .setSettings( + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put("index.routing.allocation.exclude._name", String.join(",", docRepNodes)) + .build() + ) + .get() + ); + + ClusterHealthResponse clusterHealthResponse = client().admin() + .cluster() + .prepareHealth() + .setTimeout(TimeValue.timeValueSeconds(45)) + .setWaitForEvents(Priority.LANGUID) + .setWaitForNoRelocatingShards(true) + .execute() + .actionGet(); + assertTrue(clusterHealthResponse.getRelocatingShards() == 0); + logger.info("---> Stopping indexing thread"); + asyncIndexingService.stopIndexing(); + Map shardCountByNodeId = getShardCountByNodeId(); + assertThat("node0 has 0 shards", shardCountByNodeId.get(docRepNodes.get(0)), equalTo(null)); + assertThat("node1 has 0 shards", shardCountByNodeId.get(docRepNodes.get(1)), equalTo(null)); + refresh("test"); + waitForReplication("test"); + OpenSearchAssertions.assertHitCount( + client().prepareSearch("test").setTrackTotalHits(true).get(), + asyncIndexingService.getIndexedDocs() + ); + OpenSearchAssertions.assertHitCount( + client().prepareSearch("test") + .setTrackTotalHits(true)// extra paranoia ;) + .setQuery(QueryBuilders.termQuery("auto", true)) + .get(), + asyncIndexingService.getIndexedDocs() + ); + } } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/ResizeIndexMigrationTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/ResizeIndexMigrationTestCase.java index b57bc60c50e8c..b817906a8f828 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/ResizeIndexMigrationTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/ResizeIndexMigrationTestCase.java @@ -32,7 +32,7 @@ public class ResizeIndexMigrationTestCase extends MigrationBaseTestCase { * and index is on DocRep node, and migration to remote store is in progress. * */ public void testFailResizeIndexWhileDocRepToRemoteStoreMigration() throws Exception { - addRemote = false; + setAddRemote(false); // create a docrep cluster internalCluster().startClusterManagerOnlyNode(); internalCluster().validateClusterFormed(); @@ -127,7 +127,7 @@ public void testFailResizeIndexWhileDocRepToRemoteStoreMigration() throws Except * */ public void testFailResizeIndexWhileRemoteStoreToDocRepMigration() throws Exception { // creates a remote cluster - addRemote = true; + setAddRemote(true); internalCluster().startClusterManagerOnlyNode(); internalCluster().validateClusterFormed(); diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java index 78441f74f6b4f..ca0ae3ca9a700 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java @@ -30,6 +30,7 @@ import org.opensearch.index.IndexSettings; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.IndexShardClosedException; +import org.opensearch.index.translog.Translog; import org.opensearch.index.translog.Translog.Durability; import org.opensearch.indices.IndicesService; import org.opensearch.indices.RemoteStoreSettings; @@ -63,6 +64,7 @@ import static org.opensearch.index.remote.RemoteStoreEnums.DataCategory.TRANSLOG; import static org.opensearch.index.remote.RemoteStoreEnums.DataType.DATA; import static org.opensearch.index.remote.RemoteStoreEnums.DataType.METADATA; +import static org.opensearch.index.shard.IndexShardTestCase.getTranslog; import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING; import static org.opensearch.test.OpenSearchTestCase.getShardLevelBlobPath; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; @@ -859,4 +861,45 @@ public void testLocalOnlyTranslogCleanupOnNodeRestart() throws Exception { refresh(INDEX_NAME); assertHitCount(client(dataNode).prepareSearch(INDEX_NAME).setSize(0).get(), searchableDocs + 15); } + + public void testFlushOnTooManyRemoteTranslogFiles() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + String datanode = internalCluster().startDataOnlyNodes(1).get(0); + createIndex(INDEX_NAME, remoteStoreIndexSettings(0, 10000L, -1)); + ensureGreen(INDEX_NAME); + + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); + updateSettingsRequest.persistentSettings( + Settings.builder().put(RemoteStoreSettings.CLUSTER_REMOTE_MAX_TRANSLOG_READERS.getKey(), "100") + ); + assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + + IndexShard indexShard = getIndexShard(datanode, INDEX_NAME); + Path translogLocation = getTranslog(indexShard).location(); + assertFalse(indexShard.shouldPeriodicallyFlush()); + + try (Stream files = Files.list(translogLocation)) { + long totalFiles = files.filter(f -> f.getFileName().toString().endsWith(Translog.TRANSLOG_FILE_SUFFIX)).count(); + assertEquals(totalFiles, 1L); + } + + // indexing 100 documents (100 bulk requests), no flush will be triggered yet + for (int i = 0; i < 100; i++) { + indexBulk(INDEX_NAME, 1); + } + + try (Stream files = Files.list(translogLocation)) { + long totalFiles = files.filter(f -> f.getFileName().toString().endsWith(Translog.TRANSLOG_FILE_SUFFIX)).count(); + assertEquals(totalFiles, 101L); + } + // Will flush and trim the translog readers + indexBulk(INDEX_NAME, 1); + + assertBusy(() -> { + try (Stream files = Files.list(translogLocation)) { + long totalFiles = files.filter(f -> f.getFileName().toString().endsWith(Translog.TRANSLOG_FILE_SUFFIX)).count(); + assertEquals(totalFiles, 1L); + } + }, 30, TimeUnit.SECONDS); + } } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRestoreIT.java index 94acf2b1dbb27..bd84ab026dcea 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRestoreIT.java @@ -27,6 +27,7 @@ import org.opensearch.repositories.fs.ReloadableFsRepository; import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; +import org.opensearch.test.junit.annotations.TestIssueLogging; import java.io.IOException; import java.nio.file.Path; @@ -42,7 +43,7 @@ import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount; import static org.hamcrest.Matchers.greaterThan; -@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.SUITE, numDataNodes = 0) +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) public class RemoteStoreRestoreIT extends BaseRemoteStoreRestoreIT { /** @@ -91,6 +92,7 @@ public void testRTSRestoreWithNoDataPostCommitPrimaryReplicaDown() throws Except * Simulates all data restored using Remote Translog Store. * @throws IOException IO Exception. */ + @TestIssueLogging(value = "_root:TRACE", issueUrl = "https://github.com/opensearch-project/OpenSearch/issues/11085") public void testRTSRestoreWithNoDataPostRefreshPrimaryReplicaDown() throws Exception { testRestoreFlowBothPrimaryReplicasDown(1, false, true, randomIntBetween(1, 5)); } @@ -295,7 +297,6 @@ public void testRestoreFlowNoRedIndex() throws Exception { * for multiple indices matching a wildcard name pattern. * @throws IOException IO Exception. */ - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/8480") public void testRTSRestoreWithCommittedDataMultipleIndicesPatterns() throws Exception { testRestoreFlowMultipleIndices(2, true, randomIntBetween(1, 5)); } @@ -306,16 +307,16 @@ public void testRTSRestoreWithCommittedDataMultipleIndicesPatterns() throws Exce * with all remote-enabled red indices considered for the restore by default. * @throws IOException IO Exception. */ - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/8480") public void testRTSRestoreWithCommittedDataDefaultAllIndices() throws Exception { int shardCount = randomIntBetween(1, 5); - prepareCluster(1, 3, INDEX_NAMES, 1, shardCount); + int replicaCount = 1; + prepareCluster(1, 3, INDEX_NAMES, replicaCount, shardCount); String[] indices = INDEX_NAMES.split(","); Map> indicesStats = new HashMap<>(); for (String index : indices) { Map indexStats = indexData(2, true, index); indicesStats.put(index, indexStats); - assertEquals(shardCount, getNumShards(index).totalNumShards); + assertEquals(shardCount * (replicaCount + 1), getNumShards(index).totalNumShards); } for (String index : indices) { @@ -337,7 +338,7 @@ public void testRTSRestoreWithCommittedDataDefaultAllIndices() throws Exception ensureGreen(indices); for (String index : indices) { - assertEquals(shardCount, getNumShards(index).totalNumShards); + assertEquals(shardCount * (replicaCount + 1), getNumShards(index).totalNumShards); verifyRestoredData(indicesStats.get(index), index); } } @@ -395,16 +396,16 @@ public void testRTSRestoreWithCommittedDataNotAllRedRemoteIndices() throws Excep * except those matching the specified exclusion pattern. * @throws IOException IO Exception. */ - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/8480") public void testRTSRestoreWithCommittedDataExcludeIndicesPatterns() throws Exception { int shardCount = randomIntBetween(1, 5); - prepareCluster(1, 3, INDEX_NAMES, 1, shardCount); + int replicaCount = 1; + prepareCluster(1, 3, INDEX_NAMES, replicaCount, shardCount); String[] indices = INDEX_NAMES.split(","); Map> indicesStats = new HashMap<>(); for (String index : indices) { Map indexStats = indexData(2, true, index); indicesStats.put(index, indexStats); - assertEquals(shardCount, getNumShards(index).totalNumShards); + assertEquals(shardCount * (replicaCount + 1), getNumShards(index).totalNumShards); } for (String index : indices) { @@ -433,9 +434,9 @@ public void testRTSRestoreWithCommittedDataExcludeIndicesPatterns() throws Excep PlainActionFuture.newFuture() ); ensureGreen(indices[0], indices[1]); - assertEquals(shardCount, getNumShards(indices[0]).totalNumShards); + assertEquals(shardCount * (replicaCount + 1), getNumShards(indices[0]).totalNumShards); verifyRestoredData(indicesStats.get(indices[0]), indices[0]); - assertEquals(shardCount, getNumShards(indices[1]).totalNumShards); + assertEquals(shardCount * (replicaCount + 1), getNumShards(indices[1]).totalNumShards); verifyRestoredData(indicesStats.get(indices[1]), indices[1]); ensureRed(indices[2], indices[3]); } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreUploadIndexPathIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreUploadIndexPathIT.java new file mode 100644 index 0000000000000..9b30dacfced13 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreUploadIndexPathIT.java @@ -0,0 +1,111 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotestore; + +import org.opensearch.action.admin.indices.delete.DeleteIndexRequest; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.settings.Settings; +import org.opensearch.core.util.FileSystemUtils; +import org.opensearch.index.remote.RemoteIndexPath; +import org.opensearch.index.remote.RemoteIndexPathUploader; +import org.opensearch.index.remote.RemoteStoreEnums; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.concurrent.ExecutionException; + +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING; +import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class RemoteStoreUploadIndexPathIT extends RemoteStoreBaseIntegTestCase { + + private final String INDEX_NAME = "remote-store-test-idx-1"; + + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return Settings.builder().put(super.nodeSettings(nodeOrdinal)).put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true).build(); + } + + /** + * Checks that the remote index path file gets created for the intended remote store path type and does not get created + * wherever not required. + */ + public void testRemoteIndexPathFileCreation() throws ExecutionException, InterruptedException, IOException { + String clusterManagerNode = internalCluster().startClusterManagerOnlyNode(); + internalCluster().startDataOnlyNodes(2); + + // Case 1 - Hashed_prefix, we would need the remote index path file to be created. + client(clusterManagerNode).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings( + Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), RemoteStoreEnums.PathType.HASHED_PREFIX) + ) + .get(); + + createIndex(INDEX_NAME, remoteStoreIndexSettings(0, 1)); + validateRemoteIndexPathFile(true); + assertAcked(client().admin().indices().delete(new DeleteIndexRequest(INDEX_NAME)).get()); + FileSystemUtils.deleteSubDirectories(translogRepoPath); + FileSystemUtils.deleteSubDirectories(segmentRepoPath); + + // Case 2 - Hashed_infix, we would not have the remote index path file created here. + client(clusterManagerNode).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings( + Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), RemoteStoreEnums.PathType.HASHED_INFIX) + ) + .get(); + createIndex(INDEX_NAME, remoteStoreIndexSettings(0, 1)); + validateRemoteIndexPathFile(false); + assertAcked(client().admin().indices().delete(new DeleteIndexRequest(INDEX_NAME)).get()); + + // Case 3 - fixed, we would not have the remote index path file created here either. + client(clusterManagerNode).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), RemoteStoreEnums.PathType.FIXED)) + .get(); + createIndex(INDEX_NAME, remoteStoreIndexSettings(0, 1)); + validateRemoteIndexPathFile(false); + assertAcked(client().admin().indices().delete(new DeleteIndexRequest(INDEX_NAME)).get()); + + } + + private void validateRemoteIndexPathFile(boolean exists) throws IOException { + String indexUUID = client().admin() + .indices() + .prepareGetSettings(INDEX_NAME) + .get() + .getSetting(INDEX_NAME, IndexMetadata.SETTING_INDEX_UUID); + String fileName = generatePartFileName(indexUUID); + assertEquals(exists, FileSystemUtils.exists(translogRepoPath.resolve(RemoteIndexPath.DIR))); + if (exists) { + Path[] files = FileSystemUtils.files(translogRepoPath.resolve(RemoteIndexPath.DIR)); + assertEquals(1, files.length); + assertTrue(Arrays.stream(files).anyMatch(file -> file.toString().contains(fileName))); + String translogPathFile = files[0].toString(); + assertTrue(FileSystemUtils.exists(segmentRepoPath.resolve(RemoteIndexPath.DIR))); + files = FileSystemUtils.files(segmentRepoPath.resolve(RemoteIndexPath.DIR)); + assertEquals(1, files.length); + assertTrue(Arrays.stream(files).anyMatch(file -> file.toString().contains(fileName))); + String segmentPathFile = files[0].toString(); + assertNotEquals(translogPathFile, segmentPathFile); + } + } + + private String generatePartFileName(String indexUUID) { + return String.join(RemoteIndexPathUploader.DELIMITER, indexUUID, "2", RemoteIndexPath.DEFAULT_VERSION); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java index 8562a7eb37709..0917a0baff1ab 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java @@ -39,6 +39,7 @@ import org.opensearch.cluster.routing.WeightedRoutingStats; import org.opensearch.cluster.service.ClusterManagerThrottlingStats; import org.opensearch.common.Nullable; +import org.opensearch.common.cache.service.NodeCacheStats; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.indices.breaker.AllCircuitBreakerStats; @@ -158,6 +159,9 @@ public class NodeStats extends BaseNodeResponse implements ToXContentFragment { @Nullable private AdmissionControlStats admissionControlStats; + @Nullable + private NodeCacheStats nodeCacheStats; + public NodeStats(StreamInput in) throws IOException { super(in); timestamp = in.readVLong(); @@ -234,6 +238,11 @@ public NodeStats(StreamInput in) throws IOException { } else { admissionControlStats = null; } + if (in.getVersion().onOrAfter(Version.V_2_14_0)) { + nodeCacheStats = in.readOptionalWriteable(NodeCacheStats::new); + } else { + nodeCacheStats = null; + } } public NodeStats( @@ -264,7 +273,8 @@ public NodeStats( @Nullable SearchPipelineStats searchPipelineStats, @Nullable SegmentReplicationRejectionStats segmentReplicationRejectionStats, @Nullable RepositoriesStats repositoriesStats, - @Nullable AdmissionControlStats admissionControlStats + @Nullable AdmissionControlStats admissionControlStats, + @Nullable NodeCacheStats nodeCacheStats ) { super(node); this.timestamp = timestamp; @@ -294,6 +304,7 @@ public NodeStats( this.segmentReplicationRejectionStats = segmentReplicationRejectionStats; this.repositoriesStats = repositoriesStats; this.admissionControlStats = admissionControlStats; + this.nodeCacheStats = nodeCacheStats; } public long getTimestamp() { @@ -451,6 +462,11 @@ public AdmissionControlStats getAdmissionControlStats() { return admissionControlStats; } + @Nullable + public NodeCacheStats getNodeCacheStats() { + return nodeCacheStats; + } + @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); @@ -506,6 +522,9 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalWriteable(admissionControlStats); } + if (out.getVersion().onOrAfter(Version.V_2_14_0)) { + out.writeOptionalWriteable(nodeCacheStats); + } } @Override @@ -609,6 +628,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (getAdmissionControlStats() != null) { getAdmissionControlStats().toXContent(builder, params); } + if (getNodeCacheStats() != null) { + getNodeCacheStats().toXContent(builder, params); + } return builder; } } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java index 1af56f10b95ee..379836cf442e3 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java @@ -219,7 +219,8 @@ public enum Metric { RESOURCE_USAGE_STATS("resource_usage_stats"), SEGMENT_REPLICATION_BACKPRESSURE("segment_replication_backpressure"), REPOSITORIES("repositories"), - ADMISSION_CONTROL("admission_control"); + ADMISSION_CONTROL("admission_control"), + CACHE_STATS("caches"); private String metricName; diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java index 1df73d3b4394d..2e93e5e7841cb 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java @@ -128,7 +128,8 @@ protected NodeStats nodeOperation(NodeStatsRequest nodeStatsRequest) { NodesStatsRequest.Metric.RESOURCE_USAGE_STATS.containedIn(metrics), NodesStatsRequest.Metric.SEGMENT_REPLICATION_BACKPRESSURE.containedIn(metrics), NodesStatsRequest.Metric.REPOSITORIES.containedIn(metrics), - NodesStatsRequest.Metric.ADMISSION_CONTROL.containedIn(metrics) + NodesStatsRequest.Metric.ADMISSION_CONTROL.containedIn(metrics), + NodesStatsRequest.Metric.CACHE_STATS.containedIn(metrics) ); } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/settings/TransportClusterUpdateSettingsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/settings/TransportClusterUpdateSettingsAction.java index e6c149216da09..6292d32fee26d 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/settings/TransportClusterUpdateSettingsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/settings/TransportClusterUpdateSettingsAction.java @@ -42,6 +42,7 @@ import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.block.ClusterBlockException; import org.opensearch.cluster.block.ClusterBlockLevel; +import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.IndexNameExpressionResolver; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; @@ -58,15 +59,19 @@ import org.opensearch.common.settings.SettingsException; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.index.remote.RemoteMigrationIndexMetadataUpdater; import org.opensearch.node.remotestore.RemoteStoreNodeService; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportService; import java.io.IOException; +import java.util.Collection; import java.util.Locale; import java.util.Set; import java.util.stream.Collectors; +import static org.opensearch.index.remote.RemoteMigrationIndexMetadataUpdater.indexHasAllRemoteStoreRelatedMetadata; + /** * Transport action for updating cluster settings * @@ -284,6 +289,7 @@ public void validateCompatibilityModeSettingRequest(ClusterUpdateSettingsRequest validateAllNodesOfSameVersion(clusterState.nodes()); if (value.equals(RemoteStoreNodeService.CompatibilityMode.STRICT.mode)) { validateAllNodesOfSameType(clusterState.nodes()); + validateIndexSettings(clusterState); } } } @@ -317,4 +323,19 @@ private void validateAllNodesOfSameType(DiscoveryNodes discoveryNodes) { } } + /** + * Verifies that while trying to switch to STRICT compatibility mode, + * all indices in the cluster have {@link RemoteMigrationIndexMetadataUpdater#indexHasAllRemoteStoreRelatedMetadata(IndexMetadata)} as true. + * If not, throws {@link SettingsException} + * @param clusterState current cluster state + */ + private void validateIndexSettings(ClusterState clusterState) { + Collection allIndicesMetadata = clusterState.metadata().indices().values(); + if (allIndicesMetadata.isEmpty() == false + && allIndicesMetadata.stream().anyMatch(indexMetadata -> indexHasAllRemoteStoreRelatedMetadata(indexMetadata) == false)) { + throw new SettingsException( + "can not switch to STRICT compatibility mode since all indices in the cluster does not have remote store based index settings" + ); + } + } } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java index 9c5dcc9e9de3f..e4f483f796f44 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -172,6 +172,7 @@ protected ClusterStatsNodeResponse nodeOperation(ClusterStatsNodeRequest nodeReq false, false, false, + false, false ); List shardsStats = new ArrayList<>(); diff --git a/server/src/main/java/org/opensearch/action/admin/indices/mapping/get/GetFieldMappingsResponse.java b/server/src/main/java/org/opensearch/action/admin/indices/mapping/get/GetFieldMappingsResponse.java index 86533f14e83e1..7edba143a72f0 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/mapping/get/GetFieldMappingsResponse.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/mapping/get/GetFieldMappingsResponse.java @@ -52,6 +52,7 @@ import java.io.IOException; import java.io.InputStream; +import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.Objects; @@ -116,6 +117,11 @@ public class GetFieldMappingsResponse extends ActionResponse implements ToXConte String index = in.readString(); if (in.getVersion().before(Version.V_2_0_0)) { int typesSize = in.readVInt(); + // if the requested field doesn't exist, type size in the received response from 1.x node is 0 + if (typesSize == 0) { + indexMapBuilder.put(index, Collections.emptyMap()); + continue; + } if (typesSize != 1) { throw new IllegalStateException("Expected single type but received [" + typesSize + "]"); } diff --git a/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java b/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java index a7d9f95b80f7b..4d108f8d78a69 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java @@ -34,6 +34,7 @@ import org.opensearch.Version; import org.opensearch.common.annotation.PublicApi; +import org.opensearch.common.cache.CacheType; import org.opensearch.core.common.Strings; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; @@ -42,6 +43,7 @@ import java.io.IOException; import java.util.Collections; import java.util.EnumSet; +import java.util.Set; /** * Common Stats Flags for OpenSearch @@ -62,6 +64,9 @@ public class CommonStatsFlags implements Writeable, Cloneable { private boolean includeUnloadedSegments = false; private boolean includeAllShardIndexingPressureTrackers = false; private boolean includeOnlyTopIndexingPressureMetrics = false; + // Used for metric CACHE_STATS, to determine which caches to report stats for + private EnumSet includeCaches = EnumSet.noneOf(CacheType.class); + private String[] levels = new String[0]; /** * @param flags flags to set. If no flags are supplied, default flags will be set. @@ -91,6 +96,10 @@ public CommonStatsFlags(StreamInput in) throws IOException { includeUnloadedSegments = in.readBoolean(); includeAllShardIndexingPressureTrackers = in.readBoolean(); includeOnlyTopIndexingPressureMetrics = in.readBoolean(); + if (in.getVersion().onOrAfter(Version.V_2_14_0)) { + includeCaches = in.readEnumSet(CacheType.class); + levels = in.readStringArray(); + } } @Override @@ -111,6 +120,10 @@ public void writeTo(StreamOutput out) throws IOException { out.writeBoolean(includeUnloadedSegments); out.writeBoolean(includeAllShardIndexingPressureTrackers); out.writeBoolean(includeOnlyTopIndexingPressureMetrics); + if (out.getVersion().onOrAfter(Version.V_2_14_0)) { + out.writeEnumSet(includeCaches); + out.writeStringArrayNullable(levels); + } } /** @@ -125,6 +138,8 @@ public CommonStatsFlags all() { includeUnloadedSegments = false; includeAllShardIndexingPressureTrackers = false; includeOnlyTopIndexingPressureMetrics = false; + includeCaches = EnumSet.noneOf(CacheType.class); + levels = new String[0]; return this; } @@ -140,6 +155,8 @@ public CommonStatsFlags clear() { includeUnloadedSegments = false; includeAllShardIndexingPressureTrackers = false; includeOnlyTopIndexingPressureMetrics = false; + includeCaches = EnumSet.noneOf(CacheType.class); + levels = new String[0]; return this; } @@ -151,6 +168,14 @@ public Flag[] getFlags() { return flags.toArray(new Flag[0]); } + public Set getIncludeCaches() { + return includeCaches; + } + + public String[] getLevels() { + return levels; + } + /** * Sets specific search group stats to retrieve the stats for. Mainly affects search * when enabled. @@ -206,6 +231,21 @@ public CommonStatsFlags includeOnlyTopIndexingPressureMetrics(boolean includeOnl return this; } + public CommonStatsFlags includeCacheType(CacheType cacheType) { + includeCaches.add(cacheType); + return this; + } + + public CommonStatsFlags includeAllCacheTypes() { + includeCaches = EnumSet.allOf(CacheType.class); + return this; + } + + public CommonStatsFlags setLevels(String[] inputLevels) { + levels = inputLevels; + return this; + } + public boolean includeUnloadedSegments() { return this.includeUnloadedSegments; } diff --git a/server/src/main/java/org/opensearch/action/bulk/BulkRequest.java b/server/src/main/java/org/opensearch/action/bulk/BulkRequest.java index 47abd0337fcf9..7614206cd226f 100644 --- a/server/src/main/java/org/opensearch/action/bulk/BulkRequest.java +++ b/server/src/main/java/org/opensearch/action/bulk/BulkRequest.java @@ -34,6 +34,7 @@ import org.apache.lucene.util.Accountable; import org.apache.lucene.util.RamUsageEstimator; +import org.opensearch.Version; import org.opensearch.action.ActionRequest; import org.opensearch.action.ActionRequestValidationException; import org.opensearch.action.CompositeIndicesRequest; @@ -80,7 +81,6 @@ public class BulkRequest extends ActionRequest implements CompositeIndicesReques private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(BulkRequest.class); private static final int REQUEST_OVERHEAD = 50; - /** * Requests that are part of this request. It is only possible to add things that are both {@link ActionRequest}s and * {@link WriteRequest}s to this but java doesn't support syntax to declare that everything in the array has both types so we declare @@ -96,6 +96,7 @@ public class BulkRequest extends ActionRequest implements CompositeIndicesReques private String globalRouting; private String globalIndex; private Boolean globalRequireAlias; + private int batchSize = 1; private long sizeInBytes = 0; @@ -107,6 +108,9 @@ public BulkRequest(StreamInput in) throws IOException { requests.addAll(in.readList(i -> DocWriteRequest.readDocumentRequest(null, i))); refreshPolicy = RefreshPolicy.readFrom(in); timeout = in.readTimeValue(); + if (in.getVersion().onOrAfter(Version.V_2_14_0)) { + batchSize = in.readInt(); + } } public BulkRequest(@Nullable String globalIndex) { @@ -346,6 +350,27 @@ public final BulkRequest timeout(TimeValue timeout) { return this; } + /** + * Set batch size + * @param size batch size from input + * @return {@link BulkRequest} + */ + public BulkRequest batchSize(int size) { + if (size < 1) { + throw new IllegalArgumentException("batch_size must be greater than 0"); + } + this.batchSize = size; + return this; + } + + /** + * Get batch size + * @return batch size + */ + public int batchSize() { + return this.batchSize; + } + /** * Note for internal callers (NOT high level rest client), * the global parameter setting is ignored when used with: @@ -453,6 +478,9 @@ public void writeTo(StreamOutput out) throws IOException { out.writeCollection(requests, DocWriteRequest::writeDocumentRequest); refreshPolicy.writeTo(out); out.writeTimeValue(timeout); + if (out.getVersion().onOrAfter(Version.V_2_14_0)) { + out.writeInt(batchSize); + } } @Override diff --git a/server/src/main/java/org/opensearch/action/bulk/TransportBulkAction.java b/server/src/main/java/org/opensearch/action/bulk/TransportBulkAction.java index 4a9b07c12821d..19ffb12859183 100644 --- a/server/src/main/java/org/opensearch/action/bulk/TransportBulkAction.java +++ b/server/src/main/java/org/opensearch/action/bulk/TransportBulkAction.java @@ -923,7 +923,8 @@ public boolean isForceExecution() { } }, bulkRequestModifier::markItemAsDropped, - executorName + executorName, + original ); } diff --git a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java index 0520a4a7aecec..9bf4a4b1e18f1 100644 --- a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java +++ b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java @@ -425,8 +425,10 @@ public final void executeNextPhase(SearchPhase currentPhase, SearchPhase nextPha currentPhase.getName() ); } - onPhaseFailure(currentPhase, "Partial shards failure (" + discrepancy + " shards unavailable)", null); - return; + if (!request.indicesOptions().ignoreUnavailable()) { + onPhaseFailure(currentPhase, "Partial shards failure (" + discrepancy + " shards unavailable)", null); + return; + } } } if (logger.isTraceEnabled()) { diff --git a/server/src/main/java/org/opensearch/action/search/SearchRequest.java b/server/src/main/java/org/opensearch/action/search/SearchRequest.java index 3b8a6937815aa..4d3bb868b779a 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchRequest.java +++ b/server/src/main/java/org/opensearch/action/search/SearchRequest.java @@ -40,6 +40,7 @@ import org.opensearch.action.support.IndicesOptions; import org.opensearch.common.Nullable; import org.opensearch.common.annotation.PublicApi; +import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.common.unit.TimeValue; import org.opensearch.core.common.Strings; import org.opensearch.core.common.io.stream.StreamInput; @@ -161,6 +162,18 @@ public SearchRequest(String[] indices, SearchSourceBuilder source) { this.source = source; } + /** + * Deep clone a SearchRequest + * + * @return a copy of the current SearchRequest + */ + public SearchRequest deepCopy() throws IOException { + BytesStreamOutput out = new BytesStreamOutput(); + this.writeTo(out); + StreamInput in = out.bytes().streamInput(); + return new SearchRequest(in); + } + /** * Creates a new sub-search request starting from the original search request that is provided. * For internal use only, allows to fork a search request into multiple search requests that will be executed independently. @@ -497,7 +510,7 @@ public PointInTimeBuilder pointInTimeBuilder() { } /** - * The tye of search to execute. + * The type of search to execute. */ public SearchType searchType() { return searchType; diff --git a/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java b/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java index 65cfd35489033..143b01af3f62f 100644 --- a/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java +++ b/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java @@ -457,7 +457,7 @@ private void executeRequest( PipelinedRequest searchRequest; ActionListener listener; try { - searchRequest = searchPipelineService.resolvePipeline(originalSearchRequest); + searchRequest = searchPipelineService.resolvePipeline(originalSearchRequest, indexNameExpressionResolver); listener = searchRequest.transformResponseListener(updatedListener); } catch (Exception e) { updatedListener.onFailure(e); diff --git a/server/src/main/java/org/opensearch/cluster/ClusterModule.java b/server/src/main/java/org/opensearch/cluster/ClusterModule.java index b846d382db89d..aa9101090b6d5 100644 --- a/server/src/main/java/org/opensearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/opensearch/cluster/ClusterModule.java @@ -93,6 +93,7 @@ import org.opensearch.core.common.io.stream.Writeable.Reader; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.gateway.GatewayAllocator; +import org.opensearch.gateway.ShardsBatchGatewayAllocator; import org.opensearch.ingest.IngestMetadata; import org.opensearch.persistent.PersistentTasksCustomMetadata; import org.opensearch.persistent.PersistentTasksNodeService; @@ -153,7 +154,13 @@ public ClusterModule( this.shardsAllocator = createShardsAllocator(settings, clusterService.getClusterSettings(), clusterPlugins); this.clusterService = clusterService; this.indexNameExpressionResolver = new IndexNameExpressionResolver(threadContext); - this.allocationService = new AllocationService(allocationDeciders, shardsAllocator, clusterInfoService, snapshotsInfoService); + this.allocationService = new AllocationService( + allocationDeciders, + shardsAllocator, + clusterInfoService, + snapshotsInfoService, + settings + ); } public static List getNamedWriteables() { @@ -423,6 +430,7 @@ public AllocationService getAllocationService() { @Override protected void configure() { bind(GatewayAllocator.class).asEagerSingleton(); + bind(ShardsBatchGatewayAllocator.class).asEagerSingleton(); bind(AllocationService.class).toInstance(allocationService); bind(ClusterService.class).toInstance(clusterService); bind(NodeConnectionsService.class).asEagerSingleton(); @@ -442,10 +450,10 @@ protected void configure() { bind(ShardsAllocator.class).toInstance(shardsAllocator); } - public void setExistingShardsAllocators(GatewayAllocator gatewayAllocator) { + public void setExistingShardsAllocators(GatewayAllocator gatewayAllocator, ShardsBatchGatewayAllocator shardsBatchGatewayAllocator) { final Map existingShardsAllocators = new HashMap<>(); existingShardsAllocators.put(GatewayAllocator.ALLOCATOR_NAME, gatewayAllocator); - + existingShardsAllocators.put(ShardsBatchGatewayAllocator.ALLOCATOR_NAME, shardsBatchGatewayAllocator); for (ClusterPlugin clusterPlugin : clusterPlugins) { for (Map.Entry existingShardsAllocatorEntry : clusterPlugin.getExistingShardsAllocators() .entrySet()) { diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index 5d896e392e6bc..5475470b81b93 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -31,6 +31,7 @@ package org.opensearch.cluster.coordination; +import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.Version; import org.opensearch.cluster.ClusterState; @@ -57,6 +58,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Optional; import java.util.Set; @@ -66,6 +68,7 @@ import static org.opensearch.cluster.decommission.DecommissionHelper.nodeCommissioned; import static org.opensearch.gateway.GatewayService.STATE_NOT_RECOVERED_BLOCK; import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode.MIXED; import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode.STRICT; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; @@ -78,7 +81,7 @@ public class JoinTaskExecutor implements ClusterStateTaskExecutor remoteDN = existingNodes.stream().filter(DiscoveryNode::isRemoteStoreNode).findFirst(); remoteDN.ifPresent(discoveryNode -> ensureRemoteStoreNodesCompatibility(joiningNode, discoveryNode)); diff --git a/server/src/main/java/org/opensearch/cluster/metadata/IndexNameExpressionResolver.java b/server/src/main/java/org/opensearch/cluster/metadata/IndexNameExpressionResolver.java index 9a3b569a7ac3d..24ff83d638d4b 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/IndexNameExpressionResolver.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/IndexNameExpressionResolver.java @@ -380,7 +380,13 @@ private void checkSystemIndexAccess(Context context, Metadata metadata, Set shardsWithState(ShardRoutingState state) { return shards; } + /** + * Returns a {@link List} of shards that match the provided {@link Predicate} + * + * @param predicate {@link Predicate} to apply + * @return a {@link List} of shards that match one of the given {@link Predicate} + */ + public List shardsMatchingPredicate(Predicate predicate) { + List shards = new ArrayList<>(); + for (IndexShardRoutingTable shardRoutingTable : this) { + shards.addAll(shardRoutingTable.shardsMatchingPredicate(predicate)); + } + return shards; + } + public int shardsMatchingPredicateCount(Predicate predicate) { int count = 0; for (IndexShardRoutingTable shardRoutingTable : this) { diff --git a/server/src/main/java/org/opensearch/cluster/routing/IndexShardRoutingTable.java b/server/src/main/java/org/opensearch/cluster/routing/IndexShardRoutingTable.java index 2c250f6a5d86e..fd8cbea42c12f 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/IndexShardRoutingTable.java +++ b/server/src/main/java/org/opensearch/cluster/routing/IndexShardRoutingTable.java @@ -904,6 +904,22 @@ public List shardsWithState(ShardRoutingState state) { return shards; } + /** + * Returns a {@link List} of shards that match the provided {@link Predicate} + * + * @param predicate {@link Predicate} to apply + * @return a {@link List} of shards that match one of the given {@link Predicate} + */ + public List shardsMatchingPredicate(Predicate predicate) { + List shards = new ArrayList<>(); + for (ShardRouting shardEntry : this) { + if (predicate.test(shardEntry)) { + shards.add(shardEntry); + } + } + return shards; + } + public int shardsMatchingPredicateCount(Predicate predicate) { int count = 0; for (ShardRouting shardEntry : this) { diff --git a/server/src/main/java/org/opensearch/cluster/routing/RoutingNode.java b/server/src/main/java/org/opensearch/cluster/routing/RoutingNode.java index 15ec41d5c3fbb..24c3077960444 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/RoutingNode.java +++ b/server/src/main/java/org/opensearch/cluster/routing/RoutingNode.java @@ -204,6 +204,10 @@ public int size() { return shards.size(); } + public Collection getInitializingShards() { + return initializingShards; + } + /** * Add a new shard to this node * @param shard Shard to create on this Node diff --git a/server/src/main/java/org/opensearch/cluster/routing/RoutingNodes.java b/server/src/main/java/org/opensearch/cluster/routing/RoutingNodes.java index 938a603c459c9..ab455f52c4195 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/RoutingNodes.java +++ b/server/src/main/java/org/opensearch/cluster/routing/RoutingNodes.java @@ -67,6 +67,8 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.isMigratingToRemoteStore; + /** * {@link RoutingNodes} represents a copy the routing information contained in the {@link ClusterState cluster state}. * It can be either initialized as mutable or immutable (see {@link #RoutingNodes(ClusterState, boolean)}), allowing @@ -418,6 +420,20 @@ public ShardRouting activeReplicaWithOldestVersion(ShardId shardId) { .orElse(null); } + /** + * Returns one active replica shard on a remote node for the given shard id or null if + * no such replica is found. + *

+ * Since we aim to continue moving forward during remote store migration, replicas already migrated to remote nodes + * are preferred for primary promotion + */ + public ShardRouting activeReplicaOnRemoteNode(ShardId shardId) { + return assignedShards(shardId).stream().filter(shr -> !shr.primary() && shr.active()).filter((shr) -> { + RoutingNode nd = node(shr.currentNodeId()); + return (nd != null && nd.node().isRemoteStoreNode()); + }).findFirst().orElse(null); + } + /** * Returns true iff all replicas are active for the given shard routing. Otherwise false */ @@ -735,11 +751,17 @@ private void unassignPrimaryAndPromoteActiveReplicaIfExists( RoutingChangesObserver routingChangesObserver ) { assert failedShard.primary(); - ShardRouting activeReplica; - if (metadata.isSegmentReplicationEnabled(failedShard.getIndexName())) { - activeReplica = activeReplicaWithOldestVersion(failedShard.shardId()); - } else { - activeReplica = activeReplicaWithHighestVersion(failedShard.shardId()); + ShardRouting activeReplica = null; + if (isMigratingToRemoteStore(metadata)) { + // we might not find any replica on remote node + activeReplica = activeReplicaOnRemoteNode(failedShard.shardId()); + } + if (activeReplica == null) { + if (metadata.isSegmentReplicationEnabled(failedShard.getIndexName())) { + activeReplica = activeReplicaWithOldestVersion(failedShard.shardId()); + } else { + activeReplica = activeReplicaWithHighestVersion(failedShard.shardId()); + } } if (activeReplica == null) { moveToUnassigned(failedShard, unassignedInfo); diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/AllocationService.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/AllocationService.java index a5e2175f05c51..71e562253bf58 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/AllocationService.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/AllocationService.java @@ -35,6 +35,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.Version; import org.opensearch.cluster.ClusterInfoService; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.RestoreInProgress; @@ -54,8 +55,10 @@ import org.opensearch.cluster.routing.allocation.command.AllocationCommands; import org.opensearch.cluster.routing.allocation.decider.AllocationDeciders; import org.opensearch.cluster.routing.allocation.decider.Decision; +import org.opensearch.common.settings.Settings; import org.opensearch.gateway.GatewayAllocator; import org.opensearch.gateway.PriorityComparator; +import org.opensearch.gateway.ShardsBatchGatewayAllocator; import org.opensearch.snapshots.SnapshotsInfoService; import java.util.ArrayList; @@ -73,6 +76,7 @@ import static java.util.Collections.emptyList; import static java.util.Collections.singletonList; import static org.opensearch.cluster.routing.UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING; +import static org.opensearch.cluster.routing.allocation.ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE; /** * This service manages the node allocation of a cluster. For this reason the @@ -87,6 +91,7 @@ public class AllocationService { private static final Logger logger = LogManager.getLogger(AllocationService.class); private final AllocationDeciders allocationDeciders; + private Settings settings; private Map existingShardsAllocators; private final ShardsAllocator shardsAllocator; private final ClusterInfoService clusterInfoService; @@ -109,11 +114,23 @@ public AllocationService( ShardsAllocator shardsAllocator, ClusterInfoService clusterInfoService, SnapshotsInfoService snapshotsInfoService + ) { + this(allocationDeciders, shardsAllocator, clusterInfoService, snapshotsInfoService, Settings.EMPTY); + } + + public AllocationService( + AllocationDeciders allocationDeciders, + ShardsAllocator shardsAllocator, + ClusterInfoService clusterInfoService, + SnapshotsInfoService snapshotsInfoService, + Settings settings + ) { this.allocationDeciders = allocationDeciders; this.shardsAllocator = shardsAllocator; this.clusterInfoService = clusterInfoService; this.snapshotsInfoService = snapshotsInfoService; + this.settings = settings; } /** @@ -548,6 +565,22 @@ private void allocateExistingUnassignedShards(RoutingAllocation allocation) { existingShardsAllocator.beforeAllocation(allocation); } + /* + Use batch mode if enabled and there is no custom allocator set for Allocation service + */ + Boolean batchModeEnabled = EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.get(settings); + if (batchModeEnabled + && allocation.nodes().getMinNodeVersion().onOrAfter(Version.V_2_14_0) + && existingShardsAllocators.size() == 2) { + /* + If we do not have any custom allocator set then we will be using ShardsBatchGatewayAllocator + Currently AllocationService will not run any custom Allocator that implements allocateAllUnassignedShards + */ + allocateAllUnassignedShards(allocation); + return; + } + logger.warn("Falling back to single shard assignment since batch mode disable or multiple custom allocators set"); + final RoutingNodes.UnassignedShards.UnassignedIterator primaryIterator = allocation.routingNodes().unassigned().iterator(); while (primaryIterator.hasNext()) { final ShardRouting shardRouting = primaryIterator.next(); @@ -569,6 +602,14 @@ private void allocateExistingUnassignedShards(RoutingAllocation allocation) { } } + private void allocateAllUnassignedShards(RoutingAllocation allocation) { + ExistingShardsAllocator allocator = existingShardsAllocators.get(ShardsBatchGatewayAllocator.ALLOCATOR_NAME); + allocator.allocateAllUnassignedShards(allocation, true); + allocator.afterPrimariesBeforeReplicas(allocation); + // Replicas Assignment + allocator.allocateAllUnassignedShards(allocation, false); + } + private void disassociateDeadNodes(RoutingAllocation allocation) { for (Iterator it = allocation.routingNodes().mutableIterator(); it.hasNext();) { RoutingNode node = it.next(); diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/ExistingShardsAllocator.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/ExistingShardsAllocator.java index f1889cdf780d4..fb2a37237f8b6 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/ExistingShardsAllocator.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/ExistingShardsAllocator.java @@ -39,12 +39,13 @@ import org.opensearch.common.Nullable; import org.opensearch.common.settings.Setting; import org.opensearch.gateway.GatewayAllocator; +import org.opensearch.gateway.ShardsBatchGatewayAllocator; import java.util.List; /** * Searches for, and allocates, shards for which there is an existing on-disk copy somewhere in the cluster. The default implementation is - * {@link GatewayAllocator}, but plugins can supply their own implementations too. + * {@link GatewayAllocator} and {@link ShardsBatchGatewayAllocator}, but plugins can supply their own implementations too. * * @opensearch.internal */ @@ -60,6 +61,26 @@ public interface ExistingShardsAllocator { Setting.Property.PrivateIndex ); + /** + * Boolean setting to enable/disable batch allocation of unassigned shards already existing on disk. + * This will allow sending all Unassigned Shards to the ExistingShard Allocator to make decision to allocate + * in one or more go. + * + * Enable this setting if your ExistingShardAllocator is implementing the + * {@link ExistingShardsAllocator#allocateAllUnassignedShards(RoutingAllocation, boolean)} method. + * The default implementation of this method is not optimized and assigns shards one by one. + * + * If no plugin overrides {@link ExistingShardsAllocator} then default implementation will be use for it , i.e, + * {@link ShardsBatchGatewayAllocator}. + * + * This setting is experimental at this point. + */ + Setting EXISTING_SHARDS_ALLOCATOR_BATCH_MODE = Setting.boolSetting( + "cluster.allocator.existing_shards_allocator.batch_enabled", + false, + Setting.Property.NodeScope + ); + /** * Called before starting a round of allocation, allowing the allocator to invalidate some caches if appropriate. */ @@ -80,6 +101,23 @@ void allocateUnassigned( UnassignedAllocationHandler unassignedAllocationHandler ); + /** + * Allocate all unassigned shards in the given {@link RoutingAllocation} for which this {@link ExistingShardsAllocator} is responsible. + * Default implementation calls {@link #allocateUnassigned(ShardRouting, RoutingAllocation, UnassignedAllocationHandler)} for each Unassigned shard + * and is kept here for backward compatibility. + * + * Allocation service will currently run the default implementation of it implemented by {@link ShardsBatchGatewayAllocator} + */ + default void allocateAllUnassignedShards(RoutingAllocation allocation, boolean primary) { + RoutingNodes.UnassignedShards.UnassignedIterator iterator = allocation.routingNodes().unassigned().iterator(); + while (iterator.hasNext()) { + ShardRouting shardRouting = iterator.next(); + if (shardRouting.primary() == primary) { + allocateUnassigned(shardRouting, allocation, iterator); + } + } + } + /** * Returns an explanation for a single unassigned shard. */ diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/IndexMetadataUpdater.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/IndexMetadataUpdater.java index 7fc78b05880f3..ddcccd597e894 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/IndexMetadataUpdater.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/IndexMetadataUpdater.java @@ -32,10 +32,12 @@ package org.opensearch.cluster.routing.allocation; +import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.routing.IndexShardRoutingTable; import org.opensearch.cluster.routing.RecoverySource; import org.opensearch.cluster.routing.RoutingChangesObserver; @@ -45,6 +47,7 @@ import org.opensearch.common.util.set.Sets; import org.opensearch.core.index.Index; import org.opensearch.core.index.shard.ShardId; +import org.opensearch.index.remote.RemoteMigrationIndexMetadataUpdater; import java.util.Collections; import java.util.Comparator; @@ -67,14 +70,15 @@ * @opensearch.internal */ public class IndexMetadataUpdater extends RoutingChangesObserver.AbstractRoutingChangesObserver { + private final Logger logger = LogManager.getLogger(IndexMetadataUpdater.class); private final Map shardChanges = new HashMap<>(); + private boolean ongoingRemoteStoreMigration = false; @Override public void shardInitialized(ShardRouting unassignedShard, ShardRouting initializedShard) { assert initializedShard.isRelocationTarget() == false : "shardInitialized is not called on relocation target: " + initializedShard; if (initializedShard.primary()) { increasePrimaryTerm(initializedShard.shardId()); - Updates updates = changes(initializedShard.shardId()); assert updates.initializedPrimary == null : "Primary cannot be initialized more than once in same allocation round: " + "(previous: " @@ -113,6 +117,12 @@ public void shardFailed(ShardRouting failedShard, UnassignedInfo unassignedInfo) } increasePrimaryTerm(failedShard.shardId()); } + + // Track change through shardChanges Map regardless of above-mentioned conditions + // To be used to update index metadata while computing new cluster state + if (ongoingRemoteStoreMigration) { + changes(failedShard.shardId()); + } } @Override @@ -120,20 +130,34 @@ public void relocationCompleted(ShardRouting removedRelocationSource) { removeAllocationId(removedRelocationSource); } + /** + * Adds the target {@link ShardRouting} to the tracking updates set. + * Used to track started relocations while applying changes to the new {@link ClusterState} + */ + @Override + public void relocationStarted(ShardRouting startedShard, ShardRouting targetRelocatingShard) { + // Store change in shardChanges Map regardless of above-mentioned conditions + // To be used to update index metadata while computing new cluster state + if (ongoingRemoteStoreMigration) { + changes(targetRelocatingShard.shardId()); + } + } + /** * Updates the current {@link Metadata} based on the changes of this RoutingChangesObserver. Specifically * we update {@link IndexMetadata#getInSyncAllocationIds()} and {@link IndexMetadata#primaryTerm(int)} based on * the changes made during this allocation. + *
+ * Manipulates index settings or index metadata during an ongoing remote store migration * * @param oldMetadata {@link Metadata} object from before the routing nodes was changed. * @param newRoutingTable {@link RoutingTable} object after routing changes were applied. * @return adapted {@link Metadata}, potentially the original one if no change was needed. */ - public Metadata applyChanges(Metadata oldMetadata, RoutingTable newRoutingTable) { + public Metadata applyChanges(Metadata oldMetadata, RoutingTable newRoutingTable, DiscoveryNodes discoveryNodes) { Map>> changesGroupedByIndex = shardChanges.entrySet() .stream() .collect(Collectors.groupingBy(e -> e.getKey().getIndex())); - Metadata.Builder metadataBuilder = null; for (Map.Entry>> indexChanges : changesGroupedByIndex.entrySet()) { Index index = indexChanges.getKey(); @@ -144,6 +168,17 @@ public Metadata applyChanges(Metadata oldMetadata, RoutingTable newRoutingTable) Updates updates = shardEntry.getValue(); indexMetadataBuilder = updateInSyncAllocations(newRoutingTable, oldIndexMetadata, indexMetadataBuilder, shardId, updates); indexMetadataBuilder = updatePrimaryTerm(oldIndexMetadata, indexMetadataBuilder, shardId, updates); + if (ongoingRemoteStoreMigration) { + RemoteMigrationIndexMetadataUpdater migrationImdUpdater = new RemoteMigrationIndexMetadataUpdater( + discoveryNodes, + newRoutingTable, + oldIndexMetadata, + oldMetadata.settings(), + logger + ); + migrationImdUpdater.maybeUpdateRemoteStorePathStrategy(indexMetadataBuilder, index.getName()); + migrationImdUpdater.maybeAddRemoteIndexSettings(indexMetadataBuilder, index.getName()); + } } if (indexMetadataBuilder != null) { @@ -369,6 +404,10 @@ private void increasePrimaryTerm(ShardId shardId) { changes(shardId).increaseTerm = true; } + public void setOngoingRemoteStoreMigration(boolean ongoingRemoteStoreMigration) { + this.ongoingRemoteStoreMigration = ongoingRemoteStoreMigration; + } + private static class Updates { private boolean increaseTerm; // whether primary term should be increased private Set addedAllocationIds = new HashSet<>(); // allocation ids that should be added to the in-sync set diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/RoutingAllocation.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/RoutingAllocation.java index bf2db57128517..fd789774f6f4f 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/RoutingAllocation.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/RoutingAllocation.java @@ -55,6 +55,7 @@ import static java.util.Collections.emptySet; import static java.util.Collections.unmodifiableSet; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.isMigratingToRemoteStore; /** * The {@link RoutingAllocation} keep the state of the current allocation @@ -125,6 +126,9 @@ public RoutingAllocation( this.clusterInfo = clusterInfo; this.shardSizeInfo = shardSizeInfo; this.currentNanoTime = currentNanoTime; + if (isMigratingToRemoteStore(metadata)) { + indexMetadataUpdater.setOngoingRemoteStoreMigration(true); + } } /** returns the nano time captured at the beginning of the allocation. used to make sure all time based decisions are aligned */ @@ -267,7 +271,7 @@ public RoutingChangesObserver changes() { * Returns updated {@link Metadata} based on the changes that were made to the routing nodes */ public Metadata updateMetadataWithRoutingChanges(RoutingTable newRoutingTable) { - return indexMetadataUpdater.applyChanges(metadata, newRoutingTable); + return indexMetadataUpdater.applyChanges(metadata, newRoutingTable, nodes()); } /** diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/RemoteStoreMigrationAllocationDecider.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/RemoteStoreMigrationAllocationDecider.java index 7d40aacb71e25..4fc5fff805663 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/RemoteStoreMigrationAllocationDecider.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/RemoteStoreMigrationAllocationDecider.java @@ -95,32 +95,38 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing ); } - if (migrationDirection.equals(Direction.REMOTE_STORE) == false) { - // docrep migration direction is currently not supported + IndexMetadata indexMetadata = allocation.metadata().getIndexSafe(shardRouting.index()); + boolean remoteSettingsBackedIndex = IndexMetadata.INDEX_REMOTE_STORE_ENABLED_SETTING.get(indexMetadata.getSettings()); + + if (migrationDirection.equals(Direction.NONE)) { + // remote backed indices on docrep nodes and non remote backed indices on remote nodes are not allowed + boolean isNoDecision = remoteSettingsBackedIndex ^ targetNode.isRemoteStoreNode(); + String reason = String.format(Locale.ROOT, " for %sremote store backed index", remoteSettingsBackedIndex ? "" : "non "); return allocation.decision( - Decision.YES, + isNoDecision ? Decision.NO : Decision.YES, NAME, - getDecisionDetails(true, shardRouting, targetNode, " for non remote_store direction") + getDecisionDetails(!isNoDecision, shardRouting, targetNode, reason) ); - } - - // check for remote store backed indices - IndexMetadata indexMetadata = allocation.metadata().getIndexSafe(shardRouting.index()); - boolean remoteStoreBackedIndex = IndexMetadata.INDEX_REMOTE_STORE_ENABLED_SETTING.get(indexMetadata.getSettings()); - if (remoteStoreBackedIndex && targetNode.isRemoteStoreNode() == false) { - // allocations and relocations must be to a remote node - String reason = String.format( - Locale.ROOT, - " because a remote store backed index's shard copy can only be %s to a remote node", - ((shardRouting.assignedToNode() == false) ? "allocated" : "relocated") - ); - return allocation.decision(Decision.NO, NAME, getDecisionDetails(false, shardRouting, targetNode, reason)); - } + } else if (migrationDirection.equals(Direction.DOCREP)) { + // docrep migration direction is currently not supported + return allocation.decision(Decision.YES, NAME, getDecisionDetails(true, shardRouting, targetNode, " for DOCREP direction")); + } else { + // check for remote store backed indices + if (remoteSettingsBackedIndex && targetNode.isRemoteStoreNode() == false) { + // allocations and relocations must be to a remote node + String reason = String.format( + Locale.ROOT, + " because a remote store backed index's shard copy can only be %s to a remote node", + ((shardRouting.assignedToNode() == false) ? "allocated" : "relocated") + ); + return allocation.decision(Decision.NO, NAME, getDecisionDetails(false, shardRouting, targetNode, reason)); + } - if (shardRouting.primary()) { - return primaryShardDecision(shardRouting, targetNode, allocation); + if (shardRouting.primary()) { + return primaryShardDecision(shardRouting, targetNode, allocation); + } + return replicaShardDecision(shardRouting, targetNode, allocation); } - return replicaShardDecision(shardRouting, targetNode, allocation); } // handle scenarios for allocation of a new shard's primary copy diff --git a/server/src/main/java/org/opensearch/common/blobstore/BlobStore.java b/server/src/main/java/org/opensearch/common/blobstore/BlobStore.java index 0f6646d37f950..8ce8ec8e01abe 100644 --- a/server/src/main/java/org/opensearch/common/blobstore/BlobStore.java +++ b/server/src/main/java/org/opensearch/common/blobstore/BlobStore.java @@ -75,6 +75,7 @@ default void reload(RepositoryMetadata repositoryMetadata) {} * Metrics for BlobStore interactions */ enum Metric { + GENERIC_STATS("generic_stats"), REQUEST_SUCCESS("request_success_total"), REQUEST_FAILURE("request_failures_total"), REQUEST_LATENCY("request_time_in_millis"), diff --git a/server/src/main/java/org/opensearch/common/blobstore/stream/write/WritePriority.java b/server/src/main/java/org/opensearch/common/blobstore/stream/write/WritePriority.java index 3f341c878c3c7..4e8db0a3a8c69 100644 --- a/server/src/main/java/org/opensearch/common/blobstore/stream/write/WritePriority.java +++ b/server/src/main/java/org/opensearch/common/blobstore/stream/write/WritePriority.java @@ -14,7 +14,12 @@ * @opensearch.internal */ public enum WritePriority { + // Used for segment transfers during refresh, flush or merges NORMAL, + // Used for transfer of translog or ckp files. HIGH, - URGENT + // Used for transfer of remote cluster state + URGENT, + // All other background transfers such as in snapshot recovery, recovery from local store or index etc. + LOW } diff --git a/server/src/main/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainer.java b/server/src/main/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainer.java index cd2ef22327ebb..cbd1852202d1c 100644 --- a/server/src/main/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainer.java +++ b/server/src/main/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainer.java @@ -52,7 +52,7 @@ public class RemoteTransferContainer implements Closeable { private final String remoteFileName; private final boolean failTransferIfFileExists; private final WritePriority writePriority; - private final long expectedChecksum; + private final Long expectedChecksum; private final OffsetRangeInputStreamSupplier offsetRangeInputStreamSupplier; private final boolean isRemoteDataIntegritySupported; private final AtomicBoolean readBlock = new AtomicBoolean(); @@ -79,7 +79,7 @@ public RemoteTransferContainer( boolean failTransferIfFileExists, WritePriority writePriority, OffsetRangeInputStreamSupplier offsetRangeInputStreamSupplier, - long expectedChecksum, + Long expectedChecksum, boolean isRemoteDataIntegritySupported ) { this( @@ -115,7 +115,7 @@ public RemoteTransferContainer( boolean failTransferIfFileExists, WritePriority writePriority, OffsetRangeInputStreamSupplier offsetRangeInputStreamSupplier, - long expectedChecksum, + Long expectedChecksum, boolean isRemoteDataIntegritySupported, Map metadata ) { @@ -230,7 +230,7 @@ private LocalStreamSupplier getMultipartStreamSupplier( } private boolean isRemoteDataIntegrityCheckPossible() { - return isRemoteDataIntegritySupported; + return isRemoteDataIntegritySupported && Objects.nonNull(expectedChecksum); } private void finalizeUpload(boolean uploadSuccessful) throws IOException { @@ -238,7 +238,7 @@ private void finalizeUpload(boolean uploadSuccessful) throws IOException { return; } - if (uploadSuccessful) { + if (uploadSuccessful && Objects.nonNull(expectedChecksum)) { long actualChecksum = getActualChecksum(); if (actualChecksum != expectedChecksum) { throw new CorruptIndexException( diff --git a/server/src/main/java/org/opensearch/common/cache/CacheType.java b/server/src/main/java/org/opensearch/common/cache/CacheType.java index c5aeb7cd1fa40..eee6204ac5412 100644 --- a/server/src/main/java/org/opensearch/common/cache/CacheType.java +++ b/server/src/main/java/org/opensearch/common/cache/CacheType.java @@ -10,20 +10,52 @@ import org.opensearch.common.annotation.ExperimentalApi; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + /** * Cache types available within OpenSearch. */ @ExperimentalApi public enum CacheType { - INDICES_REQUEST_CACHE("indices.requests.cache"); + INDICES_REQUEST_CACHE("indices.requests.cache", "request_cache"); private final String settingPrefix; + private final String value; // The value displayed for this cache type in stats API responses + + private static final Map valuesMap; + static { + Map values = new HashMap<>(); + for (CacheType cacheType : values()) { + values.put(cacheType.value, cacheType); + } + valuesMap = Collections.unmodifiableMap(values); + } - CacheType(String settingPrefix) { + CacheType(String settingPrefix, String representation) { this.settingPrefix = settingPrefix; + this.value = representation; } public String getSettingPrefix() { return settingPrefix; } + + public String getValue() { + return value; + } + + public static CacheType getByValue(String value) { + CacheType result = valuesMap.get(value); + if (result == null) { + throw new IllegalArgumentException("No CacheType with value = " + value); + } + return result; + } + + public static Set allValues() { + return valuesMap.keySet(); + } } diff --git a/server/src/main/java/org/opensearch/common/cache/ICache.java b/server/src/main/java/org/opensearch/common/cache/ICache.java index 8d8964abf0829..f5dd644db6d6b 100644 --- a/server/src/main/java/org/opensearch/common/cache/ICache.java +++ b/server/src/main/java/org/opensearch/common/cache/ICache.java @@ -45,7 +45,13 @@ public interface ICache extends Closeable { void refresh(); - ImmutableCacheStatsHolder stats(); + // Return total stats only + default ImmutableCacheStatsHolder stats() { + return stats(null); + } + + // Return stats aggregated by the provided levels. If levels is null or an empty array, return total stats only. + ImmutableCacheStatsHolder stats(String[] levels); /** * Factory to create objects. diff --git a/server/src/main/java/org/opensearch/common/cache/service/CacheService.java b/server/src/main/java/org/opensearch/common/cache/service/CacheService.java index b6710e5e4b424..01da78ecec52e 100644 --- a/server/src/main/java/org/opensearch/common/cache/service/CacheService.java +++ b/server/src/main/java/org/opensearch/common/cache/service/CacheService.java @@ -8,10 +8,12 @@ package org.opensearch.common.cache.service; +import org.opensearch.action.admin.indices.stats.CommonStatsFlags; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.common.cache.CacheType; import org.opensearch.common.cache.ICache; import org.opensearch.common.cache.settings.CacheSettings; +import org.opensearch.common.cache.stats.ImmutableCacheStatsHolder; import org.opensearch.common.cache.store.OpenSearchOnHeapCache; import org.opensearch.common.cache.store.config.CacheConfig; import org.opensearch.common.settings.Setting; @@ -20,6 +22,8 @@ import java.util.HashMap; import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; /** * Service responsible to create caches. @@ -62,4 +66,12 @@ public ICache createCache(CacheConfig config, CacheType cache cacheTypeMap.put(cacheType, iCache); return iCache; } + + public NodeCacheStats stats(CommonStatsFlags flags) { + final SortedMap statsMap = new TreeMap<>(); + for (CacheType type : cacheTypeMap.keySet()) { + statsMap.put(type, cacheTypeMap.get(type).stats(flags.getLevels())); + } + return new NodeCacheStats(statsMap, flags); + } } diff --git a/server/src/main/java/org/opensearch/common/cache/service/NodeCacheStats.java b/server/src/main/java/org/opensearch/common/cache/service/NodeCacheStats.java new file mode 100644 index 0000000000000..07c75eab34194 --- /dev/null +++ b/server/src/main/java/org/opensearch/common/cache/service/NodeCacheStats.java @@ -0,0 +1,80 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.cache.service; + +import org.opensearch.action.admin.indices.stats.CommonStatsFlags; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.cache.CacheType; +import org.opensearch.common.cache.stats.ImmutableCacheStatsHolder; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ToXContentFragment; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Objects; +import java.util.SortedMap; + +/** + * A class creating XContent responses to cache stats API requests. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class NodeCacheStats implements ToXContentFragment, Writeable { + // Use SortedMap to force consistent ordering of caches in API responses + private final SortedMap statsByCache; + private final CommonStatsFlags flags; + + public NodeCacheStats(SortedMap statsByCache, CommonStatsFlags flags) { + this.statsByCache = statsByCache; + this.flags = flags; + } + + public NodeCacheStats(StreamInput in) throws IOException { + this.flags = new CommonStatsFlags(in); + this.statsByCache = in.readOrderedMap(i -> i.readEnum(CacheType.class), ImmutableCacheStatsHolder::new); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + flags.writeTo(out); + out.writeMap(statsByCache, StreamOutput::writeEnum, (o, immutableCacheStatsHolder) -> immutableCacheStatsHolder.writeTo(o)); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + for (CacheType type : statsByCache.keySet()) { + if (flags.getIncludeCaches().contains(type)) { + builder.startObject(type.getValue()); + statsByCache.get(type).toXContent(builder, params); + builder.endObject(); + } + } + return builder; + } + + @Override + public boolean equals(Object o) { + if (o == null) { + return false; + } + if (o.getClass() != NodeCacheStats.class) { + return false; + } + NodeCacheStats other = (NodeCacheStats) o; + return statsByCache.equals(other.statsByCache) && flags.getIncludeCaches().equals(other.flags.getIncludeCaches()); + } + + @Override + public int hashCode() { + return Objects.hash(statsByCache, flags); + } +} diff --git a/server/src/main/java/org/opensearch/common/cache/stats/CacheStats.java b/server/src/main/java/org/opensearch/common/cache/stats/CacheStats.java index b0cb66b56b70d..93fa1ff7fcddf 100644 --- a/server/src/main/java/org/opensearch/common/cache/stats/CacheStats.java +++ b/server/src/main/java/org/opensearch/common/cache/stats/CacheStats.java @@ -20,9 +20,9 @@ public class CacheStats { CounterMetric misses; CounterMetric evictions; CounterMetric sizeInBytes; - CounterMetric entries; + CounterMetric items; - public CacheStats(long hits, long misses, long evictions, long sizeInBytes, long entries) { + public CacheStats(long hits, long misses, long evictions, long sizeInBytes, long items) { this.hits = new CounterMetric(); this.hits.inc(hits); this.misses = new CounterMetric(); @@ -31,8 +31,8 @@ public CacheStats(long hits, long misses, long evictions, long sizeInBytes, long this.evictions.inc(evictions); this.sizeInBytes = new CounterMetric(); this.sizeInBytes.inc(sizeInBytes); - this.entries = new CounterMetric(); - this.entries.inc(entries); + this.items = new CounterMetric(); + this.items.inc(items); } public CacheStats() { @@ -44,33 +44,33 @@ private void internalAdd(long otherHits, long otherMisses, long otherEvictions, this.misses.inc(otherMisses); this.evictions.inc(otherEvictions); this.sizeInBytes.inc(otherSizeInBytes); - this.entries.inc(otherEntries); + this.items.inc(otherEntries); } public void add(CacheStats other) { if (other == null) { return; } - internalAdd(other.getHits(), other.getMisses(), other.getEvictions(), other.getSizeInBytes(), other.getEntries()); + internalAdd(other.getHits(), other.getMisses(), other.getEvictions(), other.getSizeInBytes(), other.getItems()); } public void add(ImmutableCacheStats snapshot) { if (snapshot == null) { return; } - internalAdd(snapshot.getHits(), snapshot.getMisses(), snapshot.getEvictions(), snapshot.getSizeInBytes(), snapshot.getEntries()); + internalAdd(snapshot.getHits(), snapshot.getMisses(), snapshot.getEvictions(), snapshot.getSizeInBytes(), snapshot.getItems()); } public void subtract(ImmutableCacheStats other) { if (other == null) { return; } - internalAdd(-other.getHits(), -other.getMisses(), -other.getEvictions(), -other.getSizeInBytes(), -other.getEntries()); + internalAdd(-other.getHits(), -other.getMisses(), -other.getEvictions(), -other.getSizeInBytes(), -other.getItems()); } @Override public int hashCode() { - return Objects.hash(hits.count(), misses.count(), evictions.count(), sizeInBytes.count(), entries.count()); + return Objects.hash(hits.count(), misses.count(), evictions.count(), sizeInBytes.count(), items.count()); } public void incrementHits() { @@ -93,12 +93,12 @@ public void decrementSizeInBytes(long amount) { sizeInBytes.dec(amount); } - public void incrementEntries() { - entries.inc(); + public void incrementItems() { + items.inc(); } - public void decrementEntries() { - entries.dec(); + public void decrementItems() { + items.dec(); } public long getHits() { @@ -117,16 +117,16 @@ public long getSizeInBytes() { return sizeInBytes.count(); } - public long getEntries() { - return entries.count(); + public long getItems() { + return items.count(); } public void resetSizeAndEntries() { sizeInBytes = new CounterMetric(); - entries = new CounterMetric(); + items = new CounterMetric(); } public ImmutableCacheStats immutableSnapshot() { - return new ImmutableCacheStats(hits.count(), misses.count(), evictions.count(), sizeInBytes.count(), entries.count()); + return new ImmutableCacheStats(hits.count(), misses.count(), evictions.count(), sizeInBytes.count(), items.count()); } } diff --git a/server/src/main/java/org/opensearch/common/cache/stats/CacheStatsHolder.java b/server/src/main/java/org/opensearch/common/cache/stats/CacheStatsHolder.java index a8b7c27ef9e79..27cb7679efb0c 100644 --- a/server/src/main/java/org/opensearch/common/cache/stats/CacheStatsHolder.java +++ b/server/src/main/java/org/opensearch/common/cache/stats/CacheStatsHolder.java @@ -8,288 +8,31 @@ package org.opensearch.common.cache.stats; -import java.util.Collections; -import java.util.HashMap; import java.util.List; -import java.util.Map; -import java.util.TreeMap; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReentrantLock; -import java.util.function.Consumer; /** - * A class ICache implementations use to internally keep track of their stats across multiple dimensions. - * Not intended to be exposed outside the cache; for this, caches use getImmutableCacheStatsHolder() to create an immutable - * copy of the current state of the stats. - * Currently, in the IRC, the stats tracked in a CacheStatsHolder will not appear for empty shards that have had no cache - * operations done on them yet. This might be changed in the future, by exposing a method to add empty nodes to the - * tree in CacheStatsHolder in the ICache interface. - * - * @opensearch.experimental + * An interface extended by DefaultCacheStatsHolder and NoopCacheStatsHolder. */ -public class CacheStatsHolder { - - // The list of permitted dimensions. Should be ordered from "outermost" to "innermost", as you would like to - // aggregate them in an API response. - private final List dimensionNames; - // A tree structure based on dimension values, which stores stats values in its leaf nodes. - // Non-leaf nodes have stats matching the sum of their children. - // We use a tree structure, rather than a map with concatenated keys, to save on memory usage. If there are many leaf - // nodes that share a parent, that parent's dimension value will only be stored once, not many times. - private final Node statsRoot; - // To avoid sync problems, obtain a lock before creating or removing nodes in the stats tree. - // No lock is needed to edit stats on existing nodes. - private final Lock lock = new ReentrantLock(); - - public CacheStatsHolder(List dimensionNames) { - this.dimensionNames = Collections.unmodifiableList(dimensionNames); - this.statsRoot = new Node("", true); // The root node has the empty string as its dimension value - } - - public List getDimensionNames() { - return dimensionNames; - } - - // For all these increment functions, the dimensions list comes from the key, and contains all dimensions present in dimensionNames. - // The order has to match the order given in dimensionNames. - public void incrementHits(List dimensionValues) { - internalIncrement(dimensionValues, Node::incrementHits, true); - } - - public void incrementMisses(List dimensionValues) { - internalIncrement(dimensionValues, Node::incrementMisses, true); - } - - public void incrementEvictions(List dimensionValues) { - internalIncrement(dimensionValues, Node::incrementEvictions, true); - } - - public void incrementSizeInBytes(List dimensionValues, long amountBytes) { - internalIncrement(dimensionValues, (node) -> node.incrementSizeInBytes(amountBytes), true); - } - - // For decrements, we should not create nodes if they are absent. This protects us from erroneously decrementing values for keys - // which have been entirely deleted, for example in an async removal listener. - public void decrementSizeInBytes(List dimensionValues, long amountBytes) { - internalIncrement(dimensionValues, (node) -> node.decrementSizeInBytes(amountBytes), false); - } - - public void incrementEntries(List dimensionValues) { - internalIncrement(dimensionValues, Node::incrementEntries, true); - } - - public void decrementEntries(List dimensionValues) { - internalIncrement(dimensionValues, Node::decrementEntries, false); - } - - /** - * Reset number of entries and memory size when all keys leave the cache, but don't reset hit/miss/eviction numbers. - * This is in line with the behavior of the existing API when caches are cleared. - */ - public void reset() { - resetHelper(statsRoot); - } - - private void resetHelper(Node current) { - current.resetSizeAndEntries(); - for (Node child : current.children.values()) { - resetHelper(child); - } - } - - public long count() { - // Include this here so caches don't have to create an entire CacheStats object to run count(). - return statsRoot.getEntries(); - } - - private void internalIncrement(List dimensionValues, Consumer adder, boolean createNodesIfAbsent) { - assert dimensionValues.size() == dimensionNames.size(); - // First try to increment without creating nodes - boolean didIncrement = internalIncrementHelper(dimensionValues, statsRoot, 0, adder, false); - // If we failed to increment, because nodes had to be created, obtain the lock and run again while creating nodes if needed - if (!didIncrement && createNodesIfAbsent) { - try { - lock.lock(); - internalIncrementHelper(dimensionValues, statsRoot, 0, adder, true); - } finally { - lock.unlock(); - } - } - } - - /** - * Use the incrementer function to increment/decrement a value in the stats for a set of dimensions. - * If createNodesIfAbsent is true, and there is no stats for this set of dimensions, create one. - * Returns true if the increment was applied, false if not. - */ - private boolean internalIncrementHelper( - List dimensionValues, - Node node, - int depth, // Pass in the depth to avoid having to slice the list for each node. - Consumer adder, - boolean createNodesIfAbsent - ) { - if (depth == dimensionValues.size()) { - // This is the leaf node we are trying to reach - adder.accept(node); - return true; - } - - Node child = node.getChild(dimensionValues.get(depth)); - if (child == null) { - if (createNodesIfAbsent) { - boolean createMapInChild = depth < dimensionValues.size() - 1; - child = node.createChild(dimensionValues.get(depth), createMapInChild); - } else { - return false; - } - } - if (internalIncrementHelper(dimensionValues, child, depth + 1, adder, createNodesIfAbsent)) { - // Function returns true if the next node down was incremented - adder.accept(node); - return true; - } - return false; - } - - /** - * Produce an immutable version of these stats. - */ - public ImmutableCacheStatsHolder getImmutableCacheStatsHolder() { - return new ImmutableCacheStatsHolder(statsRoot.snapshot(), dimensionNames); - } - - public void removeDimensions(List dimensionValues) { - assert dimensionValues.size() == dimensionNames.size() : "Must specify a value for every dimension when removing from StatsHolder"; - // As we are removing nodes from the tree, obtain the lock - lock.lock(); - try { - removeDimensionsHelper(dimensionValues, statsRoot, 0); - } finally { - lock.unlock(); - } - } - - // Returns a CacheStatsCounterSnapshot object for the stats to decrement if the removal happened, null otherwise. - private ImmutableCacheStats removeDimensionsHelper(List dimensionValues, Node node, int depth) { - if (depth == dimensionValues.size()) { - // Pass up a snapshot of the original stats to avoid issues when the original is decremented by other fn invocations - return node.getImmutableStats(); - } - Node child = node.getChild(dimensionValues.get(depth)); - if (child == null) { - return null; - } - ImmutableCacheStats statsToDecrement = removeDimensionsHelper(dimensionValues, child, depth + 1); - if (statsToDecrement != null) { - // The removal took place, decrement values and remove this node from its parent if it's now empty - node.decrementBySnapshot(statsToDecrement); - if (child.getChildren().isEmpty()) { - node.children.remove(child.getDimensionValue()); - } - } - return statsToDecrement; - } - - // pkg-private for testing - Node getStatsRoot() { - return statsRoot; - } - - static class Node { - private final String dimensionValue; - // Map from dimensionValue to the DimensionNode for that dimension value. - final Map children; - // The stats for this node. If a leaf node, corresponds to the stats for this combination of dimensions; if not, - // contains the sum of its children's stats. - private CacheStats stats; - - // Used for leaf nodes to avoid allocating many unnecessary maps - private static final Map EMPTY_CHILDREN_MAP = new HashMap<>(); - - Node(String dimensionValue, boolean createChildrenMap) { - this.dimensionValue = dimensionValue; - if (createChildrenMap) { - this.children = new ConcurrentHashMap<>(); - } else { - this.children = EMPTY_CHILDREN_MAP; - } - this.stats = new CacheStats(); - } - - public String getDimensionValue() { - return dimensionValue; - } - - protected Map getChildren() { - // We can safely iterate over ConcurrentHashMap without worrying about thread issues. - return children; - } - - // Functions for modifying internal CacheStatsCounter without callers having to be aware of CacheStatsCounter - - void incrementHits() { - this.stats.incrementHits(); - } - - void incrementMisses() { - this.stats.incrementMisses(); - } - - void incrementEvictions() { - this.stats.incrementEvictions(); - } - - void incrementSizeInBytes(long amountBytes) { - this.stats.incrementSizeInBytes(amountBytes); - } +public interface CacheStatsHolder { + void incrementHits(List dimensionValues); - void decrementSizeInBytes(long amountBytes) { - this.stats.decrementSizeInBytes(amountBytes); - } + void incrementMisses(List dimensionValues); - void incrementEntries() { - this.stats.incrementEntries(); - } + void incrementEvictions(List dimensionValues); - void decrementEntries() { - this.stats.decrementEntries(); - } + void incrementSizeInBytes(List dimensionValues, long amountBytes); - long getEntries() { - return this.stats.getEntries(); - } + void decrementSizeInBytes(List dimensionValues, long amountBytes); - ImmutableCacheStats getImmutableStats() { - return this.stats.immutableSnapshot(); - } + void incrementItems(List dimensionValues); - void decrementBySnapshot(ImmutableCacheStats snapshot) { - this.stats.subtract(snapshot); - } + void decrementItems(List dimensionValues); - void resetSizeAndEntries() { - this.stats.resetSizeAndEntries(); - } + void reset(); - Node getChild(String dimensionValue) { - return children.get(dimensionValue); - } + long count(); - Node createChild(String dimensionValue, boolean createMapInChild) { - return children.computeIfAbsent(dimensionValue, (key) -> new Node(dimensionValue, createMapInChild)); - } + void removeDimensions(List dimensionValues); - ImmutableCacheStatsHolder.Node snapshot() { - TreeMap snapshotChildren = null; - if (!children.isEmpty()) { - snapshotChildren = new TreeMap<>(); - for (Node child : children.values()) { - snapshotChildren.put(child.getDimensionValue(), child.snapshot()); - } - } - return new ImmutableCacheStatsHolder.Node(dimensionValue, snapshotChildren, getImmutableStats()); - } - } + ImmutableCacheStatsHolder getImmutableCacheStatsHolder(String[] levels); } diff --git a/server/src/main/java/org/opensearch/common/cache/stats/DefaultCacheStatsHolder.java b/server/src/main/java/org/opensearch/common/cache/stats/DefaultCacheStatsHolder.java new file mode 100644 index 0000000000000..ea92c8e81b8f0 --- /dev/null +++ b/server/src/main/java/org/opensearch/common/cache/stats/DefaultCacheStatsHolder.java @@ -0,0 +1,314 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.cache.stats; + +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; +import java.util.function.Consumer; + +/** + * A class ICache implementations use to internally keep track of their stats across multiple dimensions. + * Not intended to be exposed outside the cache; for this, caches use getImmutableCacheStatsHolder() to create an immutable + * copy of the current state of the stats. + * Currently, in the IRC, the stats tracked in a CacheStatsHolder will not appear for empty shards that have had no cache + * operations done on them yet. This might be changed in the future, by exposing a method to add empty nodes to the + * tree in CacheStatsHolder in the ICache interface. + * + * @opensearch.experimental + */ +public class DefaultCacheStatsHolder implements CacheStatsHolder { + + // The list of permitted dimensions. Should be ordered from "outermost" to "innermost", as you would like to + // aggregate them in an API response. + protected final List dimensionNames; + // A tree structure based on dimension values, which stores stats values in its leaf nodes. + // Non-leaf nodes have stats matching the sum of their children. + // We use a tree structure, rather than a map with concatenated keys, to save on memory usage. If there are many leaf + // nodes that share a parent, that parent's dimension value will only be stored once, not many times. + private final Node statsRoot; + // To avoid sync problems, obtain a lock before creating or removing nodes in the stats tree. + // No lock is needed to edit stats on existing nodes. + private final Lock lock = new ReentrantLock(); + // The name of the cache type using these stats + private final String storeName; + + public DefaultCacheStatsHolder(List dimensionNames, String storeName) { + this.dimensionNames = Collections.unmodifiableList(dimensionNames); + this.storeName = storeName; + this.statsRoot = new Node("", true); // The root node has the empty string as its dimension value + } + + public List getDimensionNames() { + return dimensionNames; + } + + // For all these increment functions, the dimensions list comes from the key, and contains all dimensions present in dimensionNames. + // The order has to match the order given in dimensionNames. + @Override + public void incrementHits(List dimensionValues) { + internalIncrement(dimensionValues, Node::incrementHits, true); + } + + @Override + public void incrementMisses(List dimensionValues) { + internalIncrement(dimensionValues, Node::incrementMisses, true); + } + + @Override + public void incrementEvictions(List dimensionValues) { + internalIncrement(dimensionValues, Node::incrementEvictions, true); + } + + @Override + public void incrementSizeInBytes(List dimensionValues, long amountBytes) { + internalIncrement(dimensionValues, (node) -> node.incrementSizeInBytes(amountBytes), true); + } + + // For decrements, we should not create nodes if they are absent. This protects us from erroneously decrementing values for keys + // which have been entirely deleted, for example in an async removal listener. + @Override + public void decrementSizeInBytes(List dimensionValues, long amountBytes) { + internalIncrement(dimensionValues, (node) -> node.decrementSizeInBytes(amountBytes), false); + } + + @Override + public void incrementItems(List dimensionValues) { + internalIncrement(dimensionValues, Node::incrementItems, true); + } + + @Override + public void decrementItems(List dimensionValues) { + internalIncrement(dimensionValues, Node::decrementItems, false); + } + + /** + * Reset number of entries and memory size when all keys leave the cache, but don't reset hit/miss/eviction numbers. + * This is in line with the behavior of the existing API when caches are cleared. + */ + @Override + public void reset() { + resetHelper(statsRoot); + } + + private void resetHelper(Node current) { + current.resetSizeAndEntries(); + for (Node child : current.children.values()) { + resetHelper(child); + } + } + + @Override + public long count() { + // Include this here so caches don't have to create an entire CacheStats object to run count(). + return statsRoot.getEntries(); + } + + protected void internalIncrement(List dimensionValues, Consumer adder, boolean createNodesIfAbsent) { + assert dimensionValues.size() == dimensionNames.size(); + // First try to increment without creating nodes + boolean didIncrement = internalIncrementHelper(dimensionValues, statsRoot, 0, adder, false); + // If we failed to increment, because nodes had to be created, obtain the lock and run again while creating nodes if needed + if (!didIncrement && createNodesIfAbsent) { + try { + lock.lock(); + internalIncrementHelper(dimensionValues, statsRoot, 0, adder, true); + } finally { + lock.unlock(); + } + } + } + + /** + * Use the incrementer function to increment/decrement a value in the stats for a set of dimensions. + * If createNodesIfAbsent is true, and there is no stats for this set of dimensions, create one. + * Returns true if the increment was applied, false if not. + */ + private boolean internalIncrementHelper( + List dimensionValues, + Node node, + int depth, // Pass in the depth to avoid having to slice the list for each node. + Consumer adder, + boolean createNodesIfAbsent + ) { + if (depth == dimensionValues.size()) { + // This is the leaf node we are trying to reach + adder.accept(node); + return true; + } + + Node child = node.getChild(dimensionValues.get(depth)); + if (child == null) { + if (createNodesIfAbsent) { + boolean createMapInChild = depth < dimensionValues.size() - 1; + child = node.createChild(dimensionValues.get(depth), createMapInChild); + } else { + return false; + } + } + if (internalIncrementHelper(dimensionValues, child, depth + 1, adder, createNodesIfAbsent)) { + // Function returns true if the next node down was incremented + adder.accept(node); + return true; + } + return false; + } + + /** + * Produce an immutable version of these stats, aggregated according to levels. + * If levels is null, do not aggregate and return an immutable version of the original tree. + */ + @Override + public ImmutableCacheStatsHolder getImmutableCacheStatsHolder(String[] levels) { + String[] nonNullLevels = Objects.requireNonNullElseGet(levels, () -> new String[0]); + return new ImmutableCacheStatsHolder(this.statsRoot, nonNullLevels, dimensionNames, storeName); + } + + @Override + public void removeDimensions(List dimensionValues) { + assert dimensionValues.size() == dimensionNames.size() : "Must specify a value for every dimension when removing from StatsHolder"; + // As we are removing nodes from the tree, obtain the lock + lock.lock(); + try { + removeDimensionsHelper(dimensionValues, statsRoot, 0); + } finally { + lock.unlock(); + } + } + + // Returns a CacheStatsCounterSnapshot object for the stats to decrement if the removal happened, null otherwise. + private ImmutableCacheStats removeDimensionsHelper(List dimensionValues, Node node, int depth) { + if (depth == dimensionValues.size()) { + // Pass up a snapshot of the original stats to avoid issues when the original is decremented by other fn invocations + return node.getImmutableStats(); + } + Node child = node.getChild(dimensionValues.get(depth)); + if (child == null) { + return null; + } + ImmutableCacheStats statsToDecrement = removeDimensionsHelper(dimensionValues, child, depth + 1); + if (statsToDecrement != null) { + // The removal took place, decrement values and remove this node from its parent if it's now empty + node.decrementBySnapshot(statsToDecrement); + if (child.getChildren().isEmpty()) { + node.children.remove(child.getDimensionValue()); + } + } + return statsToDecrement; + } + + // pkg-private for testing + Node getStatsRoot() { + return statsRoot; + } + + /** + * Nodes that make up the tree in the stats holder. + */ + protected static class Node { + private final String dimensionValue; + // Map from dimensionValue to the DimensionNode for that dimension value. + final Map children; + // The stats for this node. If a leaf node, corresponds to the stats for this combination of dimensions; if not, + // contains the sum of its children's stats. + private CacheStats stats; + + // Used for leaf nodes to avoid allocating many unnecessary maps + private static final Map EMPTY_CHILDREN_MAP = new HashMap<>(); + + Node(String dimensionValue, boolean createChildrenMap) { + this.dimensionValue = dimensionValue; + if (createChildrenMap) { + this.children = new ConcurrentHashMap<>(); + } else { + this.children = EMPTY_CHILDREN_MAP; + } + this.stats = new CacheStats(); + } + + public String getDimensionValue() { + return dimensionValue; + } + + protected Map getChildren() { + // We can safely iterate over ConcurrentHashMap without worrying about thread issues. + return children; + } + + // Functions for modifying internal CacheStatsCounter without callers having to be aware of CacheStatsCounter + + public void incrementHits() { + this.stats.incrementHits(); + } + + public void incrementMisses() { + this.stats.incrementMisses(); + } + + public void incrementEvictions() { + this.stats.incrementEvictions(); + } + + public void incrementSizeInBytes(long amountBytes) { + this.stats.incrementSizeInBytes(amountBytes); + } + + public void decrementSizeInBytes(long amountBytes) { + this.stats.decrementSizeInBytes(amountBytes); + } + + void incrementItems() { + this.stats.incrementItems(); + } + + void decrementItems() { + this.stats.decrementItems(); + } + + long getEntries() { + return this.stats.getItems(); + } + + ImmutableCacheStats getImmutableStats() { + return this.stats.immutableSnapshot(); + } + + void decrementBySnapshot(ImmutableCacheStats snapshot) { + this.stats.subtract(snapshot); + } + + void resetSizeAndEntries() { + this.stats.resetSizeAndEntries(); + } + + Node getChild(String dimensionValue) { + return children.get(dimensionValue); + } + + Node createChild(String dimensionValue, boolean createMapInChild) { + return children.computeIfAbsent(dimensionValue, (key) -> new Node(dimensionValue, createMapInChild)); + } + + /** + * Return whether this is a leaf node which is at the lowest level of the tree. + * Does not return true if this is a node at a higher level whose children are still being constructed. + * @return if this is a leaf node at the lowest level + */ + public boolean isAtLowestLevel() { + // Compare by value to the empty children map, to ensure we don't get false positives for nodes + // which are in the process of having children added + return children == EMPTY_CHILDREN_MAP; + } + } +} diff --git a/server/src/main/java/org/opensearch/common/cache/stats/ImmutableCacheStats.java b/server/src/main/java/org/opensearch/common/cache/stats/ImmutableCacheStats.java index 7549490fd6b74..dbd78a2584f9c 100644 --- a/server/src/main/java/org/opensearch/common/cache/stats/ImmutableCacheStats.java +++ b/server/src/main/java/org/opensearch/common/cache/stats/ImmutableCacheStats.java @@ -12,6 +12,9 @@ import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.common.unit.ByteSizeValue; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.XContentBuilder; import java.io.IOException; import java.util.Objects; @@ -22,19 +25,19 @@ * @opensearch.experimental */ @ExperimentalApi -public class ImmutableCacheStats implements Writeable { // TODO: Make this extend ToXContent (in API PR) +public class ImmutableCacheStats implements Writeable, ToXContent { private final long hits; private final long misses; private final long evictions; private final long sizeInBytes; - private final long entries; + private final long items; - public ImmutableCacheStats(long hits, long misses, long evictions, long sizeInBytes, long entries) { + public ImmutableCacheStats(long hits, long misses, long evictions, long sizeInBytes, long items) { this.hits = hits; this.misses = misses; this.evictions = evictions; this.sizeInBytes = sizeInBytes; - this.entries = entries; + this.items = items; } public ImmutableCacheStats(StreamInput in) throws IOException { @@ -47,7 +50,7 @@ public static ImmutableCacheStats addSnapshots(ImmutableCacheStats s1, Immutable s1.misses + s2.misses, s1.evictions + s2.evictions, s1.sizeInBytes + s2.sizeInBytes, - s1.entries + s2.entries + s1.items + s2.items ); } @@ -67,8 +70,8 @@ public long getSizeInBytes() { return sizeInBytes; } - public long getEntries() { - return entries; + public long getItems() { + return items; } @Override @@ -77,7 +80,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeVLong(misses); out.writeVLong(evictions); out.writeVLong(sizeInBytes); - out.writeVLong(entries); + out.writeVLong(items); } @Override @@ -93,11 +96,34 @@ public boolean equals(Object o) { && (misses == other.misses) && (evictions == other.evictions) && (sizeInBytes == other.sizeInBytes) - && (entries == other.entries); + && (items == other.items); } @Override public int hashCode() { - return Objects.hash(hits, misses, evictions, sizeInBytes, entries); + return Objects.hash(hits, misses, evictions, sizeInBytes, items); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + // We don't write the header in CacheStatsResponse's toXContent, because it doesn't know the name of aggregation it's part of + builder.humanReadableField(Fields.SIZE_IN_BYTES, Fields.SIZE, new ByteSizeValue(sizeInBytes)); + builder.field(Fields.EVICTIONS, evictions); + builder.field(Fields.HIT_COUNT, hits); + builder.field(Fields.MISS_COUNT, misses); + builder.field(Fields.ITEM_COUNT, items); + return builder; + } + + /** + * Field names used to write the values in this object to XContent. + */ + public static final class Fields { + public static final String SIZE = "size"; + public static final String SIZE_IN_BYTES = "size_in_bytes"; + public static final String EVICTIONS = "evictions"; + public static final String HIT_COUNT = "hit_count"; + public static final String MISS_COUNT = "miss_count"; + public static final String ITEM_COUNT = "item_count"; } } diff --git a/server/src/main/java/org/opensearch/common/cache/stats/ImmutableCacheStatsHolder.java b/server/src/main/java/org/opensearch/common/cache/stats/ImmutableCacheStatsHolder.java index 12e325046d83b..92383626236b8 100644 --- a/server/src/main/java/org/opensearch/common/cache/stats/ImmutableCacheStatsHolder.java +++ b/server/src/main/java/org/opensearch/common/cache/stats/ImmutableCacheStatsHolder.java @@ -9,11 +9,21 @@ package org.opensearch.common.cache.stats; import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.XContentBuilder; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; -import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; +import java.util.SortedMap; +import java.util.Stack; import java.util.TreeMap; /** @@ -23,15 +33,102 @@ */ @ExperimentalApi -public class ImmutableCacheStatsHolder { // TODO: extends Writeable, ToXContent - // An immutable snapshot of a stats within a CacheStatsHolder, containing all the stats maintained by the cache. +public class ImmutableCacheStatsHolder implements Writeable, ToXContent { + // Root node of immutable snapshot of stats within a CacheStatsHolder, containing all the stats maintained by the cache. // Pkg-private for testing. final Node statsRoot; + // The dimension names for each level in this tree. final List dimensionNames; + // The name of the cache type producing these stats. Returned in API response. + final String storeName; + public static String STORE_NAME_FIELD = "store_name"; - public ImmutableCacheStatsHolder(Node statsRoot, List dimensionNames) { - this.statsRoot = statsRoot; - this.dimensionNames = dimensionNames; + // Values used for serializing/deserializing the tree. + private static final String SERIALIZATION_CHILDREN_OPEN_BRACKET = "<"; + private static final String SERIALIZATION_CHILDREN_CLOSE_BRACKET = ">"; + private static final String SERIALIZATION_BEGIN_NODE = "_"; + private static final String SERIALIZATION_DONE = "end"; + + ImmutableCacheStatsHolder( + DefaultCacheStatsHolder.Node originalStatsRoot, + String[] levels, + List originalDimensionNames, + String storeName + ) { + // Aggregate from the original CacheStatsHolder according to the levels passed in. + // The dimension names for this immutable snapshot should reflect the levels we aggregate in the snapshot + this.dimensionNames = filterLevels(levels, originalDimensionNames); + this.storeName = storeName; + this.statsRoot = aggregateByLevels(originalStatsRoot, originalDimensionNames); + makeNodeUnmodifiable(statsRoot); + } + + public ImmutableCacheStatsHolder(StreamInput in) throws IOException { + this.dimensionNames = List.of(in.readStringArray()); + this.storeName = in.readString(); + this.statsRoot = deserializeTree(in); + makeNodeUnmodifiable(statsRoot); + } + + public void writeTo(StreamOutput out) throws IOException { + out.writeStringArray(dimensionNames.toArray(new String[0])); + out.writeString(storeName); + writeNode(statsRoot, out); + out.writeString(SERIALIZATION_DONE); + } + + private void writeNode(Node node, StreamOutput out) throws IOException { + out.writeString(SERIALIZATION_BEGIN_NODE); + out.writeString(node.dimensionValue); + out.writeBoolean(node.children.isEmpty()); // Write whether this is a leaf node + node.stats.writeTo(out); + + out.writeString(SERIALIZATION_CHILDREN_OPEN_BRACKET); + for (Map.Entry entry : node.children.entrySet()) { + out.writeString(entry.getKey()); + writeNode(entry.getValue(), out); + } + out.writeString(SERIALIZATION_CHILDREN_CLOSE_BRACKET); + } + + private Node deserializeTree(StreamInput in) throws IOException { + final Stack stack = new Stack<>(); + in.readString(); // Read and discard SERIALIZATION_BEGIN_NODE for the root node + Node statsRoot = readSingleNode(in); + Node current = statsRoot; + stack.push(statsRoot); + String nextSymbol = in.readString(); + while (!nextSymbol.equals(SERIALIZATION_DONE)) { + switch (nextSymbol) { + case SERIALIZATION_CHILDREN_OPEN_BRACKET: + stack.push(current); + break; + case SERIALIZATION_CHILDREN_CLOSE_BRACKET: + stack.pop(); + break; + case SERIALIZATION_BEGIN_NODE: + current = readSingleNode(in); + stack.peek().children.put(current.dimensionValue, current); + } + nextSymbol = in.readString(); + } + return statsRoot; + } + + private Node readSingleNode(StreamInput in) throws IOException { + String dimensionValue = in.readString(); + boolean isLeafNode = in.readBoolean(); + ImmutableCacheStats stats = new ImmutableCacheStats(in); + return new Node(dimensionValue, isLeafNode, stats); + } + + private void makeNodeUnmodifiable(Node node) { + if (!node.children.isEmpty()) { + node.children = Collections.unmodifiableSortedMap(node.children); + } + for (Node child : node.children.values()) { + makeNodeUnmodifiable(child); + } } public ImmutableCacheStats getTotalStats() { @@ -54,8 +151,8 @@ public long getTotalSizeInBytes() { return getTotalStats().getSizeInBytes(); } - public long getTotalEntries() { - return getTotalStats().getEntries(); + public long getTotalItems() { + return getTotalStats().getItems(); } public ImmutableCacheStats getStatsForDimensionValues(List dimensionValues) { @@ -69,23 +166,179 @@ public ImmutableCacheStats getStatsForDimensionValues(List dimensionValu return current.stats; } - // A similar class to CacheStatsHolder.Node, which uses an ordered TreeMap and holds immutable CacheStatsSnapshot as its stats. + /** + * Returns a new tree containing the stats aggregated by the levels passed in. + * The new tree only has dimensions matching the levels passed in. + * The levels passed in must be in the proper order, as they would be in the output of filterLevels(). + */ + Node aggregateByLevels(DefaultCacheStatsHolder.Node originalStatsRoot, List originalDimensionNames) { + Node newRoot = new Node("", false, originalStatsRoot.getImmutableStats()); + for (DefaultCacheStatsHolder.Node child : originalStatsRoot.children.values()) { + aggregateByLevelsHelper(newRoot, child, originalDimensionNames, 0); + } + return newRoot; + } + + /** + * Because we may have to combine nodes that have the same dimension name, I don't think there's a clean way to aggregate + * fully recursively while also passing in a completed map of children nodes before constructing the parent node. + * For this reason, in this function we have to build the new tree top down rather than bottom up. + * We use private methods allowing us to add children to/increment the stats for an existing node. + * This should be ok because the resulting tree is unmodifiable after creation in the constructor. + * + * @param allDimensions the list of all dimensions present in the original CacheStatsHolder which produced + * the CacheStatsHolder.Node object we are traversing. + */ + private void aggregateByLevelsHelper( + Node parentInNewTree, + DefaultCacheStatsHolder.Node currentInOriginalTree, + List allDimensions, + int depth + ) { + if (dimensionNames.contains(allDimensions.get(depth))) { + // If this node is in a level we want to aggregate, create a new dimension node with the same value and stats, and connect it to + // the last parent node in the new tree. If it already exists, increment it instead. + String dimensionValue = currentInOriginalTree.getDimensionValue(); + Node nodeInNewTree = parentInNewTree.children.get(dimensionValue); + if (nodeInNewTree == null) { + // Create new node with stats matching the node from the original tree + int indexOfLastLevel = allDimensions.indexOf(dimensionNames.get(dimensionNames.size() - 1)); + boolean isLeafNode = depth == indexOfLastLevel; // If this is the last level we aggregate, the new node should be a leaf + // node + nodeInNewTree = new Node(dimensionValue, isLeafNode, currentInOriginalTree.getImmutableStats()); + parentInNewTree.addChild(dimensionValue, nodeInNewTree); + } else { + // Otherwise increment existing stats + nodeInNewTree.incrementStats(currentInOriginalTree.getImmutableStats()); + } + // Finally set the parent node to be this node for the next callers of this function + parentInNewTree = nodeInNewTree; + } + + for (Map.Entry childEntry : currentInOriginalTree.children.entrySet()) { + DefaultCacheStatsHolder.Node child = childEntry.getValue(); + aggregateByLevelsHelper(parentInNewTree, child, allDimensions, depth + 1); + } + } + + /** + * Filters out levels that aren't in dimensionNames, and orders the resulting list to match the order in dimensionNames. + * Unrecognized levels are ignored. + */ + private List filterLevels(String[] levels, List originalDimensionNames) { + if (levels == null) { + return originalDimensionNames; + } + List levelsList = Arrays.asList(levels); + List filtered = new ArrayList<>(); + for (String dimensionName : originalDimensionNames) { + if (levelsList.contains(dimensionName)) { + filtered.add(dimensionName); + } + } + return filtered; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + // Always show total stats, regardless of levels + getTotalStats().toXContent(builder, params); + + List filteredLevels = filterLevels(getLevels(params), dimensionNames); + assert filteredLevels.equals(dimensionNames); + if (!filteredLevels.isEmpty()) { + // Depth -1 corresponds to the dummy root node + toXContentForLevels(-1, statsRoot, builder, params); + } + + // Also add the store name for the cache that produced the stats + builder.field(STORE_NAME_FIELD, storeName); + return builder; + } + + private void toXContentForLevels(int depth, Node current, XContentBuilder builder, Params params) throws IOException { + if (depth >= 0) { + builder.startObject(current.dimensionValue); + } + + if (depth == dimensionNames.size() - 1) { + // This is a leaf node + current.getStats().toXContent(builder, params); + } else { + builder.startObject(dimensionNames.get(depth + 1)); + for (Node nextNode : current.children.values()) { + toXContentForLevels(depth + 1, nextNode, builder, params); + } + builder.endObject(); + } + + if (depth >= 0) { + builder.endObject(); + } + } + + private String[] getLevels(Params params) { + String levels = params.param("level"); + if (levels == null) { + return null; + } + return levels.split(","); + } + + @Override + public boolean equals(Object o) { + if (o == null || o.getClass() != ImmutableCacheStatsHolder.class) { + return false; + } + ImmutableCacheStatsHolder other = (ImmutableCacheStatsHolder) o; + if (!dimensionNames.equals(other.dimensionNames) || !storeName.equals(other.storeName)) { + return false; + } + return equalsHelper(statsRoot, other.getStatsRoot()); + } + + private boolean equalsHelper(Node thisNode, Node otherNode) { + if (otherNode == null) { + return false; + } + if (!thisNode.getStats().equals(otherNode.getStats())) { + return false; + } + boolean allChildrenMatch = true; + for (String childValue : thisNode.getChildren().keySet()) { + allChildrenMatch = equalsHelper(thisNode.children.get(childValue), otherNode.children.get(childValue)); + if (!allChildrenMatch) { + return false; + } + } + return allChildrenMatch; + } + + @Override + public int hashCode() { + // Should be sufficient to hash based on the total stats value (found in the root node) + return Objects.hash(statsRoot.stats, dimensionNames); + } + + // A similar class to CacheStatsHolder.Node, which uses a SortedMap and holds immutable CacheStatsSnapshot as its stats. static class Node { private final String dimensionValue; - final Map children; // Map from dimensionValue to the Node for that dimension value + // Map from dimensionValue to the Node for that dimension value. Not final so we can set it to be unmodifiable before we are done in + // the constructor. + SortedMap children; // The stats for this node. If a leaf node, corresponds to the stats for this combination of dimensions; if not, // contains the sum of its children's stats. - private final ImmutableCacheStats stats; - private static final Map EMPTY_CHILDREN_MAP = new HashMap<>(); + private ImmutableCacheStats stats; + private static final SortedMap EMPTY_CHILDREN_MAP = Collections.unmodifiableSortedMap(new TreeMap<>()); - Node(String dimensionValue, TreeMap snapshotChildren, ImmutableCacheStats stats) { + private Node(String dimensionValue, boolean isLeafNode, ImmutableCacheStats stats) { this.dimensionValue = dimensionValue; this.stats = stats; - if (snapshotChildren == null) { + if (isLeafNode) { this.children = EMPTY_CHILDREN_MAP; } else { - this.children = Collections.unmodifiableMap(snapshotChildren); + this.children = new TreeMap<>(); } } @@ -100,12 +353,18 @@ public ImmutableCacheStats getStats() { public String getDimensionValue() { return dimensionValue; } + + private void addChild(String dimensionValue, Node child) { + this.children.putIfAbsent(dimensionValue, child); + } + + private void incrementStats(ImmutableCacheStats toIncrement) { + stats = ImmutableCacheStats.addSnapshots(stats, toIncrement); + } } // pkg-private for testing Node getStatsRoot() { return statsRoot; } - - // TODO (in API PR): Produce XContent based on aggregateByLevels() } diff --git a/server/src/main/java/org/opensearch/common/cache/stats/NoopCacheStatsHolder.java b/server/src/main/java/org/opensearch/common/cache/stats/NoopCacheStatsHolder.java new file mode 100644 index 0000000000000..9cb69a3a0a365 --- /dev/null +++ b/server/src/main/java/org/opensearch/common/cache/stats/NoopCacheStatsHolder.java @@ -0,0 +1,69 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.cache.stats; + +import java.util.List; + +/** + * A dummy version of CacheStatsHolder, which cache implementations use when FeatureFlags.PLUGGABLE_CACHES is false. + * Returns all-zero stats when calling getImmutableCacheStatsHolder(). Always returns 0 for count(). + * A singleton instance is used for memory purposes. + */ +public class NoopCacheStatsHolder implements CacheStatsHolder { + private static final String dummyStoreName = "noop_store"; + private static final NoopCacheStatsHolder singletonInstance = new NoopCacheStatsHolder(); + private static final ImmutableCacheStatsHolder immutableCacheStatsHolder; + static { + DefaultCacheStatsHolder.Node dummyNode = new DefaultCacheStatsHolder.Node("", false); + immutableCacheStatsHolder = new ImmutableCacheStatsHolder(dummyNode, new String[0], List.of(), dummyStoreName); + } + + private NoopCacheStatsHolder() {} + + public static NoopCacheStatsHolder getInstance() { + return singletonInstance; + } + + @Override + public void incrementHits(List dimensionValues) {} + + @Override + public void incrementMisses(List dimensionValues) {} + + @Override + public void incrementEvictions(List dimensionValues) {} + + @Override + public void incrementSizeInBytes(List dimensionValues, long amountBytes) {} + + @Override + public void decrementSizeInBytes(List dimensionValues, long amountBytes) {} + + @Override + public void incrementItems(List dimensionValues) {} + + @Override + public void decrementItems(List dimensionValues) {} + + @Override + public void reset() {} + + @Override + public long count() { + return 0; + } + + @Override + public void removeDimensions(List dimensionValues) {} + + @Override + public ImmutableCacheStatsHolder getImmutableCacheStatsHolder(String[] levels) { + return immutableCacheStatsHolder; + } +} diff --git a/server/src/main/java/org/opensearch/common/cache/store/OpenSearchOnHeapCache.java b/server/src/main/java/org/opensearch/common/cache/store/OpenSearchOnHeapCache.java index 29e5667c9f27d..569653bec2a3d 100644 --- a/server/src/main/java/org/opensearch/common/cache/store/OpenSearchOnHeapCache.java +++ b/server/src/main/java/org/opensearch/common/cache/store/OpenSearchOnHeapCache.java @@ -19,7 +19,9 @@ import org.opensearch.common.cache.RemovalReason; import org.opensearch.common.cache.settings.CacheSettings; import org.opensearch.common.cache.stats.CacheStatsHolder; +import org.opensearch.common.cache.stats.DefaultCacheStatsHolder; import org.opensearch.common.cache.stats.ImmutableCacheStatsHolder; +import org.opensearch.common.cache.stats.NoopCacheStatsHolder; import org.opensearch.common.cache.store.builders.ICacheBuilder; import org.opensearch.common.cache.store.config.CacheConfig; import org.opensearch.common.cache.store.settings.OpenSearchOnHeapCacheSettings; @@ -51,6 +53,7 @@ public class OpenSearchOnHeapCache implements ICache, RemovalListene private final RemovalListener, V> removalListener; private final List dimensionNames; private final ToLongBiFunction, V> weigher; + private final boolean statsTrackingEnabled; public OpenSearchOnHeapCache(Builder builder) { CacheBuilder, V> cacheBuilder = CacheBuilder., V>builder() @@ -62,7 +65,12 @@ public OpenSearchOnHeapCache(Builder builder) { } cache = cacheBuilder.build(); this.dimensionNames = Objects.requireNonNull(builder.dimensionNames, "Dimension names can't be null"); - this.cacheStatsHolder = new CacheStatsHolder(dimensionNames); + this.statsTrackingEnabled = builder.getStatsTrackingEnabled(); + if (statsTrackingEnabled) { + this.cacheStatsHolder = new DefaultCacheStatsHolder(dimensionNames, OpenSearchOnHeapCacheFactory.NAME); + } else { + this.cacheStatsHolder = NoopCacheStatsHolder.getInstance(); + } this.removalListener = builder.getRemovalListener(); this.weigher = builder.getWeigher(); } @@ -81,7 +89,7 @@ public V get(ICacheKey key) { @Override public void put(ICacheKey key, V value) { cache.put(key, value); - cacheStatsHolder.incrementEntries(key.dimensions); + cacheStatsHolder.incrementItems(key.dimensions); cacheStatsHolder.incrementSizeInBytes(key.dimensions, weigher.applyAsLong(key, value)); } @@ -92,7 +100,7 @@ public V computeIfAbsent(ICacheKey key, LoadAwareCacheLoader, V> cacheStatsHolder.incrementHits(key.dimensions); } else { cacheStatsHolder.incrementMisses(key.dimensions); - cacheStatsHolder.incrementEntries(key.dimensions); + cacheStatsHolder.incrementItems(key.dimensions); cacheStatsHolder.incrementSizeInBytes(key.dimensions, cache.getWeigher().applyAsLong(key, value)); } return value; @@ -121,7 +129,7 @@ public Iterable> keys() { @Override public long count() { - return cacheStatsHolder.count(); + return cache.count(); } @Override @@ -133,14 +141,14 @@ public void refresh() { public void close() {} @Override - public ImmutableCacheStatsHolder stats() { - return cacheStatsHolder.getImmutableCacheStatsHolder(); + public ImmutableCacheStatsHolder stats(String[] levels) { + return cacheStatsHolder.getImmutableCacheStatsHolder(levels); } @Override public void onRemoval(RemovalNotification, V> notification) { removalListener.onRemoval(notification); - cacheStatsHolder.decrementEntries(notification.getKey().dimensions); + cacheStatsHolder.decrementItems(notification.getKey().dimensions); cacheStatsHolder.decrementSizeInBytes( notification.getKey().dimensions, cache.getWeigher().applyAsLong(notification.getKey(), notification.getValue()) @@ -163,7 +171,9 @@ public static class OpenSearchOnHeapCacheFactory implements Factory { public ICache create(CacheConfig config, CacheType cacheType, Map cacheFactories) { Map> settingList = OpenSearchOnHeapCacheSettings.getSettingListForCacheType(cacheType); Settings settings = config.getSettings(); + boolean statsTrackingEnabled = statsTrackingEnabled(config.getSettings(), config.getStatsTrackingEnabled()); ICacheBuilder builder = new Builder().setDimensionNames(config.getDimensionNames()) + .setStatsTrackingEnabled(statsTrackingEnabled) .setMaximumWeightInBytes(((ByteSizeValue) settingList.get(MAXIMUM_SIZE_IN_BYTES_KEY).get(settings)).getBytes()) .setExpireAfterAccess(((TimeValue) settingList.get(EXPIRE_AFTER_ACCESS_KEY).get(settings))) .setWeigher(config.getWeigher()) @@ -184,6 +194,11 @@ public ICache create(CacheConfig config, CacheType cacheType, public String getCacheName() { return NAME; } + + private boolean statsTrackingEnabled(Settings settings, boolean statsTrackingEnabledConfig) { + // Don't track stats when pluggable caching is off, or when explicitly set to false in the CacheConfig + return FeatureFlags.PLUGGABLE_CACHE_SETTING.get(settings) && statsTrackingEnabledConfig; + } } /** diff --git a/server/src/main/java/org/opensearch/common/cache/store/builders/ICacheBuilder.java b/server/src/main/java/org/opensearch/common/cache/store/builders/ICacheBuilder.java index ac90fcc85ffef..a308d1db88258 100644 --- a/server/src/main/java/org/opensearch/common/cache/store/builders/ICacheBuilder.java +++ b/server/src/main/java/org/opensearch/common/cache/store/builders/ICacheBuilder.java @@ -37,6 +37,8 @@ public abstract class ICacheBuilder { private RemovalListener, V> removalListener; + private boolean statsTrackingEnabled = true; + public ICacheBuilder() {} public ICacheBuilder setMaximumWeightInBytes(long sizeInBytes) { @@ -64,6 +66,11 @@ public ICacheBuilder setRemovalListener(RemovalListener, V> r return this; } + public ICacheBuilder setStatsTrackingEnabled(boolean statsTrackingEnabled) { + this.statsTrackingEnabled = statsTrackingEnabled; + return this; + } + public long getMaxWeightInBytes() { return maxWeightInBytes; } @@ -84,5 +91,9 @@ public Settings getSettings() { return settings; } + public boolean getStatsTrackingEnabled() { + return statsTrackingEnabled; + } + public abstract ICache build(); } diff --git a/server/src/main/java/org/opensearch/common/cache/store/config/CacheConfig.java b/server/src/main/java/org/opensearch/common/cache/store/config/CacheConfig.java index 15cbdbd021d71..0c54ac57a9b18 100644 --- a/server/src/main/java/org/opensearch/common/cache/store/config/CacheConfig.java +++ b/server/src/main/java/org/opensearch/common/cache/store/config/CacheConfig.java @@ -68,6 +68,8 @@ public class CacheConfig { private final ClusterSettings clusterSettings; + private final boolean statsTrackingEnabled; + private CacheConfig(Builder builder) { this.keyType = builder.keyType; this.valueType = builder.valueType; @@ -81,6 +83,7 @@ private CacheConfig(Builder builder) { this.maxSizeInBytes = builder.maxSizeInBytes; this.expireAfterAccess = builder.expireAfterAccess; this.clusterSettings = builder.clusterSettings; + this.statsTrackingEnabled = builder.statsTrackingEnabled; } public Class getKeyType() { @@ -131,6 +134,10 @@ public ClusterSettings getClusterSettings() { return clusterSettings; } + public boolean getStatsTrackingEnabled() { + return statsTrackingEnabled; + } + /** * Builder class to build Cache config related parameters. * @param Type of key. @@ -155,6 +162,7 @@ public static class Builder { private TimeValue expireAfterAccess; private ClusterSettings clusterSettings; + private boolean statsTrackingEnabled = true; public Builder() {} @@ -218,6 +226,11 @@ public Builder setClusterSettings(ClusterSettings clusterSettings) { return this; } + public Builder setStatsTrackingEnabled(boolean statsTrackingEnabled) { + this.statsTrackingEnabled = statsTrackingEnabled; + return this; + } + public CacheConfig build() { return new CacheConfig<>(this); } diff --git a/server/src/main/java/org/opensearch/common/metrics/OperationMetrics.java b/server/src/main/java/org/opensearch/common/metrics/OperationMetrics.java index 97fbbc2ce5cde..71c4a29f0f610 100644 --- a/server/src/main/java/org/opensearch/common/metrics/OperationMetrics.java +++ b/server/src/main/java/org/opensearch/common/metrics/OperationMetrics.java @@ -37,6 +37,14 @@ public void before() { current.incrementAndGet(); } + /** + * Invoke before the given operation begins in multiple items at the same time. + * @param n number of items + */ + public void beforeN(int n) { + current.addAndGet(n); + } + /** * Invoked upon completion (success or failure) of the given operation * @param currentTime elapsed time of the operation @@ -46,6 +54,18 @@ public void after(long currentTime) { time.inc(currentTime); } + /** + * Invoked upon completion (success or failure) of the given operation for multiple items. + * @param n number of items completed + * @param currentTime elapsed time of the operation + */ + public void afterN(int n, long currentTime) { + current.addAndGet(-n); + for (int i = 0; i < n; ++i) { + time.inc(currentTime); + } + } + /** * Invoked upon failure of the operation. */ @@ -53,6 +73,16 @@ public void failed() { failed.inc(); } + /** + * Invoked upon failure of the operation on multiple items. + * @param n number of items on operation. + */ + public void failedN(int n) { + for (int i = 0; i < n; ++i) { + failed.inc(); + } + } + public void add(OperationMetrics other) { // Don't try copying over current, since in-flight requests will be linked to the existing metrics instance. failed.inc(other.failed.count()); diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index dab0f6bcf1c85..4a5a45eb1a17a 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -64,6 +64,7 @@ import org.opensearch.cluster.routing.OperationRouting; import org.opensearch.cluster.routing.allocation.AwarenessReplicaBalance; import org.opensearch.cluster.routing.allocation.DiskThresholdSettings; +import org.opensearch.cluster.routing.allocation.ExistingShardsAllocator; import org.opensearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; import org.opensearch.cluster.routing.allocation.decider.AwarenessAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.ClusterRebalanceAllocationDecider; @@ -102,6 +103,7 @@ import org.opensearch.gateway.DanglingIndicesState; import org.opensearch.gateway.GatewayService; import org.opensearch.gateway.PersistedClusterStateService; +import org.opensearch.gateway.ShardsBatchGatewayAllocator; import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.http.HttpTransportSettings; import org.opensearch.index.IndexModule; @@ -268,6 +270,7 @@ public void apply(Settings value, Settings current, Settings previous) { DanglingIndicesState.AUTO_IMPORT_DANGLING_INDICES_SETTING, EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING, EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING, + ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE, FilterAllocationDecider.CLUSTER_ROUTING_INCLUDE_GROUP_SETTING, FilterAllocationDecider.CLUSTER_ROUTING_EXCLUDE_GROUP_SETTING, FilterAllocationDecider.CLUSTER_ROUTING_REQUIRE_GROUP_SETTING, @@ -330,6 +333,7 @@ public void apply(Settings value, Settings current, Settings previous) { GatewayService.RECOVER_AFTER_MASTER_NODES_SETTING, GatewayService.RECOVER_AFTER_NODES_SETTING, GatewayService.RECOVER_AFTER_TIME_SETTING, + ShardsBatchGatewayAllocator.GATEWAY_ALLOCATOR_BATCH_SIZE, PersistedClusterStateService.SLOW_WRITE_LOGGING_THRESHOLD, NetworkModule.HTTP_DEFAULT_TYPE_SETTING, NetworkModule.TRANSPORT_DEFAULT_TYPE_SETTING, @@ -493,6 +497,8 @@ public void apply(Settings value, Settings current, Settings previous) { IndicesFieldDataCache.INDICES_FIELDDATA_CACHE_SIZE_KEY, IndicesRequestCache.INDICES_CACHE_QUERY_SIZE, IndicesRequestCache.INDICES_CACHE_QUERY_EXPIRE, + IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING, + IndicesRequestCache.INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING, HunspellService.HUNSPELL_LAZY_LOAD, HunspellService.HUNSPELL_IGNORE_CASE, HunspellService.HUNSPELL_DICTIONARY_OPTIONS, @@ -732,7 +738,8 @@ public void apply(Settings value, Settings current, Settings previous) { RemoteStoreSettings.CLUSTER_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING, RemoteStoreSettings.CLUSTER_REMOTE_TRANSLOG_TRANSFER_TIMEOUT_SETTING, RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING, - RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING + RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING, + RemoteStoreSettings.CLUSTER_REMOTE_MAX_TRANSLOG_READERS ) ) ); diff --git a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java index c6c312d6b6eea..980c432774f6e 100644 --- a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java @@ -198,6 +198,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { EngineConfig.INDEX_CODEC_SETTING, EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING, EngineConfig.INDEX_OPTIMIZE_AUTO_GENERATED_IDS, + EngineConfig.INDEX_USE_COMPOUND_FILE, IndexMetadata.SETTING_WAIT_FOR_ACTIVE_SHARDS, IndexSettings.DEFAULT_PIPELINE, IndexSettings.FINAL_PIPELINE, diff --git a/server/src/main/java/org/opensearch/common/util/FeatureFlags.java b/server/src/main/java/org/opensearch/common/util/FeatureFlags.java index bdfce72d106d3..a72583607ede0 100644 --- a/server/src/main/java/org/opensearch/common/util/FeatureFlags.java +++ b/server/src/main/java/org/opensearch/common/util/FeatureFlags.java @@ -81,7 +81,7 @@ public class FeatureFlags { public static final Setting DATETIME_FORMATTER_CACHING_SETTING = Setting.boolSetting( DATETIME_FORMATTER_CACHING, - true, + false, Property.NodeScope ); diff --git a/server/src/main/java/org/opensearch/common/util/IndexUtils.java b/server/src/main/java/org/opensearch/common/util/IndexUtils.java new file mode 100644 index 0000000000000..b4f049617c4fc --- /dev/null +++ b/server/src/main/java/org/opensearch/common/util/IndexUtils.java @@ -0,0 +1,128 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.util; + +import org.opensearch.action.support.IndicesOptions; +import org.opensearch.cluster.metadata.IndexNameExpressionResolver; +import org.opensearch.common.regex.Regex; +import org.opensearch.index.IndexNotFoundException; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * Common Utility methods for Indices. + * + * @opensearch.internal + */ +public class IndexUtils { + + /** + * Filters out list of available indices based on the list of selected indices. + * + * @param availableIndices list of available indices + * @param selectedIndices list of selected indices + * @param indicesOptions ignore indices flag + * @return filtered out indices + */ + public static List filterIndices(List availableIndices, String[] selectedIndices, IndicesOptions indicesOptions) { + if (IndexNameExpressionResolver.isAllIndices(Arrays.asList(selectedIndices))) { + return availableIndices; + } + + // Move the exclusions to end of list to ensure they are processed + // after explicitly selected indices are chosen. + final List excludesAtEndSelectedIndices = Stream.concat( + Arrays.stream(selectedIndices).filter(s -> s.isEmpty() || s.charAt(0) != '-'), + Arrays.stream(selectedIndices).filter(s -> !s.isEmpty() && s.charAt(0) == '-') + ).collect(Collectors.toUnmodifiableList()); + + Set result = null; + for (int i = 0; i < excludesAtEndSelectedIndices.size(); i++) { + String indexOrPattern = excludesAtEndSelectedIndices.get(i); + boolean add = true; + if (!indexOrPattern.isEmpty()) { + if (availableIndices.contains(indexOrPattern)) { + if (result == null) { + result = new HashSet<>(); + } + result.add(indexOrPattern); + continue; + } + if (indexOrPattern.charAt(0) == '+') { + add = true; + indexOrPattern = indexOrPattern.substring(1); + // if its the first, add empty set + if (i == 0) { + result = new HashSet<>(); + } + } else if (indexOrPattern.charAt(0) == '-') { + // If the first index pattern is an exclusion, then all patterns are exclusions due to the + // reordering logic above. In this case, the request is interpreted as "include all indexes except + // those matching the exclusions" so we add all indices here and then remove the ones that match the exclusion patterns. + if (i == 0) { + result = new HashSet<>(availableIndices); + } + add = false; + indexOrPattern = indexOrPattern.substring(1); + } + } + if (indexOrPattern.isEmpty() || !Regex.isSimpleMatchPattern(indexOrPattern)) { + if (!availableIndices.contains(indexOrPattern)) { + if (!indicesOptions.ignoreUnavailable()) { + throw new IndexNotFoundException(indexOrPattern); + } else { + if (result == null) { + // add all the previous ones... + result = new HashSet<>(availableIndices.subList(0, i)); + } + } + } else { + if (result != null) { + if (add) { + result.add(indexOrPattern); + } else { + result.remove(indexOrPattern); + } + } + } + continue; + } + if (result == null) { + // add all the previous ones... + result = new HashSet<>(availableIndices.subList(0, i)); + } + boolean found = false; + for (String index : availableIndices) { + if (Regex.simpleMatch(indexOrPattern, index)) { + found = true; + if (add) { + result.add(index); + } else { + result.remove(index); + } + } + } + if (!found && !indicesOptions.allowNoIndices()) { + throw new IndexNotFoundException(indexOrPattern); + } + } + if (result == null) { + return Collections.unmodifiableList(new ArrayList<>(Arrays.asList(selectedIndices))); + } + return Collections.unmodifiableList(new ArrayList<>(result)); + } + +} diff --git a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java index 9b2bd06a88e2e..998122d9e5c43 100644 --- a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java +++ b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java @@ -106,8 +106,9 @@ private void parseToken(StringBuilder path, String currentFieldName) throws IOEx // skip } else if (this.parser.currentToken() == Token.START_OBJECT) { parseToken(path, currentFieldName); - int dotIndex = path.lastIndexOf(DOT_SYMBOL); - if (dotIndex != -1) { + int dotIndex = path.lastIndexOf(DOT_SYMBOL, path.length()); + + if (dotIndex != -1 && path.length() > currentFieldName.length()) { path.setLength(path.length() - currentFieldName.length() - 1); } } else { @@ -117,8 +118,8 @@ private void parseToken(StringBuilder path, String currentFieldName) throws IOEx parseValue(parsedFields); this.valueList.add(parsedFields.toString()); this.valueAndPathList.add(path + EQUAL_SYMBOL + parsedFields); - int dotIndex = path.lastIndexOf(DOT_SYMBOL); - if (dotIndex != -1) { + int dotIndex = path.lastIndexOf(DOT_SYMBOL, path.length()); + if (dotIndex != -1 && path.length() > currentFieldName.length()) { path.setLength(path.length() - currentFieldName.length() - 1); } } diff --git a/server/src/main/java/org/opensearch/gateway/BaseGatewayShardAllocator.java b/server/src/main/java/org/opensearch/gateway/BaseGatewayShardAllocator.java index e0831293fc7e1..eed5de65258fc 100644 --- a/server/src/main/java/org/opensearch/gateway/BaseGatewayShardAllocator.java +++ b/server/src/main/java/org/opensearch/gateway/BaseGatewayShardAllocator.java @@ -135,6 +135,8 @@ private void executeDecision( } } + public void allocateUnassignedBatch(String batchId, RoutingAllocation allocation) {} + protected long getExpectedShardSize(ShardRouting shardRouting, RoutingAllocation allocation) { if (shardRouting.primary()) { if (shardRouting.recoverySource().getType() == RecoverySource.Type.SNAPSHOT) { diff --git a/server/src/main/java/org/opensearch/gateway/ReplicaShardBatchAllocator.java b/server/src/main/java/org/opensearch/gateway/ReplicaShardBatchAllocator.java index 3459f1591b633..be7867b7823f6 100644 --- a/server/src/main/java/org/opensearch/gateway/ReplicaShardBatchAllocator.java +++ b/server/src/main/java/org/opensearch/gateway/ReplicaShardBatchAllocator.java @@ -56,7 +56,7 @@ public void processExistingRecoveries(RoutingAllocation allocation, List GATEWAY_ALLOCATOR_BATCH_SIZE = Setting.longSetting( + "cluster.allocator.gateway.batch_size", + DEFAULT_SHARD_BATCH_SIZE, + 1, + 10000, + Setting.Property.NodeScope + ); + + private final RerouteService rerouteService; + private final PrimaryShardBatchAllocator primaryShardBatchAllocator; + private final ReplicaShardBatchAllocator replicaShardBatchAllocator; + private Set lastSeenEphemeralIds = Collections.emptySet(); + + // visible for testing + protected final ConcurrentMap batchIdToStartedShardBatch = ConcurrentCollections.newConcurrentMap(); + + // visible for testing + protected final ConcurrentMap batchIdToStoreShardBatch = ConcurrentCollections.newConcurrentMap(); + private final TransportNodesListGatewayStartedShardsBatch batchStartedAction; + private final TransportNodesListShardStoreMetadataBatch batchStoreAction; + + @Inject + public ShardsBatchGatewayAllocator( + RerouteService rerouteService, + TransportNodesListGatewayStartedShardsBatch batchStartedAction, + TransportNodesListShardStoreMetadataBatch batchStoreAction, + Settings settings + ) { + this.rerouteService = rerouteService; + this.primaryShardBatchAllocator = new InternalPrimaryBatchShardAllocator(); + this.replicaShardBatchAllocator = new InternalReplicaBatchShardAllocator(); + this.batchStartedAction = batchStartedAction; + this.batchStoreAction = batchStoreAction; + this.maxBatchSize = GATEWAY_ALLOCATOR_BATCH_SIZE.get(settings); + } + + @Override + public void cleanCaches() { + Stream.of(batchIdToStartedShardBatch, batchIdToStoreShardBatch).forEach(b -> { + Releasables.close(b.values().stream().map(shardsBatch -> shardsBatch.asyncBatch).collect(Collectors.toList())); + b.clear(); + }); + } + + // for tests + protected ShardsBatchGatewayAllocator() { + this.rerouteService = null; + this.batchStartedAction = null; + this.primaryShardBatchAllocator = null; + this.batchStoreAction = null; + this.replicaShardBatchAllocator = null; + this.maxBatchSize = DEFAULT_SHARD_BATCH_SIZE; + } + + // for tests + + @Override + public int getNumberOfInFlightFetches() { + int count = 0; + for (ShardsBatch batch : batchIdToStartedShardBatch.values()) { + count += (batch.getNumberOfInFlightFetches() * batch.getBatchedShards().size()); + } + for (ShardsBatch batch : batchIdToStoreShardBatch.values()) { + count += (batch.getNumberOfInFlightFetches() * batch.getBatchedShards().size()); + } + + return count; + } + + @Override + public void applyStartedShards(final List startedShards, final RoutingAllocation allocation) { + for (ShardRouting startedShard : startedShards) { + safelyRemoveShardFromBothBatch(startedShard); + } + } + + @Override + public void applyFailedShards(final List failedShards, final RoutingAllocation allocation) { + for (FailedShard failedShard : failedShards) { + safelyRemoveShardFromBothBatch(failedShard.getRoutingEntry()); + } + } + + @Override + public void beforeAllocation(final RoutingAllocation allocation) { + assert primaryShardBatchAllocator != null; + assert replicaShardBatchAllocator != null; + ensureAsyncFetchStorePrimaryRecency(allocation); + } + + @Override + public void afterPrimariesBeforeReplicas(RoutingAllocation allocation) { + assert replicaShardBatchAllocator != null; + List> storedShardBatches = batchIdToStoreShardBatch.values() + .stream() + .map(ShardsBatch::getBatchedShardRoutings) + .collect(Collectors.toList()); + if (allocation.routingNodes().hasInactiveShards()) { + // cancel existing recoveries if we have a better match + replicaShardBatchAllocator.processExistingRecoveries(allocation, storedShardBatches); + } + } + + @Override + public void allocateUnassigned( + ShardRouting shardRouting, + RoutingAllocation allocation, + UnassignedAllocationHandler unassignedAllocationHandler + ) { + throw new UnsupportedOperationException("ShardsBatchGatewayAllocator does not support allocating unassigned shards"); + } + + @Override + public void allocateAllUnassignedShards(final RoutingAllocation allocation, boolean primary) { + + assert primaryShardBatchAllocator != null; + assert replicaShardBatchAllocator != null; + innerAllocateUnassignedBatch(allocation, primaryShardBatchAllocator, replicaShardBatchAllocator, primary); + } + + protected void innerAllocateUnassignedBatch( + RoutingAllocation allocation, + PrimaryShardBatchAllocator primaryBatchShardAllocator, + ReplicaShardBatchAllocator replicaBatchShardAllocator, + boolean primary + ) { + // create batches for unassigned shards + Set batchesToAssign = createAndUpdateBatches(allocation, primary); + if (batchesToAssign.isEmpty()) { + return; + } + if (primary) { + batchIdToStartedShardBatch.values() + .stream() + .filter(batch -> batchesToAssign.contains(batch.batchId)) + .forEach( + shardsBatch -> primaryBatchShardAllocator.allocateUnassignedBatch(shardsBatch.getBatchedShardRoutings(), allocation) + ); + } else { + batchIdToStoreShardBatch.values() + .stream() + .filter(batch -> batchesToAssign.contains(batch.batchId)) + .forEach(batch -> replicaBatchShardAllocator.allocateUnassignedBatch(batch.getBatchedShardRoutings(), allocation)); + } + } + + // visible for testing + protected Set createAndUpdateBatches(RoutingAllocation allocation, boolean primary) { + Set batchesToBeAssigned = new HashSet<>(); + RoutingNodes.UnassignedShards unassigned = allocation.routingNodes().unassigned(); + ConcurrentMap currentBatches = primary ? batchIdToStartedShardBatch : batchIdToStoreShardBatch; + // get all batched shards + Map currentBatchedShards = new HashMap<>(); + for (Map.Entry batchEntry : currentBatches.entrySet()) { + batchEntry.getValue().getBatchedShards().forEach(shardId -> currentBatchedShards.put(shardId, batchEntry.getKey())); + } + + Set newShardsToBatch = Sets.newHashSet(); + Set batchedShardsToAssign = Sets.newHashSet(); + // add all unassigned shards to the batch if they are not already in a batch + unassigned.forEach(shardRouting -> { + if ((currentBatchedShards.containsKey(shardRouting.shardId()) == false) && (shardRouting.primary() == primary)) { + assert shardRouting.unassigned(); + newShardsToBatch.add(shardRouting); + } + // if shard is already batched update to latest shardRouting information in the batches + // Replica shard assignment can be cancelled if we get a better match. These ShardRouting objects also + // store other information like relocating node, targetRelocatingShard etc. And it can be updated after + // batches are created. If we don't update the ShardRouting object, stale data would be passed from the + // batch. This stale data can end up creating a same decision which has already been taken, and we'll see + // failure in executeDecision of BaseGatewayShardAllocator. Previous non-batch mode flow also used to + // pass ShardRouting object directly from unassignedIterator, so we're following the same behaviour. + else if (shardRouting.primary() == primary) { + String batchId = currentBatchedShards.get(shardRouting.shardId()); + batchesToBeAssigned.add(batchId); + currentBatches.get(batchId).batchInfo.get(shardRouting.shardId()).setShardRouting(shardRouting); + batchedShardsToAssign.add(shardRouting.shardId()); + } + }); + + allocation.routingNodes().forEach(routingNode -> routingNode.getInitializingShards().forEach(shardRouting -> { + if (currentBatchedShards.containsKey(shardRouting.shardId()) && shardRouting.primary() == primary) { + batchedShardsToAssign.add(shardRouting.shardId()); + // Set updated shard routing in batch if it already exists + String batchId = currentBatchedShards.get(shardRouting.shardId()); + currentBatches.get(batchId).batchInfo.get(shardRouting.shardId()).setShardRouting(shardRouting); + } + })); + + refreshShardBatches(currentBatches, batchedShardsToAssign, primary); + + Iterator iterator = newShardsToBatch.iterator(); + assert maxBatchSize > 0 : "Shards batch size must be greater than 0"; + + long batchSize = maxBatchSize; + Map perBatchShards = new HashMap<>(); + while (iterator.hasNext()) { + ShardRouting currentShard = iterator.next(); + ShardEntry shardEntry = new ShardEntry( + new ShardAttributes( + IndexMetadata.INDEX_DATA_PATH_SETTING.get(allocation.metadata().index(currentShard.index()).getSettings()) + ), + currentShard + ); + perBatchShards.put(currentShard.shardId(), shardEntry); + batchSize--; + iterator.remove(); + // add to batch if batch size full or last shard in unassigned list + if (batchSize == 0 || iterator.hasNext() == false) { + String batchUUId = UUIDs.base64UUID(); + ShardsBatch shardsBatch = new ShardsBatch(batchUUId, perBatchShards, primary); + // add the batch to list of current batches + addBatch(shardsBatch, primary); + batchesToBeAssigned.add(batchUUId); + perBatchShards.clear(); + batchSize = maxBatchSize; + } + } + return batchesToBeAssigned; + } + + private void refreshShardBatches( + ConcurrentMap currentBatches, + Set batchedShardsToAssign, + boolean primary + ) { + // cleanup shard from batches if they are not present in unassigned list from allocation object. This is + // needed as AllocationService.reroute can also be called directly by API flows for example DeleteIndices. + // So, as part of calling reroute, those shards will be removed from allocation object. It'll handle the + // scenarios where shards can be removed from unassigned list without "start" or "failed" event. + for (Map.Entry batchEntry : currentBatches.entrySet()) { + Iterator shardIdIterator = batchEntry.getValue().getBatchedShards().iterator(); + while (shardIdIterator.hasNext()) { + ShardId shardId = shardIdIterator.next(); + if (batchedShardsToAssign.contains(shardId) == false) { + shardIdIterator.remove(); + batchEntry.getValue().clearShardFromCache(shardId); + } + } + ConcurrentMap batches = primary ? batchIdToStartedShardBatch : batchIdToStoreShardBatch; + deleteBatchIfEmpty(batches, batchEntry.getValue().getBatchId()); + } + } + + private void addBatch(ShardsBatch shardsBatch, boolean primary) { + ConcurrentMap batches = primary ? batchIdToStartedShardBatch : batchIdToStoreShardBatch; + if (batches.containsKey(shardsBatch.getBatchId())) { + throw new IllegalStateException("Batch already exists. BatchId = " + shardsBatch.getBatchId()); + } + batches.put(shardsBatch.getBatchId(), shardsBatch); + } + + /** + * Safely remove a shard from the appropriate batch depending on if it is primary or replica + * If the shard is not in a batch, this is a no-op. + * Cleans the batch if it is empty after removing the shard. + * This method should be called when removing the shard from the batch instead {@link ShardsBatch#removeFromBatch(ShardRouting)} + * so that we can clean up the batch if it is empty and release the fetching resources + * + * @param shardRouting shard to be removed + * @param primary from which batch shard needs to be removed + */ + protected void safelyRemoveShardFromBatch(ShardRouting shardRouting, boolean primary) { + String batchId = primary ? getBatchId(shardRouting, true) : getBatchId(shardRouting, false); + if (batchId == null) { + logger.debug("Shard[{}] is not batched", shardRouting); + return; + } + ConcurrentMap batches = primary ? batchIdToStartedShardBatch : batchIdToStoreShardBatch; + ShardsBatch batch = batches.get(batchId); + batch.removeFromBatch(shardRouting); + deleteBatchIfEmpty(batches, batchId); + } + + /** + * Safely remove shard from both the batches irrespective of its primary or replica, + * For the corresponding shardId. The method intends to clean up the batch if it is empty + * after removing the shard + * @param shardRouting shard to remove + */ + protected void safelyRemoveShardFromBothBatch(ShardRouting shardRouting) { + safelyRemoveShardFromBatch(shardRouting, true); + safelyRemoveShardFromBatch(shardRouting, false); + } + + private void deleteBatchIfEmpty(ConcurrentMap batches, String batchId) { + if (batches.containsKey(batchId)) { + ShardsBatch batch = batches.get(batchId); + if (batch.getBatchedShards().isEmpty()) { + Releasables.close(batch.getAsyncFetcher()); + batches.remove(batchId); + } + } + } + + protected String getBatchId(ShardRouting shardRouting, boolean primary) { + ConcurrentMap batches = primary ? batchIdToStartedShardBatch : batchIdToStoreShardBatch; + + return batches.entrySet() + .stream() + .filter(entry -> entry.getValue().getBatchedShards().contains(shardRouting.shardId())) + .findFirst() + .map(Map.Entry::getKey) + .orElse(null); + } + + @Override + public AllocateUnassignedDecision explainUnassignedShardAllocation(ShardRouting unassignedShard, RoutingAllocation routingAllocation) { + assert unassignedShard.unassigned(); + assert routingAllocation.debugDecision(); + if (getBatchId(unassignedShard, unassignedShard.primary()) == null) { + createAndUpdateBatches(routingAllocation, unassignedShard.primary()); + } + assert getBatchId(unassignedShard, unassignedShard.primary()) != null; + if (unassignedShard.primary()) { + assert primaryShardBatchAllocator != null; + return primaryShardBatchAllocator.makeAllocationDecision(unassignedShard, routingAllocation, logger); + } else { + assert replicaShardBatchAllocator != null; + return replicaShardBatchAllocator.makeAllocationDecision(unassignedShard, routingAllocation, logger); + } + } + + /** + * Clear the fetched data for the primary to ensure we do not cancel recoveries based on excessively stale data. + */ + private void ensureAsyncFetchStorePrimaryRecency(RoutingAllocation allocation) { + DiscoveryNodes nodes = allocation.nodes(); + if (hasNewNodes(nodes)) { + final Set newEphemeralIds = StreamSupport.stream(Spliterators.spliterator(nodes.getDataNodes().entrySet(), 0), false) + .map(node -> node.getValue().getEphemeralId()) + .collect(Collectors.toSet()); + // Invalidate the cache if a data node has been added to the cluster. This ensures that we do not cancel a recovery if a node + // drops out, we fetch the shard data, then some indexing happens and then the node rejoins the cluster again. There are other + // ways we could decide to cancel a recovery based on stale data (e.g. changing allocation filters or a primary failure) but + // making the wrong decision here is not catastrophic so we only need to cover the common case. + + logger.trace( + () -> new ParameterizedMessage( + "new nodes {} found, clearing primary async-fetch-store cache", + Sets.difference(newEphemeralIds, lastSeenEphemeralIds) + ) + ); + batchIdToStoreShardBatch.values().forEach(batch -> clearCacheForBatchPrimary(batch, allocation)); + + // recalc to also (lazily) clear out old nodes. + this.lastSeenEphemeralIds = newEphemeralIds; + } + } + + private static void clearCacheForBatchPrimary(ShardsBatch batch, RoutingAllocation allocation) { + // We need to clear the cache for the primary shard to ensure we do not cancel recoveries based on excessively + // stale data. We do this by clearing the cache of nodes for all the active primaries of replicas in the current batch. + // Although this flow can be optimized by only clearing the cache for the primary shard but currently + // when we want to fetch data we do for complete node, for doing this a new fetch flow will also handle just + // fetching the data for a single shard on the node and fill that up in our cache + // Opened issue #13352 - to track the improvement + List primaries = batch.getBatchedShards() + .stream() + .map(allocation.routingNodes()::activePrimary) + .filter(Objects::nonNull) + .collect(Collectors.toList()); + AsyncShardBatchFetch fetch = batch.getAsyncFetcher(); + primaries.forEach(shardRouting -> fetch.clearCacheForNode(shardRouting.currentNodeId())); + } + + private boolean hasNewNodes(DiscoveryNodes nodes) { + for (final DiscoveryNode node : nodes.getDataNodes().values()) { + if (lastSeenEphemeralIds.contains(node.getEphemeralId()) == false) { + return true; + } + } + return false; + } + + class InternalBatchAsyncFetch extends AsyncShardBatchFetch { + InternalBatchAsyncFetch( + Logger logger, + String type, + Map map, + AsyncShardFetch.Lister, T> action, + String batchUUId, + Class clazz, + V emptyShardResponse, + Predicate emptyShardResponsePredicate, + ShardBatchResponseFactory responseFactory + ) { + super(logger, type, map, action, batchUUId, clazz, emptyShardResponse, emptyShardResponsePredicate, responseFactory); + } + + @Override + protected void reroute(String reroutingKey, String reason) { + logger.trace("{} scheduling reroute for {}", reroutingKey, reason); + assert rerouteService != null; + rerouteService.reroute( + "async_shard_batch_fetch", + Priority.HIGH, + ActionListener.wrap( + r -> logger.trace("{} scheduled reroute completed for {}", reroutingKey, reason), + e -> logger.debug(new ParameterizedMessage("{} scheduled reroute failed for {}", reroutingKey, reason), e) + ) + ); + } + } + + class InternalPrimaryBatchShardAllocator extends PrimaryShardBatchAllocator { + + @Override + @SuppressWarnings("unchecked") + protected AsyncShardFetch.FetchResult fetchData( + List eligibleShards, + List inEligibleShards, + RoutingAllocation allocation + ) { + return (AsyncShardFetch.FetchResult< + TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShardsBatch>) fetchDataAndCleanIneligibleShards( + eligibleShards, + inEligibleShards, + allocation + ); + } + + } + + class InternalReplicaBatchShardAllocator extends ReplicaShardBatchAllocator { + @Override + @SuppressWarnings("unchecked") + protected AsyncShardFetch.FetchResult fetchData( + List eligibleShards, + List inEligibleShards, + RoutingAllocation allocation + ) { + return (AsyncShardFetch.FetchResult< + TransportNodesListShardStoreMetadataBatch.NodeStoreFilesMetadataBatch>) fetchDataAndCleanIneligibleShards( + eligibleShards, + inEligibleShards, + allocation + ); + } + + @Override + protected boolean hasInitiatedFetching(ShardRouting shard) { + String batchId = getBatchId(shard, shard.primary()); + return batchId != null; + } + } + + AsyncShardFetch.FetchResult fetchDataAndCleanIneligibleShards( + List eligibleShards, + List inEligibleShards, + RoutingAllocation allocation + ) { + // get batch id for anyone given shard. We are assuming all shards will have same batchId + ShardRouting shardRouting = eligibleShards.iterator().hasNext() ? eligibleShards.iterator().next() : null; + shardRouting = shardRouting == null && inEligibleShards.iterator().hasNext() ? inEligibleShards.iterator().next() : shardRouting; + if (shardRouting == null) { + return new AsyncShardFetch.FetchResult<>(null, Collections.emptyMap()); + } + String batchId = getBatchId(shardRouting, shardRouting.primary()); + if (batchId == null) { + logger.debug("Shard {} has no batch id", shardRouting); + throw new IllegalStateException("Shard " + shardRouting + " has no batch id. Shard should batched before fetching"); + } + ConcurrentMap batches = shardRouting.primary() ? batchIdToStartedShardBatch : batchIdToStoreShardBatch; + if (batches.containsKey(batchId) == false) { + logger.debug("Batch {} has no shards batch", batchId); + throw new IllegalStateException("Batch " + batchId + " has no shards batch"); + } + + ShardsBatch shardsBatch = batches.get(batchId); + // remove in eligible shards which allocator is not responsible for + inEligibleShards.forEach(sr -> safelyRemoveShardFromBatch(sr, sr.primary())); + + if (shardsBatch.getBatchedShards().isEmpty() && eligibleShards.isEmpty()) { + logger.debug("Batch {} is empty", batchId); + return new AsyncShardFetch.FetchResult<>(null, Collections.emptyMap()); + } + Map> shardToIgnoreNodes = new HashMap<>(); + for (ShardId shardId : shardsBatch.asyncBatch.shardAttributesMap.keySet()) { + shardToIgnoreNodes.put(shardId, allocation.getIgnoreNodes(shardId)); + } + AsyncShardBatchFetch asyncFetcher = shardsBatch.getAsyncFetcher(); + AsyncShardFetch.FetchResult fetchResult = asyncFetcher.fetchData( + allocation.nodes(), + shardToIgnoreNodes + ); + if (fetchResult.hasData()) { + fetchResult.processAllocation(allocation); + } + + return fetchResult; + } + + /** + * Holds information about a batch of shards to be allocated. + * Async fetcher is used to fetch the data for the batch. + *

+ * Visible for testing + */ + public class ShardsBatch { + private final String batchId; + private final boolean primary; + + private final InternalBatchAsyncFetch asyncBatch; + + private final Map batchInfo; + + public ShardsBatch(String batchId, Map shardsWithInfo, boolean primary) { + this.batchId = batchId; + this.batchInfo = new HashMap<>(shardsWithInfo); + // create a ShardId -> customDataPath map for async fetch + Map shardIdsMap = batchInfo.entrySet() + .stream() + .collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().getShardAttributes())); + this.primary = primary; + if (this.primary) { + asyncBatch = new InternalBatchAsyncFetch<>( + logger, + "batch_shards_started", + shardIdsMap, + batchStartedAction, + batchId, + GatewayStartedShard.class, + new GatewayStartedShard(null, false, null, null), + GatewayStartedShard::isEmpty, + new ShardBatchResponseFactory<>(true) + ); + } else { + asyncBatch = new InternalBatchAsyncFetch<>( + logger, + "batch_shards_store", + shardIdsMap, + batchStoreAction, + batchId, + NodeStoreFilesMetadata.class, + new NodeStoreFilesMetadata(new StoreFilesMetadata(null, Store.MetadataSnapshot.EMPTY, Collections.emptyList()), null), + NodeStoreFilesMetadata::isEmpty, + new ShardBatchResponseFactory<>(false) + ); + } + } + + protected void removeShard(ShardId shardId) { + this.batchInfo.remove(shardId); + } + + private TransportNodesListShardStoreMetadataBatch.NodeStoreFilesMetadata buildEmptyReplicaShardResponse() { + return new TransportNodesListShardStoreMetadataBatch.NodeStoreFilesMetadata( + new TransportNodesListShardStoreMetadataHelper.StoreFilesMetadata( + null, + Store.MetadataSnapshot.EMPTY, + Collections.emptyList() + ), + null + ); + } + + private void removeFromBatch(ShardRouting shard) { + removeShard(shard.shardId()); + clearShardFromCache(shard.shardId()); + // assert that fetcher and shards are the same as batched shards + assert batchInfo.size() == asyncBatch.shardAttributesMap.size() : "Shards size is not equal to fetcher size"; + } + + private void clearShardFromCache(ShardId shardId) { + asyncBatch.clearShard(shardId); + } + + public List getBatchedShardRoutings() { + return batchInfo.values().stream().map(ShardEntry::getShardRouting).collect(Collectors.toList()); + } + + public Set getBatchedShards() { + return batchInfo.keySet(); + } + + public String getBatchId() { + return batchId; + } + + public AsyncShardBatchFetch getAsyncFetcher() { + return asyncBatch; + } + + public int getNumberOfInFlightFetches() { + return asyncBatch.getNumberOfInFlightFetches(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o instanceof ShardsBatch == false) { + return false; + } + ShardsBatch shardsBatch = (ShardsBatch) o; + return batchId.equals(shardsBatch.getBatchId()) && batchInfo.keySet().equals(shardsBatch.getBatchedShards()); + } + + @Override + public int hashCode() { + return Objects.hash(batchId); + } + + @Override + public String toString() { + return "batchId: " + batchId; + } + + } + + /** + * Holds information about a shard to be allocated in a batch. + */ + static class ShardEntry { + + private final ShardAttributes shardAttributes; + + private ShardRouting shardRouting; + + public ShardEntry(ShardAttributes shardAttributes, ShardRouting shardRouting) { + this.shardAttributes = shardAttributes; + this.shardRouting = shardRouting; + } + + public ShardRouting getShardRouting() { + return shardRouting; + } + + public ShardAttributes getShardAttributes() { + return shardAttributes; + } + + public ShardEntry setShardRouting(ShardRouting shardRouting) { + this.shardRouting = shardRouting; + return this; + } + } + + public int getNumberOfStartedShardBatches() { + return batchIdToStartedShardBatch.size(); + } + + public int getNumberOfStoreShardBatches() { + return batchIdToStoreShardBatch.size(); + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/IndexMetadataUploadListener.java b/server/src/main/java/org/opensearch/gateway/remote/IndexMetadataUploadListener.java new file mode 100644 index 0000000000000..7d5fe8140e1e1 --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/IndexMetadataUploadListener.java @@ -0,0 +1,58 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.core.action.ActionListener; +import org.opensearch.threadpool.ThreadPool; + +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.ExecutorService; + +/** + * Hook for running code that needs to be executed before the upload of index metadata. Here we have introduced a hook + * for index creation (also triggerred after enabling the remote cluster statement for the first time). The listener + * is intended to be run in parallel and async with the index metadata upload. + * + * @opensearch.internal + */ +public abstract class IndexMetadataUploadListener { + + private final ExecutorService executorService; + + public IndexMetadataUploadListener(ThreadPool threadPool, String threadPoolName) { + Objects.requireNonNull(threadPool); + Objects.requireNonNull(threadPoolName); + assert ThreadPool.THREAD_POOL_TYPES.containsKey(threadPoolName) && ThreadPool.Names.SAME.equals(threadPoolName) == false; + this.executorService = threadPool.executor(threadPoolName); + } + + /** + * Runs before the new index upload of index metadata (or first time upload). The caller is expected to trigger + * onSuccess or onFailure of the {@code ActionListener}. + * + * @param indexMetadataList list of index metadata of new indexes (or first time index metadata upload). + * @param actionListener listener to be invoked on success or failure. + */ + public final void onUpload( + List indexMetadataList, + Map prevIndexMetadataByName, + ActionListener actionListener + ) { + executorService.execute(() -> doOnUpload(indexMetadataList, prevIndexMetadataByName, actionListener)); + } + + protected abstract void doOnUpload( + List indexMetadataList, + Map prevIndexMetadataByName, + ActionListener actionListener + ); +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index c892b475d71da..eaf607564185c 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -160,6 +160,7 @@ public class RemoteClusterStateService implements Closeable { private final Settings settings; private final LongSupplier relativeTimeNanosSupplier; private final ThreadPool threadpool; + private final List indexMetadataUploadListeners; private BlobStoreRepository blobStoreRepository; private BlobStoreTransferService blobStoreTransferService; private volatile TimeValue slowWriteLoggingThreshold; @@ -177,6 +178,7 @@ public class RemoteClusterStateService implements Closeable { // ToXContent Params with gateway mode. // We are using gateway context mode to persist all custom metadata. public static final ToXContent.Params FORMAT_PARAMS; + static { Map params = new HashMap<>(1); params.put(Metadata.CONTEXT_MODE_PARAM, Metadata.CONTEXT_MODE_GATEWAY); @@ -189,7 +191,8 @@ public RemoteClusterStateService( Settings settings, ClusterSettings clusterSettings, LongSupplier relativeTimeNanosSupplier, - ThreadPool threadPool + ThreadPool threadPool, + List indexMetadataUploadListeners ) { assert isRemoteStoreClusterStateEnabled(settings) : "Remote cluster state is not enabled"; this.nodeId = nodeId; @@ -206,6 +209,7 @@ public RemoteClusterStateService( clusterSettings.addSettingsUpdateConsumer(GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING, this::setGlobalMetadataUploadTimeout); clusterSettings.addSettingsUpdateConsumer(METADATA_MANIFEST_UPLOAD_TIMEOUT_SETTING, this::setMetadataManifestUploadTimeout); this.remoteStateStats = new RemotePersistenceStats(); + this.indexMetadataUploadListeners = indexMetadataUploadListeners; } private BlobStoreTransferService getBlobStoreTransferService() { @@ -233,10 +237,12 @@ public ClusterMetadataManifest writeFullMetadata(ClusterState clusterState, Stri // Write globalMetadata String globalMetadataFile = writeGlobalMetadata(clusterState); + List toUpload = new ArrayList<>(clusterState.metadata().indices().values()); // any validations before/after upload ? final List allUploadedIndexMetadata = writeIndexMetadataParallel( clusterState, - new ArrayList<>(clusterState.metadata().indices().values()) + toUpload, + Collections.emptyMap() ); final ClusterMetadataManifest manifest = uploadManifest( clusterState, @@ -301,9 +307,9 @@ public ClusterMetadataManifest writeIncrementalMetadata( } // Write Index Metadata - final Map previousStateIndexMetadataVersionByName = new HashMap<>(); + final Map previousStateIndexMetadataByName = new HashMap<>(); for (final IndexMetadata indexMetadata : previousClusterState.metadata().indices().values()) { - previousStateIndexMetadataVersionByName.put(indexMetadata.getIndex().getName(), indexMetadata.getVersion()); + previousStateIndexMetadataByName.put(indexMetadata.getIndex().getName(), indexMetadata); } int numIndicesUpdated = 0; @@ -313,9 +319,12 @@ public ClusterMetadataManifest writeIncrementalMetadata( .collect(Collectors.toMap(UploadedIndexMetadata::getIndexName, Function.identity())); List toUpload = new ArrayList<>(); - + // We prepare a map that contains the previous index metadata for the indexes for which version has changed. + Map prevIndexMetadataByName = new HashMap<>(); for (final IndexMetadata indexMetadata : clusterState.metadata().indices().values()) { - final Long previousVersion = previousStateIndexMetadataVersionByName.get(indexMetadata.getIndex().getName()); + String indexName = indexMetadata.getIndex().getName(); + final IndexMetadata prevIndexMetadata = previousStateIndexMetadataByName.get(indexName); + Long previousVersion = prevIndexMetadata != null ? prevIndexMetadata.getVersion() : null; if (previousVersion == null || indexMetadata.getVersion() != previousVersion) { logger.debug( "updating metadata for [{}], changing version from [{}] to [{}]", @@ -325,18 +334,19 @@ public ClusterMetadataManifest writeIncrementalMetadata( ); numIndicesUpdated++; toUpload.add(indexMetadata); + prevIndexMetadataByName.put(indexName, prevIndexMetadata); } else { numIndicesUnchanged++; } - previousStateIndexMetadataVersionByName.remove(indexMetadata.getIndex().getName()); + previousStateIndexMetadataByName.remove(indexMetadata.getIndex().getName()); } - List uploadedIndexMetadataList = writeIndexMetadataParallel(clusterState, toUpload); + List uploadedIndexMetadataList = writeIndexMetadataParallel(clusterState, toUpload, prevIndexMetadataByName); uploadedIndexMetadataList.forEach( uploadedIndexMetadata -> allUploadedIndexMetadata.put(uploadedIndexMetadata.getIndexName(), uploadedIndexMetadata) ); - for (String removedIndexName : previousStateIndexMetadataVersionByName.keySet()) { + for (String removedIndexName : previousStateIndexMetadataByName.keySet()) { allUploadedIndexMetadata.remove(removedIndexName); } final ClusterMetadataManifest manifest = uploadManifest( @@ -436,13 +446,18 @@ private String writeGlobalMetadata(ClusterState clusterState) throws IOException * Uploads provided IndexMetadata's to remote store in parallel. The call is blocking so the method waits for upload to finish and then return. * * @param clusterState current ClusterState - * @param toUpload list of IndexMetadata to upload + * @param toUpload list of IndexMetadata to upload * @return {@code List} list of IndexMetadata uploaded to remote */ - private List writeIndexMetadataParallel(ClusterState clusterState, List toUpload) - throws IOException { - List exceptionList = Collections.synchronizedList(new ArrayList<>(toUpload.size())); - final CountDownLatch latch = new CountDownLatch(toUpload.size()); + private List writeIndexMetadataParallel( + ClusterState clusterState, + List toUpload, + Map prevIndexMetadataByName + ) throws IOException { + assert Objects.nonNull(indexMetadataUploadListeners) : "indexMetadataUploadListeners can not be null"; + int latchCount = toUpload.size() + indexMetadataUploadListeners.size(); + List exceptionList = Collections.synchronizedList(new ArrayList<>(latchCount)); + final CountDownLatch latch = new CountDownLatch(latchCount); List result = new ArrayList<>(toUpload.size()); LatchedActionListener latchedActionListener = new LatchedActionListener<>( @@ -467,6 +482,8 @@ private List writeIndexMetadataParallel(ClusterState clus writeIndexMetadataAsync(clusterState, indexMetadata, latchedActionListener); } + invokeIndexMetadataUploadListeners(toUpload, prevIndexMetadataByName, latch, exceptionList); + try { if (latch.await(getIndexMetadataUploadTimeout().millis(), TimeUnit.MILLISECONDS) == false) { RemoteStateTransferException ex = new RemoteStateTransferException( @@ -506,11 +523,68 @@ private List writeIndexMetadataParallel(ClusterState clus return result; } + /** + * Invokes the index metadata upload listener but does not wait for the execution to complete. + */ + private void invokeIndexMetadataUploadListeners( + List updatedIndexMetadataList, + Map prevIndexMetadataByName, + CountDownLatch latch, + List exceptionList + ) { + for (IndexMetadataUploadListener listener : indexMetadataUploadListeners) { + String listenerName = listener.getClass().getSimpleName(); + listener.onUpload( + updatedIndexMetadataList, + prevIndexMetadataByName, + getIndexMetadataUploadActionListener(updatedIndexMetadataList, prevIndexMetadataByName, latch, exceptionList, listenerName) + ); + } + + } + + private ActionListener getIndexMetadataUploadActionListener( + List newIndexMetadataList, + Map prevIndexMetadataByName, + CountDownLatch latch, + List exceptionList, + String listenerName + ) { + long startTime = System.nanoTime(); + return new LatchedActionListener<>( + ActionListener.wrap( + ignored -> logger.trace( + new ParameterizedMessage( + "listener={} : Invoked successfully with indexMetadataList={} prevIndexMetadataList={} tookTimeNs={}", + listenerName, + newIndexMetadataList, + prevIndexMetadataByName.values(), + (System.nanoTime() - startTime) + ) + ), + ex -> { + logger.error( + new ParameterizedMessage( + "listener={} : Exception during invocation with indexMetadataList={} prevIndexMetadataList={} tookTimeNs={}", + listenerName, + newIndexMetadataList, + prevIndexMetadataByName.values(), + (System.nanoTime() - startTime) + ), + ex + ); + exceptionList.add(ex); + } + ), + latch + ); + } + /** * Allows async Upload of IndexMetadata to remote * - * @param clusterState current ClusterState - * @param indexMetadata {@link IndexMetadata} to upload + * @param clusterState current ClusterState + * @param indexMetadata {@link IndexMetadata} to upload * @param latchedActionListener listener to respond back on after upload finishes */ private void writeIndexMetadataAsync( @@ -659,16 +733,6 @@ private void writeMetadataManifest(String clusterName, String clusterUUID, Clust ); } - private String fetchPreviousClusterUUID(String clusterName, String clusterUUID) { - final Optional latestManifest = getLatestClusterMetadataManifest(clusterName, clusterUUID); - if (!latestManifest.isPresent()) { - final String previousClusterUUID = getLastKnownUUIDFromRemote(clusterName); - assert !clusterUUID.equals(previousClusterUUID) : "Last cluster UUID is same current cluster UUID"; - return previousClusterUUID; - } - return latestManifest.get().getPreviousClusterUUID(); - } - private BlobContainer indexMetadataContainer(String clusterName, String clusterUUID, String indexUUID) { // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/index/ftqsCnn9TgOX return blobStoreRepository.blobStore() @@ -737,7 +801,7 @@ static String getManifestFileName(long term, long version, boolean committed) { (committed ? "C" : "P"), // C for committed and P for published RemoteStoreUtils.invertLong(System.currentTimeMillis()), String.valueOf(MANIFEST_CURRENT_CODEC_VERSION) // Keep the codec version at last place only, during read we reads last place to - // determine codec version. + // determine codec version. ); } @@ -750,7 +814,7 @@ static String indexMetadataFileName(IndexMetadata indexMetadata) { RemoteStoreUtils.invertLong(indexMetadata.getVersion()), RemoteStoreUtils.invertLong(System.currentTimeMillis()), String.valueOf(INDEX_METADATA_CURRENT_CODEC_VERSION) // Keep the codec version at last place only, during read we reads last - // place to determine codec version. + // place to determine codec version. ); } @@ -772,8 +836,8 @@ private BlobPath getManifestFolderPath(String clusterName, String clusterUUID) { /** * Fetch latest index metadata from remote cluster state * - * @param clusterUUID uuid of cluster state to refer to in remote - * @param clusterName name of the cluster + * @param clusterUUID uuid of cluster state to refer to in remote + * @param clusterName name of the cluster * @param clusterMetadataManifest manifest file of cluster * @return {@code Map} latest IndexUUID to IndexMetadata map */ @@ -795,8 +859,8 @@ private Map getIndexMetadataMap( /** * Fetch index metadata from remote cluster state * - * @param clusterUUID uuid of cluster state to refer to in remote - * @param clusterName name of the cluster + * @param clusterUUID uuid of cluster state to refer to in remote + * @param clusterName name of the cluster * @param uploadedIndexMetadata {@link UploadedIndexMetadata} contains details about remote location of index metadata * @return {@link IndexMetadata} */ @@ -825,7 +889,6 @@ private IndexMetadata getIndexMetadata(String clusterName, String clusterUUID, U * @return {@link IndexMetadata} */ public ClusterState getLatestClusterState(String clusterName, String clusterUUID) { - start(); Optional clusterMetadataManifest = getLatestClusterMetadataManifest(clusterName, clusterUUID); if (clusterMetadataManifest.isEmpty()) { throw new IllegalStateException( @@ -989,6 +1052,7 @@ private List createClusterChain(final Map trimClusterUUIDs( @@ -1050,7 +1114,7 @@ private boolean isValidClusterUUID(ClusterMetadataManifest manifest) { * * @param clusterUUID uuid of cluster state to refer to in remote * @param clusterName name of the cluster - * @param limit max no of files to fetch + * @param limit max no of files to fetch * @return all manifest file names */ private List getManifestFileNames(String clusterName, String clusterUUID, int limit) throws IllegalStateException { @@ -1123,7 +1187,7 @@ private int getManifestCodecVersion(String fileName) { if (splitName.length == SPLITED_MANIFEST_FILE_LENGTH) { return Integer.parseInt(splitName[splitName.length - 1]); // Last value would be codec version. } else if (splitName.length < SPLITED_MANIFEST_FILE_LENGTH) { // Where codec is not part of file name, i.e. default codec version 0 - // is used. + // is used. return ClusterMetadataManifest.CODEC_V0; } else { throw new IllegalArgumentException("Manifest file name is corrupted"); @@ -1141,7 +1205,7 @@ public void writeMetadataFailed() { /** * Exception for Remote state transfer. */ - static class RemoteStateTransferException extends RuntimeException { + public static class RemoteStateTransferException extends RuntimeException { public RemoteStateTransferException(String errorDesc) { super(errorDesc); @@ -1155,7 +1219,7 @@ public RemoteStateTransferException(String errorDesc, Throwable cause) { /** * Purges all remote cluster state against provided cluster UUIDs * - * @param clusterName name of the cluster + * @param clusterName name of the cluster * @param clusterUUIDs clusteUUIDs for which the remote state needs to be purged */ void deleteStaleUUIDsClusterMetadata(String clusterName, List clusterUUIDs) { @@ -1188,8 +1252,8 @@ public void onFailure(Exception e) { /** * Deletes older than last {@code versionsToRetain} manifests. Also cleans up unreferenced IndexMetadata associated with older manifests * - * @param clusterName name of the cluster - * @param clusterUUID uuid of cluster state to refer to in remote + * @param clusterName name of the cluster + * @param clusterUUID uuid of cluster state to refer to in remote * @param manifestsToRetain no of latest manifest files to keep in remote */ // package private for testing @@ -1308,7 +1372,8 @@ private void deleteStalePaths(String clusterName, String clusterUUID, List remoteCustomData = indexMetadata.getCustomData(IndexMetadata.REMOTE_STORE_CUSTOM_KEY); - assert remoteCustomData == null || remoteCustomData.containsKey(PathType.NAME); - if (remoteCustomData != null && remoteCustomData.containsKey(PathType.NAME)) { - PathType pathType = PathType.parseString(remoteCustomData.get(PathType.NAME)); - String hashAlgoStr = remoteCustomData.get(PathHashAlgorithm.NAME); - PathHashAlgorithm hashAlgorithm = Objects.nonNull(hashAlgoStr) ? PathHashAlgorithm.parseString(hashAlgoStr) : null; - return new RemoteStorePathStrategy(pathType, hashAlgorithm); - } - return new RemoteStorePathStrategy(PathType.FIXED); - } - public RemoteStorePathStrategy getRemoteStorePathStrategy() { return remoteStorePathStrategy; } diff --git a/server/src/main/java/org/opensearch/index/cache/request/ShardRequestCache.java b/server/src/main/java/org/opensearch/index/cache/request/ShardRequestCache.java index bb35a09ccab46..c5c49f6dd5dd4 100644 --- a/server/src/main/java/org/opensearch/index/cache/request/ShardRequestCache.java +++ b/server/src/main/java/org/opensearch/index/cache/request/ShardRequestCache.java @@ -32,6 +32,8 @@ package org.opensearch.index.cache.request; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.lucene.util.Accountable; import org.opensearch.common.annotation.PublicApi; import org.opensearch.common.metrics.CounterMetric; @@ -45,13 +47,14 @@ @PublicApi(since = "1.0.0") public final class ShardRequestCache { + private static final Logger logger = LogManager.getLogger(ShardRequestCache.class); final CounterMetric evictionsMetric = new CounterMetric(); final CounterMetric totalMetric = new CounterMetric(); final CounterMetric hitCount = new CounterMetric(); final CounterMetric missCount = new CounterMetric(); public RequestCacheStats stats() { - return new RequestCacheStats(totalMetric.count(), evictionsMetric.count(), hitCount.count(), missCount.count()); + return new RequestCacheStats(Math.max(0, totalMetric.count()), evictionsMetric.count(), hitCount.count(), missCount.count()); } public void onHit() { @@ -62,21 +65,37 @@ public void onMiss() { missCount.inc(); } - public void onCached(Accountable key, BytesReference value) { - totalMetric.inc(key.ramBytesUsed() + value.ramBytesUsed()); + // Functions used to increment size by passing in the size directly, Used now, as we use ICacheKey in the IndicesRequestCache.. + public void onCached(long keyRamBytesUsed, BytesReference value) { + totalMetric.inc(keyRamBytesUsed + value.ramBytesUsed()); } - public void onRemoval(Accountable key, BytesReference value, boolean evicted) { + public void onRemoval(long keyRamBytesUsed, BytesReference value, boolean evicted) { if (evicted) { evictionsMetric.inc(); } - long dec = 0; - if (key != null) { - dec += key.ramBytesUsed(); - } + long dec = keyRamBytesUsed; if (value != null) { dec += value.ramBytesUsed(); } totalMetric.dec(dec); + if (totalMetric.count() < 0) { + totalMetric.inc(dec); + logger.warn( + "Ignoring the operation to deduct memory: {} from RequestStats memory_size metric as it will " + + "go negative. Current memory: {}. This is a bug.", + dec, + totalMetric.count() + ); + } + } + + // Old functions which increment size by passing in an Accountable. Functional but no longer used. + public void onCached(Accountable key, BytesReference value) { + totalMetric.inc(key.ramBytesUsed() + value.ramBytesUsed()); + } + + public void onRemoval(Accountable key, BytesReference value, boolean evicted) { + onRemoval(key.ramBytesUsed(), value, evicted); } } diff --git a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java index 8106b65bddeec..fe2ce470cda03 100644 --- a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java +++ b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java @@ -236,6 +236,12 @@ private static void doValidateCodecSettings(final String codec) { Property.Dynamic ); + public static final Setting INDEX_USE_COMPOUND_FILE = Setting.boolSetting( + "index.use_compound_file", + true, + Property.IndexScope + ); + private final TranslogConfig translogConfig; private final TranslogFactory translogFactory; @@ -494,6 +500,10 @@ public boolean isReadOnlyReplica() { return indexSettings.isSegRepEnabledOrRemoteNode() && isReadOnlyReplica; } + public boolean useCompoundFile() { + return indexSettings.getValue(INDEX_USE_COMPOUND_FILE); + } + /** * Returns the underlying startedPrimarySupplier. * @return the primary mode supplier. diff --git a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java index 7bacec22fc850..59165b936aec8 100644 --- a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java @@ -2341,7 +2341,7 @@ private IndexWriterConfig getIndexWriterConfig() { iwc.setSimilarity(engineConfig.getSimilarity()); iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac()); iwc.setCodec(engineConfig.getCodec()); - iwc.setUseCompoundFile(true); // always use compound on flush - reduces # of file-handles on refresh + iwc.setUseCompoundFile(engineConfig.useCompoundFile()); if (config().getIndexSort() != null) { iwc.setIndexSort(config().getIndexSort()); } diff --git a/server/src/main/java/org/opensearch/index/mapper/BooleanFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/BooleanFieldMapper.java index 3c7925809415a..b4cf585c1329d 100644 --- a/server/src/main/java/org/opensearch/index/mapper/BooleanFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/BooleanFieldMapper.java @@ -37,8 +37,11 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BoostQuery; +import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; import org.opensearch.common.Booleans; import org.opensearch.common.Nullable; @@ -55,8 +58,10 @@ import java.time.ZoneId; import java.util.Arrays; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.function.Supplier; /** @@ -175,6 +180,10 @@ public BooleanFieldType(String name, boolean searchable) { this(name, searchable, false, true, false, Collections.emptyMap()); } + public BooleanFieldType(String name, boolean searchable, boolean hasDocValues) { + this(name, searchable, false, hasDocValues, false, Collections.emptyMap()); + } + @Override public String typeName() { return CONTENT_TYPE; @@ -257,16 +266,81 @@ public DocValueFormat docValueFormat(@Nullable String format, ZoneId timeZone) { return DocValueFormat.BOOLEAN; } + @Override + public Query termQuery(Object value, QueryShardContext context) { + failIfNotIndexedAndNoDocValues(); + if (!isSearchable()) { + return SortedNumericDocValuesField.newSlowExactQuery(name(), Values.TRUE.bytesEquals(indexedValueForSearch(value)) ? 1 : 0); + } + Query query = new TermQuery(new Term(name(), indexedValueForSearch(value))); + if (boost() != 1f) { + query = new BoostQuery(query, boost()); + } + return query; + } + + @Override + public Query termsQuery(List values, QueryShardContext context) { + failIfNotIndexedAndNoDocValues(); + int distinct = 0; + Set distinctValues = new HashSet<>(values); + for (Object value : distinctValues) { + if (Values.TRUE.equals(indexedValueForSearch(value))) { + distinct |= 2; + } else if (Values.FALSE.equals(indexedValueForSearch(value))) { + distinct |= 1; + } + if (distinct == 3) { + return this.existsQuery(context); + } + } + switch (distinct) { + case 1: + return termQuery("false", context); + case 2: + return termQuery("true", context); + } + + return new MatchNoDocsQuery("Values did not contain True or False"); + } + @Override public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, QueryShardContext context) { - failIfNotIndexed(); - return new TermRangeQuery( - name(), - lowerTerm == null ? null : indexedValueForSearch(lowerTerm), - upperTerm == null ? null : indexedValueForSearch(upperTerm), - includeLower, - includeUpper - ); + failIfNotIndexedAndNoDocValues(); + if (lowerTerm == null) { + lowerTerm = false; + includeLower = true; + + } + if (upperTerm == null) { + upperTerm = true; + includeUpper = true; + + } + + lowerTerm = indexedValueForSearch(lowerTerm); + upperTerm = indexedValueForSearch(upperTerm); + + if (lowerTerm == upperTerm) { + if (!includeLower || !includeUpper) { + return new MatchNoDocsQuery(); + } + return termQuery(lowerTerm.equals(Values.TRUE), context); + } + + if (lowerTerm.equals(Values.TRUE)) { + return new MatchNoDocsQuery(); + } + if (!includeLower && !includeUpper) { + return new MatchNoDocsQuery(); + } else if (!includeLower) { + return termQuery(true, context); + } else if (!includeUpper) { + return termQuery(false, context); + } else { + return this.existsQuery(context); + } + } } diff --git a/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java index d98e6ea6af83d..b7ee3bb8ca3e3 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java @@ -457,22 +457,30 @@ public Query rangeQuery( @Nullable DateMathParser forcedDateParser, QueryShardContext context ) { - failIfNotIndexed(); + failIfNotIndexedAndNoDocValues(); if (relation == ShapeRelation.DISJOINT) { throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] does not support DISJOINT ranges"); } DateMathParser parser = forcedDateParser == null ? dateMathParser : forcedDateParser; return dateRangeQuery(lowerTerm, upperTerm, includeLower, includeUpper, timeZone, parser, context, resolution, (l, u) -> { - Query query = LongPoint.newRangeQuery(name(), l, u); - if (hasDocValues()) { + if (isSearchable() && hasDocValues()) { + Query query = LongPoint.newRangeQuery(name(), l, u); Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u); query = new IndexOrDocValuesQuery(query, dvQuery); if (context.indexSortedOnField(name())) { query = new IndexSortSortedNumericDocValuesRangeQuery(name(), l, u, query); } + return query; + } + if (hasDocValues()) { + Query query = SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u); + if (context.indexSortedOnField(name())) { + query = new IndexSortSortedNumericDocValuesRangeQuery(name(), l, u, query); + } + return query; } - return query; + return LongPoint.newRangeQuery(name(), l, u); }); } @@ -543,6 +551,7 @@ public static long parseToLong( @Override public Query distanceFeatureQuery(Object origin, String pivot, float boost, QueryShardContext context) { + failIfNotIndexedAndNoDocValues(); long originLong = parseToLong(origin, true, null, null, context::nowInMillis); TimeValue pivotTime = TimeValue.parseTimeValue(pivot, "distance_feature.pivot"); return resolution.distanceFeatureQuery(name(), boost, originLong, pivotTime); @@ -559,6 +568,10 @@ public Relation isFieldWithinQuery( DateMathParser dateParser, QueryRewriteContext context ) throws IOException { + // if we have only doc_values enabled we do not look at the BKD so we return an INTERSECTS by default + if (isSearchable() == false && hasDocValues()) { + return Relation.INTERSECTS; + } if (dateParser == null) { dateParser = this.dateMathParser; } diff --git a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java index 23bb4cea17a20..fe90f24b0f544 100644 --- a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java +++ b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java @@ -11,6 +11,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.action.admin.cluster.remotestore.restore.RestoreRemoteStoreRequest; +import org.opensearch.action.support.IndicesOptions; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.block.ClusterBlocks; @@ -48,6 +49,7 @@ import java.util.stream.Collectors; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_STORE_ENABLED; +import static org.opensearch.common.util.IndexUtils.filterIndices; import static org.opensearch.repositories.blobstore.BlobStoreRepository.SYSTEM_REPOSITORY_SETTING; /** @@ -158,7 +160,12 @@ public RemoteRestoreResult restore( throw new IllegalStateException("Unable to restore remote index metadata", e); } } else { - for (String indexName : indexNames) { + List filteredIndices = filterIndices( + List.of(currentState.metadata().getConcreteAllIndices()), + indexNames, + IndicesOptions.fromOptions(true, true, true, true) + ); + for (String indexName : filteredIndices) { IndexMetadata indexMetadata = currentState.metadata().index(indexName); if (indexMetadata == null) { logger.warn("Index restore is not supported for non-existent index. Skipping: {}", indexName); diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteIndexPath.java b/server/src/main/java/org/opensearch/index/remote/RemoteIndexPath.java new file mode 100644 index 0000000000000..89b642b79df86 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/remote/RemoteIndexPath.java @@ -0,0 +1,166 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.remote; + +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.core.xcontent.ToXContentFragment; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.remote.RemoteStoreEnums.DataCategory; +import org.opensearch.index.remote.RemoteStoreEnums.DataType; +import org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm; +import org.opensearch.index.remote.RemoteStoreEnums.PathType; +import org.opensearch.index.remote.RemoteStorePathStrategy.PathInput; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + +import static org.opensearch.index.remote.RemoteStoreEnums.DataCategory.SEGMENTS; +import static org.opensearch.index.remote.RemoteStoreEnums.DataCategory.TRANSLOG; +import static org.opensearch.index.remote.RemoteStoreEnums.DataType.DATA; +import static org.opensearch.index.remote.RemoteStoreEnums.DataType.METADATA; +import static org.opensearch.index.remote.RemoteStorePathStrategy.isCompatible; + +/** + * Remote index path information. + * + * @opensearch.internal + */ +@ExperimentalApi +public class RemoteIndexPath implements ToXContentFragment { + + public static final Map> TRANSLOG_PATH = Map.of(TRANSLOG, List.of(DATA, METADATA)); + public static final Map> SEGMENT_PATH = Map.of(SEGMENTS, List.of(DataType.values())); + public static final Map> COMBINED_PATH; + + static { + Map> combinedPath = new HashMap<>(); + combinedPath.putAll(TRANSLOG_PATH); + combinedPath.putAll(SEGMENT_PATH); + COMBINED_PATH = Collections.unmodifiableMap(combinedPath); + } + public static final String DEFAULT_VERSION = "1"; + public static final String DIR = "remote-index-path"; + public static final String FILE_NAME_FORMAT = "remote_path_%s"; + static final String KEY_VERSION = "version"; + static final String KEY_INDEX_UUID = "index_uuid"; + static final String KEY_SHARD_COUNT = "shard_count"; + static final String KEY_PATH_CREATION_MAP = "path_creation_map"; + static final String KEY_PATHS = "paths"; + + private final String version; + private final String indexUUID; + private final int shardCount; + private final Iterable basePath; + private final PathType pathType; + private final PathHashAlgorithm pathHashAlgorithm; + + /** + * This keeps the map of paths that would be present in the content of the index path file. For eg - It is possible + * that segment and translog repository can be different. For this use case, we have either segment or translog as the + * key, and list of data, metadata, and lock_files (only for segment) as the value. + */ + private final Map> pathCreationMap; + + public RemoteIndexPath( + String indexUUID, + int shardCount, + Iterable basePath, + PathType pathType, + PathHashAlgorithm pathHashAlgorithm, + Map> pathCreationMap + ) { + if (Objects.isNull(pathCreationMap) + || Objects.isNull(pathType) + || isCompatible(pathType, pathHashAlgorithm) == false + || shardCount < 1 + || Objects.isNull(basePath) + || pathCreationMap.isEmpty() + || pathCreationMap.keySet().stream().anyMatch(k -> pathCreationMap.get(k).isEmpty())) { + ParameterizedMessage parameterizedMessage = new ParameterizedMessage( + "Invalid input in RemoteIndexPath constructor indexUUID={} shardCount={} basePath={} pathType={}" + + " pathHashAlgorithm={} pathCreationMap={}", + indexUUID, + shardCount, + basePath, + pathType, + pathHashAlgorithm, + pathCreationMap + ); + throw new IllegalArgumentException(parameterizedMessage.getFormattedMessage()); + } + boolean validMap = pathCreationMap.keySet() + .stream() + .allMatch(k -> pathCreationMap.get(k).stream().allMatch(k::isSupportedDataType)); + if (validMap == false) { + throw new IllegalArgumentException( + new ParameterizedMessage("pathCreationMap={} is having illegal combination of category and type", pathCreationMap) + .getFormattedMessage() + ); + } + this.version = DEFAULT_VERSION; + this.indexUUID = indexUUID; + this.shardCount = shardCount; + this.basePath = basePath; + this.pathType = pathType; + this.pathHashAlgorithm = pathHashAlgorithm; + this.pathCreationMap = pathCreationMap; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.field(KEY_VERSION, version); + builder.field(KEY_INDEX_UUID, indexUUID); + builder.field(KEY_SHARD_COUNT, shardCount); + builder.field(PathType.NAME, pathType.name()); + if (Objects.nonNull(pathHashAlgorithm)) { + builder.field(PathHashAlgorithm.NAME, pathHashAlgorithm.name()); + } + + Map> pathMap = new HashMap<>(); + for (Map.Entry> entry : pathCreationMap.entrySet()) { + pathMap.put(entry.getKey().getName(), entry.getValue().stream().map(DataType::getName).collect(Collectors.toList())); + } + builder.field(KEY_PATH_CREATION_MAP); + builder.map(pathMap); + builder.startArray(KEY_PATHS); + for (Map.Entry> entry : pathCreationMap.entrySet()) { + DataCategory dataCategory = entry.getKey(); + for (DataType type : entry.getValue()) { + for (int shardNo = 0; shardNo < shardCount; shardNo++) { + PathInput pathInput = PathInput.builder() + .basePath(new BlobPath().add(basePath)) + .indexUUID(indexUUID) + .shardId(Integer.toString(shardNo)) + .dataCategory(dataCategory) + .dataType(type) + .build(); + builder.value(pathType.path(pathInput, pathHashAlgorithm).buildAsString()); + } + } + } + builder.endArray(); + return builder; + } + + public static RemoteIndexPath fromXContent(XContentParser ignored) { + throw new UnsupportedOperationException("RemoteIndexPath.fromXContent() is not supported"); + } + + String getVersion() { + return version; + } +} diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteIndexPathUploader.java b/server/src/main/java/org/opensearch/index/remote/RemoteIndexPathUploader.java new file mode 100644 index 0000000000000..d736a82d57a7c --- /dev/null +++ b/server/src/main/java/org/opensearch/index/remote/RemoteIndexPathUploader.java @@ -0,0 +1,306 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.remote; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.action.LatchedActionListener; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.UUIDs; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.blobstore.BlobContainer; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.action.ActionListener; +import org.opensearch.core.index.Index; +import org.opensearch.gateway.remote.IndexMetadataUploadListener; +import org.opensearch.gateway.remote.RemoteClusterStateService.RemoteStateTransferException; +import org.opensearch.index.remote.RemoteStoreEnums.PathType; +import org.opensearch.node.Node; +import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.repositories.Repository; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.repositories.blobstore.ConfigBlobStoreFormat; +import org.opensearch.threadpool.ThreadPool; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +import static org.opensearch.gateway.remote.RemoteClusterStateService.INDEX_METADATA_UPLOAD_TIMEOUT_SETTING; +import static org.opensearch.index.remote.RemoteIndexPath.COMBINED_PATH; +import static org.opensearch.index.remote.RemoteIndexPath.SEGMENT_PATH; +import static org.opensearch.index.remote.RemoteIndexPath.TRANSLOG_PATH; +import static org.opensearch.index.remote.RemoteStoreUtils.determineRemoteStorePathStrategy; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteDataAttributePresent; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteStoreClusterStateEnabled; + +/** + * Uploads the remote store path for all possible combinations of {@link org.opensearch.index.remote.RemoteStoreEnums.DataCategory} + * and {@link org.opensearch.index.remote.RemoteStoreEnums.DataType} for each shard of an index. + * + * @opensearch.internal + */ +@ExperimentalApi +public class RemoteIndexPathUploader extends IndexMetadataUploadListener { + + public static final String DELIMITER = "#"; + public static final ConfigBlobStoreFormat REMOTE_INDEX_PATH_FORMAT = new ConfigBlobStoreFormat<>( + RemoteIndexPath.FILE_NAME_FORMAT + ); + + private static final String TIMEOUT_EXCEPTION_MSG = "Timed out waiting while uploading remote index path file for indexes=%s"; + private static final String UPLOAD_EXCEPTION_MSG = "Exception occurred while uploading remote index paths for indexes=%s"; + static final String TRANSLOG_REPO_NAME_KEY = Node.NODE_ATTRIBUTES.getKey() + + RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; + static final String SEGMENT_REPO_NAME_KEY = Node.NODE_ATTRIBUTES.getKey() + + RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; + + private static final Logger logger = LogManager.getLogger(RemoteIndexPathUploader.class); + + private final Settings settings; + private final boolean isRemoteDataAttributePresent; + private final boolean isTranslogSegmentRepoSame; + private final Supplier repositoriesService; + private volatile TimeValue indexMetadataUploadTimeout; + + private BlobStoreRepository translogRepository; + private BlobStoreRepository segmentRepository; + + public RemoteIndexPathUploader( + ThreadPool threadPool, + Settings settings, + Supplier repositoriesService, + ClusterSettings clusterSettings + ) { + super(threadPool, ThreadPool.Names.GENERIC); + this.settings = Objects.requireNonNull(settings); + this.repositoriesService = Objects.requireNonNull(repositoriesService); + isRemoteDataAttributePresent = isRemoteDataAttributePresent(settings); + // If the remote data attributes are not present, then there is no effect of translog and segment being same or different or null. + isTranslogSegmentRepoSame = isTranslogSegmentRepoSame(); + Objects.requireNonNull(clusterSettings); + indexMetadataUploadTimeout = clusterSettings.get(INDEX_METADATA_UPLOAD_TIMEOUT_SETTING); + clusterSettings.addSettingsUpdateConsumer(INDEX_METADATA_UPLOAD_TIMEOUT_SETTING, this::setIndexMetadataUploadTimeout); + } + + @Override + protected void doOnUpload( + List indexMetadataList, + Map prevIndexMetadataByName, + ActionListener actionListener + ) { + if (isRemoteDataAttributePresent == false) { + logger.trace("Skipping beforeNewIndexUpload as there are no remote indexes"); + actionListener.onResponse(null); + return; + } + + long startTime = System.nanoTime(); + boolean success = false; + List eligibleList = indexMetadataList.stream() + .filter(idxMd -> requiresPathUpload(idxMd, prevIndexMetadataByName.get(idxMd.getIndex().getName()))) + .collect(Collectors.toList()); + String indexNames = eligibleList.stream().map(IndexMetadata::getIndex).map(Index::toString).collect(Collectors.joining(",")); + int latchCount = eligibleList.size() * (isTranslogSegmentRepoSame ? 1 : 2); + CountDownLatch latch = new CountDownLatch(latchCount); + List exceptionList = Collections.synchronizedList(new ArrayList<>(latchCount)); + try { + for (IndexMetadata indexMetadata : eligibleList) { + writeIndexPathAsync(indexMetadata, latch, exceptionList); + } + + logger.trace(new ParameterizedMessage("Remote index path upload started for {}", indexNames)); + + try { + if (latch.await(indexMetadataUploadTimeout.millis(), TimeUnit.MILLISECONDS) == false) { + RemoteStateTransferException ex = new RemoteStateTransferException( + String.format(Locale.ROOT, TIMEOUT_EXCEPTION_MSG, indexNames) + ); + exceptionList.forEach(ex::addSuppressed); + actionListener.onFailure(ex); + return; + } + } catch (InterruptedException exception) { + exceptionList.forEach(exception::addSuppressed); + RemoteStateTransferException ex = new RemoteStateTransferException( + String.format(Locale.ROOT, TIMEOUT_EXCEPTION_MSG, indexNames), + exception + ); + actionListener.onFailure(ex); + return; + } + if (exceptionList.size() > 0) { + RemoteStateTransferException ex = new RemoteStateTransferException( + String.format(Locale.ROOT, UPLOAD_EXCEPTION_MSG, indexNames) + ); + exceptionList.forEach(ex::addSuppressed); + actionListener.onFailure(ex); + return; + } + success = true; + actionListener.onResponse(null); + } catch (Exception exception) { + RemoteStateTransferException ex = new RemoteStateTransferException( + String.format(Locale.ROOT, UPLOAD_EXCEPTION_MSG, indexNames), + exception + ); + exceptionList.forEach(ex::addSuppressed); + actionListener.onFailure(ex); + } finally { + long tookTimeNs = System.nanoTime() - startTime; + logger.trace(new ParameterizedMessage("executed beforeNewIndexUpload status={} tookTimeNs={}", success, tookTimeNs)); + } + + } + + private void writeIndexPathAsync(IndexMetadata idxMD, CountDownLatch latch, List exceptionList) { + if (isTranslogSegmentRepoSame) { + // If the repositories are same, then we need to upload a single file containing paths for both translog and segments. + writePathToRemoteStore(idxMD, translogRepository, latch, exceptionList, COMBINED_PATH); + } else { + // If the repositories are different, then we need to upload one file per segment and translog containing their individual + // paths. + writePathToRemoteStore(idxMD, translogRepository, latch, exceptionList, TRANSLOG_PATH); + writePathToRemoteStore(idxMD, segmentRepository, latch, exceptionList, SEGMENT_PATH); + } + } + + private void writePathToRemoteStore( + IndexMetadata idxMD, + BlobStoreRepository repository, + CountDownLatch latch, + List exceptionList, + Map> pathCreationMap + ) { + Map remoteCustomData = idxMD.getCustomData(IndexMetadata.REMOTE_STORE_CUSTOM_KEY); + PathType pathType = PathType.valueOf(remoteCustomData.get(PathType.NAME)); + RemoteStoreEnums.PathHashAlgorithm hashAlgorithm = RemoteStoreEnums.PathHashAlgorithm.valueOf( + remoteCustomData.get(RemoteStoreEnums.PathHashAlgorithm.NAME) + ); + String indexUUID = idxMD.getIndexUUID(); + int shardCount = idxMD.getNumberOfShards(); + BlobPath basePath = repository.basePath(); + BlobContainer blobContainer = repository.blobStore().blobContainer(basePath.add(RemoteIndexPath.DIR)); + ActionListener actionListener = getUploadPathLatchedActionListener(idxMD, latch, exceptionList, pathCreationMap); + try { + RemoteIndexPath remoteIndexPath = new RemoteIndexPath( + indexUUID, + shardCount, + basePath, + pathType, + hashAlgorithm, + pathCreationMap + ); + String fileName = generateFileName(indexUUID, idxMD.getVersion(), remoteIndexPath.getVersion()); + REMOTE_INDEX_PATH_FORMAT.writeAsyncWithUrgentPriority(remoteIndexPath, blobContainer, fileName, actionListener); + } catch (IOException ioException) { + RemoteStateTransferException ex = new RemoteStateTransferException( + String.format(Locale.ROOT, UPLOAD_EXCEPTION_MSG, List.of(idxMD.getIndex().getName())), + ioException + ); + actionListener.onFailure(ex); + } + } + + private Repository validateAndGetRepository(String repoSetting) { + final String repo = settings.get(repoSetting); + assert repo != null : "Remote " + repoSetting + " repository is not configured"; + final Repository repository = repositoriesService.get().repository(repo); + assert repository instanceof BlobStoreRepository : "Repository should be instance of BlobStoreRepository"; + return repository; + } + + public void start() { + assert isRemoteStoreClusterStateEnabled(settings) == true : "Remote cluster state is not enabled"; + if (isRemoteDataAttributePresent == false) { + // If remote store data attributes are not present than we skip this. + return; + } + translogRepository = (BlobStoreRepository) validateAndGetRepository(TRANSLOG_REPO_NAME_KEY); + segmentRepository = (BlobStoreRepository) validateAndGetRepository(SEGMENT_REPO_NAME_KEY); + } + + private boolean isTranslogSegmentRepoSame() { + // TODO - The current comparison checks the repository name. But it is also possible that the repository are same + // by attributes, but different by name. We need to handle this. + String translogRepoName = settings.get(TRANSLOG_REPO_NAME_KEY); + String segmentRepoName = settings.get(SEGMENT_REPO_NAME_KEY); + return Objects.equals(translogRepoName, segmentRepoName); + } + + private LatchedActionListener getUploadPathLatchedActionListener( + IndexMetadata indexMetadata, + CountDownLatch latch, + List exceptionList, + Map> pathCreationMap + ) { + return new LatchedActionListener<>( + ActionListener.wrap( + resp -> logger.trace( + new ParameterizedMessage("Index path uploaded for {} indexMetadata={}", pathCreationMap, indexMetadata) + ), + ex -> { + logger.error( + new ParameterizedMessage( + "Exception during Index path upload for {} indexMetadata={}", + pathCreationMap, + indexMetadata + ), + ex + ); + exceptionList.add(ex); + } + ), + latch + ); + } + + /** + * This method checks if the index metadata has attributes that calls for uploading the index path for remote store + * uploads. It checks if the remote store path type is {@code HASHED_PREFIX} and returns true if so. + */ + private boolean requiresPathUpload(IndexMetadata indexMetadata, IndexMetadata prevIndexMetadata) { + PathType pathType = determineRemoteStorePathStrategy(indexMetadata).getType(); + PathType prevPathType = Objects.nonNull(prevIndexMetadata) ? determineRemoteStorePathStrategy(prevIndexMetadata).getType() : null; + // If previous metadata is null or previous path type is not hashed_prefix, and along with new path type being + // hashed_prefix, then this can mean any of the following - + // 1. This is creation of remote index with hashed_prefix + // 2. We are enabling cluster state for the very first time with multiple indexes having hashed_prefix path type. + // 3. A docrep index is being migrated to being remote store index. + return pathType == PathType.HASHED_PREFIX && (Objects.isNull(prevPathType) || prevPathType != PathType.HASHED_PREFIX); + } + + private void setIndexMetadataUploadTimeout(TimeValue newIndexMetadataUploadTimeout) { + this.indexMetadataUploadTimeout = newIndexMetadataUploadTimeout; + } + + /** + * Creates a file name by combining index uuid, index metadata version and file version. # has been chosen as the + * delimiter since it does not collide with any possible letters in file name. The random base64 uuid is added to + * ensure that the file does not get overwritten. We do check if translog and segment repo are same by name, but + * it is possible that a user configures same repo by different name for translog and segment in which case, this + * will lead to file not being overwritten. + */ + private String generateFileName(String indexUUID, long indexMetadataVersion, String fileVersion) { + return String.join(DELIMITER, indexUUID, Long.toString(indexMetadataVersion), fileVersion, UUIDs.randomBase64UUID()); + } +} diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteMigrationIndexMetadataUpdater.java b/server/src/main/java/org/opensearch/index/remote/RemoteMigrationIndexMetadataUpdater.java new file mode 100644 index 0000000000000..761fa20ea64e5 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/remote/RemoteMigrationIndexMetadataUpdater.java @@ -0,0 +1,181 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.remote; + +import org.apache.logging.log4j.Logger; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.IndexRoutingTable; +import org.opensearch.cluster.routing.RoutingTable; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.ShardRoutingState; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.remote.RemoteStoreEnums.PathType; +import org.opensearch.indices.replication.common.ReplicationType; + +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import static org.opensearch.cluster.metadata.IndexMetadata.REMOTE_STORE_CUSTOM_KEY; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_SEGMENT_STORE_REPOSITORY; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_STORE_ENABLED; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; +import static org.opensearch.index.remote.RemoteStoreUtils.determineRemoteStorePathStrategyDuringMigration; +import static org.opensearch.index.remote.RemoteStoreUtils.getRemoteStoreRepoName; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; + +/** + * Utils for checking and mutating cluster state during remote migration + * + * @opensearch.internal + */ +public class RemoteMigrationIndexMetadataUpdater { + private final DiscoveryNodes discoveryNodes; + private final RoutingTable routingTable; + private final Settings clusterSettings; + private final IndexMetadata indexMetadata; + private final Logger logger; + + public RemoteMigrationIndexMetadataUpdater( + DiscoveryNodes discoveryNodes, + RoutingTable routingTable, + IndexMetadata indexMetadata, + Settings clusterSettings, + Logger logger + + ) { + this.discoveryNodes = discoveryNodes; + this.routingTable = routingTable; + this.clusterSettings = clusterSettings; + this.indexMetadata = indexMetadata; + this.logger = logger; + } + + /** + * During docrep to remote store migration, applies the following remote store based index settings + * once all shards of an index have moved over to remote store enabled nodes + *
+ * Also appends the requisite Remote Store Path based custom metadata to the existing index metadata + */ + public void maybeAddRemoteIndexSettings(IndexMetadata.Builder indexMetadataBuilder, String index) { + Settings currentIndexSettings = indexMetadata.getSettings(); + if (needsRemoteIndexSettingsUpdate(routingTable.indicesRouting().get(index), discoveryNodes, currentIndexSettings)) { + logger.info( + "Index {} does not have remote store based index settings but all primary shards and STARTED replica shards have moved to remote enabled nodes. Applying remote store settings to the index", + index + ); + Map remoteRepoNames = getRemoteStoreRepoName(discoveryNodes); + String segmentRepoName = remoteRepoNames.get(REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY); + String tlogRepoName = remoteRepoNames.get(REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY); + assert Objects.nonNull(segmentRepoName) && Objects.nonNull(tlogRepoName) : "Remote repo names cannot be null"; + Settings.Builder indexSettingsBuilder = Settings.builder().put(currentIndexSettings); + updateRemoteStoreSettings(indexSettingsBuilder, segmentRepoName, tlogRepoName); + indexMetadataBuilder.settings(indexSettingsBuilder); + indexMetadataBuilder.settingsVersion(1 + indexMetadata.getVersion()); + } else { + logger.debug("Index {} does not satisfy criteria for applying remote store settings", index); + } + } + + /** + * Returns true iff all the below conditions are true: + * - All primary shards are in {@link ShardRoutingState#STARTED} state and are in remote store enabled nodes + * - No replica shard in {@link ShardRoutingState#RELOCATING} state + * - All {@link ShardRoutingState#STARTED} replica shards are in remote store enabled nodes + * + * @param indexRoutingTable current {@link IndexRoutingTable} from cluster state + * @param discoveryNodes set of discovery nodes from cluster state + * @param currentIndexSettings current {@link IndexMetadata} from cluster state + * @return true or false depending on the met conditions + */ + private boolean needsRemoteIndexSettingsUpdate( + IndexRoutingTable indexRoutingTable, + DiscoveryNodes discoveryNodes, + Settings currentIndexSettings + ) { + assert currentIndexSettings != null : "IndexMetadata for a shard cannot be null"; + if (indexHasRemoteStoreSettings(currentIndexSettings) == false) { + boolean allPrimariesStartedAndOnRemote = indexRoutingTable.shardsMatchingPredicate(ShardRouting::primary) + .stream() + .allMatch(shardRouting -> shardRouting.started() && discoveryNodes.get(shardRouting.currentNodeId()).isRemoteStoreNode()); + List replicaShards = indexRoutingTable.shardsMatchingPredicate(shardRouting -> shardRouting.primary() == false); + boolean noRelocatingReplicas = replicaShards.stream().noneMatch(ShardRouting::relocating); + boolean allStartedReplicasOnRemote = replicaShards.stream() + .filter(ShardRouting::started) + .allMatch(shardRouting -> discoveryNodes.get(shardRouting.currentNodeId()).isRemoteStoreNode()); + return allPrimariesStartedAndOnRemote && noRelocatingReplicas && allStartedReplicasOnRemote; + } + return false; + } + + /** + * Updates the remote store path strategy metadata for the index when it is migrating to remote. + * This is run during state change of each shard copy when the cluster is in `MIXED` mode and the direction of migration is `REMOTE_STORE` + * Should not interfere with docrep functionality even if the index is in docrep nodes since this metadata + * is not used anywhere in the docrep flow + * Checks are in place to make this execution no-op if the index metadata is already present. + * + * @param indexMetadataBuilder Mutated {@link IndexMetadata.Builder} having the previous state updates + * @param index index name + */ + public void maybeUpdateRemoteStorePathStrategy(IndexMetadata.Builder indexMetadataBuilder, String index) { + if (indexHasRemotePathMetadata(indexMetadata) == false) { + logger.info("Adding remote store path strategy for index [{}] during migration", index); + indexMetadataBuilder.putCustom( + REMOTE_STORE_CUSTOM_KEY, + determineRemoteStorePathStrategyDuringMigration(clusterSettings, discoveryNodes) + ); + } else { + logger.debug("Index {} already has remote store path strategy", index); + } + } + + public static boolean indexHasAllRemoteStoreRelatedMetadata(IndexMetadata indexMetadata) { + return indexHasRemoteStoreSettings(indexMetadata.getSettings()) && indexHasRemotePathMetadata(indexMetadata); + } + + /** + * Assert current index settings have: + * - index.remote_store.enabled == true + * - index.remote_store.segment.repository != null + * - index.remote_store.translog.repository != null + * - index.replication.type == SEGMENT + * + * @param indexSettings Current index settings + * @return true if all above conditions match. false otherwise + */ + public static boolean indexHasRemoteStoreSettings(Settings indexSettings) { + return IndexMetadata.INDEX_REMOTE_STORE_ENABLED_SETTING.exists(indexSettings) + && IndexMetadata.INDEX_REMOTE_TRANSLOG_REPOSITORY_SETTING.exists(indexSettings) + && IndexMetadata.INDEX_REMOTE_SEGMENT_STORE_REPOSITORY_SETTING.exists(indexSettings) + && IndexMetadata.INDEX_REPLICATION_TYPE_SETTING.get(indexSettings) == ReplicationType.SEGMENT; + } + + /** + * Asserts current index metadata customs has the {@link IndexMetadata#REMOTE_STORE_CUSTOM_KEY} key. + * If it does, checks if the path_type sub-key is present + * + * @param indexMetadata Current index metadata + * @return true if all above conditions match. false otherwise + */ + public static boolean indexHasRemotePathMetadata(IndexMetadata indexMetadata) { + Map customMetadata = indexMetadata.getCustomData(REMOTE_STORE_CUSTOM_KEY); + return Objects.nonNull(customMetadata) && Objects.nonNull(customMetadata.get(PathType.NAME)); + } + + public static void updateRemoteStoreSettings(Settings.Builder settingsBuilder, String segmentRepository, String translogRepository) { + settingsBuilder.put(SETTING_REMOTE_STORE_ENABLED, true) + .put(SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .put(SETTING_REMOTE_SEGMENT_STORE_REPOSITORY, segmentRepository) + .put(SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY, translogRepository); + } +} diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategy.java b/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategy.java index 775f8fe19e4ef..c58f6c3faac84 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategy.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategy.java @@ -10,6 +10,7 @@ import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.common.Nullable; +import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.common.annotation.PublicApi; import org.opensearch.common.blobstore.BlobPath; import org.opensearch.index.remote.RemoteStoreEnums.DataCategory; @@ -25,6 +26,7 @@ * @opensearch.internal */ @PublicApi(since = "2.14.0") +@ExperimentalApi public class RemoteStorePathStrategy { private final PathType type; @@ -74,6 +76,7 @@ public BlobPath generatePath(PathInput pathInput) { * @opensearch.internal */ @PublicApi(since = "2.14.0") + @ExperimentalApi public static class PathInput { private final BlobPath basePath; private final String indexUUID; @@ -122,6 +125,7 @@ public static Builder builder() { * @opensearch.internal */ @PublicApi(since = "2.14.0") + @ExperimentalApi public static class Builder { private BlobPath basePath; private String indexUUID; diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategyResolver.java b/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategyResolver.java index a33f7522daaae..178de406ed681 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategyResolver.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategyResolver.java @@ -9,6 +9,7 @@ package org.opensearch.index.remote; import org.opensearch.Version; +import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm; import org.opensearch.index.remote.RemoteStoreEnums.PathType; import org.opensearch.indices.RemoteStoreSettings; @@ -20,6 +21,7 @@ * * @opensearch.internal */ +@ExperimentalApi public class RemoteStorePathStrategyResolver { private final RemoteStoreSettings remoteStoreSettings; @@ -34,7 +36,7 @@ public RemoteStorePathStrategy get() { PathType pathType; PathHashAlgorithm pathHashAlgorithm; // Min node version check ensures that we are enabling the new prefix type only when all the nodes understand it. - pathType = Version.CURRENT.compareTo(minNodeVersionSupplier.get()) <= 0 ? remoteStoreSettings.getPathType() : PathType.FIXED; + pathType = Version.V_2_14_0.compareTo(minNodeVersionSupplier.get()) <= 0 ? remoteStoreSettings.getPathType() : PathType.FIXED; // If the path type is fixed, hash algorithm is not applicable. pathHashAlgorithm = pathType == PathType.FIXED ? null : remoteStoreSettings.getPathHashAlgorithm(); return new RemoteStorePathStrategy(pathType, pathHashAlgorithm); diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java b/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java index 4d1d98334c3c4..27b1b88034573 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java @@ -8,7 +8,16 @@ package org.opensearch.index.remote; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.Version; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.common.collect.Tuple; +import org.opensearch.common.settings.Settings; +import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; import java.nio.ByteBuffer; import java.util.Arrays; @@ -17,14 +26,20 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Objects; +import java.util.Optional; import java.util.function.Function; +import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING; +import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING; + /** * Utils for remote store * * @opensearch.internal */ public class RemoteStoreUtils { + private static final Logger logger = LogManager.getLogger(RemoteStoreUtils.class); public static final int LONG_MAX_LENGTH = String.valueOf(Long.MAX_VALUE).length(); /** @@ -146,4 +161,67 @@ static String longToCompositeBase64AndBinaryEncoding(long value, int len) { assert base64DecimalValue >= 0 && base64DecimalValue < 64; return URL_BASE64_CHARSET[base64DecimalValue] + binaryPart; } + + /** + * Determines the remote store path strategy by reading the custom data map in IndexMetadata class. + */ + public static RemoteStorePathStrategy determineRemoteStorePathStrategy(IndexMetadata indexMetadata) { + Map remoteCustomData = indexMetadata.getCustomData(IndexMetadata.REMOTE_STORE_CUSTOM_KEY); + assert remoteCustomData == null || remoteCustomData.containsKey(RemoteStoreEnums.PathType.NAME); + if (remoteCustomData != null && remoteCustomData.containsKey(RemoteStoreEnums.PathType.NAME)) { + RemoteStoreEnums.PathType pathType = RemoteStoreEnums.PathType.parseString( + remoteCustomData.get(RemoteStoreEnums.PathType.NAME) + ); + String hashAlgoStr = remoteCustomData.get(RemoteStoreEnums.PathHashAlgorithm.NAME); + RemoteStoreEnums.PathHashAlgorithm hashAlgorithm = Objects.nonNull(hashAlgoStr) + ? RemoteStoreEnums.PathHashAlgorithm.parseString(hashAlgoStr) + : null; + return new RemoteStorePathStrategy(pathType, hashAlgorithm); + } + return new RemoteStorePathStrategy(RemoteStoreEnums.PathType.FIXED); + } + + /** + * Generates the remote store path type information to be added to custom data of index metadata during migration + * + * @param clusterSettings Current Cluster settings from {@link ClusterState} + * @param discoveryNodes Current {@link DiscoveryNodes} from the cluster state + * @return {@link Map} to be added as custom data in index metadata + */ + public static Map determineRemoteStorePathStrategyDuringMigration( + Settings clusterSettings, + DiscoveryNodes discoveryNodes + ) { + Version minNodeVersion = discoveryNodes.getMinNodeVersion(); + RemoteStoreEnums.PathType pathType = Version.CURRENT.compareTo(minNodeVersion) <= 0 + ? CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.get(clusterSettings) + : RemoteStoreEnums.PathType.FIXED; + RemoteStoreEnums.PathHashAlgorithm pathHashAlgorithm = pathType == RemoteStoreEnums.PathType.FIXED + ? null + : CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING.get(clusterSettings); + Map remoteCustomData = new HashMap<>(); + remoteCustomData.put(RemoteStoreEnums.PathType.NAME, pathType.name()); + if (Objects.nonNull(pathHashAlgorithm)) { + remoteCustomData.put(RemoteStoreEnums.PathHashAlgorithm.NAME, pathHashAlgorithm.name()); + } + return remoteCustomData; + } + + /** + * Fetches segment and translog repository names from remote store node attributes. + * Returns a blank {@link HashMap} if the cluster does not contain any remote nodes. + *
+ * Caller need to handle null checks if {@link DiscoveryNodes} object does not have any remote nodes + * + * @param discoveryNodes Current set of {@link DiscoveryNodes} in the cluster + * @return {@link Map} of data repository node attributes keys and their values + */ + public static Map getRemoteStoreRepoName(DiscoveryNodes discoveryNodes) { + Optional remoteNode = discoveryNodes.getNodes() + .values() + .stream() + .filter(DiscoveryNode::isRemoteStoreNode) + .findFirst(); + return remoteNode.map(RemoteStoreNodeAttribute::getDataRepoNames).orElseGet(HashMap::new); + } } diff --git a/server/src/main/java/org/opensearch/index/seqno/ReplicationTracker.java b/server/src/main/java/org/opensearch/index/seqno/ReplicationTracker.java index 39b1cc130d6a5..6697991aef90b 100644 --- a/server/src/main/java/org/opensearch/index/seqno/ReplicationTracker.java +++ b/server/src/main/java/org/opensearch/index/seqno/ReplicationTracker.java @@ -1106,12 +1106,11 @@ private ReplicationGroup calculateReplicationGroup() { } else { newVersion = replicationGroup.getVersion() + 1; } - assert indexSettings.isRemoteTranslogStoreEnabled() - // Handle migration cases. Ignore assertion if any of the shard copies in the replication group is assigned to a remote node - || (replicationGroup != null - && replicationGroup.getReplicationTargets() - .stream() - .anyMatch(shardRouting -> isShardOnRemoteEnabledNode.apply(shardRouting.currentNodeId()))) + assert newVersion == 0 || indexSettings.isRemoteTranslogStoreEnabled() + // Handle migration cases. Ignore assertion if any of the shard copies in the replication group is assigned to a remote node + || replicationGroup.getReplicationTargets() + .stream() + .anyMatch(shardRouting -> isShardOnRemoteEnabledNode.apply(shardRouting.currentNodeId())) || checkpoints.entrySet().stream().filter(e -> e.getValue().tracked).allMatch(e -> e.getValue().replicated) : "In absence of remote translog store, all tracked shards must have replication mode as LOGICAL_REPLICATION"; diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 26dbbbcdee7c0..18d4a2ca6d639 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -436,7 +436,7 @@ public IndexShard( logger.debug("state: [CREATED]"); this.checkIndexOnStartup = indexSettings.getValue(IndexSettings.INDEX_CHECK_ON_STARTUP); - this.translogConfig = new TranslogConfig(shardId, shardPath().resolveTranslog(), indexSettings, bigArrays, nodeId); + this.translogConfig = new TranslogConfig(shardId, shardPath().resolveTranslog(), indexSettings, bigArrays, nodeId, seedRemote); final String aId = shardRouting.allocationId().getId(); final long primaryTerm = indexSettings.getIndexMetadata().primaryTerm(shardId.id()); this.pendingPrimaryTerm = primaryTerm; @@ -2118,15 +2118,16 @@ public boolean isRemoteSegmentStoreInSync() { return false; } - public void waitForRemoteStoreSync() { + public void waitForRemoteStoreSync() throws IOException { waitForRemoteStoreSync(() -> {}); } /* Blocks the calling thread, waiting for the remote store to get synced till internal Remote Upload Timeout Calls onProgress on seeing an increased file count on remote + Throws IOException if the remote store is not synced within the timeout */ - public void waitForRemoteStoreSync(Runnable onProgress) { + public void waitForRemoteStoreSync(Runnable onProgress) throws IOException { assert indexSettings.isAssignedOnRemoteNode(); RemoteSegmentStoreDirectory directory = getRemoteDirectory(); int segmentUploadeCount = 0; @@ -2138,7 +2139,7 @@ public void waitForRemoteStoreSync(Runnable onProgress) { while (System.nanoTime() - startNanos < getRecoverySettings().internalRemoteUploadTimeout().nanos()) { try { if (isRemoteSegmentStoreInSync()) { - break; + return; } else { if (directory.getSegmentsUploadedToRemoteStore().size() > segmentUploadeCount) { onProgress.run(); @@ -2156,6 +2157,11 @@ public void waitForRemoteStoreSync(Runnable onProgress) { return; } } + throw new IOException( + "Failed to upload to remote segment store within remote upload timeout of " + + getRecoverySettings().internalRemoteUploadTimeout().getMinutes() + + " minutes" + ); } public void preRecovery() { @@ -2911,7 +2917,7 @@ public void restoreFromRepository(Repository repository, ActionListener * * @return {@code true} if the engine should be flushed */ - boolean shouldPeriodicallyFlush() { + public boolean shouldPeriodicallyFlush() { final Engine engine = getEngineOrNull(); if (engine != null) { try { @@ -4487,6 +4493,7 @@ public Durability getTranslogDurability() { /** * Schedules a flush or translog generation roll if needed but will not schedule more than one concurrently. The operation will be * executed asynchronously on the flush thread pool. + * Can also schedule a flush if decided by translog manager */ public void afterWriteOperation() { if (shouldPeriodicallyFlush() || shouldRollTranslogGeneration()) { @@ -4993,7 +5000,8 @@ public void syncTranslogFilesFromRemoteTranslog() throws IOException { shardPath().resolveTranslog(), indexSettings.getRemoteStorePathStrategy(), remoteStoreSettings, - logger + logger, + shouldSeedRemoteStore() ); } diff --git a/server/src/main/java/org/opensearch/index/shard/ReleasableRetryableRefreshListener.java b/server/src/main/java/org/opensearch/index/shard/ReleasableRetryableRefreshListener.java index 757275932c5f1..80daefc4482fc 100644 --- a/server/src/main/java/org/opensearch/index/shard/ReleasableRetryableRefreshListener.java +++ b/server/src/main/java/org/opensearch/index/shard/ReleasableRetryableRefreshListener.java @@ -13,6 +13,7 @@ import org.opensearch.common.lease.Releasable; import org.opensearch.common.lease.Releasables; import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException; import org.opensearch.threadpool.ThreadPool; import java.io.IOException; @@ -129,6 +130,12 @@ private void scheduleRetry(TimeValue interval, String retryThreadPoolName, boole ); scheduled = true; getLogger().info("Scheduled retry with didRefresh={}", didRefresh); + } catch (OpenSearchRejectedExecutionException e) { + if (e.isExecutorShutdown()) { + getLogger().info("Scheduling retry with didRefresh={} failed due to executor shut down", didRefresh); + } else { + throw e; + } } finally { if (scheduled == false) { retryScheduled.set(false); diff --git a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java index 351aec6e3af6c..bfb841307af49 100644 --- a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java +++ b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java @@ -437,10 +437,14 @@ private void uploadNewSegments( batchUploadListener.onFailure(ex); }); statsListener.beforeUpload(src); - remoteDirectory.copyFrom(storeDirectory, src, IOContext.DEFAULT, aggregatedListener); + remoteDirectory.copyFrom(storeDirectory, src, IOContext.DEFAULT, aggregatedListener, isLowPriorityUpload()); } } + private boolean isLowPriorityUpload() { + return isLocalOrSnapshotRecovery(); + } + /** * Whether to upload a file or not depending on whether file is in excluded list or has been already uploaded. * diff --git a/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java b/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java index f5e342d28fde1..8d689e8769728 100644 --- a/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java +++ b/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java @@ -193,13 +193,6 @@ void recoverFromLocalShards( indexShard.getEngine().forceMerge(false, -1, false, false, false, UUIDs.randomBase64UUID()); if (indexShard.isRemoteTranslogEnabled() && indexShard.shardRouting.primary()) { indexShard.waitForRemoteStoreSync(); - if (indexShard.isRemoteSegmentStoreInSync() == false) { - throw new IndexShardRecoveryException( - indexShard.shardId(), - "failed to upload to remote", - new IOException("Failed to upload to remote segment store") - ); - } } return true; } catch (IOException ex) { @@ -436,10 +429,6 @@ void recoverFromSnapshotAndRemoteStore( indexShard.finalizeRecovery(); if (indexShard.isRemoteTranslogEnabled() && indexShard.shardRouting.primary()) { indexShard.waitForRemoteStoreSync(); - if (indexShard.isRemoteSegmentStoreInSync() == false) { - listener.onFailure(new IndexShardRestoreFailedException(shardId, "Failed to upload to remote segment store")); - return; - } } indexShard.postRecovery("restore done"); @@ -661,6 +650,14 @@ private void internalRecoverFromStore(IndexShard indexShard) throws IndexShardRe indexShard.recoveryState().getIndex().setFileDetailsComplete(); } indexShard.openEngineAndRecoverFromTranslog(); + if (indexShard.shouldSeedRemoteStore()) { + indexShard.getThreadPool().executor(ThreadPool.Names.GENERIC).execute(() -> { + logger.info("Attempting to seed Remote Store via local recovery for {}", indexShard.shardId()); + indexShard.refresh("remote store migration"); + }); + indexShard.waitForRemoteStoreSync(); + logger.info("Remote Store is now seeded via local recovery for {}", indexShard.shardId()); + } indexShard.getEngine().fillSeqNoGaps(indexShard.getPendingPrimaryTerm()); indexShard.finalizeRecovery(); indexShard.postRecovery("post recovery from shard_store"); @@ -722,10 +719,6 @@ private void restore( indexShard.finalizeRecovery(); if (indexShard.isRemoteTranslogEnabled() && indexShard.shardRouting.primary()) { indexShard.waitForRemoteStoreSync(); - if (indexShard.isRemoteSegmentStoreInSync() == false) { - listener.onFailure(new IndexShardRestoreFailedException(shardId, "Failed to upload to remote segment store")); - return; - } } indexShard.postRecovery("restore done"); listener.onResponse(true); diff --git a/server/src/main/java/org/opensearch/index/store/RemoteDirectory.java b/server/src/main/java/org/opensearch/index/store/RemoteDirectory.java index 345583bbbd1be..ab76150f8f83d 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteDirectory.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteDirectory.java @@ -29,6 +29,7 @@ import org.opensearch.common.blobstore.transfer.stream.OffsetRangeIndexInputStream; import org.opensearch.common.blobstore.transfer.stream.OffsetRangeInputStream; import org.opensearch.core.action.ActionListener; +import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.index.store.exception.ChecksumCombinationException; import java.io.FileNotFoundException; @@ -323,11 +324,12 @@ public boolean copyFrom( String remoteFileName, IOContext context, Runnable postUploadRunner, - ActionListener listener + ActionListener listener, + boolean lowPriorityUpload ) { if (blobContainer instanceof AsyncMultiStreamBlobContainer) { try { - uploadBlob(from, src, remoteFileName, context, postUploadRunner, listener); + uploadBlob(from, src, remoteFileName, context, postUploadRunner, listener, lowPriorityUpload); } catch (Exception e) { listener.onFailure(e); } @@ -342,7 +344,8 @@ private void uploadBlob( String remoteFileName, IOContext ioContext, Runnable postUploadRunner, - ActionListener listener + ActionListener listener, + boolean lowPriorityUpload ) throws Exception { long expectedChecksum = calculateChecksumOfChecksum(from, src); long contentLength; @@ -353,12 +356,13 @@ private void uploadBlob( if (getBlobContainer() instanceof AsyncMultiStreamBlobContainer) { remoteIntegrityEnabled = ((AsyncMultiStreamBlobContainer) getBlobContainer()).remoteIntegrityCheckSupported(); } + lowPriorityUpload = lowPriorityUpload || contentLength > ByteSizeUnit.GB.toBytes(15); RemoteTransferContainer remoteTransferContainer = new RemoteTransferContainer( src, remoteFileName, contentLength, true, - WritePriority.NORMAL, + lowPriorityUpload ? WritePriority.LOW : WritePriority.NORMAL, (size, position) -> uploadRateLimiter.apply(new OffsetRangeIndexInputStream(from.openInput(src, ioContext), size, position)), expectedChecksum, remoteIntegrityEnabled diff --git a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java index ec1163fe91b6c..8c0ecb4cc783a 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java @@ -453,7 +453,7 @@ public IndexInput openInput(String name, IOContext context) throws IOException { * @param context IOContext to be used to open IndexInput of file during remote upload * @param listener Listener to handle upload callback events */ - public void copyFrom(Directory from, String src, IOContext context, ActionListener listener) { + public void copyFrom(Directory from, String src, IOContext context, ActionListener listener, boolean lowPriorityUpload) { try { final String remoteFileName = getNewRemoteSegmentFilename(src); boolean uploaded = remoteDataDirectory.copyFrom(from, src, remoteFileName, context, () -> { @@ -462,7 +462,7 @@ public void copyFrom(Directory from, String src, IOContext context, ActionListen } catch (IOException e) { throw new RuntimeException("Exception in segment postUpload for file " + src, e); } - }, listener); + }, listener, lowPriorityUpload); if (uploaded == false) { copyFrom(from, src, src, context); listener.onResponse(null); diff --git a/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java b/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java index a22c538286a88..e2210217672ef 100644 --- a/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java +++ b/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java @@ -437,6 +437,12 @@ public String getTranslogUUID() { * @return if the translog should be flushed */ public boolean shouldPeriodicallyFlush(long localCheckpointOfLastCommit, long flushThreshold) { + /* + * This can trigger flush depending upon translog's implementation + */ + if (translog.shouldFlush()) { + return true; + } // This is the minimum seqNo that is referred in translog and considered for calculating translog size long minTranslogRefSeqNo = translog.getMinUnreferencedSeqNoInSegments(localCheckpointOfLastCommit + 1); final long minReferencedTranslogGeneration = translog.getMinGenerationForSeqNo(minTranslogRefSeqNo).translogFileGeneration; diff --git a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java index 69600c7f4066c..3ad8eef616a7a 100644 --- a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java +++ b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java @@ -123,7 +123,7 @@ public RemoteFsTranslog( ckpAsMetadata ); try { - download(translogTransferManager, location, logger); + download(translogTransferManager, location, logger, config.shouldSeedRemote()); Checkpoint checkpoint = readCheckpoint(location); logger.info("Downloaded data from remote translog till maxSeqNo = {}", checkpoint.maxSeqNo); this.readers.addAll(recoverFromFiles(checkpoint)); @@ -171,7 +171,8 @@ public static void download( Path location, RemoteStorePathStrategy pathStrategy, RemoteStoreSettings remoteStoreSettings, - Logger logger + Logger logger, + boolean seedRemote ) throws IOException { assert repository instanceof BlobStoreRepository : String.format( Locale.ROOT, @@ -192,11 +193,12 @@ public static void download( pathStrategy, remoteStoreSettings ); - RemoteFsTranslog.download(translogTransferManager, location, logger); + RemoteFsTranslog.download(translogTransferManager, location, logger, seedRemote); logger.trace(remoteTranslogTransferTracker.toString()); } - static void download(TranslogTransferManager translogTransferManager, Path location, Logger logger) throws IOException { + static void download(TranslogTransferManager translogTransferManager, Path location, Logger logger, boolean seedRemote) + throws IOException { /* In Primary to Primary relocation , there can be concurrent upload and download of translog. While translog files are getting downloaded by new primary, it might hence be deleted by the primary @@ -209,7 +211,7 @@ static void download(TranslogTransferManager translogTransferManager, Path locat boolean success = false; long startTimeMs = System.currentTimeMillis(); try { - downloadOnce(translogTransferManager, location, logger); + downloadOnce(translogTransferManager, location, logger, seedRemote); success = true; return; } catch (FileNotFoundException | NoSuchFileException e) { @@ -223,7 +225,8 @@ static void download(TranslogTransferManager translogTransferManager, Path locat throw ex; } - private static void downloadOnce(TranslogTransferManager translogTransferManager, Path location, Logger logger) throws IOException { + private static void downloadOnce(TranslogTransferManager translogTransferManager, Path location, Logger logger, boolean seedRemote) + throws IOException { logger.debug("Downloading translog files from remote"); RemoteTranslogTransferTracker statsTracker = translogTransferManager.getRemoteTranslogTransferTracker(); long prevDownloadBytesSucceeded = statsTracker.getDownloadBytesSucceeded(); @@ -265,7 +268,9 @@ private static void downloadOnce(TranslogTransferManager translogTransferManager logger.debug("No translog files found on remote, checking local filesystem for cleanup"); if (FileSystemUtils.exists(location.resolve(CHECKPOINT_FILE_NAME))) { final Checkpoint checkpoint = readCheckpoint(location); - if (isEmptyTranslog(checkpoint) == false) { + if (seedRemote) { + logger.debug("Remote migration ongoing. Retaining the translog on local, skipping clean-up"); + } else if (isEmptyTranslog(checkpoint) == false) { logger.debug("Translog files exist on local without any metadata in remote, cleaning up these files"); // Creating empty translog will cleanup the older un-referenced tranlog files, we don't have to explicitly delete Translog.createEmptyTranslog(location, translogTransferManager.getShardId(), checkpoint); @@ -688,4 +693,15 @@ public long getMinUnreferencedSeqNoInSegments(long minUnrefCheckpointInLastCommi int availablePermits() { return syncPermit.availablePermits(); } + + /** + * Checks whether or not the shard should be flushed based on translog files. + * This checks if number of translog files breaches the threshold count determined by + * {@code cluster.remote_store.translog.max_readers} setting + * @return {@code true} if the shard should be flushed + */ + @Override + protected boolean shouldFlush() { + return readers.size() >= translogTransferManager.getMaxRemoteTranslogReadersSettings(); + } } diff --git a/server/src/main/java/org/opensearch/index/translog/Translog.java b/server/src/main/java/org/opensearch/index/translog/Translog.java index c653605f8fa10..842e9c77d2350 100644 --- a/server/src/main/java/org/opensearch/index/translog/Translog.java +++ b/server/src/main/java/org/opensearch/index/translog/Translog.java @@ -2082,4 +2082,13 @@ public static String createEmptyTranslog( public long getMinUnreferencedSeqNoInSegments(long minUnrefCheckpointInLastCommit) { return minUnrefCheckpointInLastCommit; } + + /** + * Checks whether or not the shard should be flushed based on translog files. + * each translog type can have it's own decider + * @return {@code true} if the shard should be flushed + */ + protected boolean shouldFlush() { + return false; + } } diff --git a/server/src/main/java/org/opensearch/index/translog/TranslogConfig.java b/server/src/main/java/org/opensearch/index/translog/TranslogConfig.java index 2f00773075d41..f720f041b287c 100644 --- a/server/src/main/java/org/opensearch/index/translog/TranslogConfig.java +++ b/server/src/main/java/org/opensearch/index/translog/TranslogConfig.java @@ -59,6 +59,7 @@ public final class TranslogConfig { private final Path translogPath; private final ByteSizeValue bufferSize; private final String nodeId; + private final boolean seedRemote; /** * Creates a new TranslogConfig instance @@ -66,9 +67,17 @@ public final class TranslogConfig { * @param translogPath the path to use for the transaction log files * @param indexSettings the index settings used to set internal variables * @param bigArrays a bigArrays instance used for temporarily allocating write operations + * @param seedRemote boolean denoting whether remote store needs to be seeded as part of remote migration */ - public TranslogConfig(ShardId shardId, Path translogPath, IndexSettings indexSettings, BigArrays bigArrays, String nodeId) { - this(shardId, translogPath, indexSettings, bigArrays, DEFAULT_BUFFER_SIZE, nodeId); + public TranslogConfig( + ShardId shardId, + Path translogPath, + IndexSettings indexSettings, + BigArrays bigArrays, + String nodeId, + boolean seedRemote + ) { + this(shardId, translogPath, indexSettings, bigArrays, DEFAULT_BUFFER_SIZE, nodeId, seedRemote); } TranslogConfig( @@ -77,7 +86,8 @@ public TranslogConfig(ShardId shardId, Path translogPath, IndexSettings indexSet IndexSettings indexSettings, BigArrays bigArrays, ByteSizeValue bufferSize, - String nodeId + String nodeId, + boolean seedRemote ) { this.bufferSize = bufferSize; this.indexSettings = indexSettings; @@ -85,6 +95,7 @@ public TranslogConfig(ShardId shardId, Path translogPath, IndexSettings indexSet this.translogPath = translogPath; this.bigArrays = bigArrays; this.nodeId = nodeId; + this.seedRemote = seedRemote; } /** @@ -125,4 +136,8 @@ public ByteSizeValue getBufferSize() { public String getNodeId() { return nodeId; } + + public boolean shouldSeedRemote() { + return seedRemote; + } } diff --git a/server/src/main/java/org/opensearch/index/translog/TruncateTranslogAction.java b/server/src/main/java/org/opensearch/index/translog/TruncateTranslogAction.java index 25fcdc614172a..cf6c9087777fe 100644 --- a/server/src/main/java/org/opensearch/index/translog/TruncateTranslogAction.java +++ b/server/src/main/java/org/opensearch/index/translog/TruncateTranslogAction.java @@ -195,7 +195,8 @@ private boolean isTranslogClean(ShardPath shardPath, ClusterState clusterState, translogPath, indexSettings, BigArrays.NON_RECYCLING_INSTANCE, - "" + "", + false ); long primaryTerm = indexSettings.getIndexMetadata().primaryTerm(shardPath.getShardId().id()); // We open translog to check for corruption, do not clean anything. diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java index 164263b62add2..a1b7ce3041cf5 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java @@ -693,4 +693,8 @@ public void onFailure(Exception e) { throw e; } } + + public int getMaxRemoteTranslogReadersSettings() { + return this.remoteStoreSettings.getMaxRemoteTranslogReaders(); + } } diff --git a/server/src/main/java/org/opensearch/indices/AbstractIndexShardCacheEntity.java b/server/src/main/java/org/opensearch/indices/AbstractIndexShardCacheEntity.java index bb1201cb910a9..6b4c53654d871 100644 --- a/server/src/main/java/org/opensearch/indices/AbstractIndexShardCacheEntity.java +++ b/server/src/main/java/org/opensearch/indices/AbstractIndexShardCacheEntity.java @@ -32,6 +32,7 @@ package org.opensearch.indices; +import org.opensearch.common.cache.ICacheKey; import org.opensearch.common.cache.RemovalNotification; import org.opensearch.common.cache.RemovalReason; import org.opensearch.core.common.bytes.BytesReference; @@ -51,8 +52,8 @@ abstract class AbstractIndexShardCacheEntity implements IndicesRequestCache.Cach protected abstract ShardRequestCache stats(); @Override - public final void onCached(IndicesRequestCache.Key key, BytesReference value) { - stats().onCached(key, value); + public final void onCached(ICacheKey key, BytesReference value) { + stats().onCached(getRamBytesUsedInKey(key), value); } @Override @@ -66,7 +67,19 @@ public final void onMiss() { } @Override - public final void onRemoval(RemovalNotification notification) { - stats().onRemoval(notification.getKey(), notification.getValue(), notification.getRemovalReason() == RemovalReason.EVICTED); + public final void onRemoval(RemovalNotification, BytesReference> notification) { + stats().onRemoval( + getRamBytesUsedInKey(notification.getKey()), + notification.getValue(), + notification.getRemovalReason() == RemovalReason.EVICTED + ); + } + + private long getRamBytesUsedInKey(ICacheKey key) { + long innerKeyRamBytesUsed = 0; + if (key.key != null) { + innerKeyRamBytesUsed = key.key.ramBytesUsed(); + } + return key.ramBytesUsed(innerKeyRamBytesUsed); } } diff --git a/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java b/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java index eab772cda3213..44af83bb839c1 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java +++ b/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java @@ -47,11 +47,13 @@ import org.opensearch.common.cache.LoadAwareCacheLoader; import org.opensearch.common.cache.RemovalListener; import org.opensearch.common.cache.RemovalNotification; +import org.opensearch.common.cache.RemovalReason; import org.opensearch.common.cache.policy.CachedQueryResult; import org.opensearch.common.cache.serializer.BytesReferenceSerializer; import org.opensearch.common.cache.service.CacheService; import org.opensearch.common.cache.stats.ImmutableCacheStatsHolder; import org.opensearch.common.cache.store.config.CacheConfig; +import org.opensearch.common.collect.Tuple; import org.opensearch.common.lease.Releasable; import org.opensearch.common.lucene.index.OpenSearchDirectoryReader; import org.opensearch.common.settings.Setting; @@ -111,6 +113,10 @@ public final class IndicesRequestCache implements RemovalListener INDEX_CACHE_REQUEST_ENABLED_SETTING = Setting.boolSetting( "index.requests.cache.enable", true, @@ -127,15 +133,16 @@ public final class IndicesRequestCache implements RemovalListener INDICES_REQUEST_CACHE_CLEAN_INTERVAL_SETTING = Setting.positiveTimeSetting( - "indices.requests.cache.cleanup.interval", + public static final Setting INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING = Setting.positiveTimeSetting( + INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, INDICES_CACHE_CLEAN_INTERVAL_SETTING, Property.NodeScope ); public static final Setting INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING = new Setting<>( - "indices.requests.cache.cleanup.staleness_threshold", + INDICES_REQUEST_CACHE_CLEANUP_STALENESS_THRESHOLD_SETTING_KEY, "0%", IndicesRequestCache::validateStalenessSetting, + Property.Dynamic, Property.NodeScope ); @@ -145,6 +152,7 @@ public final class IndicesRequestCache implements RemovalListener cache; + private final ClusterService clusterService; private final Function> cacheEntityLookup; // pkg-private for testing final IndicesRequestCacheCleanupManager cacheCleanupManager; @@ -166,10 +174,13 @@ public final class IndicesRequestCache implements RemovalListener, BytesReference> weigher = (k, v) -> k.ramBytesUsed(k.key.ramBytesUsed()) + v.ramBytesUsed(); this.cacheCleanupManager = new IndicesRequestCacheCleanupManager( threadPool, - INDICES_REQUEST_CACHE_CLEAN_INTERVAL_SETTING.get(settings), + INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING.get(settings), getStalenessThreshold(settings) ); this.cacheEntityLookup = cacheEntityFunction; + this.clusterService = clusterService; + this.clusterService.getClusterSettings() + .addSettingsUpdateConsumer(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING, this::setStalenessThreshold); this.cache = cacheService.createCache( new CacheConfig.Builder().setSettings(settings) .setWeigher(weigher) @@ -195,6 +206,11 @@ public final class IndicesRequestCache implements RemovalListener, BytesReference> notification) { // In case this event happens for an old shard, we can safely ignore this as we don't keep track for old // shards as part of request cache. - // Pass a new removal notification containing Key rather than ICacheKey to the CacheEntity for backwards compatibility. Key key = notification.getKey().key; - RemovalNotification newNotification = new RemovalNotification<>( - key, - notification.getValue(), - notification.getRemovalReason() - ); - - cacheEntityLookup.apply(key.shardId).ifPresent(entity -> entity.onRemoval(newNotification)); - cacheCleanupManager.updateCleanupKeyToCountMapOnCacheEviction( - new CleanupKey(cacheEntityLookup.apply(key.shardId).orElse(null), key.readerCacheKeyId) - ); + IndicesService.IndexShardCacheEntity indexShardCacheEntity = (IndicesService.IndexShardCacheEntity) cacheEntityLookup.apply( + key.shardId + ).orElse(null); + if (indexShardCacheEntity != null) { + // Here we match the hashcode to avoid scenario where we deduct stats of older IndexShard(with same + // shardId) from current IndexShard. + if (key.indexShardHashCode == System.identityHashCode(indexShardCacheEntity.getCacheIdentity())) { + indexShardCacheEntity.onRemoval(notification); + } + } + CleanupKey cleanupKey = new CleanupKey(indexShardCacheEntity, key.readerCacheKeyId); + cacheCleanupManager.updateStaleCountOnEntryRemoval(cleanupKey, notification); } private ICacheKey getICacheKey(Key key) { @@ -259,7 +281,8 @@ BytesReference getOrCompute( .getReaderCacheHelper(); String readerCacheKeyId = delegatingCacheHelper.getDelegatingCacheKey().getId(); assert readerCacheKeyId != null; - final Key key = new Key(((IndexShard) cacheEntity.getCacheIdentity()).shardId(), cacheKey, readerCacheKeyId); + IndexShard indexShard = ((IndexShard) cacheEntity.getCacheIdentity()); + final Key key = new Key(indexShard.shardId(), cacheKey, readerCacheKeyId, System.identityHashCode(indexShard)); Loader cacheLoader = new Loader(cacheEntity, loader); BytesReference value = cache.computeIfAbsent(getICacheKey(key), cacheLoader); if (cacheLoader.isLoaded()) { @@ -272,7 +295,7 @@ BytesReference getOrCompute( OpenSearchDirectoryReader.addReaderCloseListener(reader, cleanupKey); } } - cacheCleanupManager.updateCleanupKeyToCountMapOnCacheInsertion(cleanupKey); + cacheCleanupManager.updateStaleCountOnCacheInsert(cleanupKey); } else { cacheEntity.onHit(); } @@ -292,7 +315,8 @@ void invalidate(IndicesService.IndexShardCacheEntity cacheEntity, DirectoryReade IndexReader.CacheHelper cacheHelper = ((OpenSearchDirectoryReader) reader).getDelegatingCacheHelper(); readerCacheKeyId = ((OpenSearchDirectoryReader.DelegatingCacheHelper) cacheHelper).getDelegatingCacheKey().getId(); } - cache.invalidate(getICacheKey(new Key(((IndexShard) cacheEntity.getCacheIdentity()).shardId(), cacheKey, readerCacheKeyId))); + IndexShard indexShard = (IndexShard) cacheEntity.getCacheIdentity(); + cache.invalidate(getICacheKey(new Key(indexShard.shardId(), cacheKey, readerCacheKeyId, System.identityHashCode(indexShard)))); } /** @@ -318,7 +342,7 @@ public boolean isLoaded() { @Override public BytesReference load(ICacheKey key) throws Exception { BytesReference value = loader.get(); - entity.onCached(key.key, value); + entity.onCached(key, value); loaded = true; return value; } @@ -332,7 +356,7 @@ interface CacheEntity extends Accountable { /** * Called after the value was loaded. */ - void onCached(Key key, BytesReference value); + void onCached(ICacheKey key, BytesReference value); /** * Returns true iff the resource behind this entity is still open ie. @@ -359,7 +383,7 @@ interface CacheEntity extends Accountable { /** * Called when this entity instance is removed */ - void onRemoval(RemovalNotification notification); + void onRemoval(RemovalNotification, BytesReference> notification); } @@ -370,19 +394,25 @@ interface CacheEntity extends Accountable { */ static class Key implements Accountable, Writeable { public final ShardId shardId; // use as identity equality + public final int indexShardHashCode; // While ShardId is usually sufficient to uniquely identify an + // indexShard but in case where the same indexShard is deleted and reallocated on same node, we need the + // hashcode(default) to identify the older indexShard but with same shardId. public final String readerCacheKeyId; public final BytesReference value; - Key(ShardId shardId, BytesReference value, String readerCacheKeyId) { + Key(ShardId shardId, BytesReference value, String readerCacheKeyId, int indexShardHashCode) { this.shardId = shardId; this.value = value; this.readerCacheKeyId = Objects.requireNonNull(readerCacheKeyId); + this.indexShardHashCode = indexShardHashCode; } Key(StreamInput in) throws IOException { this.shardId = in.readOptionalWriteable(ShardId::new); this.readerCacheKeyId = in.readOptionalString(); this.value = in.readBytesReference(); + this.indexShardHashCode = in.readInt(); // We are serializing/de-serializing this as we need to store the + // key as part of tiered/disk cache. The key is not passed between nodes at this point. } @Override @@ -404,6 +434,7 @@ public boolean equals(Object o) { if (!Objects.equals(readerCacheKeyId, key.readerCacheKeyId)) return false; if (!shardId.equals(key.shardId)) return false; if (!value.equals(key.value)) return false; + if (indexShardHashCode != key.indexShardHashCode) return false; return true; } @@ -412,6 +443,7 @@ public int hashCode() { int result = shardId.hashCode(); result = 31 * result + readerCacheKeyId.hashCode(); result = 31 * result + value.hashCode(); + result = 31 * result + indexShardHashCode; return result; } @@ -420,6 +452,8 @@ public void writeTo(StreamOutput out) throws IOException { out.writeOptionalWriteable(shardId); out.writeOptionalString(readerCacheKeyId); out.writeBytesReference(value); + out.writeInt(indexShardHashCode); // We are serializing/de-serializing this as we need to store the + // key as part of tiered/disk cache. The key is not passed between nodes at this point. } } @@ -474,7 +508,7 @@ class IndicesRequestCacheCleanupManager implements Closeable { private final Set keysToClean; private final ConcurrentMap> cleanupKeyToCountMap; private final AtomicInteger staleKeysCount; - private final double stalenessThreshold; + private volatile double stalenessThreshold; private final IndicesRequestCacheCleaner cacheCleaner; IndicesRequestCacheCleanupManager(ThreadPool threadpool, TimeValue cleanInterval, double stalenessThreshold) { @@ -486,6 +520,18 @@ class IndicesRequestCacheCleanupManager implements Closeable { threadpool.schedule(cacheCleaner, cleanInterval, ThreadPool.Names.SAME); } + void updateStalenessThreshold(double stalenessThreshold) { + double oldStalenessThreshold = this.stalenessThreshold; + this.stalenessThreshold = stalenessThreshold; + if (logger.isDebugEnabled()) { + logger.debug( + "Staleness threshold for indices request cache changed to {} from {}", + this.stalenessThreshold, + oldStalenessThreshold + ); + } + } + /** * Enqueue cleanup key. * @@ -508,8 +554,8 @@ void enqueueCleanupKey(CleanupKey cleanupKey) { * * @param cleanupKey the CleanupKey to be updated in the map */ - private void updateCleanupKeyToCountMapOnCacheInsertion(CleanupKey cleanupKey) { - if (stalenessThreshold == 0.0 || cleanupKey.entity == null) { + private void updateStaleCountOnCacheInsert(CleanupKey cleanupKey) { + if (cleanupKey.entity == null) { return; } IndexShard indexShard = (IndexShard) cleanupKey.entity.getCacheIdentity(); @@ -524,8 +570,32 @@ private void updateCleanupKeyToCountMapOnCacheInsertion(CleanupKey cleanupKey) { cleanupKeyToCountMap.computeIfAbsent(shardId, k -> new HashMap<>()).merge(cleanupKey.readerCacheKeyId, 1, Integer::sum); } - private void updateCleanupKeyToCountMapOnCacheEviction(CleanupKey cleanupKey) { - if (stalenessThreshold == 0.0 || cleanupKey.entity == null) { + /** + * Handles the eviction of a cache entry. + * + *

This method is called when an entry is evicted from the cache. + * We consider all removal notifications except with the reason Replaced + * {@link #incrementStaleKeysCount} would have removed the entries from the map and increment the {@link #staleKeysCount} + * Hence we decrement {@link #staleKeysCount} if we do not find the shardId or readerCacheKeyId in the map. + * Skip decrementing staleKeysCount if we find the shardId or readerCacheKeyId in the map since it would have not been accounted for in the staleKeysCount in + * + * @param cleanupKey the CleanupKey that has been evicted from the cache + * @param notification RemovalNotification of the cache entry evicted + */ + private void updateStaleCountOnEntryRemoval( + CleanupKey cleanupKey, + RemovalNotification, BytesReference> notification + ) { + if (notification.getRemovalReason() == RemovalReason.REPLACED) { + // The reason of the notification is REPLACED when a cache entry's value is updated, since replacing an entry + // does not affect the staleness count, we skip such notifications. + return; + } + if (cleanupKey.entity == null) { + // entity will only be null when the shard is closed/deleted + // we would have accounted this in staleKeysCount when the closing/deletion of shard would have closed the associated + // readers + staleKeysCount.decrementAndGet(); return; } IndexShard indexShard = (IndexShard) cleanupKey.entity.getCacheIdentity(); @@ -535,15 +605,33 @@ private void updateCleanupKeyToCountMapOnCacheEviction(CleanupKey cleanupKey) { } ShardId shardId = indexShard.shardId(); - cleanupKeyToCountMap.computeIfPresent(shardId, (shard, keyCountMap) -> { - keyCountMap.computeIfPresent(cleanupKey.readerCacheKeyId, (key, currentValue) -> { - // decrement the stale key count + cleanupKeyToCountMap.compute(shardId, (key, readerCacheKeyMap) -> { + if (readerCacheKeyMap == null || !readerCacheKeyMap.containsKey(cleanupKey.readerCacheKeyId)) { + // If ShardId is not present or readerCacheKeyId is not present + // it should have already been accounted for and hence been removed from this map + // so decrement staleKeysCount staleKeysCount.decrementAndGet(); - int newValue = currentValue - 1; - // Remove the key if the new value is zero by returning null; otherwise, update with the new value. - return newValue == 0 ? null : newValue; - }); - return keyCountMap; + // Return the current map + return readerCacheKeyMap; + } else { + // If it is in the map, it is not stale yet. + // Proceed to adjust the count for the readerCacheKeyId in the map + // but do not decrement the staleKeysCount + Integer count = readerCacheKeyMap.get(cleanupKey.readerCacheKeyId); + // this should never be null + assert (count != null && count >= 0); + // Reduce the count by 1 + int newCount = count - 1; + if (newCount > 0) { + // Update the map with the new count + readerCacheKeyMap.put(cleanupKey.readerCacheKeyId, newCount); + } else { + // Remove the readerCacheKeyId entry if new count is zero + readerCacheKeyMap.remove(cleanupKey.readerCacheKeyId); + } + // If after modification, the readerCacheKeyMap is empty, we return null to remove the ShardId entry + return readerCacheKeyMap.isEmpty() ? null : readerCacheKeyMap; + } }); } @@ -551,14 +639,14 @@ private void updateCleanupKeyToCountMapOnCacheEviction(CleanupKey cleanupKey) { * Updates the count of stale keys in the cache. * This method is called when a CleanupKey is added to the keysToClean set. * - * It increments the staleKeysCount by the count of the CleanupKey in the cleanupKeyToCountMap. + *

It increments the staleKeysCount by the count of the CleanupKey in the cleanupKeyToCountMap. * If the CleanupKey's readerCacheKeyId is null or the CleanupKey's entity is not open, it increments the staleKeysCount * by the total count of keys associated with the CleanupKey's ShardId in the cleanupKeyToCountMap and removes the ShardId from the map. * * @param cleanupKey the CleanupKey that has been marked for cleanup */ private void incrementStaleKeysCount(CleanupKey cleanupKey) { - if (stalenessThreshold == 0.0 || cleanupKey.entity == null) { + if (cleanupKey.entity == null) { return; } IndexShard indexShard = (IndexShard) cleanupKey.entity.getCacheIdentity(); @@ -569,7 +657,7 @@ private void incrementStaleKeysCount(CleanupKey cleanupKey) { ShardId shardId = indexShard.shardId(); // Using computeIfPresent to atomically operate on the countMap for a given shardId - cleanupKeyToCountMap.computeIfPresent(shardId, (key, countMap) -> { + cleanupKeyToCountMap.computeIfPresent(shardId, (currentShardId, countMap) -> { if (cleanupKey.readerCacheKeyId == null) { // Aggregate and add to staleKeysCount atomically if readerCacheKeyId is null int totalSum = countMap.values().stream().mapToInt(Integer::intValue).sum(); @@ -578,18 +666,19 @@ private void incrementStaleKeysCount(CleanupKey cleanupKey) { return null; } else { // Update staleKeysCount based on specific readerCacheKeyId, then remove it from the countMap - countMap.computeIfPresent(cleanupKey.readerCacheKeyId, (k, v) -> { - staleKeysCount.addAndGet(v); + countMap.computeIfPresent(cleanupKey.readerCacheKeyId, (readerCacheKey, count) -> { + staleKeysCount.addAndGet(count); // Return null to remove the key after updating staleKeysCount return null; }); - // Check if countMap is empty after removal to decide if we need to remove the shardId entry if (countMap.isEmpty()) { - return null; // Returning null removes the entry for shardId + // Returning null removes the entry for shardId + return null; } } - return countMap; // Return the modified countMap to keep the mapping + // Return the modified countMap to retain updates + return countMap; }); } @@ -627,15 +716,16 @@ private synchronized void cleanCache(double stalenessThreshold) { // Contains CleanupKey objects with open shard but invalidated readerCacheKeyId. final Set cleanupKeysFromOutdatedReaders = new HashSet<>(); // Contains CleanupKey objects of a closed shard. - final Set cleanupKeysFromClosedShards = new HashSet<>(); + final Set> cleanupKeysFromClosedShards = new HashSet<>(); for (Iterator iterator = keysToClean.iterator(); iterator.hasNext();) { CleanupKey cleanupKey = iterator.next(); iterator.remove(); if (cleanupKey.readerCacheKeyId == null || !cleanupKey.entity.isOpen()) { // null indicates full cleanup, as does a closed shard - ShardId shardId = ((IndexShard) cleanupKey.entity.getCacheIdentity()).shardId(); - cleanupKeysFromClosedShards.add(shardId); + IndexShard indexShard = (IndexShard) cleanupKey.entity.getCacheIdentity(); + // Add both shardId and indexShardHashCode to uniquely identify an indexShard. + cleanupKeysFromClosedShards.add(new Tuple<>(indexShard.shardId(), indexShard.hashCode())); } else { cleanupKeysFromOutdatedReaders.add(cleanupKey); } @@ -649,14 +739,22 @@ private synchronized void cleanCache(double stalenessThreshold) { for (Iterator> iterator = cache.keys().iterator(); iterator.hasNext();) { ICacheKey key = iterator.next(); - if (cleanupKeysFromClosedShards.contains(key.key.shardId)) { + Key delegatingKey = key.key; + if (cleanupKeysFromClosedShards.contains(new Tuple<>(delegatingKey.shardId, delegatingKey.indexShardHashCode))) { // Since the shard is closed, the cache should drop stats for this shard. dimensionListsToDrop.add(key.dimensions); iterator.remove(); } else { - CleanupKey cleanupKey = new CleanupKey(cacheEntityLookup.apply(key.key.shardId).orElse(null), key.key.readerCacheKeyId); - if (cleanupKeysFromOutdatedReaders.contains(cleanupKey)) { + CacheEntity cacheEntity = cacheEntityLookup.apply(delegatingKey.shardId).orElse(null); + if (cacheEntity == null) { + // If cache entity is null, it means that index or shard got deleted/closed meanwhile. + // So we will delete this key. iterator.remove(); + } else { + CleanupKey cleanupKey = new CleanupKey(cacheEntity, delegatingKey.readerCacheKeyId); + if (cleanupKeysFromOutdatedReaders.contains(cleanupKey)) { + iterator.remove(); + } } } } @@ -715,6 +813,11 @@ public void close() { this.cacheCleaner.close(); } + // for testing + ConcurrentMap> getCleanupKeyToCountMap() { + return cleanupKeyToCountMap; + } + private final class IndicesRequestCacheCleaner implements Runnable, Releasable { private final IndicesRequestCacheCleanupManager cacheCleanupManager; @@ -756,18 +859,11 @@ long count() { return cache.count(); } - /** - * Returns the current size in bytes of the cache - */ - long getSizeInBytes() { - return cache.stats().getTotalSizeInBytes(); - } - /** * Returns the current cache stats. Pkg-private for testing. */ - ImmutableCacheStatsHolder stats() { - return cache.stats(); + ImmutableCacheStatsHolder stats(String[] levels) { + return cache.stats(levels); } int numRegisteredCloseListeners() { // for testing diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index 0187a9fb3b8ba..251be8a990055 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -216,10 +216,11 @@ public class IndicesService extends AbstractLifecycleComponent IndicesClusterStateService.AllocatedIndices, IndexService.ShardStoreDeleter { private static final Logger logger = LogManager.getLogger(IndicesService.class); + public static final String INDICES_CACHE_CLEANUP_INTERVAL_SETTING_KEY = "indices.cache.cleanup_interval"; public static final String INDICES_SHARDS_CLOSED_TIMEOUT = "indices.shards_closed_timeout"; public static final Setting INDICES_CACHE_CLEAN_INTERVAL_SETTING = Setting.positiveTimeSetting( - "indices.cache.cleanup_interval", + INDICES_CACHE_CLEANUP_INTERVAL_SETTING_KEY, TimeValue.timeValueMinutes(1), Property.NodeScope ); @@ -273,8 +274,8 @@ public class IndicesService extends AbstractLifecycleComponent */ public static final Setting CLUSTER_MINIMUM_INDEX_REFRESH_INTERVAL_SETTING = Setting.timeSetting( "cluster.minimum.index.refresh_interval", - IndexSettings.MINIMUM_REFRESH_INTERVAL, - IndexSettings.MINIMUM_REFRESH_INTERVAL, + TimeValue.ZERO, + TimeValue.ZERO, new ClusterMinimumRefreshIntervalValidator(), Property.NodeScope, Property.Dynamic @@ -2009,6 +2010,9 @@ public Iterator> settings() { * @param defaultRefreshInterval value of cluster default index refresh interval setting */ private static void validateRefreshIntervalSettings(TimeValue minimumRefreshInterval, TimeValue defaultRefreshInterval) { + if (defaultRefreshInterval.millis() < 0) { + return; + } if (minimumRefreshInterval.compareTo(defaultRefreshInterval) > 0) { throw new IllegalArgumentException( "cluster minimum index refresh interval [" diff --git a/server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java b/server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java index e0a9f7a9e05c1..0bd4c7aedfc03 100644 --- a/server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java +++ b/server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java @@ -94,11 +94,23 @@ public class RemoteStoreSettings { Property.Dynamic ); + /** + * Controls the maximum referenced remote translog files. If breached the shard will be flushed. + */ + public static final Setting CLUSTER_REMOTE_MAX_TRANSLOG_READERS = Setting.intSetting( + "cluster.remote_store.translog.max_readers", + 1000, + 100, + Property.Dynamic, + Property.NodeScope + ); + private volatile TimeValue clusterRemoteTranslogBufferInterval; private volatile int minRemoteSegmentMetadataFiles; private volatile TimeValue clusterRemoteTranslogTransferTimeout; private volatile RemoteStoreEnums.PathType pathType; private volatile RemoteStoreEnums.PathHashAlgorithm pathHashAlgorithm; + private volatile int maxRemoteTranslogReaders; public RemoteStoreSettings(Settings settings, ClusterSettings clusterSettings) { clusterRemoteTranslogBufferInterval = CLUSTER_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING.get(settings); @@ -124,6 +136,9 @@ public RemoteStoreSettings(Settings settings, ClusterSettings clusterSettings) { pathHashAlgorithm = clusterSettings.get(CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING); clusterSettings.addSettingsUpdateConsumer(CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING, this::setPathHashAlgorithm); + + maxRemoteTranslogReaders = CLUSTER_REMOTE_MAX_TRANSLOG_READERS.get(settings); + clusterSettings.addSettingsUpdateConsumer(CLUSTER_REMOTE_MAX_TRANSLOG_READERS, this::setMaxRemoteTranslogReaders); } public TimeValue getClusterRemoteTranslogBufferInterval() { @@ -167,4 +182,12 @@ private void setPathType(RemoteStoreEnums.PathType pathType) { private void setPathHashAlgorithm(RemoteStoreEnums.PathHashAlgorithm pathHashAlgorithm) { this.pathHashAlgorithm = pathHashAlgorithm; } + + public int getMaxRemoteTranslogReaders() { + return maxRemoteTranslogReaders; + } + + private void setMaxRemoteTranslogReaders(int maxRemoteTranslogReaders) { + this.maxRemoteTranslogReaders = maxRemoteTranslogReaders; + } } diff --git a/server/src/main/java/org/opensearch/indices/store/TransportNodesListShardStoreMetadataBatch.java b/server/src/main/java/org/opensearch/indices/store/TransportNodesListShardStoreMetadataBatch.java index 85d5bff4677ef..22b03539cca74 100644 --- a/server/src/main/java/org/opensearch/indices/store/TransportNodesListShardStoreMetadataBatch.java +++ b/server/src/main/java/org/opensearch/indices/store/TransportNodesListShardStoreMetadataBatch.java @@ -276,7 +276,7 @@ public void writeTo(StreamOutput out) throws IOException { } } - boolean isEmpty(NodeStoreFilesMetadata response) { + public static boolean isEmpty(NodeStoreFilesMetadata response) { return response.storeFilesMetadata() == null || response.storeFilesMetadata().isEmpty() && response.getStoreFileFetchException() == null; } @@ -329,7 +329,13 @@ public static class NodeStoreFilesMetadataBatch extends BaseNodeResponse { protected NodeStoreFilesMetadataBatch(StreamInput in) throws IOException { super(in); - this.nodeStoreFilesMetadataBatch = in.readMap(ShardId::new, NodeStoreFilesMetadata::new); + this.nodeStoreFilesMetadataBatch = in.readMap(ShardId::new, i -> { + if (i.readBoolean()) { + return new NodeStoreFilesMetadata(i); + } else { + return null; + } + }); } public NodeStoreFilesMetadataBatch(DiscoveryNode node, Map nodeStoreFilesMetadataBatch) { @@ -344,7 +350,14 @@ public Map getNodeStoreFilesMetadataBatch() { @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); - out.writeMap(nodeStoreFilesMetadataBatch, (o, k) -> k.writeTo(o), (o, v) -> v.writeTo(o)); + out.writeMap(nodeStoreFilesMetadataBatch, (o, k) -> k.writeTo(o), (o, v) -> { + if (v != null) { + o.writeBoolean(true); + v.writeTo(o); + } else { + o.writeBoolean(false); + } + }); } } diff --git a/server/src/main/java/org/opensearch/ingest/CompoundProcessor.java b/server/src/main/java/org/opensearch/ingest/CompoundProcessor.java index a5f4870029e87..64d71691bf818 100644 --- a/server/src/main/java/org/opensearch/ingest/CompoundProcessor.java +++ b/server/src/main/java/org/opensearch/ingest/CompoundProcessor.java @@ -39,10 +39,13 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiConsumer; +import java.util.function.Consumer; import java.util.function.LongSupplier; import java.util.stream.Collectors; @@ -150,6 +153,108 @@ public void execute(IngestDocument ingestDocument, BiConsumer ingestDocumentWrappers, Consumer> handler) { + innerBatchExecute(0, ingestDocumentWrappers, handler); + } + + /** + * Internal logic to process documents with current processor. + * + * @param currentProcessor index of processor to process batched documents + * @param ingestDocumentWrappers batched documents to be processed + * @param handler callback function + */ + void innerBatchExecute( + int currentProcessor, + List ingestDocumentWrappers, + Consumer> handler + ) { + if (currentProcessor == processorsWithMetrics.size()) { + handler.accept(ingestDocumentWrappers); + return; + } + Tuple processorWithMetric = processorsWithMetrics.get(currentProcessor); + final Processor processor = processorWithMetric.v1(); + final OperationMetrics metric = processorWithMetric.v2(); + final long startTimeInNanos = relativeTimeProvider.getAsLong(); + int size = ingestDocumentWrappers.size(); + metric.beforeN(size); + // Use synchronization to ensure batches are processed by processors in sequential order + AtomicInteger counter = new AtomicInteger(size); + List allResults = Collections.synchronizedList(new ArrayList<>()); + Map slotToWrapperMap = createSlotIngestDocumentWrapperMap(ingestDocumentWrappers); + processor.batchExecute(ingestDocumentWrappers, results -> { + if (results.isEmpty()) return; + allResults.addAll(results); + // counter equals to 0 means all documents are processed and called back. + if (counter.addAndGet(-results.size()) == 0) { + long ingestTimeInMillis = TimeUnit.NANOSECONDS.toMillis(relativeTimeProvider.getAsLong() - startTimeInNanos); + metric.afterN(allResults.size(), ingestTimeInMillis); + + List documentsDropped = new ArrayList<>(); + List documentsWithException = new ArrayList<>(); + List documentsToContinue = new ArrayList<>(); + int totalFailed = 0; + // iterate all results to categorize them to: to continue, to drop, with exception + for (IngestDocumentWrapper resultDocumentWrapper : allResults) { + IngestDocumentWrapper originalDocumentWrapper = slotToWrapperMap.get(resultDocumentWrapper.getSlot()); + if (resultDocumentWrapper.getException() != null) { + ++totalFailed; + if (ignoreFailure) { + documentsToContinue.add(originalDocumentWrapper); + } else { + IngestProcessorException compoundProcessorException = newCompoundProcessorException( + resultDocumentWrapper.getException(), + processor, + originalDocumentWrapper.getIngestDocument() + ); + documentsWithException.add( + new IngestDocumentWrapper( + resultDocumentWrapper.getSlot(), + originalDocumentWrapper.getIngestDocument(), + compoundProcessorException + ) + ); + } + } else { + if (resultDocumentWrapper.getIngestDocument() == null) { + documentsDropped.add(resultDocumentWrapper); + } else { + documentsToContinue.add(resultDocumentWrapper); + } + } + } + if (totalFailed > 0) { + metric.failedN(totalFailed); + } + if (!documentsDropped.isEmpty()) { + handler.accept(documentsDropped); + } + if (!documentsToContinue.isEmpty()) { + innerBatchExecute(currentProcessor + 1, documentsToContinue, handler); + } + if (!documentsWithException.isEmpty()) { + if (onFailureProcessors.isEmpty()) { + handler.accept(documentsWithException); + } else { + documentsWithException.forEach( + doc -> executeOnFailureAsync( + 0, + doc.getIngestDocument(), + (IngestProcessorException) doc.getException(), + (result, ex) -> { + handler.accept(Collections.singletonList(new IngestDocumentWrapper(doc.getSlot(), result, ex))); + } + ) + ); + } + } + } + assert counter.get() >= 0; + }); + } + void innerExecute(int currentProcessor, IngestDocument ingestDocument, BiConsumer handler) { if (currentProcessor == processorsWithMetrics.size()) { handler.accept(ingestDocument, null); @@ -266,4 +371,12 @@ static IngestProcessorException newCompoundProcessorException(Exception e, Proce return exception; } + private Map createSlotIngestDocumentWrapperMap(List ingestDocumentWrappers) { + Map slotIngestDocumentWrapperMap = new HashMap<>(); + for (IngestDocumentWrapper ingestDocumentWrapper : ingestDocumentWrappers) { + slotIngestDocumentWrapperMap.put(ingestDocumentWrapper.getSlot(), ingestDocumentWrapper); + } + return slotIngestDocumentWrapperMap; + } + } diff --git a/server/src/main/java/org/opensearch/ingest/IngestDocumentWrapper.java b/server/src/main/java/org/opensearch/ingest/IngestDocumentWrapper.java new file mode 100644 index 0000000000000..6fb9f245f4996 --- /dev/null +++ b/server/src/main/java/org/opensearch/ingest/IngestDocumentWrapper.java @@ -0,0 +1,42 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ingest; + +/** + * A IngestDocument wrapper including the slot of the IngestDocument in original IndexRequests. + * It also stores the exception happened during ingest process of the document. + */ +public final class IngestDocumentWrapper { + private final int slot; + private IngestDocument ingestDocument; + private Exception exception; + + public IngestDocumentWrapper(int slot, IngestDocument ingestDocument, Exception ex) { + this.slot = slot; + this.ingestDocument = ingestDocument; + this.exception = ex; + } + + public int getSlot() { + return this.slot; + } + + public IngestDocument getIngestDocument() { + return this.ingestDocument; + } + + public Exception getException() { + return this.exception; + } + + public void update(IngestDocument result, Exception ex) { + this.ingestDocument = result; + this.exception = ex; + } +} diff --git a/server/src/main/java/org/opensearch/ingest/IngestService.java b/server/src/main/java/org/opensearch/ingest/IngestService.java index 2d4439e86461b..ab8e823199447 100644 --- a/server/src/main/java/org/opensearch/ingest/IngestService.java +++ b/server/src/main/java/org/opensearch/ingest/IngestService.java @@ -39,6 +39,7 @@ import org.opensearch.OpenSearchParseException; import org.opensearch.ResourceNotFoundException; import org.opensearch.action.DocWriteRequest; +import org.opensearch.action.bulk.BulkRequest; import org.opensearch.action.bulk.TransportBulkAction; import org.opensearch.action.index.IndexRequest; import org.opensearch.action.ingest.DeletePipelineRequest; @@ -93,6 +94,7 @@ import java.util.function.BiConsumer; import java.util.function.Consumer; import java.util.function.IntConsumer; +import java.util.stream.Collectors; /** * Holder class for several ingest related services. @@ -511,9 +513,9 @@ public void executeBulkRequest( BiConsumer onFailure, BiConsumer onCompletion, IntConsumer onDropped, - String executorName + String executorName, + BulkRequest originalBulkRequest ) { - threadPool.executor(executorName).execute(new AbstractRunnable() { @Override @@ -523,6 +525,12 @@ public void onFailure(Exception e) { @Override protected void doRun() { + int batchSize = originalBulkRequest.batchSize(); + if (shouldExecuteBulkRequestInBatch(originalBulkRequest.requests().size(), batchSize)) { + runBulkRequestInBatch(numberOfActionRequests, actionRequests, onFailure, onCompletion, onDropped, originalBulkRequest); + return; + } + final Thread originalThread = Thread.currentThread(); final AtomicInteger counter = new AtomicInteger(numberOfActionRequests); int i = 0; @@ -536,7 +544,6 @@ protected void doRun() { i++; continue; } - final String pipelineId = indexRequest.getPipeline(); indexRequest.setPipeline(NOOP_PIPELINE_NAME); final String finalPipelineId = indexRequest.getFinalPipeline(); @@ -571,13 +578,286 @@ protected void doRun() { onCompletion, originalThread ); - i++; } } }); } + private void runBulkRequestInBatch( + int numberOfActionRequests, + Iterable> actionRequests, + BiConsumer onFailure, + BiConsumer onCompletion, + IntConsumer onDropped, + BulkRequest originalBulkRequest + ) { + + final Thread originalThread = Thread.currentThread(); + final AtomicInteger counter = new AtomicInteger(numberOfActionRequests); + int i = 0; + List indexRequestWrappers = new ArrayList<>(); + for (DocWriteRequest actionRequest : actionRequests) { + IndexRequest indexRequest = TransportBulkAction.getIndexWriteRequest(actionRequest); + if (indexRequest == null) { + if (counter.decrementAndGet() == 0) { + onCompletion.accept(originalThread, null); + } + assert counter.get() >= 0; + i++; + continue; + } + + final String pipelineId = indexRequest.getPipeline(); + indexRequest.setPipeline(NOOP_PIPELINE_NAME); + final String finalPipelineId = indexRequest.getFinalPipeline(); + indexRequest.setFinalPipeline(NOOP_PIPELINE_NAME); + boolean hasFinalPipeline = true; + final List pipelines; + if (IngestService.NOOP_PIPELINE_NAME.equals(pipelineId) == false + && IngestService.NOOP_PIPELINE_NAME.equals(finalPipelineId) == false) { + pipelines = Arrays.asList(pipelineId, finalPipelineId); + } else if (IngestService.NOOP_PIPELINE_NAME.equals(pipelineId) == false) { + pipelines = Collections.singletonList(pipelineId); + hasFinalPipeline = false; + } else if (IngestService.NOOP_PIPELINE_NAME.equals(finalPipelineId) == false) { + pipelines = Collections.singletonList(finalPipelineId); + } else { + if (counter.decrementAndGet() == 0) { + onCompletion.accept(originalThread, null); + } + assert counter.get() >= 0; + i++; + continue; + } + + indexRequestWrappers.add(new IndexRequestWrapper(i, indexRequest, pipelines, hasFinalPipeline)); + i++; + } + + int batchSize = originalBulkRequest.batchSize(); + List> batches = prepareBatches(batchSize, indexRequestWrappers); + logger.debug("batchSize: {}, batches: {}", batchSize, batches.size()); + + for (List batch : batches) { + executePipelinesInBatchRequests( + batch.stream().map(IndexRequestWrapper::getSlot).collect(Collectors.toList()), + batch.get(0).getPipelines().iterator(), + batch.get(0).isHasFinalPipeline(), + batch.stream().map(IndexRequestWrapper::getIndexRequest).collect(Collectors.toList()), + onDropped, + onFailure, + counter, + onCompletion, + originalThread + ); + } + } + + private boolean shouldExecuteBulkRequestInBatch(int documentSize, int batchSize) { + return documentSize > 1 && batchSize > 1; + } + + /** + * IndexRequests are grouped by unique (index + pipeline_ids) before batching. + * Only IndexRequests in the same group could be batched. It's to ensure batched documents always + * flow through the same pipeline together. + * + * An IndexRequest could be preprocessed by at most two pipelines: default_pipeline and final_pipeline. + * A final_pipeline is configured on index level. The default_pipeline for a IndexRequest in a _bulk API + * could come from three places: + * 1. bound with index + * 2. a request parameter of _bulk API + * 3. a parameter of an IndexRequest. + */ + static List> prepareBatches(int batchSize, List indexRequestWrappers) { + final Map> indexRequestsPerIndexAndPipelines = new HashMap<>(); + for (IndexRequestWrapper indexRequestWrapper : indexRequestWrappers) { + // IndexRequests are grouped by their index + pipeline ids + List indexAndPipelineIds = new ArrayList<>(); + String index = indexRequestWrapper.getIndexRequest().index(); + List pipelines = indexRequestWrapper.getPipelines(); + indexAndPipelineIds.add(index); + indexAndPipelineIds.addAll(pipelines); + int hashCode = indexAndPipelineIds.hashCode(); + indexRequestsPerIndexAndPipelines.putIfAbsent(hashCode, new ArrayList<>()); + indexRequestsPerIndexAndPipelines.get(hashCode).add(indexRequestWrapper); + } + List> batchedIndexRequests = new ArrayList<>(); + for (Map.Entry> indexRequestsPerKey : indexRequestsPerIndexAndPipelines.entrySet()) { + for (int i = 0; i < indexRequestsPerKey.getValue().size(); i += batchSize) { + batchedIndexRequests.add( + new ArrayList<>( + indexRequestsPerKey.getValue().subList(i, i + Math.min(batchSize, indexRequestsPerKey.getValue().size() - i)) + ) + ); + } + } + return batchedIndexRequests; + } + + /* visible for testing */ + static final class IndexRequestWrapper { + private final int slot; + private final IndexRequest indexRequest; + private final List pipelines; + private final boolean hasFinalPipeline; + + IndexRequestWrapper(int slot, IndexRequest indexRequest, List pipelines, boolean hasFinalPipeline) { + this.slot = slot; + this.indexRequest = indexRequest; + this.pipelines = pipelines; + this.hasFinalPipeline = hasFinalPipeline; + } + + public int getSlot() { + return slot; + } + + public IndexRequest getIndexRequest() { + return indexRequest; + } + + public List getPipelines() { + return pipelines; + } + + public boolean isHasFinalPipeline() { + return hasFinalPipeline; + } + } + + private void executePipelinesInBatchRequests( + final List slots, + final Iterator pipelineIterator, + final boolean hasFinalPipeline, + final List indexRequests, + final IntConsumer onDropped, + final BiConsumer onFailure, + final AtomicInteger counter, + final BiConsumer onCompletion, + final Thread originalThread + ) { + if (indexRequests.size() == 1) { + executePipelines( + slots.get(0), + pipelineIterator, + hasFinalPipeline, + indexRequests.get(0), + onDropped, + onFailure, + counter, + onCompletion, + originalThread + ); + return; + } + while (pipelineIterator.hasNext()) { + final String pipelineId = pipelineIterator.next(); + try { + PipelineHolder holder = pipelines.get(pipelineId); + if (holder == null) { + throw new IllegalArgumentException("pipeline with id [" + pipelineId + "] does not exist"); + } + Pipeline pipeline = holder.pipeline; + String originalIndex = indexRequests.get(0).indices()[0]; + Map slotIndexRequestMap = createSlotIndexRequestMap(slots, indexRequests); + innerBatchExecute(slots, indexRequests, pipeline, onDropped, results -> { + for (int i = 0; i < results.size(); ++i) { + if (results.get(i).getException() != null) { + IndexRequest indexRequest = slotIndexRequestMap.get(results.get(i).getSlot()); + logger.debug( + () -> new ParameterizedMessage( + "failed to execute pipeline [{}] for document [{}/{}]", + pipelineId, + indexRequest.index(), + indexRequest.id() + ), + results.get(i).getException() + ); + onFailure.accept(slots.get(i), results.get(i).getException()); + } + } + + Iterator newPipelineIterator = pipelineIterator; + boolean newHasFinalPipeline = hasFinalPipeline; + // indexRequests are grouped for the same index and same pipelines + String newIndex = indexRequests.get(0).indices()[0]; + + // handle index change case + if (Objects.equals(originalIndex, newIndex) == false) { + if (hasFinalPipeline && pipelineIterator.hasNext() == false) { + totalMetrics.failed(); + for (int slot : slots) { + onFailure.accept( + slot, + new IllegalStateException("final pipeline [" + pipelineId + "] can't change the target index") + ); + } + } else { + // Drain old it so it's not looped over + pipelineIterator.forEachRemaining($ -> {}); + for (IndexRequest indexRequest : indexRequests) { + indexRequest.isPipelineResolved(false); + resolvePipelines(null, indexRequest, state.metadata()); + if (IngestService.NOOP_PIPELINE_NAME.equals(indexRequest.getFinalPipeline()) == false) { + newPipelineIterator = Collections.singleton(indexRequest.getFinalPipeline()).iterator(); + newHasFinalPipeline = true; + } else { + newPipelineIterator = Collections.emptyIterator(); + } + } + } + } + + if (newPipelineIterator.hasNext()) { + executePipelinesInBatchRequests( + slots, + newPipelineIterator, + newHasFinalPipeline, + indexRequests, + onDropped, + onFailure, + counter, + onCompletion, + originalThread + ); + } else { + if (counter.addAndGet(-results.size()) == 0) { + onCompletion.accept(originalThread, null); + } + assert counter.get() >= 0; + } + }); + } catch (Exception e) { + StringBuilder documentLogBuilder = new StringBuilder(); + for (int i = 0; i < indexRequests.size(); ++i) { + IndexRequest indexRequest = indexRequests.get(i); + documentLogBuilder.append(indexRequest.index()); + documentLogBuilder.append("/"); + documentLogBuilder.append(indexRequest.id()); + if (i < indexRequests.size() - 1) { + documentLogBuilder.append(", "); + } + onFailure.accept(slots.get(i), e); + } + logger.debug( + () -> new ParameterizedMessage( + "failed to execute pipeline [{}] for documents [{}]", + pipelineId, + documentLogBuilder.toString() + ), + e + ); + if (counter.addAndGet(-indexRequests.size()) == 0) { + onCompletion.accept(originalThread, null); + } + assert counter.get() >= 0; + break; + } + } + } + private void executePipelines( final int slot, final Iterator it, @@ -761,28 +1041,73 @@ private void innerExecute( itemDroppedHandler.accept(slot); handler.accept(null); } else { - Map metadataMap = ingestDocument.extractMetadata(); - // it's fine to set all metadata fields all the time, as ingest document holds their starting values - // before ingestion, which might also get modified during ingestion. - indexRequest.index((String) metadataMap.get(IngestDocument.Metadata.INDEX)); - indexRequest.id((String) metadataMap.get(IngestDocument.Metadata.ID)); - indexRequest.routing((String) metadataMap.get(IngestDocument.Metadata.ROUTING)); - indexRequest.version(((Number) metadataMap.get(IngestDocument.Metadata.VERSION)).longValue()); - if (metadataMap.get(IngestDocument.Metadata.VERSION_TYPE) != null) { - indexRequest.versionType(VersionType.fromString((String) metadataMap.get(IngestDocument.Metadata.VERSION_TYPE))); - } - if (metadataMap.get(IngestDocument.Metadata.IF_SEQ_NO) != null) { - indexRequest.setIfSeqNo(((Number) metadataMap.get(IngestDocument.Metadata.IF_SEQ_NO)).longValue()); - } - if (metadataMap.get(IngestDocument.Metadata.IF_PRIMARY_TERM) != null) { - indexRequest.setIfPrimaryTerm(((Number) metadataMap.get(IngestDocument.Metadata.IF_PRIMARY_TERM)).longValue()); - } - indexRequest.source(ingestDocument.getSourceAndMetadata(), indexRequest.getContentType()); + updateIndexRequestWithIngestDocument(indexRequest, ingestDocument); handler.accept(null); } }); } + private void innerBatchExecute( + List slots, + List indexRequests, + Pipeline pipeline, + IntConsumer itemDroppedHandler, + Consumer> handler + ) { + if (pipeline.getProcessors().isEmpty()) { + handler.accept(null); + return; + } + + int size = indexRequests.size(); + long startTimeInNanos = System.nanoTime(); + // the pipeline specific stat holder may not exist and that is fine: + // (e.g. the pipeline may have been removed while we're ingesting a document + totalMetrics.beforeN(size); + List ingestDocumentWrappers = new ArrayList<>(); + Map slotToindexRequestMap = new HashMap<>(); + for (int i = 0; i < slots.size(); ++i) { + slotToindexRequestMap.put(slots.get(i), indexRequests.get(i)); + ingestDocumentWrappers.add(toIngestDocumentWrapper(slots.get(i), indexRequests.get(i))); + } + AtomicInteger counter = new AtomicInteger(size); + List allResults = Collections.synchronizedList(new ArrayList<>()); + pipeline.batchExecute(ingestDocumentWrappers, results -> { + if (results.isEmpty()) return; + allResults.addAll(results); + if (counter.addAndGet(-results.size()) == 0) { + long ingestTimeInMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeInNanos); + totalMetrics.afterN(size, ingestTimeInMillis); + List succeeded = new ArrayList<>(); + List dropped = new ArrayList<>(); + List exceptions = new ArrayList<>(); + for (IngestDocumentWrapper result : allResults) { + if (result.getException() != null) { + exceptions.add(result); + } else if (result.getIngestDocument() == null) { + dropped.add(result); + } else { + succeeded.add(result); + } + } + if (!exceptions.isEmpty()) { + totalMetrics.failedN(exceptions.size()); + } else if (!dropped.isEmpty()) { + dropped.forEach(t -> itemDroppedHandler.accept(t.getSlot())); + } else { + for (IngestDocumentWrapper ingestDocumentWrapper : succeeded) { + updateIndexRequestWithIngestDocument( + slotToindexRequestMap.get(ingestDocumentWrapper.getSlot()), + ingestDocumentWrapper.getIngestDocument() + ); + } + } + handler.accept(allResults); + } + assert counter.get() >= 0; + }); + } + @Override public void applyClusterState(final ClusterChangedEvent event) { state = event.state(); @@ -969,4 +1294,46 @@ static class PipelineHolder { } } + public static void updateIndexRequestWithIngestDocument(IndexRequest indexRequest, IngestDocument ingestDocument) { + Map metadataMap = ingestDocument.extractMetadata(); + // it's fine to set all metadata fields all the time, as ingest document holds their starting values + // before ingestion, which might also get modified during ingestion. + indexRequest.index((String) metadataMap.get(IngestDocument.Metadata.INDEX)); + indexRequest.id((String) metadataMap.get(IngestDocument.Metadata.ID)); + indexRequest.routing((String) metadataMap.get(IngestDocument.Metadata.ROUTING)); + indexRequest.version(((Number) metadataMap.get(IngestDocument.Metadata.VERSION)).longValue()); + if (metadataMap.get(IngestDocument.Metadata.VERSION_TYPE) != null) { + indexRequest.versionType(VersionType.fromString((String) metadataMap.get(IngestDocument.Metadata.VERSION_TYPE))); + } + if (metadataMap.get(IngestDocument.Metadata.IF_SEQ_NO) != null) { + indexRequest.setIfSeqNo(((Number) metadataMap.get(IngestDocument.Metadata.IF_SEQ_NO)).longValue()); + } + if (metadataMap.get(IngestDocument.Metadata.IF_PRIMARY_TERM) != null) { + indexRequest.setIfPrimaryTerm(((Number) metadataMap.get(IngestDocument.Metadata.IF_PRIMARY_TERM)).longValue()); + } + indexRequest.source(ingestDocument.getSourceAndMetadata(), indexRequest.getContentType()); + } + + static IngestDocument toIngestDocument(IndexRequest indexRequest) { + return new IngestDocument( + indexRequest.index(), + indexRequest.id(), + indexRequest.routing(), + indexRequest.version(), + indexRequest.versionType(), + indexRequest.sourceAsMap() + ); + } + + private static IngestDocumentWrapper toIngestDocumentWrapper(int slot, IndexRequest indexRequest) { + return new IngestDocumentWrapper(slot, toIngestDocument(indexRequest), null); + } + + private static Map createSlotIndexRequestMap(List slots, List indexRequests) { + Map slotIndexRequestMap = new HashMap<>(); + for (int i = 0; i < slots.size(); ++i) { + slotIndexRequestMap.put(slots.get(i), indexRequests.get(i)); + } + return slotIndexRequestMap; + } } diff --git a/server/src/main/java/org/opensearch/ingest/Pipeline.java b/server/src/main/java/org/opensearch/ingest/Pipeline.java index 2541cfbf4af77..708416cfca3b7 100644 --- a/server/src/main/java/org/opensearch/ingest/Pipeline.java +++ b/server/src/main/java/org/opensearch/ingest/Pipeline.java @@ -43,6 +43,7 @@ import java.util.Map; import java.util.concurrent.TimeUnit; import java.util.function.BiConsumer; +import java.util.function.Consumer; import java.util.function.LongSupplier; /** @@ -201,4 +202,28 @@ public List flattenAllProcessors() { public OperationMetrics getMetrics() { return metrics; } + + /** + * Modifies the data of batched multiple documents to be indexed based on the processor this pipeline holds + *

+ * If {@code null} is returned then this document will be dropped and not indexed, otherwise + * this document will be kept and indexed. Document and the exception happened during processing are kept in + * IngestDocumentWrapper and callback to upper level. + * + * @param ingestDocumentWrappers a list of wrapped IngestDocument to ingest. + * @param handler callback with IngestDocument result and exception wrapped in IngestDocumentWrapper. + */ + public void batchExecute(List ingestDocumentWrappers, Consumer> handler) { + final long startTimeInNanos = relativeTimeProvider.getAsLong(); + int size = ingestDocumentWrappers.size(); + metrics.beforeN(size); + compoundProcessor.batchExecute(ingestDocumentWrappers, results -> { + long ingestTimeInMillis = TimeUnit.NANOSECONDS.toMillis(relativeTimeProvider.getAsLong() - startTimeInNanos); + metrics.afterN(results.size(), ingestTimeInMillis); + + int failedCount = (int) results.stream().filter(t -> t.getException() != null).count(); + metrics.failedN(failedCount); + handler.accept(results); + }); + } } diff --git a/server/src/main/java/org/opensearch/ingest/Processor.java b/server/src/main/java/org/opensearch/ingest/Processor.java index ecae1c139ea5e..9af1104502047 100644 --- a/server/src/main/java/org/opensearch/ingest/Processor.java +++ b/server/src/main/java/org/opensearch/ingest/Processor.java @@ -33,6 +33,7 @@ package org.opensearch.ingest; import org.opensearch.client.Client; +import org.opensearch.common.util.concurrent.AtomicArray; import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.env.Environment; import org.opensearch.index.analysis.AnalysisRegistry; @@ -40,7 +41,10 @@ import org.opensearch.script.ScriptService; import org.opensearch.threadpool.Scheduler; +import java.util.Collections; +import java.util.List; import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiConsumer; import java.util.function.BiFunction; import java.util.function.Consumer; @@ -81,6 +85,42 @@ default void execute(IngestDocument ingestDocument, BiConsumer ingestDocumentWrappers, Consumer> handler) { + if (ingestDocumentWrappers.isEmpty()) { + handler.accept(Collections.emptyList()); + return; + } + int size = ingestDocumentWrappers.size(); + AtomicInteger counter = new AtomicInteger(size); + AtomicArray results = new AtomicArray<>(size); + for (int i = 0; i < size; ++i) { + innerExecute(i, ingestDocumentWrappers.get(i), results, counter, handler); + } + } + + private void innerExecute( + int slot, + IngestDocumentWrapper ingestDocumentWrapper, + AtomicArray results, + AtomicInteger counter, + Consumer> handler + ) { + execute(ingestDocumentWrapper.getIngestDocument(), (doc, ex) -> { + results.set(slot, new IngestDocumentWrapper(ingestDocumentWrapper.getSlot(), doc, ex)); + if (counter.decrementAndGet() == 0) { + handler.accept(results.asList()); + } + }); + } + /** * Gets the type of a processor */ diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index a33fd71e21896..614f39166ea66 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -136,6 +136,7 @@ import org.opensearch.gateway.GatewayService; import org.opensearch.gateway.MetaStateService; import org.opensearch.gateway.PersistedClusterStateService; +import org.opensearch.gateway.ShardsBatchGatewayAllocator; import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.http.HttpServerTransport; import org.opensearch.identity.IdentityService; @@ -146,6 +147,7 @@ import org.opensearch.index.analysis.AnalysisRegistry; import org.opensearch.index.engine.EngineFactory; import org.opensearch.index.recovery.RemoteStoreRestoreService; +import org.opensearch.index.remote.RemoteIndexPathUploader; import org.opensearch.index.remote.RemoteStoreStatsTrackerFactory; import org.opensearch.index.store.RemoteSegmentStoreDirectoryFactory; import org.opensearch.index.store.remote.filecache.FileCache; @@ -726,17 +728,26 @@ protected Node( threadPool::relativeTimeInMillis ); final RemoteClusterStateService remoteClusterStateService; + final RemoteIndexPathUploader remoteIndexPathUploader; if (isRemoteStoreClusterStateEnabled(settings)) { + remoteIndexPathUploader = new RemoteIndexPathUploader( + threadPool, + settings, + repositoriesServiceReference::get, + clusterService.getClusterSettings() + ); remoteClusterStateService = new RemoteClusterStateService( nodeEnvironment.nodeId(), repositoriesServiceReference::get, settings, clusterService.getClusterSettings(), threadPool::preciseRelativeTimeInNanos, - threadPool + threadPool, + List.of(remoteIndexPathUploader) ); } else { remoteClusterStateService = null; + remoteIndexPathUploader = null; } // collect engine factory providers from plugins @@ -1181,7 +1192,8 @@ protected Node( resourceUsageCollectorService, segmentReplicationStatsTracker, repositoryService, - admissionControlService + admissionControlService, + cacheService ); final SearchService searchService = newSearchService( @@ -1313,6 +1325,7 @@ protected Node( b.bind(SearchRequestSlowLog.class).toInstance(searchRequestSlowLog); b.bind(MetricsRegistry.class).toInstance(metricsRegistry); b.bind(RemoteClusterStateService.class).toProvider(() -> remoteClusterStateService); + b.bind(RemoteIndexPathUploader.class).toProvider(() -> remoteIndexPathUploader); b.bind(PersistedStateRegistry.class).toInstance(persistedStateRegistry); b.bind(SegmentReplicationStatsTracker.class).toInstance(segmentReplicationStatsTracker); b.bind(SearchRequestOperationsCompositeListenerFactory.class).toInstance(searchRequestOperationsCompositeListenerFactory); @@ -1322,9 +1335,12 @@ protected Node( // We allocate copies of existing shards by looking for a viable copy of the shard in the cluster and assigning the shard there. // The search for viable copies is triggered by an allocation attempt (i.e. a reroute) and is performed asynchronously. When it // completes we trigger another reroute to try the allocation again. This means there is a circular dependency: the allocation - // service needs access to the existing shards allocators (e.g. the GatewayAllocator) which need to be able to trigger a - // reroute, which needs to call into the allocation service. We close the loop here: - clusterModule.setExistingShardsAllocators(injector.getInstance(GatewayAllocator.class)); + // service needs access to the existing shards allocators (e.g. the GatewayAllocator, ShardsBatchGatewayAllocator) which + // need to be able to trigger a reroute, which needs to call into the allocation service. We close the loop here: + clusterModule.setExistingShardsAllocators( + injector.getInstance(GatewayAllocator.class), + injector.getInstance(ShardsBatchGatewayAllocator.class) + ); List pluginLifecycleComponents = pluginComponents.stream() .filter(p -> p instanceof LifecycleComponent) @@ -1462,6 +1478,10 @@ public Node start() throws NodeValidationException { if (remoteClusterStateService != null) { remoteClusterStateService.start(); } + final RemoteIndexPathUploader remoteIndexPathUploader = injector.getInstance(RemoteIndexPathUploader.class); + if (remoteIndexPathUploader != null) { + remoteIndexPathUploader.start(); + } // Load (and maybe upgrade) the metadata stored on disk final GatewayMetaState gatewayMetaState = injector.getInstance(GatewayMetaState.class); gatewayMetaState.start( diff --git a/server/src/main/java/org/opensearch/node/NodeService.java b/server/src/main/java/org/opensearch/node/NodeService.java index 15cc8f3d20bb3..1eb38ea63ad5a 100644 --- a/server/src/main/java/org/opensearch/node/NodeService.java +++ b/server/src/main/java/org/opensearch/node/NodeService.java @@ -41,6 +41,7 @@ import org.opensearch.cluster.routing.WeightedRoutingStats; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.Nullable; +import org.opensearch.common.cache.service.CacheService; import org.opensearch.common.settings.Settings; import org.opensearch.common.settings.SettingsFilter; import org.opensearch.common.util.io.IOUtils; @@ -99,6 +100,7 @@ public class NodeService implements Closeable { private final RepositoriesService repositoriesService; private final AdmissionControlService admissionControlService; private final SegmentReplicationStatsTracker segmentReplicationStatsTracker; + private final CacheService cacheService; NodeService( Settings settings, @@ -125,7 +127,8 @@ public class NodeService implements Closeable { ResourceUsageCollectorService resourceUsageCollectorService, SegmentReplicationStatsTracker segmentReplicationStatsTracker, RepositoriesService repositoriesService, - AdmissionControlService admissionControlService + AdmissionControlService admissionControlService, + CacheService cacheService ) { this.settings = settings; this.threadPool = threadPool; @@ -154,6 +157,7 @@ public class NodeService implements Closeable { clusterService.addStateApplier(ingestService); clusterService.addStateApplier(searchPipelineService); this.segmentReplicationStatsTracker = segmentReplicationStatsTracker; + this.cacheService = cacheService; } public NodeInfo info( @@ -236,7 +240,8 @@ public NodeStats stats( boolean resourceUsageStats, boolean segmentReplicationTrackerStats, boolean repositoriesStats, - boolean admissionControl + boolean admissionControl, + boolean cacheService ) { // for indices stats we want to include previous allocated shards stats as well (it will // only be applied to the sensible ones to use, like refresh/merge/flush/indexing stats) @@ -268,7 +273,8 @@ public NodeStats stats( searchPipelineStats ? this.searchPipelineService.stats() : null, segmentReplicationTrackerStats ? this.segmentReplicationStatsTracker.getTotalRejectionStats() : null, repositoriesStats ? this.repositoriesService.getRepositoriesStats() : null, - admissionControl ? this.admissionControlService.stats() : null + admissionControl ? this.admissionControlService.stats() : null, + cacheService ? this.cacheService.stats(indices) : null ); } diff --git a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java index a3bfe1195d8cc..b10ec0d99c3d5 100644 --- a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java +++ b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java @@ -18,6 +18,7 @@ import org.opensearch.repositories.blobstore.BlobStoreRepository; import java.util.ArrayList; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -47,6 +48,11 @@ public class RemoteStoreNodeAttribute { public static final String REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX = "remote_store.repository.%s.settings."; private final RepositoriesMetadata repositoriesMetadata; + public static List SUPPORTED_DATA_REPO_NAME_ATTRIBUTES = List.of( + REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, + REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY + ); + /** * Creates a new {@link RemoteStoreNodeAttribute} */ @@ -185,6 +191,30 @@ public RepositoriesMetadata getRepositoriesMetadata() { return this.repositoriesMetadata; } + /** + * Return {@link Map} of all the supported data repo names listed on {@link RemoteStoreNodeAttribute#SUPPORTED_DATA_REPO_NAME_ATTRIBUTES} + * + * @param node Node to fetch attributes from + * @return {@link Map} of all remote store data repo attribute keys and their values + */ + public static Map getDataRepoNames(DiscoveryNode node) { + assert remoteDataAttributesPresent(node.getAttributes()); + Map dataRepoNames = new HashMap<>(); + for (String supportedRepoAttribute : SUPPORTED_DATA_REPO_NAME_ATTRIBUTES) { + dataRepoNames.put(supportedRepoAttribute, node.getAttributes().get(supportedRepoAttribute)); + } + return dataRepoNames; + } + + private static boolean remoteDataAttributesPresent(Map nodeAttrs) { + for (String supportedRepoAttributes : SUPPORTED_DATA_REPO_NAME_ATTRIBUTES) { + if (nodeAttrs.get(supportedRepoAttributes) == null || nodeAttrs.get(supportedRepoAttributes).isEmpty()) { + return false; + } + } + return true; + } + @Override public int hashCode() { // The hashCode is generated by computing the hash of all the repositoryMetadata present in diff --git a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java index 94b11086ba865..874c9408de6c5 100644 --- a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java +++ b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java @@ -11,6 +11,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.metadata.RepositoriesMetadata; import org.opensearch.cluster.metadata.RepositoryMetadata; import org.opensearch.cluster.node.DiscoveryNode; @@ -226,7 +227,14 @@ public RepositoriesMetadata updateRepositoriesMetadata(DiscoveryNode joiningNode } /** - * To check if the cluster is undergoing remote store migration + * Returns true iff current cluster settings have: + *
+ * - remote_store.compatibility_mode set to mixed + *
+ * - migration.direction set to remote_store + *
+ * false otherwise + * * @param clusterSettings cluster level settings * @return * true For REMOTE_STORE migration direction and MIXED compatibility mode, @@ -238,4 +246,17 @@ public static boolean isMigratingToRemoteStore(ClusterSettings clusterSettings) return (isMixedMode && isRemoteStoreMigrationDirection); } + + /** + * To check if the cluster is undergoing remote store migration using clusterState metadata + * @return + * true For REMOTE_STORE migration direction and MIXED compatibility mode, + * false otherwise + */ + public static boolean isMigratingToRemoteStore(Metadata metadata) { + boolean isMixedMode = REMOTE_STORE_COMPATIBILITY_MODE_SETTING.get(metadata.settings()).equals(CompatibilityMode.MIXED); + boolean isRemoteStoreMigrationDirection = MIGRATION_DIRECTION_SETTING.get(metadata.settings()).equals(Direction.REMOTE_STORE); + + return (isMixedMode && isRemoteStoreMigrationDirection); + } } diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/BaseBlobStoreFormat.java b/server/src/main/java/org/opensearch/repositories/blobstore/BaseBlobStoreFormat.java new file mode 100644 index 0000000000000..262a32fa1e74d --- /dev/null +++ b/server/src/main/java/org/opensearch/repositories/blobstore/BaseBlobStoreFormat.java @@ -0,0 +1,130 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.repositories.blobstore; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.store.OutputStreamIndexOutput; +import org.opensearch.common.blobstore.BlobContainer; +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.common.lucene.store.IndexOutputOutputStream; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.compress.Compressor; +import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.Locale; +import java.util.Objects; + +/** + * Provides common methods, variables that can be used by the implementors. + * + * @opensearch.internal + */ +public class BaseBlobStoreFormat { + + private static final int BUFFER_SIZE = 4096; + + private final String blobNameFormat; + + private final boolean skipHeaderFooter; + + /** + * @param blobNameFormat format of the blobname in {@link String#format} format + */ + public BaseBlobStoreFormat(String blobNameFormat, boolean skipHeaderFooter) { + this.blobNameFormat = blobNameFormat; + this.skipHeaderFooter = skipHeaderFooter; + } + + protected String blobName(String name) { + return String.format(Locale.ROOT, blobNameFormat, name); + } + + /** + * Writes blob with resolving the blob name using {@link #blobName} method. + *

+ * The blob will optionally by compressed. + * + * @param obj object to be serialized + * @param blobContainer blob container + * @param name blob name + * @param compressor whether to use compression + * @param params ToXContent params + * @param codec codec used + * @param version version used + */ + protected void write( + final T obj, + final BlobContainer blobContainer, + final String name, + final Compressor compressor, + final ToXContent.Params params, + XContentType xContentType, + String codec, + Integer version + ) throws IOException { + final String blobName = blobName(name); + final BytesReference bytes = serialize(obj, blobName, compressor, params, xContentType, codec, version); + blobContainer.writeBlob(blobName, bytes.streamInput(), bytes.length(), false); + } + + public BytesReference serialize( + final T obj, + final String blobName, + final Compressor compressor, + final ToXContent.Params params, + XContentType xContentType, + String codec, + Integer version + ) throws IOException { + assert skipHeaderFooter || (Objects.nonNull(codec) && Objects.nonNull(version)); + try (BytesStreamOutput outputStream = new BytesStreamOutput()) { + try ( + OutputStreamIndexOutput indexOutput = new OutputStreamIndexOutput( + "ChecksumBlobStoreFormat.writeBlob(blob=\"" + blobName + "\")", + blobName, + outputStream, + BUFFER_SIZE + ) + ) { + if (skipHeaderFooter == false) { + CodecUtil.writeHeader(indexOutput, codec, version); + } + try (OutputStream indexOutputOutputStream = new IndexOutputOutputStream(indexOutput) { + @Override + public void close() { + // this is important since some of the XContentBuilders write bytes on close. + // in order to write the footer we need to prevent closing the actual index input. + } + }; + XContentBuilder builder = MediaTypeRegistry.contentBuilder( + xContentType, + compressor.threadLocalOutputStream(indexOutputOutputStream) + ) + ) { + builder.startObject(); + obj.toXContent(builder, params); + builder.endObject(); + } + if (skipHeaderFooter == false) { + CodecUtil.writeFooter(indexOutput); + } + } + return outputStream.bytes(); + } + } + + protected String getBlobNameFormat() { + return blobNameFormat; + } +} diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java b/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java index 3e6052a5ef820..e567e1b626c5a 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java @@ -38,7 +38,6 @@ import org.apache.lucene.store.ByteBuffersDataInput; import org.apache.lucene.store.ByteBuffersIndexInput; import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.OutputStreamIndexOutput; import org.apache.lucene.util.BytesRef; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.common.CheckedFunction; @@ -48,26 +47,21 @@ import org.opensearch.common.blobstore.transfer.RemoteTransferContainer; import org.opensearch.common.blobstore.transfer.stream.OffsetRangeIndexInputStream; import org.opensearch.common.io.Streams; -import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.common.lucene.store.ByteArrayIndexInput; -import org.opensearch.common.lucene.store.IndexOutputOutputStream; import org.opensearch.common.xcontent.LoggingDeprecationHandler; import org.opensearch.common.xcontent.XContentHelper; import org.opensearch.common.xcontent.XContentType; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.compress.Compressor; -import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.ToXContent; -import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.gateway.CorruptStateException; import org.opensearch.index.store.exception.ChecksumCombinationException; import org.opensearch.snapshots.SnapshotInfo; import java.io.IOException; -import java.io.OutputStream; import java.util.Arrays; import java.util.HashMap; import java.util.Locale; @@ -80,7 +74,7 @@ * * @opensearch.internal */ -public final class ChecksumBlobStoreFormat { +public final class ChecksumBlobStoreFormat extends BaseBlobStoreFormat { // Serialization parameters to specify correct context for metadata serialization public static final ToXContent.Params SNAPSHOT_ONLY_FORMAT_PARAMS; @@ -98,12 +92,8 @@ public final class ChecksumBlobStoreFormat { // The format version public static final int VERSION = 1; - private static final int BUFFER_SIZE = 4096; - private final String codec; - private final String blobNameFormat; - private final CheckedFunction reader; /** @@ -112,8 +102,8 @@ public final class ChecksumBlobStoreFormat { * @param reader prototype object that can deserialize T from XContent */ public ChecksumBlobStoreFormat(String codec, String blobNameFormat, CheckedFunction reader) { + super(blobNameFormat, false); this.reader = reader; - this.blobNameFormat = blobNameFormat; this.codec = codec; } @@ -130,7 +120,7 @@ public T read(BlobContainer blobContainer, String name, NamedXContentRegistry na } public String blobName(String name) { - return String.format(Locale.ROOT, blobNameFormat, name); + return String.format(Locale.ROOT, getBlobNameFormat(), name); } public T deserialize(String blobName, NamedXContentRegistry namedXContentRegistry, BytesReference bytes) throws IOException { @@ -170,30 +160,7 @@ public T deserialize(String blobName, NamedXContentRegistry namedXContentRegistr * @param compressor whether to use compression */ public void write(final T obj, final BlobContainer blobContainer, final String name, final Compressor compressor) throws IOException { - write(obj, blobContainer, name, compressor, SNAPSHOT_ONLY_FORMAT_PARAMS); - } - - /** - * Writes blob with resolving the blob name using {@link #blobName} method. - *

- * The blob will optionally by compressed. - * - * @param obj object to be serialized - * @param blobContainer blob container - * @param name blob name - * @param compressor whether to use compression - * @param params ToXContent params - */ - public void write( - final T obj, - final BlobContainer blobContainer, - final String name, - final Compressor compressor, - final ToXContent.Params params - ) throws IOException { - final String blobName = blobName(name); - final BytesReference bytes = serialize(obj, blobName, compressor, params); - blobContainer.writeBlob(blobName, bytes.streamInput(), bytes.length(), false); + write(obj, blobContainer, name, compressor, SNAPSHOT_ONLY_FORMAT_PARAMS, XContentType.SMILE, codec, VERSION); } /** @@ -251,7 +218,7 @@ private void writeAsyncWithPriority( final ToXContent.Params params ) throws IOException { if (blobContainer instanceof AsyncMultiStreamBlobContainer == false) { - write(obj, blobContainer, name, compressor, params); + write(obj, blobContainer, name, compressor, params, XContentType.SMILE, codec, VERSION); listener.onResponse(null); return; } @@ -290,35 +257,6 @@ private void writeAsyncWithPriority( public BytesReference serialize(final T obj, final String blobName, final Compressor compressor, final ToXContent.Params params) throws IOException { - try (BytesStreamOutput outputStream = new BytesStreamOutput()) { - try ( - OutputStreamIndexOutput indexOutput = new OutputStreamIndexOutput( - "ChecksumBlobStoreFormat.writeBlob(blob=\"" + blobName + "\")", - blobName, - outputStream, - BUFFER_SIZE - ) - ) { - CodecUtil.writeHeader(indexOutput, codec, VERSION); - try (OutputStream indexOutputOutputStream = new IndexOutputOutputStream(indexOutput) { - @Override - public void close() throws IOException { - // this is important since some of the XContentBuilders write bytes on close. - // in order to write the footer we need to prevent closing the actual index input. - } - }; - XContentBuilder builder = MediaTypeRegistry.contentBuilder( - XContentType.SMILE, - compressor.threadLocalOutputStream(indexOutputOutputStream) - ) - ) { - builder.startObject(); - obj.toXContent(builder, params); - builder.endObject(); - } - CodecUtil.writeFooter(indexOutput); - } - return outputStream.bytes(); - } + return serialize(obj, blobName, compressor, params, XContentType.SMILE, codec, VERSION); } } diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/ConfigBlobStoreFormat.java b/server/src/main/java/org/opensearch/repositories/blobstore/ConfigBlobStoreFormat.java new file mode 100644 index 0000000000000..18c718ca2110e --- /dev/null +++ b/server/src/main/java/org/opensearch/repositories/blobstore/ConfigBlobStoreFormat.java @@ -0,0 +1,73 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.repositories.blobstore; + +import org.apache.lucene.store.IndexInput; +import org.opensearch.common.blobstore.AsyncMultiStreamBlobContainer; +import org.opensearch.common.blobstore.BlobContainer; +import org.opensearch.common.blobstore.stream.write.WritePriority; +import org.opensearch.common.blobstore.transfer.RemoteTransferContainer; +import org.opensearch.common.blobstore.transfer.stream.OffsetRangeIndexInputStream; +import org.opensearch.common.lucene.store.ByteArrayIndexInput; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.core.action.ActionListener; +import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.compress.NoneCompressor; +import org.opensearch.core.xcontent.ToXContent; + +import java.io.IOException; + +/** + * Format for writing short configurations to remote. Read interface does not exist as it not yet required. This format + * should be used for writing data from in-memory to remote store where there is no need for checksum and the client + * library for the remote store has inbuilt checksum capabilities while upload and download both. This format would + * serialise the data in Json format and store it on remote store as is. This does not support compression yet (this + * can be changed as required). In comparison to {@link ChecksumBlobStoreFormat}, this format does not add any additional + * metadata (like header and footer) to the content. Hence, this format does not depend on {@code CodecUtil} from + * Lucene library. + * + * @opensearch.internal + */ +public class ConfigBlobStoreFormat extends BaseBlobStoreFormat { + + /** + * @param blobNameFormat format of the blobname in {@link String#format} format + */ + public ConfigBlobStoreFormat(String blobNameFormat) { + super(blobNameFormat, true); + } + + public void writeAsyncWithUrgentPriority(T obj, BlobContainer blobContainer, String name, ActionListener listener) + throws IOException { + if (blobContainer instanceof AsyncMultiStreamBlobContainer == false) { + write(obj, blobContainer, name, new NoneCompressor(), ToXContent.EMPTY_PARAMS, XContentType.JSON, null, null); + listener.onResponse(null); + return; + } + String blobName = blobName(name); + BytesReference bytes = serialize(obj, blobName, new NoneCompressor(), ToXContent.EMPTY_PARAMS, XContentType.JSON, null, null); + String resourceDescription = "BlobStoreFormat.writeAsyncWithPriority(blob=\"" + blobName + "\")"; + try (IndexInput input = new ByteArrayIndexInput(resourceDescription, BytesReference.toBytes(bytes))) { + try ( + RemoteTransferContainer remoteTransferContainer = new RemoteTransferContainer( + blobName, + blobName, + bytes.length(), + true, + WritePriority.URGENT, + (size, position) -> new OffsetRangeIndexInputStream(input, size, position), + null, + false + ) + ) { + ((AsyncMultiStreamBlobContainer) blobContainer).asyncBlobUpload(remoteTransferContainer.createWriteContext(), listener); + } + } + } +} diff --git a/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestNodesStatsAction.java b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestNodesStatsAction.java index 66b9afda06eb6..267bfde576dec 100644 --- a/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestNodesStatsAction.java +++ b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestNodesStatsAction.java @@ -36,6 +36,7 @@ import org.opensearch.action.admin.indices.stats.CommonStatsFlags; import org.opensearch.action.admin.indices.stats.CommonStatsFlags.Flag; import org.opensearch.client.node.NodeClient; +import org.opensearch.common.cache.CacheType; import org.opensearch.core.common.Strings; import org.opensearch.rest.BaseRestHandler; import org.opensearch.rest.RestRequest; @@ -175,6 +176,25 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC nodesStatsRequest.indices(flags); } + } else if (metrics.contains("caches")) { + // Extract the list of caches we want to get stats for from the submetrics (which we get from index_metric) + Set cacheMetrics = Strings.tokenizeByCommaToSet(request.param("index_metric", "_all")); + CommonStatsFlags cacheFlags = new CommonStatsFlags(); + cacheFlags.clear(); + if (cacheMetrics.contains("_all")) { + cacheFlags.includeAllCacheTypes(); + } else { + for (String cacheName : cacheMetrics) { + try { + cacheFlags.includeCacheType(CacheType.getByValue(cacheName)); + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException( + unrecognized(request, Set.of(cacheName), CacheType.allValues(), "cache type") + ); + } + } + } + nodesStatsRequest.indices(cacheFlags); } else if (request.hasParam("index_metric")) { throw new IllegalArgumentException( String.format( @@ -209,6 +229,10 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC nodesStatsRequest.indices().includeOnlyTopIndexingPressureMetrics(request.paramAsBoolean("top", false)); } + // If no levels are passed in this results in an empty array. + String[] levels = Strings.splitStringByCommaToArray(request.param("level")); + nodesStatsRequest.indices().setLevels(levels); + return channel -> client.admin().cluster().nodesStats(nodesStatsRequest, new NodesResponseRestListener<>(channel)); } diff --git a/server/src/main/java/org/opensearch/rest/action/document/RestBulkAction.java b/server/src/main/java/org/opensearch/rest/action/document/RestBulkAction.java index b046146707885..0bc4234c9b8b8 100644 --- a/server/src/main/java/org/opensearch/rest/action/document/RestBulkAction.java +++ b/server/src/main/java/org/opensearch/rest/action/document/RestBulkAction.java @@ -97,6 +97,7 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC Boolean defaultRequireAlias = request.paramAsBoolean(DocWriteRequest.REQUIRE_ALIAS, null); bulkRequest.timeout(request.paramAsTime("timeout", BulkShardRequest.DEFAULT_TIMEOUT)); bulkRequest.setRefreshPolicy(request.param("refresh")); + bulkRequest.batchSize(request.paramAsInt("batch_size", 1)); bulkRequest.add( request.requiredContent(), defaultIndex, diff --git a/server/src/main/java/org/opensearch/rest/action/search/RestSearchAction.java b/server/src/main/java/org/opensearch/rest/action/search/RestSearchAction.java index 80dc34c4d5d68..3a6b45013e892 100644 --- a/server/src/main/java/org/opensearch/rest/action/search/RestSearchAction.java +++ b/server/src/main/java/org/opensearch/rest/action/search/RestSearchAction.java @@ -50,6 +50,7 @@ import org.opensearch.rest.action.RestCancellableNodeClient; import org.opensearch.rest.action.RestStatusToXContentListener; import org.opensearch.search.Scroll; +import org.opensearch.search.SearchService; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.fetch.StoredFieldsContext; import org.opensearch.search.fetch.subphase.FetchSourceContext; @@ -235,13 +236,12 @@ private static void parseSearchSource(final SearchSourceBuilder searchSourceBuil searchSourceBuilder.query(queryBuilder); } - int from = request.paramAsInt("from", -1); - if (from != -1) { - searchSourceBuilder.from(from); + if (request.hasParam("from")) { + searchSourceBuilder.from(request.paramAsInt("from", SearchService.DEFAULT_FROM)); } - int size = request.paramAsInt("size", -1); - if (size != -1) { - setSize.accept(size); + + if (request.hasParam("size")) { + setSize.accept(request.paramAsInt("size", SearchService.DEFAULT_SIZE)); } if (request.hasParam("explain")) { diff --git a/server/src/main/java/org/opensearch/search/SearchService.java b/server/src/main/java/org/opensearch/search/SearchService.java index 6b3620e65a271..744d3a19f1593 100644 --- a/server/src/main/java/org/opensearch/search/SearchService.java +++ b/server/src/main/java/org/opensearch/search/SearchService.java @@ -275,7 +275,7 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv // value 0 means rewrite filters optimization in aggregations will be disabled public static final Setting MAX_AGGREGATION_REWRITE_FILTERS = Setting.intSetting( "search.max_aggregation_rewrite_filters", - 72, + 3000, 0, Property.Dynamic, Property.NodeScope diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/FastFilterRewriteHelper.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/FastFilterRewriteHelper.java index dde748bf0dc07..c8ce39a52f869 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/FastFilterRewriteHelper.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/FastFilterRewriteHelper.java @@ -10,12 +10,13 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.apache.lucene.document.LongPoint; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.PointValues; +import org.apache.lucene.search.CollectionTerminatedException; import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.FieldExistsQuery; import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.MatchAllDocsQuery; @@ -23,24 +24,30 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Weight; +import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.NumericUtils; +import org.opensearch.common.CheckedRunnable; import org.opensearch.common.Rounding; import org.opensearch.common.lucene.search.function.FunctionScoreQuery; import org.opensearch.index.mapper.DateFieldMapper; import org.opensearch.index.mapper.DocCountFieldMapper; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.query.DateRangeIncludingNowQuery; +import org.opensearch.search.aggregations.bucket.composite.CompositeAggregator; import org.opensearch.search.aggregations.bucket.composite.CompositeValuesSourceConfig; import org.opensearch.search.aggregations.bucket.composite.RoundingValuesSource; import org.opensearch.search.aggregations.bucket.histogram.LongBounds; import org.opensearch.search.internal.SearchContext; import java.io.IOException; +import java.util.Arrays; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.OptionalLong; import java.util.function.BiConsumer; +import java.util.function.BiFunction; import java.util.function.Function; import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; @@ -128,9 +135,10 @@ private static long[] getSegmentBounds(final LeafReaderContext context, final St } /** - * This method also acts as a pre-condition check for the optimization + * Gets the min and max bounds of the field for the shard search + * Depending on the query part, the bounds are computed differently * - * @return null if the processed query not as expected + * @return null if the processed query not supported by the optimization */ public static long[] getDateHistoAggBounds(final SearchContext context, final String fieldName) throws IOException { final Query cq = unwrapIntoConcreteQuery(context.query()); @@ -167,67 +175,6 @@ private static long[] getBoundsWithRangeQuery(PointRangeQuery prq, String fieldN return null; } - /** - * Creates the date range filters for aggregations using the interval, min/max - * bounds and prepared rounding - */ - private static Weight[] createFilterForAggregations( - final SearchContext context, - final DateFieldMapper.DateFieldType fieldType, - final long interval, - final Rounding.Prepared preparedRounding, - long low, - final long high - ) throws IOException { - // Calculate the number of buckets using range and interval - long roundedLow = preparedRounding.round(fieldType.convertNanosToMillis(low)); - long prevRounded = roundedLow; - int bucketCount = 0; - while (roundedLow <= fieldType.convertNanosToMillis(high)) { - bucketCount++; - int maxNumFilterBuckets = context.maxAggRewriteFilters(); - if (bucketCount > maxNumFilterBuckets) { - logger.debug("Max number of filters reached [{}], skip the fast filter optimization", maxNumFilterBuckets); - return null; - } - // Below rounding is needed as the interval could return in - // non-rounded values for something like calendar month - roundedLow = preparedRounding.round(roundedLow + interval); - if (prevRounded == roundedLow) break; // prevents getting into an infinite loop - prevRounded = roundedLow; - } - - Weight[] filters = null; - if (bucketCount > 0) { - filters = new Weight[bucketCount]; - roundedLow = preparedRounding.round(fieldType.convertNanosToMillis(low)); - - int i = 0; - while (i < bucketCount) { - // Calculate the lower bucket bound - final byte[] lower = new byte[8]; - NumericUtils.longToSortableBytes(i == 0 ? low : fieldType.convertRoundedMillisToNanos(roundedLow), lower, 0); - - // Calculate the upper bucket bound - roundedLow = preparedRounding.round(roundedLow + interval); - final byte[] upper = new byte[8]; - NumericUtils.longToSortableBytes(i + 1 == bucketCount ? high : - // Subtract -1 if the minimum is roundedLow as roundedLow itself - // is included in the next bucket - fieldType.convertRoundedMillisToNanos(roundedLow) - 1, upper, 0); - - filters[i++] = context.searcher().createWeight(new PointRangeQuery(fieldType.name(), lower, upper, 1) { - @Override - protected String toString(int dimension, byte[] value) { - return Long.toString(LongPoint.decodeDimension(value, 0)); - } - }, ScoreMode.COMPLETE_NO_SCORES, 1); - } - } - - return filters; - } - /** * Context object for fast filter optimization *

@@ -235,12 +182,24 @@ protected String toString(int dimension, byte[] value) { */ public static class FastFilterContext { private boolean rewriteable = false; - private Weight[] filters = null; - private boolean filtersBuiltAtShardLevel = false; + private boolean rangesBuiltAtShardLevel = false; private AggregationType aggregationType; private final SearchContext context; + private String fieldName; + private long[][] ranges; + + // debug info related fields + public int leaf; + public int inner; + public int segments; + public int optimizedSegments; + + public void setFieldName(String fieldName) { + this.fieldName = fieldName; + } + public FastFilterContext(SearchContext context) { this.context = context; } @@ -262,24 +221,26 @@ public boolean isRewriteable(final Object parent, final int subAggLength) { return rewriteable; } - public void buildFastFilter() throws IOException { - assert filters == null : "Filters should only be built once, but they are already built"; - this.filters = this.aggregationType.buildFastFilter(context); - if (filters != null) { - logger.debug("Fast filter built for shard {}", context.indexShard().shardId()); - filtersBuiltAtShardLevel = true; + public void buildRanges() throws IOException { + assert ranges == null : "Ranges should only be built once at shard level, but they are already built"; + this.ranges = this.aggregationType.buildRanges(context); + if (ranges != null) { + logger.debug("Ranges built for shard {}", context.indexShard().shardId()); + rangesBuiltAtShardLevel = true; } } - /** - * Built filters for a segment - */ - public Weight[] buildFastFilter(LeafReaderContext leaf) throws IOException { - Weight[] filters = this.aggregationType.buildFastFilter(leaf, context); - if (filters != null) { - logger.debug("Fast filter built for shard {} segment {}", context.indexShard().shardId(), leaf.ord); + public long[][] buildRanges(LeafReaderContext leaf) throws IOException { + long[][] ranges = this.aggregationType.buildRanges(leaf, context); + if (ranges != null) { + logger.debug("Ranges built for shard {} segment {}", context.indexShard().shardId(), leaf.ord); } - return filters; + return ranges; + } + + private void consumeDebugInfo(DebugInfo debug) { + leaf += debug.leaf; + inner += debug.inner; } } @@ -287,16 +248,11 @@ public Weight[] buildFastFilter(LeafReaderContext leaf) throws IOException { * Different types have different pre-conditions, filter building logic, etc. */ interface AggregationType { - boolean isRewriteable(Object parent, int subAggLength); - Weight[] buildFastFilter(SearchContext ctx) throws IOException; - - Weight[] buildFastFilter(LeafReaderContext leaf, SearchContext ctx) throws IOException; + long[][] buildRanges(SearchContext ctx) throws IOException; - default int getSize() { - return Integer.MAX_VALUE; - } + long[][] buildRanges(LeafReaderContext leaf, SearchContext ctx) throws IOException; } /** @@ -330,20 +286,13 @@ public boolean isRewriteable(Object parent, int subAggLength) { } @Override - public Weight[] buildFastFilter(SearchContext context) throws IOException { + public long[][] buildRanges(SearchContext context) throws IOException { long[] bounds = getDateHistoAggBounds(context, fieldType.name()); logger.debug("Bounds are {} for shard {}", bounds, context.indexShard().shardId()); - return buildFastFilter(context, bounds); + return buildRanges(context, bounds); } - @Override - public Weight[] buildFastFilter(LeafReaderContext leaf, SearchContext context) throws IOException { - long[] bounds = getSegmentBounds(leaf, fieldType.name()); - logger.debug("Bounds are {} for shard {} segment {}", bounds, context.indexShard().shardId(), leaf.ord); - return buildFastFilter(context, bounds); - } - - private Weight[] buildFastFilter(SearchContext context, long[] bounds) throws IOException { + private long[][] buildRanges(SearchContext context, long[] bounds) throws IOException { bounds = processHardBounds(bounds); if (bounds == null) { return null; @@ -360,7 +309,7 @@ private Weight[] buildFastFilter(SearchContext context, long[] bounds) throws IO // process the after key of composite agg processAfterKey(bounds, interval); - return FastFilterRewriteHelper.createFilterForAggregations( + return FastFilterRewriteHelper.createRangesFromAgg( context, (DateFieldMapper.DateFieldType) fieldType, interval, @@ -370,6 +319,13 @@ private Weight[] buildFastFilter(SearchContext context, long[] bounds) throws IO ); } + @Override + public long[][] buildRanges(LeafReaderContext leaf, SearchContext context) throws IOException { + long[] bounds = getSegmentBounds(leaf, fieldType.name()); + logger.debug("Bounds are {} for shard {} segment {}", bounds, context.indexShard().shardId(), leaf.ord); + return buildRanges(context, bounds); + } + protected abstract Rounding getRounding(final long low, final long high); protected abstract Rounding.Prepared getRoundingPrepared(); @@ -413,7 +369,7 @@ public static long getBucketOrd(long bucketOrd) { } /** - * Try to get the bucket doc counts from the fast filters for the aggregation + * Try to get the bucket doc counts for the date histogram aggregation *

* Usage: invoked at segment level — in getLeafCollector of aggregator * @@ -424,11 +380,18 @@ public static boolean tryFastFilterAggregation( FastFilterContext fastFilterContext, final BiConsumer incrementDocCount ) throws IOException { - if (fastFilterContext == null) return false; + fastFilterContext.segments++; if (!fastFilterContext.rewriteable) { return false; } + if (ctx.reader().hasDeletions()) return false; + + PointValues values = ctx.reader().getPointValues(fastFilterContext.fieldName); + if (values == null) return false; + // only proceed if every document corresponds to exactly one point + if (values.getDocCount() != values.size()) return false; + NumericDocValues docCountValues = DocValues.getNumeric(ctx.reader(), DocCountFieldMapper.NAME); if (docCountValues.nextDoc() != NO_MORE_DOCS) { logger.debug( @@ -439,61 +402,330 @@ public static boolean tryFastFilterAggregation( return false; } - // if no filters built at shard level (see getDateHistoAggBounds method for possible reasons) - // check if the query is functionally match-all at segment level - if (!fastFilterContext.filtersBuiltAtShardLevel && !segmentMatchAll(fastFilterContext.context, ctx)) { + // even if no ranges built at shard level, we can still perform the optimization + // when functionally match-all at segment level + if (!fastFilterContext.rangesBuiltAtShardLevel && !segmentMatchAll(fastFilterContext.context, ctx)) { return false; } - Weight[] filters = fastFilterContext.filters; - if (filters == null) { + long[][] ranges = fastFilterContext.ranges; + if (ranges == null) { logger.debug( "Shard {} segment {} functionally match all documents. Build the fast filter", fastFilterContext.context.indexShard().shardId(), ctx.ord ); - filters = fastFilterContext.buildFastFilter(ctx); - if (filters == null) { + ranges = fastFilterContext.buildRanges(ctx); + if (ranges == null) { return false; } } - final int[] counts = new int[filters.length]; - int i; - for (i = 0; i < filters.length; i++) { - counts[i] = filters[i].count(ctx); - if (counts[i] == -1) { - // Cannot use the optimization if any of the counts - // is -1 indicating the segment might have deleted documents - return false; + final AggregationType aggregationType = fastFilterContext.aggregationType; + assert aggregationType instanceof AbstractDateHistogramAggregationType; + final DateFieldMapper.DateFieldType fieldType = ((AbstractDateHistogramAggregationType) aggregationType).getFieldType(); + int size = Integer.MAX_VALUE; + if (aggregationType instanceof CompositeAggregator.CompositeAggregationType) { + size = ((CompositeAggregator.CompositeAggregationType) aggregationType).getSize(); + } + DebugInfo debugInfo = multiRangesTraverse(values.getPointTree(), ranges, incrementDocCount, fieldType, size); + fastFilterContext.consumeDebugInfo(debugInfo); + + fastFilterContext.optimizedSegments++; + logger.debug("Fast filter optimization applied to shard {} segment {}", fastFilterContext.context.indexShard().shardId(), ctx.ord); + logger.debug("crossed leaf nodes: {}, inner nodes: {}", fastFilterContext.leaf, fastFilterContext.inner); + return true; + } + + private static boolean segmentMatchAll(SearchContext ctx, LeafReaderContext leafCtx) throws IOException { + Weight weight = ctx.searcher().createWeight(ctx.query(), ScoreMode.COMPLETE_NO_SCORES, 1f); + return weight != null && weight.count(leafCtx) == leafCtx.reader().numDocs(); + } + + /** + * Creates the date ranges from date histo aggregations using its interval, + * and min/max boundaries + */ + private static long[][] createRangesFromAgg( + final SearchContext context, + final DateFieldMapper.DateFieldType fieldType, + final long interval, + final Rounding.Prepared preparedRounding, + long low, + final long high + ) { + // Calculate the number of buckets using range and interval + long roundedLow = preparedRounding.round(fieldType.convertNanosToMillis(low)); + long prevRounded = roundedLow; + int bucketCount = 0; + while (roundedLow <= fieldType.convertNanosToMillis(high)) { + bucketCount++; + int maxNumFilterBuckets = context.maxAggRewriteFilters(); + if (bucketCount > maxNumFilterBuckets) { + logger.debug("Max number of filters reached [{}], skip the fast filter optimization", maxNumFilterBuckets); + return null; + } + // Below rounding is needed as the interval could return in + // non-rounded values for something like calendar month + roundedLow = preparedRounding.round(roundedLow + interval); + if (prevRounded == roundedLow) break; // prevents getting into an infinite loop + prevRounded = roundedLow; + } + + long[][] ranges = new long[bucketCount][2]; + if (bucketCount > 0) { + roundedLow = preparedRounding.round(fieldType.convertNanosToMillis(low)); + + int i = 0; + while (i < bucketCount) { + // Calculate the lower bucket bound + long lower = i == 0 ? low : fieldType.convertRoundedMillisToNanos(roundedLow); + roundedLow = preparedRounding.round(roundedLow + interval); + + // Subtract -1 if the minimum is roundedLow as roundedLow itself + // is included in the next bucket + long upper = i + 1 == bucketCount ? high : fieldType.convertRoundedMillisToNanos(roundedLow) - 1; + + ranges[i][0] = lower; + ranges[i][1] = upper; + i++; + } + } + + return ranges; + } + + /** + * @param maxNumNonZeroRanges the number of non-zero ranges to collect + */ + private static DebugInfo multiRangesTraverse( + final PointValues.PointTree tree, + final long[][] ranges, + final BiConsumer incrementDocCount, + final DateFieldMapper.DateFieldType fieldType, + final int maxNumNonZeroRanges + ) throws IOException { + // ranges are connected and in ascending order + Iterator rangeIter = Arrays.stream(ranges).iterator(); + long[] activeRange = rangeIter.next(); + + // make sure the first range at least crosses the min value of the tree + DebugInfo debugInfo = new DebugInfo(); + if (activeRange[0] > NumericUtils.sortableBytesToLong(tree.getMaxPackedValue(), 0)) { + logger.debug("No ranges match the query, skip the fast filter optimization"); + return debugInfo; + } + while (activeRange[1] < NumericUtils.sortableBytesToLong(tree.getMinPackedValue(), 0)) { + if (!rangeIter.hasNext()) { + logger.debug("No ranges match the query, skip the fast filter optimization"); + return debugInfo; } + activeRange = rangeIter.next(); } - int s = 0; - int size = fastFilterContext.aggregationType.getSize(); - for (i = 0; i < filters.length; i++) { - if (counts[i] > 0) { - long bucketKey = i; // the index of filters is the key for filters aggregation - if (fastFilterContext.aggregationType instanceof AbstractDateHistogramAggregationType) { - final DateFieldMapper.DateFieldType fieldType = - ((AbstractDateHistogramAggregationType) fastFilterContext.aggregationType).getFieldType(); - bucketKey = fieldType.convertNanosToMillis( - NumericUtils.sortableBytesToLong(((PointRangeQuery) filters[i].getQuery()).getLowerPoint(), 0) - ); + RangeCollectorForPointTree collector = new RangeCollectorForPointTree( + incrementDocCount, + fieldType, + rangeIter, + maxNumNonZeroRanges, + activeRange + ); + + final ArrayUtil.ByteArrayComparator comparator = ArrayUtil.getUnsignedComparator(8); + PointValues.IntersectVisitor visitor = getIntersectVisitor(collector, comparator); + try { + intersectWithRanges(visitor, tree, collector, debugInfo); + } catch (CollectionTerminatedException e) { + logger.debug("Early terminate since no more range to collect"); + } + collector.finalizePreviousRange(); + + return debugInfo; + } + + private static void intersectWithRanges( + PointValues.IntersectVisitor visitor, + PointValues.PointTree pointTree, + RangeCollectorForPointTree collector, + DebugInfo debug + ) throws IOException { + PointValues.Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); + + switch (r) { + case CELL_INSIDE_QUERY: + collector.countNode((int) pointTree.size()); + debug.visitInner(); + break; + case CELL_CROSSES_QUERY: + if (pointTree.moveToChild()) { + do { + intersectWithRanges(visitor, pointTree, collector, debug); + } while (pointTree.moveToSibling()); + pointTree.moveToParent(); + } else { + pointTree.visitDocValues(visitor); + debug.visitLeaf(); } - incrementDocCount.accept(bucketKey, counts[i]); - s++; - if (s > size) { - break; + break; + case CELL_OUTSIDE_QUERY: + } + } + + private static PointValues.IntersectVisitor getIntersectVisitor( + RangeCollectorForPointTree collector, + ArrayUtil.ByteArrayComparator comparator + ) { + return new PointValues.IntersectVisitor() { + @Override + public void visit(int docID) throws IOException { + // this branch should be unreachable + throw new UnsupportedOperationException( + "This IntersectVisitor does not perform any actions on a " + "docID=" + docID + " node being visited" + ); + } + + @Override + public void visit(int docID, byte[] packedValue) throws IOException { + visitPoints(packedValue, collector::count); + } + + @Override + public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { + visitPoints(packedValue, () -> { + for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) { + collector.count(); + } + }); + } + + private void visitPoints(byte[] packedValue, CheckedRunnable collect) throws IOException { + if (comparator.compare(packedValue, 0, collector.activeRangeAsByteArray[1], 0) > 0) { + // need to move to next range + collector.finalizePreviousRange(); + if (collector.iterateRangeEnd(packedValue, this::compareByteValue)) { + throw new CollectionTerminatedException(); + } + } + + if (pointCompare(collector.activeRangeAsByteArray[0], collector.activeRangeAsByteArray[1], packedValue)) { + collect.run(); + } + } + + private boolean pointCompare(byte[] lower, byte[] upper, byte[] packedValue) { + if (compareByteValue(packedValue, lower) < 0) { + return false; } + return compareByteValue(packedValue, upper) <= 0; + } + + private int compareByteValue(byte[] value1, byte[] value2) { + return comparator.compare(value1, 0, value2, 0); } + + @Override + public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + byte[] rangeMin = collector.activeRangeAsByteArray[0]; + byte[] rangeMax = collector.activeRangeAsByteArray[1]; + + if (compareByteValue(rangeMax, minPackedValue) < 0) { + collector.finalizePreviousRange(); + if (collector.iterateRangeEnd(minPackedValue, this::compareByteValue)) { + throw new CollectionTerminatedException(); + } + // compare the next range with this node's min max again + // new rangeMin = previous rangeMax + 1 <= min + rangeMax = collector.activeRangeAsByteArray[1]; + } + + if (compareByteValue(rangeMin, minPackedValue) > 0 || compareByteValue(rangeMax, maxPackedValue) < 0) { + return PointValues.Relation.CELL_CROSSES_QUERY; + } else { + return PointValues.Relation.CELL_INSIDE_QUERY; + } + } + }; + } + + private static class RangeCollectorForPointTree { + private final BiConsumer incrementDocCount; + private final DateFieldMapper.DateFieldType fieldType; + private int counter = 0; + + private long[] activeRange; + private byte[][] activeRangeAsByteArray; + private final Iterator rangeIter; + + private int visitedRange = 0; + private final int maxNumNonZeroRange; + + public RangeCollectorForPointTree( + BiConsumer incrementDocCount, + DateFieldMapper.DateFieldType fieldType, + Iterator rangeIter, + int maxNumNonZeroRange, + long[] activeRange + ) { + this.incrementDocCount = incrementDocCount; + this.fieldType = fieldType; + this.rangeIter = rangeIter; + this.maxNumNonZeroRange = maxNumNonZeroRange; + this.activeRange = activeRange; + this.activeRangeAsByteArray = activeRangeAsByteArray(); } - logger.debug("Fast filter optimization applied to shard {} segment {}", fastFilterContext.context.indexShard().shardId(), ctx.ord); - return true; + private void count() { + counter++; + } + + private void countNode(int count) { + counter += count; + } + + private void finalizePreviousRange() { + if (counter > 0) { + logger.debug("finalize previous range: {}", activeRange[0]); + logger.debug("counter: {}", counter); + incrementDocCount.accept(fieldType.convertNanosToMillis(activeRange[0]), counter); + counter = 0; + } + } + + /** + * @return true when iterator exhausted or collect enough non-zero ranges + */ + private boolean iterateRangeEnd(byte[] value, BiFunction comparator) { + // the new value may not be contiguous to the previous one + // so try to find the first next range that cross the new value + while (comparator.apply(activeRangeAsByteArray[1], value) < 0) { + if (!rangeIter.hasNext()) { + return true; + } + activeRange = rangeIter.next(); + activeRangeAsByteArray = activeRangeAsByteArray(); + } + visitedRange++; + return visitedRange > maxNumNonZeroRange; + } + + private byte[][] activeRangeAsByteArray() { + byte[] lower = new byte[8]; + byte[] upper = new byte[8]; + NumericUtils.longToSortableBytes(activeRange[0], lower, 0); + NumericUtils.longToSortableBytes(activeRange[1], upper, 0); + return new byte[][] { lower, upper }; + } } - private static boolean segmentMatchAll(SearchContext ctx, LeafReaderContext leafCtx) throws IOException { - Weight weight = ctx.searcher().createWeight(ctx.query(), ScoreMode.COMPLETE_NO_SCORES, 1f); - return weight != null && weight.count(leafCtx) == leafCtx.reader().numDocs(); + private static class DebugInfo { + private int leaf = 0; // leaf node visited + private int inner = 0; // inner node visited + + private void visitLeaf() { + leaf++; + } + + private void visitInner() { + inner++; + } } } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java index b97c814cdf645..3713d8f83990d 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java @@ -87,6 +87,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.function.BiConsumer; import java.util.function.LongUnaryOperator; import java.util.stream.Collectors; @@ -97,7 +98,7 @@ * * @opensearch.internal */ -final class CompositeAggregator extends BucketsAggregator { +public final class CompositeAggregator extends BucketsAggregator { private final int size; private final List sourceNames; private final int[] reverseMuls; @@ -171,14 +172,15 @@ final class CompositeAggregator extends BucketsAggregator { // bucketOrds is used for saving date histogram results bucketOrds = LongKeyedBucketOrds.build(context.bigArrays(), CardinalityUpperBound.ONE); preparedRounding = ((CompositeAggregationType) fastFilterContext.getAggregationType()).getRoundingPrepared(); - fastFilterContext.buildFastFilter(); + fastFilterContext.setFieldName(sourceConfigs[0].fieldType().name()); + fastFilterContext.buildRanges(); } } /** * Currently the filter rewrite is only supported for date histograms */ - private class CompositeAggregationType extends FastFilterRewriteHelper.AbstractDateHistogramAggregationType { + public class CompositeAggregationType extends FastFilterRewriteHelper.AbstractDateHistogramAggregationType { private final RoundingValuesSource valuesSource; private long afterKey = -1L; @@ -210,7 +212,6 @@ protected void processAfterKey(long[] bound, long interval) { } } - @Override public int getSize() { return size; } @@ -706,4 +707,14 @@ private static class Entry { this.docIdSet = docIdSet; } } + + @Override + public void collectDebugInfo(BiConsumer add) { + if (fastFilterContext.optimizedSegments > 0) { + add.accept("optimized_segments", fastFilterContext.optimizedSegments); + add.accept("unoptimized_segments", fastFilterContext.segments - fastFilterContext.optimizedSegments); + add.accept("leaf_visited", fastFilterContext.leaf); + add.accept("inner_visited", fastFilterContext.inner); + } + } } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java index 12aefc540e75c..f326426800909 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java @@ -166,7 +166,8 @@ private AutoDateHistogramAggregator( ) ); if (fastFilterContext.isRewriteable(parent, subAggregators.length)) { - fastFilterContext.buildFastFilter(); + fastFilterContext.setFieldName(valuesSourceConfig.fieldType().name()); + fastFilterContext.buildRanges(); } } @@ -307,6 +308,17 @@ protected final void merge(long[] mergeMap, long newNumBuckets) { } } + @Override + public void collectDebugInfo(BiConsumer add) { + super.collectDebugInfo(add); + if (fastFilterContext.optimizedSegments > 0) { + add.accept("optimized_segments", fastFilterContext.optimizedSegments); + add.accept("unoptimized_segments", fastFilterContext.segments - fastFilterContext.optimizedSegments); + add.accept("leaf_visited", fastFilterContext.leaf); + add.accept("inner_visited", fastFilterContext.inner); + } + } + /** * Initially it uses the most fine grained rounding configuration possible * but as more data arrives it rebuckets the data until it "fits" in the diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 0e830106c8284..dd4ee9196fd62 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -126,7 +126,8 @@ class DateHistogramAggregator extends BucketsAggregator implements SizedBucketAg ) ); if (fastFilterContext.isRewriteable(parent, subAggregators.length)) { - fastFilterContext.buildFastFilter(); + fastFilterContext.setFieldName(valuesSourceConfig.fieldType().name()); + fastFilterContext.buildRanges(); } } @@ -255,6 +256,12 @@ public void doClose() { @Override public void collectDebugInfo(BiConsumer add) { add.accept("total_buckets", bucketOrds.size()); + if (fastFilterContext.optimizedSegments > 0) { + add.accept("optimized_segments", fastFilterContext.optimizedSegments); + add.accept("unoptimized_segments", fastFilterContext.segments - fastFilterContext.optimizedSegments); + add.accept("leaf_visited", fastFilterContext.leaf); + add.accept("inner_visited", fastFilterContext.inner); + } } /** diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregationBuilder.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregationBuilder.java index c711b140be192..fb588a04cf93c 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregationBuilder.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregationBuilder.java @@ -51,10 +51,15 @@ * },{ * "field": "host" * }], - * "order": {"max-cpu": "desc"} + * "order": [{ + * "max-cpu": "desc" + * },{ + * "max-memory": "desc" + * }] * }, * "aggs": { - * "max-cpu": { "max": { "field": "cpu" } } + * "max-cpu": { "max": { "field": "cpu" } }, + * "max-memory": { "max": { "field": "memory" } } * } * } * } @@ -80,6 +85,9 @@ * "doc_count": 2, * "max-cpu": { * "value": 90.0 + * }, + * "max-memory": { + * "value": 80.0 * } * }, * { @@ -91,6 +99,9 @@ * "doc_count": 2, * "max-cpu": { * "value": 70.0 + * }, + * "max-memory": { + * "value": 90.0 * } * } * ] diff --git a/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java b/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java index 182350c22f697..07248a0719c3a 100644 --- a/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java +++ b/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java @@ -418,7 +418,7 @@ public QueryBuilder postFilter() { */ public SearchSourceBuilder from(int from) { if (from < 0) { - throw new IllegalArgumentException("[from] parameter cannot be negative"); + throw new IllegalArgumentException("[from] parameter cannot be negative, found [" + from + "]"); } this.from = from; return this; @@ -1215,9 +1215,9 @@ public void parseXContent(XContentParser parser, boolean checkTrailingTokens) th currentFieldName = parser.currentName(); } else if (token.isValue()) { if (FROM_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { - from = parser.intValue(); + from(parser.intValue()); } else if (SIZE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { - size = parser.intValue(); + size(parser.intValue()); } else if (TIMEOUT_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { timeout = TimeValue.parseTimeValue(parser.text(), null, TIMEOUT_FIELD.getPreferredName()); } else if (TERMINATE_AFTER_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { diff --git a/server/src/main/java/org/opensearch/search/fetch/FetchContext.java b/server/src/main/java/org/opensearch/search/fetch/FetchContext.java index 780a6f35524ea..5be3733106655 100644 --- a/server/src/main/java/org/opensearch/search/fetch/FetchContext.java +++ b/server/src/main/java/org/opensearch/search/fetch/FetchContext.java @@ -192,10 +192,6 @@ public boolean includeNamedQueriesScore() { return searchContext.includeNamedQueriesScore(); } - public boolean hasInnerHits() { - return searchContext.hasInnerHits(); - } - /** * Configuration for returning inner hits */ @@ -217,10 +213,6 @@ public FetchFieldsContext fetchFieldsContext() { return searchContext.fetchFieldsContext(); } - public boolean hasScriptFields() { - return searchContext.hasScriptFields(); - } - /** * Configuration for script fields */ diff --git a/server/src/main/java/org/opensearch/search/fetch/subphase/InnerHitsContext.java b/server/src/main/java/org/opensearch/search/fetch/subphase/InnerHitsContext.java index fa80bb04c77f5..5855a0b3217f3 100644 --- a/server/src/main/java/org/opensearch/search/fetch/subphase/InnerHitsContext.java +++ b/server/src/main/java/org/opensearch/search/fetch/subphase/InnerHitsContext.java @@ -119,11 +119,6 @@ public String getName() { return name; } - @Override - public boolean hasInnerHits() { - return childInnerHits != null; - } - @Override public InnerHitsContext innerHits() { return childInnerHits; diff --git a/server/src/main/java/org/opensearch/search/fetch/subphase/InnerHitsPhase.java b/server/src/main/java/org/opensearch/search/fetch/subphase/InnerHitsPhase.java index cadad8529da9d..0b07dc35f13bb 100644 --- a/server/src/main/java/org/opensearch/search/fetch/subphase/InnerHitsPhase.java +++ b/server/src/main/java/org/opensearch/search/fetch/subphase/InnerHitsPhase.java @@ -64,7 +64,7 @@ public InnerHitsPhase(FetchPhase fetchPhase) { @Override public FetchSubPhaseProcessor getProcessor(FetchContext searchContext) { - if (searchContext.hasInnerHits() == false) { + if (searchContext.innerHits() == null) { return null; } Map innerHits = searchContext.innerHits().getInnerHits(); diff --git a/server/src/main/java/org/opensearch/search/fetch/subphase/ScriptFieldsPhase.java b/server/src/main/java/org/opensearch/search/fetch/subphase/ScriptFieldsPhase.java index bee536dbaf7f6..67d1863050a7b 100644 --- a/server/src/main/java/org/opensearch/search/fetch/subphase/ScriptFieldsPhase.java +++ b/server/src/main/java/org/opensearch/search/fetch/subphase/ScriptFieldsPhase.java @@ -54,7 +54,7 @@ public final class ScriptFieldsPhase implements FetchSubPhase { @Override public FetchSubPhaseProcessor getProcessor(FetchContext context) { - if (context.hasScriptFields() == false) { + if (context.scriptFields() == null) { return null; } List scriptFields = context.scriptFields().fields(); diff --git a/server/src/main/java/org/opensearch/search/internal/SearchContext.java b/server/src/main/java/org/opensearch/search/internal/SearchContext.java index 07f3616bbc138..0c8240d3a8322 100644 --- a/server/src/main/java/org/opensearch/search/internal/SearchContext.java +++ b/server/src/main/java/org/opensearch/search/internal/SearchContext.java @@ -194,10 +194,6 @@ public final void close() { public abstract void highlight(SearchHighlightContext highlight); - public boolean hasInnerHits() { - return innerHitsContext != null; - } - public InnerHitsContext innerHits() { if (innerHitsContext == null) { innerHitsContext = new InnerHitsContext(); diff --git a/server/src/main/java/org/opensearch/search/pipeline/SearchPipelineService.java b/server/src/main/java/org/opensearch/search/pipeline/SearchPipelineService.java index 2175b5d135394..012d6695c042b 100644 --- a/server/src/main/java/org/opensearch/search/pipeline/SearchPipelineService.java +++ b/server/src/main/java/org/opensearch/search/pipeline/SearchPipelineService.java @@ -23,6 +23,7 @@ import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateApplier; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.IndexNameExpressionResolver; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.service.ClusterManagerTaskKeys; @@ -35,10 +36,12 @@ import org.opensearch.common.xcontent.XContentHelper; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.io.stream.NamedWriteableRegistry; +import org.opensearch.core.index.Index; import org.opensearch.core.service.ReportingService; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.env.Environment; import org.opensearch.gateway.GatewayService; +import org.opensearch.index.IndexNotFoundException; import org.opensearch.index.IndexSettings; import org.opensearch.index.analysis.AnalysisRegistry; import org.opensearch.ingest.ConfigurationUtils; @@ -62,6 +65,8 @@ /** * The main entry point for search pipelines. Handles CRUD operations and exposes the API to execute search pipelines * against requests and responses. + * + * @opensearch.internal */ public class SearchPipelineService implements ClusterStateApplier, ReportingService { @@ -360,7 +365,7 @@ static ClusterState innerDelete(DeleteSearchPipelineRequest request, ClusterStat return newState.build(); } - public PipelinedRequest resolvePipeline(SearchRequest searchRequest) { + public PipelinedRequest resolvePipeline(SearchRequest searchRequest, IndexNameExpressionResolver indexNameExpressionResolver) { Pipeline pipeline = Pipeline.NO_OP_PIPELINE; if (searchRequest.source() != null && searchRequest.source().searchPipelineSource() != null) { @@ -390,14 +395,27 @@ public PipelinedRequest resolvePipeline(SearchRequest searchRequest) { if (searchRequest.pipeline() != null) { // Named pipeline specified for the request pipelineId = searchRequest.pipeline(); - } else if (state != null && searchRequest.indices() != null && searchRequest.indices().length == 1) { - // Check for index default pipeline - IndexMetadata indexMetadata = state.metadata().index(searchRequest.indices()[0]); - if (indexMetadata != null) { - Settings indexSettings = indexMetadata.getSettings(); - if (IndexSettings.DEFAULT_SEARCH_PIPELINE.exists(indexSettings)) { - pipelineId = IndexSettings.DEFAULT_SEARCH_PIPELINE.get(indexSettings); + } else if (state != null && searchRequest.indices() != null && searchRequest.indices().length != 0) { + try { + // Check for index default pipeline + Index[] concreteIndices = indexNameExpressionResolver.concreteIndices(state, searchRequest); + for (Index index : concreteIndices) { + IndexMetadata indexMetadata = state.metadata().index(index); + if (indexMetadata != null) { + Settings indexSettings = indexMetadata.getSettings(); + if (IndexSettings.DEFAULT_SEARCH_PIPELINE.exists(indexSettings)) { + String currentPipelineId = IndexSettings.DEFAULT_SEARCH_PIPELINE.get(indexSettings); + if (NOOP_PIPELINE_ID.equals(pipelineId)) { + pipelineId = currentPipelineId; + } else if (!pipelineId.equals(currentPipelineId)) { + pipelineId = NOOP_PIPELINE_ID; + break; + } + } + } } + } catch (IndexNotFoundException e) { + logger.debug("Default pipeline not applied for {}", (Object) searchRequest.indices()); } } if (NOOP_PIPELINE_ID.equals(pipelineId) == false) { diff --git a/server/src/main/java/org/opensearch/search/query/ConcurrentQueryPhaseSearcher.java b/server/src/main/java/org/opensearch/search/query/ConcurrentQueryPhaseSearcher.java index e22f766d3894c..771ac60dfb5e5 100644 --- a/server/src/main/java/org/opensearch/search/query/ConcurrentQueryPhaseSearcher.java +++ b/server/src/main/java/org/opensearch/search/query/ConcurrentQueryPhaseSearcher.java @@ -23,10 +23,9 @@ import java.io.IOException; import java.util.LinkedList; +import java.util.Objects; import java.util.concurrent.ExecutionException; -import static org.opensearch.search.query.TopDocsCollectorContext.createTopDocsCollectorContext; - /** * The implementation of the {@link QueryPhaseSearcher} which attempts to use concurrent * search of Apache Lucene segments if it has been enabled. @@ -46,10 +45,19 @@ protected boolean searchWithCollector( ContextIndexSearcher searcher, Query query, LinkedList collectors, + QueryCollectorContext queryCollectorContext, boolean hasFilterCollector, boolean hasTimeout ) throws IOException { - return searchWithCollectorManager(searchContext, searcher, query, collectors, hasFilterCollector, hasTimeout); + return searchWithCollectorManager( + searchContext, + searcher, + query, + collectors, + queryCollectorContext, + hasFilterCollector, + hasTimeout + ); } private static boolean searchWithCollectorManager( @@ -57,13 +65,12 @@ private static boolean searchWithCollectorManager( ContextIndexSearcher searcher, Query query, LinkedList collectorContexts, + QueryCollectorContext queryCollectorContext, boolean hasFilterCollector, boolean timeoutSet ) throws IOException { - // create the top docs collector last when the other collectors are known - final TopDocsCollectorContext topDocsFactory = createTopDocsCollectorContext(searchContext, hasFilterCollector); - // add the top docs collector, the first collector context in the chain - collectorContexts.addFirst(topDocsFactory); + // add the passed collector, the first collector context in the chain + collectorContexts.addFirst(Objects.requireNonNull(queryCollectorContext)); final QuerySearchResult queryResult = searchContext.queryResult(); final CollectorManager collectorManager; @@ -95,7 +102,10 @@ private static boolean searchWithCollectorManager( queryResult.terminatedEarly(false); } - return topDocsFactory.shouldRescore(); + if (queryCollectorContext instanceof RescoringQueryCollectorContext) { + return ((RescoringQueryCollectorContext) queryCollectorContext).shouldRescore(); + } + return false; } @Override diff --git a/server/src/main/java/org/opensearch/search/query/QueryCollectorContext.java b/server/src/main/java/org/opensearch/search/query/QueryCollectorContext.java index 91762bee2ac08..08b048cf682bb 100644 --- a/server/src/main/java/org/opensearch/search/query/QueryCollectorContext.java +++ b/server/src/main/java/org/opensearch/search/query/QueryCollectorContext.java @@ -77,6 +77,29 @@ public ScoreMode scoreMode() { } }; + public static final QueryCollectorContext EMPTY_CONTEXT = new QueryCollectorContext("empty") { + + @Override + Collector create(Collector in) throws IOException { + return EMPTY_COLLECTOR; + } + + @Override + CollectorManager createManager(CollectorManager in) throws IOException { + return new CollectorManager() { + @Override + public Collector newCollector() throws IOException { + return EMPTY_COLLECTOR; + } + + @Override + public ReduceableSearchResult reduce(Collection collectors) throws IOException { + return result -> {}; + } + }; + } + }; + private String profilerName; QueryCollectorContext(String profilerName) { diff --git a/server/src/main/java/org/opensearch/search/query/QueryPhase.java b/server/src/main/java/org/opensearch/search/query/QueryPhase.java index 8f98f0d9efbd4..608649ad22b23 100644 --- a/server/src/main/java/org/opensearch/search/query/QueryPhase.java +++ b/server/src/main/java/org/opensearch/search/query/QueryPhase.java @@ -335,13 +335,12 @@ private static boolean searchWithCollector( ContextIndexSearcher searcher, Query query, LinkedList collectors, + QueryCollectorContext queryCollectorContext, boolean hasFilterCollector, boolean timeoutSet ) throws IOException { - // create the top docs collector last when the other collectors are known - final TopDocsCollectorContext topDocsFactory = createTopDocsCollectorContext(searchContext, hasFilterCollector); - // add the top docs collector, the first collector context in the chain - collectors.addFirst(topDocsFactory); + // add passed collector, the first collector context in the chain + collectors.addFirst(Objects.requireNonNull(queryCollectorContext)); final Collector queryCollector; if (searchContext.getProfilers() != null) { @@ -370,7 +369,10 @@ private static boolean searchWithCollector( for (QueryCollectorContext ctx : collectors) { ctx.postProcess(queryResult); } - return topDocsFactory.shouldRescore(); + if (queryCollectorContext instanceof RescoringQueryCollectorContext) { + return ((RescoringQueryCollectorContext) queryCollectorContext).shouldRescore(); + } + return false; } /** @@ -440,7 +442,29 @@ protected boolean searchWithCollector( boolean hasFilterCollector, boolean hasTimeout ) throws IOException { - return QueryPhase.searchWithCollector(searchContext, searcher, query, collectors, hasFilterCollector, hasTimeout); + // create the top docs collector last when the other collectors are known + final TopDocsCollectorContext topDocsFactory = createTopDocsCollectorContext(searchContext, hasFilterCollector); + return searchWithCollector(searchContext, searcher, query, collectors, topDocsFactory, hasFilterCollector, hasTimeout); + } + + protected boolean searchWithCollector( + SearchContext searchContext, + ContextIndexSearcher searcher, + Query query, + LinkedList collectors, + QueryCollectorContext queryCollectorContext, + boolean hasFilterCollector, + boolean hasTimeout + ) throws IOException { + return QueryPhase.searchWithCollector( + searchContext, + searcher, + query, + collectors, + queryCollectorContext, + hasFilterCollector, + hasTimeout + ); } } } diff --git a/server/src/main/java/org/opensearch/search/query/RescoringQueryCollectorContext.java b/server/src/main/java/org/opensearch/search/query/RescoringQueryCollectorContext.java new file mode 100644 index 0000000000000..dc47ee80dba42 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/query/RescoringQueryCollectorContext.java @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.query; + +import org.opensearch.common.annotation.PublicApi; + +/** + * Abstraction that allows indication of whether results should be rescored or not based on + * custom logic of exact {@link QueryCollectorContext} implementation. + * + * @opensearch.api + */ +@PublicApi(since = "2.15.0") +public interface RescoringQueryCollectorContext { + + /** + * Indicates if results from the query context should be rescored + * @return true if results must be rescored, false otherwise + */ + boolean shouldRescore(); +} diff --git a/server/src/main/java/org/opensearch/search/query/TopDocsCollectorContext.java b/server/src/main/java/org/opensearch/search/query/TopDocsCollectorContext.java index 65d3948c8401e..f780f6fe32af2 100644 --- a/server/src/main/java/org/opensearch/search/query/TopDocsCollectorContext.java +++ b/server/src/main/java/org/opensearch/search/query/TopDocsCollectorContext.java @@ -95,7 +95,7 @@ * * @opensearch.internal */ -public abstract class TopDocsCollectorContext extends QueryCollectorContext { +public abstract class TopDocsCollectorContext extends QueryCollectorContext implements RescoringQueryCollectorContext { protected final int numHits; TopDocsCollectorContext(String profilerName, int numHits) { diff --git a/server/src/main/java/org/opensearch/snapshots/RestoreService.java b/server/src/main/java/org/opensearch/snapshots/RestoreService.java index e6a6b747c2baf..5883a8a37be71 100644 --- a/server/src/main/java/org/opensearch/snapshots/RestoreService.java +++ b/server/src/main/java/org/opensearch/snapshots/RestoreService.java @@ -125,12 +125,12 @@ import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_VERSION_CREATED; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_VERSION_UPGRADED; import static org.opensearch.common.util.FeatureFlags.SEARCHABLE_SNAPSHOT_EXTENDED_COMPATIBILITY; +import static org.opensearch.common.util.IndexUtils.filterIndices; import static org.opensearch.common.util.set.Sets.newHashSet; import static org.opensearch.index.IndexModule.INDEX_STORE_TYPE_SETTING; import static org.opensearch.index.store.remote.directory.RemoteSnapshotDirectory.SEARCHABLE_SNAPSHOT_EXTENDED_COMPATIBILITY_MINIMUM_VERSION; import static org.opensearch.index.store.remote.filecache.FileCache.DATA_TO_FILE_CACHE_SIZE_RATIO_SETTING; import static org.opensearch.node.Node.NODE_SEARCH_CACHE_SIZE_SETTING; -import static org.opensearch.snapshots.SnapshotUtils.filterIndices; /** * Service responsible for restoring snapshots diff --git a/server/src/main/java/org/opensearch/snapshots/SnapshotUtils.java b/server/src/main/java/org/opensearch/snapshots/SnapshotUtils.java index e7338a29cafeb..8cce5e1c98254 100644 --- a/server/src/main/java/org/opensearch/snapshots/SnapshotUtils.java +++ b/server/src/main/java/org/opensearch/snapshots/SnapshotUtils.java @@ -31,24 +31,15 @@ package org.opensearch.snapshots; -import org.opensearch.action.support.IndicesOptions; import org.opensearch.cluster.metadata.IndexMetadata; -import org.opensearch.cluster.metadata.IndexNameExpressionResolver; -import org.opensearch.common.regex.Regex; import org.opensearch.index.IndexModule; -import org.opensearch.index.IndexNotFoundException; import org.opensearch.index.IndexSettings; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.stream.Collectors; -import java.util.stream.Stream; /** * Snapshot utilities @@ -57,102 +48,6 @@ */ public class SnapshotUtils { - /** - * Filters out list of available indices based on the list of selected indices. - * - * @param availableIndices list of available indices - * @param selectedIndices list of selected indices - * @param indicesOptions ignore indices flag - * @return filtered out indices - */ - public static List filterIndices(List availableIndices, String[] selectedIndices, IndicesOptions indicesOptions) { - if (IndexNameExpressionResolver.isAllIndices(Arrays.asList(selectedIndices))) { - return availableIndices; - } - - // Move the exclusions to end of list to ensure they are processed - // after explicitly selected indices are chosen. - final List excludesAtEndSelectedIndices = Stream.concat( - Arrays.stream(selectedIndices).filter(s -> s.isEmpty() || s.charAt(0) != '-'), - Arrays.stream(selectedIndices).filter(s -> !s.isEmpty() && s.charAt(0) == '-') - ).collect(Collectors.toUnmodifiableList()); - - Set result = null; - for (int i = 0; i < excludesAtEndSelectedIndices.size(); i++) { - String indexOrPattern = excludesAtEndSelectedIndices.get(i); - boolean add = true; - if (!indexOrPattern.isEmpty()) { - if (availableIndices.contains(indexOrPattern)) { - if (result == null) { - result = new HashSet<>(); - } - result.add(indexOrPattern); - continue; - } - if (indexOrPattern.charAt(0) == '+') { - add = true; - indexOrPattern = indexOrPattern.substring(1); - // if its the first, add empty set - if (i == 0) { - result = new HashSet<>(); - } - } else if (indexOrPattern.charAt(0) == '-') { - // If the first index pattern is an exclusion, then all patterns are exclusions due to the - // reordering logic above. In this case, the request is interpreted as "include all indexes except - // those matching the exclusions" so we add all indices here and then remove the ones that match the exclusion patterns. - if (i == 0) { - result = new HashSet<>(availableIndices); - } - add = false; - indexOrPattern = indexOrPattern.substring(1); - } - } - if (indexOrPattern.isEmpty() || !Regex.isSimpleMatchPattern(indexOrPattern)) { - if (!availableIndices.contains(indexOrPattern)) { - if (!indicesOptions.ignoreUnavailable()) { - throw new IndexNotFoundException(indexOrPattern); - } else { - if (result == null) { - // add all the previous ones... - result = new HashSet<>(availableIndices.subList(0, i)); - } - } - } else { - if (result != null) { - if (add) { - result.add(indexOrPattern); - } else { - result.remove(indexOrPattern); - } - } - } - continue; - } - if (result == null) { - // add all the previous ones... - result = new HashSet<>(availableIndices.subList(0, i)); - } - boolean found = false; - for (String index : availableIndices) { - if (Regex.simpleMatch(indexOrPattern, index)) { - found = true; - if (add) { - result.add(index); - } else { - result.remove(index); - } - } - } - if (!found && !indicesOptions.allowNoIndices()) { - throw new IndexNotFoundException(indexOrPattern); - } - } - if (result == null) { - return Collections.unmodifiableList(new ArrayList<>(Arrays.asList(selectedIndices))); - } - return Collections.unmodifiableList(new ArrayList<>(result)); - } - /** * Validates if there are any remote snapshots backing an index * @param metadata index metadata from cluster state diff --git a/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java b/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java index 71918bc73b55a..acc2dc83749cd 100644 --- a/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java @@ -131,6 +131,9 @@ import static java.util.Collections.emptySet; import static java.util.Collections.unmodifiableList; import static org.opensearch.cluster.SnapshotsInProgress.completed; +import static org.opensearch.common.util.IndexUtils.filterIndices; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; import static org.opensearch.repositories.blobstore.BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY; import static org.opensearch.snapshots.SnapshotUtils.validateSnapshotsBackingAnyIndex; @@ -343,6 +346,13 @@ public ClusterState execute(ClusterState currentState) { } boolean remoteStoreIndexShallowCopy = REMOTE_STORE_INDEX_SHALLOW_COPY.get(repository.getMetadata().settings()); + logger.debug("remote_store_index_shallow_copy setting is set as [{}]", remoteStoreIndexShallowCopy); + if (remoteStoreIndexShallowCopy + && clusterService.getClusterSettings().get(REMOTE_STORE_COMPATIBILITY_MODE_SETTING).equals(CompatibilityMode.MIXED)) { + // don't allow shallow snapshots if compatibility mode is not strict + logger.warn("Shallow snapshots are not supported during migration. Falling back to full snapshot."); + remoteStoreIndexShallowCopy = false; + } newEntry = SnapshotsInProgress.startedEntry( new Snapshot(repositoryName, snapshotId), request.includeGlobalState(), @@ -466,11 +476,7 @@ public ClusterState execute(ClusterState currentState) { indicesForSnapshot.add(indexId.getName()); } } - final List matchingIndices = SnapshotUtils.filterIndices( - indicesForSnapshot, - request.indices(), - request.indicesOptions() - ); + final List matchingIndices = filterIndices(indicesForSnapshot, request.indices(), request.indicesOptions()); if (matchingIndices.isEmpty()) { throw new SnapshotException( new Snapshot(repositoryName, sourceSnapshotId), diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java index 1b8b6243aa805..14bfc85abac16 100644 --- a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java +++ b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java @@ -42,6 +42,12 @@ import org.opensearch.cluster.routing.WeightedRoutingStats; import org.opensearch.cluster.service.ClusterManagerThrottlingStats; import org.opensearch.cluster.service.ClusterStateStats; +import org.opensearch.common.cache.CacheType; +import org.opensearch.common.cache.service.NodeCacheStats; +import org.opensearch.common.cache.stats.CacheStats; +import org.opensearch.common.cache.stats.DefaultCacheStatsHolder; +import org.opensearch.common.cache.stats.DefaultCacheStatsHolderTests; +import org.opensearch.common.cache.stats.ImmutableCacheStatsHolder; import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.common.metrics.OperationStats; import org.opensearch.common.settings.ClusterSettings; @@ -89,6 +95,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.TreeMap; import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.StreamSupport; @@ -577,6 +584,13 @@ public void testSerialization() throws IOException { deserializedAdmissionControllerStats.getRejectionCount().get(AdmissionControlActionType.INDEXING.getType()) ); } + NodeCacheStats nodeCacheStats = nodeStats.getNodeCacheStats(); + NodeCacheStats deserializedNodeCacheStats = deserializedNodeStats.getNodeCacheStats(); + if (nodeCacheStats == null) { + assertNull(deserializedNodeCacheStats); + } else { + assertEquals(nodeCacheStats, deserializedNodeCacheStats); + } } } } @@ -928,6 +942,39 @@ public void apply(String action, AdmissionControlActionType admissionControlActi NodeIndicesStats indicesStats = getNodeIndicesStats(remoteStoreStats); + NodeCacheStats nodeCacheStats = null; + if (frequently()) { + int numIndices = randomIntBetween(1, 10); + int numShardsPerIndex = randomIntBetween(1, 50); + + List dimensionNames = List.of("index", "shard", "tier"); + DefaultCacheStatsHolder statsHolder = new DefaultCacheStatsHolder(dimensionNames, "dummyStoreName"); + for (int indexNum = 0; indexNum < numIndices; indexNum++) { + String indexName = "index" + indexNum; + for (int shardNum = 0; shardNum < numShardsPerIndex; shardNum++) { + String shardName = "[" + indexName + "][" + shardNum + "]"; + for (String tierName : new String[] { "dummy_tier_1", "dummy_tier_2" }) { + List dimensionValues = List.of(indexName, shardName, tierName); + CacheStats toIncrement = new CacheStats(randomInt(20), randomInt(20), randomInt(20), randomInt(20), randomInt(20)); + DefaultCacheStatsHolderTests.populateStatsHolderFromStatsValueMap( + statsHolder, + Map.of(dimensionValues, toIncrement) + ); + } + } + } + CommonStatsFlags flags = new CommonStatsFlags(); + for (CacheType cacheType : CacheType.values()) { + if (frequently()) { + flags.includeCacheType(cacheType); + } + } + ImmutableCacheStatsHolder cacheStats = statsHolder.getImmutableCacheStatsHolder(dimensionNames.toArray(new String[0])); + TreeMap cacheStatsMap = new TreeMap<>(); + cacheStatsMap.put(CacheType.INDICES_REQUEST_CACHE, cacheStats); + nodeCacheStats = new NodeCacheStats(cacheStatsMap, flags); + } + // TODO: Only remote_store based aspects of NodeIndicesStats are being tested here. // It is possible to test other metrics in NodeIndicesStats as well since it extends Writeable now return new NodeStats( @@ -958,7 +1005,8 @@ public void apply(String action, AdmissionControlActionType admissionControlActi null, segmentReplicationRejectionStats, null, - admissionControlStats + admissionControlStats, + nodeCacheStats ); } diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java index 141c630b94020..da9156ccdb71a 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java @@ -341,7 +341,8 @@ public void testIngestLocal() throws Exception { failureHandler.capture(), completionHandler.capture(), any(), - eq(Names.WRITE) + eq(Names.WRITE), + eq(bulkRequest) ); completionHandler.getValue().accept(null, exception); assertTrue(failureCalled.get()); @@ -378,7 +379,8 @@ public void testSingleItemBulkActionIngestLocal() throws Exception { failureHandler.capture(), completionHandler.capture(), any(), - eq(Names.WRITE) + eq(Names.WRITE), + any() ); completionHandler.getValue().accept(null, exception); assertTrue(failureCalled.get()); @@ -424,7 +426,8 @@ public void testIngestSystemLocal() throws Exception { failureHandler.capture(), completionHandler.capture(), any(), - eq(Names.SYSTEM_WRITE) + eq(Names.SYSTEM_WRITE), + eq(bulkRequest) ); completionHandler.getValue().accept(null, exception); assertTrue(failureCalled.get()); @@ -455,7 +458,7 @@ public void testIngestForward() throws Exception { action.execute(null, bulkRequest, listener); // should not have executed ingest locally - verify(ingestService, never()).executeBulkRequest(anyInt(), any(), any(), any(), any(), any()); + verify(ingestService, never()).executeBulkRequest(anyInt(), any(), any(), any(), any(), any(), any()); // but instead should have sent to a remote node with the transport service ArgumentCaptor node = ArgumentCaptor.forClass(DiscoveryNode.class); verify(transportService).sendRequest(node.capture(), eq(BulkAction.NAME), any(), remoteResponseHandler.capture()); @@ -495,7 +498,7 @@ public void testSingleItemBulkActionIngestForward() throws Exception { singleItemBulkWriteAction.execute(null, indexRequest, listener); // should not have executed ingest locally - verify(ingestService, never()).executeBulkRequest(anyInt(), any(), any(), any(), any(), any()); + verify(ingestService, never()).executeBulkRequest(anyInt(), any(), any(), any(), any(), any(), any()); // but instead should have sent to a remote node with the transport service ArgumentCaptor node = ArgumentCaptor.forClass(DiscoveryNode.class); verify(transportService).sendRequest(node.capture(), eq(BulkAction.NAME), any(), remoteResponseHandler.capture()); @@ -581,7 +584,8 @@ private void validatePipelineWithBulkUpsert(@Nullable String indexRequestIndexNa failureHandler.capture(), completionHandler.capture(), any(), - eq(Names.WRITE) + eq(Names.WRITE), + eq(bulkRequest) ); assertEquals(indexRequest1.getPipeline(), "default_pipeline"); assertEquals(indexRequest2.getPipeline(), "default_pipeline"); @@ -624,7 +628,8 @@ public void testDoExecuteCalledTwiceCorrectly() throws Exception { failureHandler.capture(), completionHandler.capture(), any(), - eq(Names.WRITE) + eq(Names.WRITE), + any() ); completionHandler.getValue().accept(null, exception); assertFalse(action.indexCreated); // still no index yet, the ingest node failed. @@ -711,7 +716,8 @@ public void testFindDefaultPipelineFromTemplateMatch() { failureHandler.capture(), completionHandler.capture(), any(), - eq(Names.WRITE) + eq(Names.WRITE), + any() ); } @@ -750,7 +756,8 @@ public void testFindDefaultPipelineFromV2TemplateMatch() { failureHandler.capture(), completionHandler.capture(), any(), - eq(Names.WRITE) + eq(Names.WRITE), + any() ); } @@ -775,7 +782,8 @@ private void validateDefaultPipeline(IndexRequest indexRequest) { failureHandler.capture(), completionHandler.capture(), any(), - eq(Names.WRITE) + eq(Names.WRITE), + any() ); assertEquals(indexRequest.getPipeline(), "default_pipeline"); completionHandler.getValue().accept(null, exception); diff --git a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java index 420289d3ff2e5..7dcbf213d6c9d 100644 --- a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java +++ b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java @@ -70,6 +70,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.EnumSet; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -442,6 +443,24 @@ public void testShardNotAvailableWithDisallowPartialFailures() { assertEquals(0, searchPhaseExecutionException.getSuppressed().length); } + public void testShardNotAvailableWithIgnoreUnavailable() { + SearchRequest searchRequest = new SearchRequest().allowPartialSearchResults(false) + .indicesOptions(new IndicesOptions(EnumSet.of(IndicesOptions.Option.IGNORE_UNAVAILABLE), IndicesOptions.WildcardStates.NONE)); + AtomicReference exception = new AtomicReference<>(); + ActionListener listener = ActionListener.wrap(response -> {}, exception::set); + int numShards = randomIntBetween(2, 10); + ArraySearchPhaseResults phaseResults = new ArraySearchPhaseResults<>(numShards); + AbstractSearchAsyncAction action = createAction(searchRequest, phaseResults, listener, false, new AtomicLong()); + // skip one to avoid the "all shards failed" failure. + SearchShardIterator skipIterator = new SearchShardIterator(null, null, Collections.emptyList(), null); + skipIterator.resetAndSkip(); + action.skipShard(skipIterator); + + // Validate no exception is thrown + action.executeNextPhase(action, createFetchSearchPhase()); + action.sendSearchResponse(InternalSearchResponse.empty(), phaseResults.results); + } + private static ArraySearchPhaseResults phaseResults( Set contextIds, List> nodeLookups, diff --git a/server/src/test/java/org/opensearch/action/search/SearchRequestTests.java b/server/src/test/java/org/opensearch/action/search/SearchRequestTests.java index 9ee314e77ca7e..40514c526f190 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchRequestTests.java +++ b/server/src/test/java/org/opensearch/action/search/SearchRequestTests.java @@ -46,6 +46,7 @@ import org.opensearch.search.Scroll; import org.opensearch.search.builder.PointInTimeBuilder; import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.fetch.subphase.FetchSourceContext; import org.opensearch.search.rescore.QueryRescorerBuilder; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.VersionUtils; @@ -76,6 +77,35 @@ protected SearchRequest createSearchRequest() throws IOException { ); } + public void testClone() throws IOException { + SearchRequest searchRequest = new SearchRequest(); + SearchRequest clonedRequest = searchRequest.deepCopy(); + assertEquals(searchRequest.hashCode(), clonedRequest.hashCode()); + assertNotSame(searchRequest, clonedRequest); + + String[] includes = new String[] { "field1.*" }; + String[] excludes = new String[] { "field2.*" }; + FetchSourceContext fetchSourceContext = new FetchSourceContext(true, includes, excludes); + SearchSourceBuilder source = new SearchSourceBuilder().fetchSource(fetchSourceContext); + SearchRequest complexSearchRequest = createSearchRequest().source(source); + complexSearchRequest.requestCache(false); + complexSearchRequest.scroll(new TimeValue(1000)); + SearchRequest clonedComplexRequest = complexSearchRequest.deepCopy(); + assertEquals(complexSearchRequest.hashCode(), clonedComplexRequest.hashCode()); + assertNotSame(complexSearchRequest, clonedComplexRequest); + assertEquals(fetchSourceContext, clonedComplexRequest.source().fetchSource()); + assertNotSame(fetchSourceContext, clonedComplexRequest.source().fetchSource()); + // Change the value of the original includes array and excludes array + includes[0] = "new_field1.*"; + excludes[0] = "new_field2.*"; + // Values in the original fetchSource object should be updated + assertEquals("new_field1.*", complexSearchRequest.source().fetchSource().includes()[0]); + assertEquals("new_field2.*", complexSearchRequest.source().fetchSource().excludes()[0]); + // Values in the cloned fetchSource object should not be updated + assertEquals("field1.*", clonedComplexRequest.source().fetchSource().includes()[0]); + assertEquals("field2.*", clonedComplexRequest.source().fetchSource().excludes()[0]); + } + public void testWithLocalReduction() { expectThrows(NullPointerException.class, () -> SearchRequest.subSearchRequest(null, Strings.EMPTY_ARRAY, "", 0, randomBoolean())); SearchRequest request = new SearchRequest(); diff --git a/server/src/test/java/org/opensearch/action/support/clustermanager/TransportClusterManagerNodeActionTests.java b/server/src/test/java/org/opensearch/action/support/clustermanager/TransportClusterManagerNodeActionTests.java index b3eb2443fa940..35c5c5e605b4d 100644 --- a/server/src/test/java/org/opensearch/action/support/clustermanager/TransportClusterManagerNodeActionTests.java +++ b/server/src/test/java/org/opensearch/action/support/clustermanager/TransportClusterManagerNodeActionTests.java @@ -86,6 +86,8 @@ import java.util.concurrent.atomic.AtomicBoolean; import static org.opensearch.common.util.FeatureFlags.REMOTE_STORE_MIGRATION_EXPERIMENTAL; +import static org.opensearch.index.remote.RemoteMigrationIndexMetadataUpdaterTests.createIndexMetadataWithDocrepSettings; +import static org.opensearch.index.remote.RemoteMigrationIndexMetadataUpdaterTests.createIndexMetadataWithRemoteStoreSettings; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; @@ -791,7 +793,9 @@ public void testDontAllowSwitchingToStrictCompatibilityModeForMixedCluster() { .add(nonRemoteNode2) .localNodeId(nonRemoteNode2.getId()) .build(); - ClusterState sameTypeClusterState = ClusterState.builder(clusterState).nodes(discoveryNodes).build(); + + metadata = createIndexMetadataWithRemoteStoreSettings("test-index"); + ClusterState sameTypeClusterState = ClusterState.builder(clusterState).nodes(discoveryNodes).metadata(metadata).build(); transportClusterUpdateSettingsAction.validateCompatibilityModeSettingRequest(request, sameTypeClusterState); // cluster with only non-remote nodes @@ -801,10 +805,84 @@ public void testDontAllowSwitchingToStrictCompatibilityModeForMixedCluster() { .add(remoteNode2) .localNodeId(remoteNode2.getId()) .build(); - sameTypeClusterState = ClusterState.builder(sameTypeClusterState).nodes(discoveryNodes).build(); + sameTypeClusterState = ClusterState.builder(sameTypeClusterState).nodes(discoveryNodes).metadata(metadata).build(); transportClusterUpdateSettingsAction.validateCompatibilityModeSettingRequest(request, sameTypeClusterState); } + public void testDontAllowSwitchingToStrictCompatibilityModeWithoutRemoteIndexSettings() { + Settings nodeSettings = Settings.builder().put(REMOTE_STORE_MIGRATION_EXPERIMENTAL, "true").build(); + FeatureFlags.initializeFeatureFlags(nodeSettings); + Settings currentCompatibilityModeSettings = Settings.builder() + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), RemoteStoreNodeService.CompatibilityMode.MIXED) + .build(); + Settings intendedCompatibilityModeSettings = Settings.builder() + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), RemoteStoreNodeService.CompatibilityMode.STRICT) + .build(); + ClusterUpdateSettingsRequest request = new ClusterUpdateSettingsRequest(); + request.persistentSettings(intendedCompatibilityModeSettings); + DiscoveryNode remoteNode1 = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + getRemoteStoreNodeAttributes(), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + DiscoveryNode remoteNode2 = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + getRemoteStoreNodeAttributes(), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + DiscoveryNodes discoveryNodes = DiscoveryNodes.builder() + .add(remoteNode1) + .localNodeId(remoteNode1.getId()) + .add(remoteNode2) + .localNodeId(remoteNode2.getId()) + .build(); + AllocationService allocationService = new AllocationService( + new AllocationDeciders(Collections.singleton(new MaxRetryAllocationDecider())), + new TestGatewayAllocator(), + new BalancedShardsAllocator(Settings.EMPTY), + EmptyClusterInfoService.INSTANCE, + EmptySnapshotsInfoService.INSTANCE + ); + TransportClusterUpdateSettingsAction transportClusterUpdateSettingsAction = new TransportClusterUpdateSettingsAction( + transportService, + clusterService, + threadPool, + allocationService, + new ActionFilters(Collections.emptySet()), + new IndexNameExpressionResolver(new ThreadContext(Settings.EMPTY)), + clusterService.getClusterSettings() + ); + + Metadata nonRemoteIndexMd = Metadata.builder(createIndexMetadataWithDocrepSettings("test")) + .persistentSettings(currentCompatibilityModeSettings) + .build(); + final ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT) + .metadata(nonRemoteIndexMd) + .nodes(discoveryNodes) + .build(); + final SettingsException exception = expectThrows( + SettingsException.class, + () -> transportClusterUpdateSettingsAction.validateCompatibilityModeSettingRequest(request, clusterState) + ); + assertEquals( + "can not switch to STRICT compatibility mode since all indices in the cluster does not have remote store based index settings", + exception.getMessage() + ); + + Metadata remoteIndexMd = Metadata.builder(createIndexMetadataWithRemoteStoreSettings("test")) + .persistentSettings(currentCompatibilityModeSettings) + .build(); + ClusterState clusterStateWithRemoteIndices = ClusterState.builder(ClusterName.DEFAULT) + .metadata(remoteIndexMd) + .nodes(discoveryNodes) + .build(); + transportClusterUpdateSettingsAction.validateCompatibilityModeSettingRequest(request, clusterStateWithRemoteIndices); + } + public void testDontAllowSwitchingCompatibilityModeForClusterWithMultipleVersions() { Settings nodeSettings = Settings.builder().put(REMOTE_STORE_MIGRATION_EXPERIMENTAL, "true").build(); FeatureFlags.initializeFeatureFlags(nodeSettings); @@ -897,7 +975,10 @@ public void testDontAllowSwitchingCompatibilityModeForClusterWithMultipleVersion .localNodeId(discoveryNode2.getId()) .build(); - ClusterState sameVersionClusterState = ClusterState.builder(differentVersionClusterState).nodes(discoveryNodes).build(); + ClusterState sameVersionClusterState = ClusterState.builder(differentVersionClusterState) + .nodes(discoveryNodes) + .metadata(createIndexMetadataWithRemoteStoreSettings("test")) + .build(); transportClusterUpdateSettingsAction.validateCompatibilityModeSettingRequest(request, sameVersionClusterState); } @@ -907,4 +988,5 @@ private Map getRemoteStoreNodeAttributes() { remoteStoreNodeAttributes.put(REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-translog-repo-1"); return remoteStoreNodeAttributes; } + } diff --git a/server/src/test/java/org/opensearch/cluster/ClusterModuleTests.java b/server/src/test/java/org/opensearch/cluster/ClusterModuleTests.java index b30ebaf183084..557e4dc2ca8c5 100644 --- a/server/src/test/java/org/opensearch/cluster/ClusterModuleTests.java +++ b/server/src/test/java/org/opensearch/cluster/ClusterModuleTests.java @@ -73,6 +73,7 @@ import org.opensearch.gateway.GatewayAllocator; import org.opensearch.plugins.ClusterPlugin; import org.opensearch.test.gateway.TestGatewayAllocator; +import org.opensearch.test.gateway.TestShardBatchGatewayAllocator; import java.util.Arrays; import java.util.Collection; @@ -296,7 +297,10 @@ public void testRejectsReservedExistingShardsAllocatorName() { null, threadContext ); - expectThrows(IllegalArgumentException.class, () -> clusterModule.setExistingShardsAllocators(new TestGatewayAllocator())); + expectThrows( + IllegalArgumentException.class, + () -> clusterModule.setExistingShardsAllocators(new TestGatewayAllocator(), new TestShardBatchGatewayAllocator()) + ); } public void testRejectsDuplicateExistingShardsAllocatorName() { @@ -308,7 +312,10 @@ public void testRejectsDuplicateExistingShardsAllocatorName() { null, threadContext ); - expectThrows(IllegalArgumentException.class, () -> clusterModule.setExistingShardsAllocators(new TestGatewayAllocator())); + expectThrows( + IllegalArgumentException.class, + () -> clusterModule.setExistingShardsAllocators(new TestGatewayAllocator(), new TestShardBatchGatewayAllocator()) + ); } private static ClusterPlugin existingShardsAllocatorPlugin(final String allocatorName) { diff --git a/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java b/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java index ff47ec3015697..5539dd26dd52d 100644 --- a/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java +++ b/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java @@ -194,6 +194,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ), new NodeStats( @@ -224,6 +225,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ), new NodeStats( @@ -254,6 +256,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ) ); @@ -315,6 +318,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ), new NodeStats( @@ -345,6 +349,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ), new NodeStats( @@ -375,6 +380,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ) ); diff --git a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java index 5eafe63e63fad..3e343e95f6c4b 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java @@ -76,6 +76,7 @@ import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; +import static org.opensearch.test.VersionUtils.allOpenSearchVersions; import static org.opensearch.test.VersionUtils.allVersions; import static org.opensearch.test.VersionUtils.maxCompatibleVersion; import static org.opensearch.test.VersionUtils.randomCompatibleVersion; @@ -885,6 +886,64 @@ public void testUpdatesClusterStateWithMultiNodeClusterAndSameRepository() throw validateRepositoryMetadata(result.resultingState, clusterManagerNode, 2); } + public void testNodeJoinInMixedMode() { + Settings nodeSettings = Settings.builder().put(REMOTE_STORE_MIGRATION_EXPERIMENTAL, "true").build(); + FeatureFlags.initializeFeatureFlags(nodeSettings); + + List versions = allOpenSearchVersions(); + assert versions.size() >= 2 : "test requires at least two open search versions"; + Version baseVersion = versions.get(versions.size() - 2); + Version higherVersion = versions.get(versions.size() - 1); + + DiscoveryNode currentNode1 = new DiscoveryNode(UUIDs.base64UUID(), buildNewFakeTransportAddress(), baseVersion); + DiscoveryNode currentNode2 = new DiscoveryNode(UUIDs.base64UUID(), buildNewFakeTransportAddress(), baseVersion); + DiscoveryNodes currentNodes = DiscoveryNodes.builder() + .add(currentNode1) + .localNodeId(currentNode1.getId()) + .add(currentNode2) + .localNodeId(currentNode2.getId()) + .build(); + + Settings mixedModeCompatibilitySettings = Settings.builder() + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), RemoteStoreNodeService.CompatibilityMode.MIXED) + .build(); + + Metadata metadata = Metadata.builder().persistentSettings(mixedModeCompatibilitySettings).build(); + + // joining node of a higher version than the current nodes + DiscoveryNode joiningNode1 = new DiscoveryNode( + randomAlphaOfLength(10), + randomAlphaOfLength(10), + buildNewFakeTransportAddress(), + remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO), + Collections.singleton(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE), + higherVersion + ); + final IllegalStateException exception = expectThrows( + IllegalStateException.class, + () -> JoinTaskExecutor.ensureNodesCompatibility(joiningNode1, currentNodes, metadata) + ); + String reason = String.format( + Locale.ROOT, + "remote migration : a node [%s] of higher version [%s] is not allowed to join a cluster with maximum version [%s]", + joiningNode1, + joiningNode1.getVersion(), + currentNodes.getMaxNodeVersion() + ); + assertEquals(reason, exception.getMessage()); + + // joining node of the same version as the current nodes + DiscoveryNode joiningNode2 = new DiscoveryNode( + randomAlphaOfLength(10), + randomAlphaOfLength(10), + buildNewFakeTransportAddress(), + remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO), + Collections.singleton(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE), + baseVersion + ); + JoinTaskExecutor.ensureNodesCompatibility(joiningNode2, currentNodes, metadata); + } + private void validateRepositoryMetadata(ClusterState updatedState, DiscoveryNode existingNode, int expectedRepositories) throws Exception { diff --git a/server/src/test/java/org/opensearch/cluster/metadata/IndexNameExpressionResolverTests.java b/server/src/test/java/org/opensearch/cluster/metadata/IndexNameExpressionResolverTests.java index 51696ca599d02..fda2f411b1994 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/IndexNameExpressionResolverTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/IndexNameExpressionResolverTests.java @@ -352,6 +352,25 @@ public void testIndexOptionsWildcardExpansion() { assertEquals(1, results.length); assertThat(results, arrayContainingInAnyOrder(".hidden-closed")); + options = IndicesOptions.fromOptions(false, true, false, true, false, false, false, false); + context = new IndexNameExpressionResolver.Context(state, options, false); + + results = indexNameExpressionResolver.concreteIndexNames(context, "foo*"); + assertEquals(1, results.length); + assertEquals("foo", results[0]); + + try { + options = IndicesOptions.fromOptions(false, true, false, true, false, true, false, false); + context = new IndexNameExpressionResolver.Context(state, options, false); + + results = indexNameExpressionResolver.concreteIndexNames(context, "foo*"); + assertEquals(1, results.length); + assertEquals("foo", results[0]); + } catch (IllegalArgumentException iae) { + String expectedMessage = "To expand [CLOSE] wildcard, please set forbid_closed_indices to `false`"; + assertEquals(expectedMessage, iae.getMessage()); + } + // Only open options = IndicesOptions.fromOptions(false, true, true, false, false); context = new IndexNameExpressionResolver.Context(state, options, false); diff --git a/server/src/test/java/org/opensearch/cluster/routing/IndexShardRoutingTableTests.java b/server/src/test/java/org/opensearch/cluster/routing/IndexShardRoutingTableTests.java index ebb7529d3f733..e881016fb9305 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/IndexShardRoutingTableTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/IndexShardRoutingTableTests.java @@ -39,6 +39,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.UUID; public class IndexShardRoutingTableTests extends OpenSearchTestCase { public void testEqualsAttributesKey() { @@ -69,4 +70,48 @@ public void testEquals() { assertNotEquals(table1, s); assertNotEquals(table1, table3); } + + public void testShardsMatchingPredicate() { + ShardId shardId = new ShardId(new Index("a", UUID.randomUUID().toString()), 0); + ShardRouting primary = TestShardRouting.newShardRouting(shardId, "node-1", true, ShardRoutingState.STARTED); + ShardRouting replica = TestShardRouting.newShardRouting(shardId, "node-2", false, ShardRoutingState.STARTED); + ShardRouting unassignedReplica = ShardRouting.newUnassigned( + shardId, + false, + RecoverySource.PeerRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null) + ); + ShardRouting relocatingReplica1 = TestShardRouting.newShardRouting( + shardId, + "node-3", + "node-4", + false, + ShardRoutingState.RELOCATING + ); + ShardRouting relocatingReplica2 = TestShardRouting.newShardRouting( + shardId, + "node-4", + "node-5", + false, + ShardRoutingState.RELOCATING + ); + + IndexShardRoutingTable table = new IndexShardRoutingTable( + shardId, + Arrays.asList(primary, replica, unassignedReplica, relocatingReplica1, relocatingReplica2) + ); + assertEquals(List.of(primary), table.shardsMatchingPredicate(ShardRouting::primary)); + assertEquals( + List.of(replica, unassignedReplica, relocatingReplica1, relocatingReplica2), + table.shardsMatchingPredicate(shardRouting -> !shardRouting.primary()) + ); + assertEquals( + List.of(unassignedReplica), + table.shardsMatchingPredicate(shardRouting -> !shardRouting.primary() && shardRouting.unassigned()) + ); + assertEquals( + Arrays.asList(relocatingReplica1, relocatingReplica2), + table.shardsMatchingPredicate(shardRouting -> !shardRouting.primary() && shardRouting.relocating()) + ); + } } diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/FailedShardsRoutingTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/FailedShardsRoutingTests.java index db4cedbbbe7b5..5e3b74ee138ab 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/allocation/FailedShardsRoutingTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/FailedShardsRoutingTests.java @@ -40,6 +40,7 @@ import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeRole; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.routing.RoutingNodes; import org.opensearch.cluster.routing.RoutingTable; @@ -47,13 +48,18 @@ import org.opensearch.cluster.routing.allocation.command.AllocationCommands; import org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand; import org.opensearch.cluster.routing.allocation.decider.ClusterRebalanceAllocationDecider; +import org.opensearch.common.UUIDs; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.index.shard.ShardId; import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.node.remotestore.RemoteStoreNodeService; import org.opensearch.test.VersionUtils; import java.util.ArrayList; import java.util.HashSet; +import java.util.List; +import java.util.Map; import java.util.Set; import static org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING; @@ -61,6 +67,11 @@ import static org.opensearch.cluster.routing.ShardRoutingState.RELOCATING; import static org.opensearch.cluster.routing.ShardRoutingState.STARTED; import static org.opensearch.cluster.routing.ShardRoutingState.UNASSIGNED; +import static org.opensearch.common.util.FeatureFlags.REMOTE_STORE_MIGRATION_EXPERIMENTAL; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.lessThan; @@ -812,4 +823,136 @@ private void testReplicaIsPromoted(boolean isSegmentReplicationEnabled) { } } } + + public void testPreferReplicaOnRemoteNodeForPrimaryPromotion() { + FeatureFlags.initializeFeatureFlags(Settings.builder().put(REMOTE_STORE_MIGRATION_EXPERIMENTAL, "true").build()); + AllocationService allocation = createAllocationService(Settings.builder().build()); + + // segment replication enabled + Settings.Builder settingsBuilder = settings(Version.CURRENT).put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT); + + // remote store migration metadata settings + Metadata metadata = Metadata.builder() + .put(IndexMetadata.builder("test").settings(settingsBuilder).numberOfShards(1).numberOfReplicas(4)) + .persistentSettings( + Settings.builder() + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), RemoteStoreNodeService.CompatibilityMode.MIXED.mode) + .put(MIGRATION_DIRECTION_SETTING.getKey(), RemoteStoreNodeService.Direction.REMOTE_STORE.direction) + .build() + ) + .build(); + + RoutingTable initialRoutingTable = RoutingTable.builder().addAsNew(metadata.index("test")).build(); + + ClusterState clusterState = ClusterState.builder(CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) + .metadata(metadata) + .routingTable(initialRoutingTable) + .build(); + + ShardId shardId = new ShardId(metadata.index("test").getIndex(), 0); + + // add a remote node and start primary shard + Map remoteStoreNodeAttributes = Map.of( + REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, + "REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_VALUE", + REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, + "REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_VALUE" + ); + DiscoveryNode remoteNode1 = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remoteStoreNodeAttributes, + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(remoteNode1)).build(); + clusterState = ClusterState.builder(clusterState).routingTable(allocation.reroute(clusterState, "reroute").routingTable()).build(); + assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(4)); + + clusterState = startInitializingShardsAndReroute(allocation, clusterState); + assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(4)); + + // add remote and non-remote nodes and start replica shards + DiscoveryNode remoteNode2 = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remoteStoreNodeAttributes, + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + DiscoveryNode remoteNode3 = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remoteStoreNodeAttributes, + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + DiscoveryNode nonRemoteNode1 = new DiscoveryNode(UUIDs.base64UUID(), buildNewFakeTransportAddress(), Version.CURRENT); + DiscoveryNode nonRemoteNode2 = new DiscoveryNode(UUIDs.base64UUID(), buildNewFakeTransportAddress(), Version.CURRENT); + List replicaShardNodes = List.of(remoteNode2, remoteNode3, nonRemoteNode1, nonRemoteNode2); + + for (int i = 0; i < 4; i++) { + clusterState = ClusterState.builder(clusterState) + .nodes(DiscoveryNodes.builder(clusterState.nodes()).add(replicaShardNodes.get(i))) + .build(); + + clusterState = allocation.reroute(clusterState, "reroute"); + assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(1 + i)); + assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(4 - (i + 1))); + + clusterState = startInitializingShardsAndReroute(allocation, clusterState); + assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(1 + (i + 1))); + assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(4 - (i + 1))); + } + + // fail primary shard + ShardRouting primaryShard0 = clusterState.routingTable().index("test").shard(0).primaryShard(); + ClusterState newState = allocation.applyFailedShard(clusterState, primaryShard0, randomBoolean()); + assertNotEquals(clusterState, newState); + clusterState = newState; + + // verify that promoted replica exists on a remote node + assertEquals(4, clusterState.getRoutingNodes().shardsWithState(STARTED).size()); + ShardRouting primaryShardRouting1 = clusterState.routingTable().index("test").shard(0).primaryShard(); + assertNotEquals(primaryShard0, primaryShardRouting1); + assertTrue( + primaryShardRouting1.currentNodeId().equals(remoteNode2.getId()) + || primaryShardRouting1.currentNodeId().equals(remoteNode3.getId()) + ); + + // fail primary shard again + newState = allocation.applyFailedShard(clusterState, primaryShardRouting1, randomBoolean()); + assertNotEquals(clusterState, newState); + clusterState = newState; + + // verify that promoted replica again exists on a remote node + assertEquals(3, clusterState.getRoutingNodes().shardsWithState(STARTED).size()); + ShardRouting primaryShardRouting2 = clusterState.routingTable().index("test").shard(0).primaryShard(); + assertNotEquals(primaryShardRouting1, primaryShardRouting2); + assertTrue( + primaryShardRouting2.currentNodeId().equals(remoteNode2.getId()) + || primaryShardRouting2.currentNodeId().equals(remoteNode3.getId()) + ); + assertNotEquals(primaryShardRouting1.currentNodeId(), primaryShardRouting2.currentNodeId()); + + ShardRouting expectedCandidateForSegRep = clusterState.getRoutingNodes().activeReplicaWithOldestVersion(shardId); + + // fail primary shard again + newState = allocation.applyFailedShard(clusterState, primaryShardRouting2, randomBoolean()); + assertNotEquals(clusterState, newState); + clusterState = newState; + + // verify that promoted replica exists on a non-remote node + assertEquals(2, clusterState.getRoutingNodes().shardsWithState(STARTED).size()); + ShardRouting primaryShardRouting3 = clusterState.routingTable().index("test").shard(0).primaryShard(); + assertNotEquals(primaryShardRouting2, primaryShardRouting3); + assertTrue( + primaryShardRouting3.currentNodeId().equals(nonRemoteNode1.getId()) + || primaryShardRouting3.currentNodeId().equals(nonRemoteNode2.getId()) + ); + assertEquals(expectedCandidateForSegRep.allocationId(), primaryShardRouting3.allocationId()); + } } diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteStoreMigrationAllocationDeciderTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteStoreMigrationAllocationDeciderTests.java index 3e130a42952e4..ee4dbe9738e04 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteStoreMigrationAllocationDeciderTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteStoreMigrationAllocationDeciderTests.java @@ -70,6 +70,8 @@ import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; import static org.opensearch.common.util.FeatureFlags.REMOTE_STORE_MIGRATION_EXPERIMENTAL; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.Direction.NONE; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.Direction.REMOTE_STORE; import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; import static org.hamcrest.core.Is.is; @@ -89,7 +91,7 @@ public class RemoteStoreMigrationAllocationDeciderTests extends OpenSearchAlloca .build(); private final Settings remoteStoreDirectionSettings = Settings.builder() - .put(MIGRATION_DIRECTION_SETTING.getKey(), RemoteStoreNodeService.Direction.REMOTE_STORE) + .put(MIGRATION_DIRECTION_SETTING.getKey(), REMOTE_STORE) .build(); private final Settings docrepDirectionSettings = Settings.builder() .put(MIGRATION_DIRECTION_SETTING.getKey(), RemoteStoreNodeService.Direction.DOCREP) @@ -106,7 +108,9 @@ public class RemoteStoreMigrationAllocationDeciderTests extends OpenSearchAlloca private Metadata metadata; private RoutingTable routingTable = null; - private void beforeAllocation() { + private ShardId shardId = new ShardId(TEST_INDEX, "_na_", 0); + + private void beforeAllocation(String direction) { FeatureFlags.initializeFeatureFlags(directionEnabledNodeSettings); if (isRemoteStoreBackedIndex == null) { isRemoteStoreBackedIndex = randomBoolean(); @@ -116,11 +120,7 @@ private void beforeAllocation() { String compatibilityMode = isMixedMode ? RemoteStoreNodeService.CompatibilityMode.MIXED.mode : RemoteStoreNodeService.CompatibilityMode.STRICT.mode; - customSettings = getCustomSettings( - RemoteStoreNodeService.Direction.REMOTE_STORE.direction, - compatibilityMode, - indexMetadataBuilder - ); + customSettings = getCustomSettings(direction, compatibilityMode, indexMetadataBuilder); if (routingTable != null) { metadata = Metadata.builder().put(indexMetadataBuilder).build(); @@ -149,6 +149,35 @@ private void beforeAllocation() { routingAllocation.debugDecision(true); } + private void prepareRoutingTable(boolean isReplicaAllocation, String primaryShardNodeId) { + routingTable = RoutingTable.builder() + .add( + IndexRoutingTable.builder(shardId.getIndex()) + .addIndexShard( + new IndexShardRoutingTable.Builder(shardId).addShard( + TestShardRouting.newShardRouting( + shardId.getIndexName(), + shardId.getId(), + (isReplicaAllocation ? primaryShardNodeId : null), + true, + (isReplicaAllocation ? ShardRoutingState.STARTED : ShardRoutingState.UNASSIGNED) + ) + ) + .addShard( + TestShardRouting.newShardRouting( + shardId.getIndexName(), + shardId.getId(), + null, + false, + ShardRoutingState.UNASSIGNED + ) + ) + .build() + ) + ) + .build(); + } + // tests for primary shard copy allocation with MIXED mode and REMOTE_STORE direction public void testDontAllocateNewPrimaryShardOnNonRemoteNodeForMixedModeAndRemoteStoreDirection() { @@ -166,7 +195,7 @@ public void testDontAllocateNewPrimaryShardOnNonRemoteNodeForMixedModeAndRemoteS .localNodeId(remoteNode.getId()) .build(); - beforeAllocation(); + beforeAllocation(REMOTE_STORE.direction); ShardRouting primaryShardRouting = clusterState.getRoutingTable().shardRoutingTable(TEST_INDEX, 0).primaryShard(); RoutingNode nonRemoteRoutingNode = clusterState.getRoutingNodes().node(nonRemoteNode.getId()); @@ -196,7 +225,7 @@ public void testAllocateNewPrimaryShardOnRemoteNodeForMixedModeAndRemoteStoreDir .localNodeId(remoteNode.getId()) .build(); - beforeAllocation(); + beforeAllocation(REMOTE_STORE.direction); ShardRouting primaryShardRouting = clusterState.getRoutingTable().shardRoutingTable(TEST_INDEX, 0).primaryShard(); RoutingNode remoteRoutingNode = clusterState.getRoutingNodes().node(remoteNode.getId()); @@ -216,39 +245,11 @@ public void testDontAllocateNewReplicaShardOnRemoteNodeIfPrimaryShardOnNonRemote replicaCount = 1; isMixedMode = true; - ShardId shardId = new ShardId(TEST_INDEX, "_na_", 0); - DiscoveryNode nonRemoteNode = getNonRemoteNode(); DiscoveryNode remoteNode = getRemoteNode(); - routingTable = RoutingTable.builder() - .add( - IndexRoutingTable.builder(shardId.getIndex()) - .addIndexShard( - new IndexShardRoutingTable.Builder(shardId).addShard( - // primary on non-remote node - TestShardRouting.newShardRouting( - shardId.getIndexName(), - shardId.getId(), - nonRemoteNode.getId(), - true, - ShardRoutingState.STARTED - ) - ) - .addShard( - // new replica's allocation - TestShardRouting.newShardRouting( - shardId.getIndexName(), - shardId.getId(), - null, - false, - ShardRoutingState.UNASSIGNED - ) - ) - .build() - ) - ) - .build(); + // primary on non-remote node, new replica's allocation + prepareRoutingTable(true, nonRemoteNode.getId()); discoveryNodes = DiscoveryNodes.builder() .add(nonRemoteNode) @@ -257,7 +258,7 @@ public void testDontAllocateNewReplicaShardOnRemoteNodeIfPrimaryShardOnNonRemote .localNodeId(remoteNode.getId()) .build(); - beforeAllocation(); + beforeAllocation(REMOTE_STORE.direction); assertEquals(2, clusterState.getRoutingTable().allShards().size()); ShardRouting replicaShardRouting = clusterState.getRoutingTable().shardRoutingTable(TEST_INDEX, 0).replicaShards().get(0); @@ -278,40 +279,12 @@ public void testAllocateNewReplicaShardOnRemoteNodeIfPrimaryShardOnRemoteNodeFor replicaCount = 1; isMixedMode = true; - ShardId shardId = new ShardId(TEST_INDEX, "_na_", 0); - DiscoveryNode remoteNode1 = getRemoteNode(); DiscoveryNode remoteNode2 = getRemoteNode(); DiscoveryNode nonRemoteNode = getNonRemoteNode(); - routingTable = RoutingTable.builder() - .add( - IndexRoutingTable.builder(shardId.getIndex()) - .addIndexShard( - new IndexShardRoutingTable.Builder(shardId).addShard( - // primary on remote node - TestShardRouting.newShardRouting( - shardId.getIndexName(), - shardId.getId(), - remoteNode1.getId(), - true, - ShardRoutingState.STARTED - ) - ) - .addShard( - // new replica's allocation - TestShardRouting.newShardRouting( - shardId.getIndexName(), - shardId.getId(), - null, - false, - ShardRoutingState.UNASSIGNED - ) - ) - .build() - ) - ) - .build(); + // primary on remote node, new replica's allocation + prepareRoutingTable(true, remoteNode1.getId()); discoveryNodes = DiscoveryNodes.builder() .add(remoteNode1) @@ -322,7 +295,7 @@ public void testAllocateNewReplicaShardOnRemoteNodeIfPrimaryShardOnRemoteNodeFor .localNodeId(nonRemoteNode.getId()) .build(); - beforeAllocation(); + beforeAllocation(REMOTE_STORE.direction); assertEquals(2, clusterState.getRoutingTable().allShards().size()); ShardRouting replicaShardRouting = clusterState.getRoutingTable().shardRoutingTable(TEST_INDEX, 0).replicaShards().get(0); @@ -343,40 +316,12 @@ public void testAllocateNewReplicaShardOnNonRemoteNodeIfPrimaryShardOnNonRemoteN replicaCount = 1; isMixedMode = true; - ShardId shardId = new ShardId(TEST_INDEX, "_na_", 0); - DiscoveryNode remoteNode = getRemoteNode(); DiscoveryNode nonRemoteNode1 = getNonRemoteNode(); DiscoveryNode nonRemoteNode2 = getNonRemoteNode(); - routingTable = RoutingTable.builder() - .add( - IndexRoutingTable.builder(shardId.getIndex()) - .addIndexShard( - new IndexShardRoutingTable.Builder(shardId).addShard( - // primary shard on non-remote node - TestShardRouting.newShardRouting( - shardId.getIndexName(), - shardId.getId(), - nonRemoteNode1.getId(), - true, - ShardRoutingState.STARTED - ) - ) - .addShard( - // new replica's allocation - TestShardRouting.newShardRouting( - shardId.getIndexName(), - shardId.getId(), - null, - false, - ShardRoutingState.UNASSIGNED - ) - ) - .build() - ) - ) - .build(); + // primary shard on non-remote node, new replica's allocation + prepareRoutingTable(true, nonRemoteNode1.getId()); discoveryNodes = DiscoveryNodes.builder() .add(remoteNode) @@ -387,7 +332,7 @@ public void testAllocateNewReplicaShardOnNonRemoteNodeIfPrimaryShardOnNonRemoteN .localNodeId(nonRemoteNode2.getId()) .build(); - beforeAllocation(); + beforeAllocation(REMOTE_STORE.direction); assertEquals(2, clusterState.getRoutingTable().allShards().size()); @@ -411,39 +356,11 @@ public void testAllocateNewReplicaShardOnNonRemoteNodeIfPrimaryShardOnRemoteNode replicaCount = 1; isMixedMode = true; - ShardId shardId = new ShardId(TEST_INDEX, "_na_", 0); - DiscoveryNode nonRemoteNode = getNonRemoteNode(); DiscoveryNode remoteNode = getRemoteNode(); - routingTable = RoutingTable.builder() - .add( - IndexRoutingTable.builder(shardId.getIndex()) - .addIndexShard( - new IndexShardRoutingTable.Builder(shardId).addShard( - // primary shard on non-remote node - TestShardRouting.newShardRouting( - shardId.getIndexName(), - shardId.getId(), - remoteNode.getId(), - true, - ShardRoutingState.STARTED - ) - ) - .addShard( - // new replica's allocation - TestShardRouting.newShardRouting( - shardId.getIndexName(), - shardId.getId(), - null, - false, - ShardRoutingState.UNASSIGNED - ) - ) - .build() - ) - ) - .build(); + // primary shard on remote node, new replica's allocation + prepareRoutingTable(true, remoteNode.getId()); discoveryNodes = DiscoveryNodes.builder() .add(nonRemoteNode) @@ -452,7 +369,7 @@ public void testAllocateNewReplicaShardOnNonRemoteNodeIfPrimaryShardOnRemoteNode .localNodeId(remoteNode.getId()) .build(); - beforeAllocation(); + beforeAllocation(REMOTE_STORE.direction); assertEquals(2, clusterState.getRoutingTable().allShards().size()); ShardRouting replicaShardRouting = clusterState.getRoutingTable().shardRoutingTable(TEST_INDEX, 0).replicaShards().get(0); @@ -478,39 +395,12 @@ public void testAlwaysAllocateNewShardForStrictMode() { isMixedMode = false; isRemoteStoreBackedIndex = false; - ShardId shardId = new ShardId(TEST_INDEX, "_na_", 0); - DiscoveryNode nonRemoteNode1 = getNonRemoteNode(); DiscoveryNode nonRemoteNode2 = getNonRemoteNode(); boolean isReplicaAllocation = randomBoolean(); - routingTable = RoutingTable.builder() - .add( - IndexRoutingTable.builder(shardId.getIndex()) - .addIndexShard( - new IndexShardRoutingTable.Builder(shardId).addShard( - TestShardRouting.newShardRouting( - shardId.getIndexName(), - shardId.getId(), - (isReplicaAllocation ? nonRemoteNode1.getId() : null), - true, - (isReplicaAllocation ? ShardRoutingState.STARTED : ShardRoutingState.UNASSIGNED) - ) - ) - .addShard( - TestShardRouting.newShardRouting( - shardId.getIndexName(), - shardId.getId(), - null, - false, - ShardRoutingState.UNASSIGNED - ) - ) - .build() - ) - ) - .build(); + prepareRoutingTable(isReplicaAllocation, nonRemoteNode1.getId()); discoveryNodes = DiscoveryNodes.builder() .add(nonRemoteNode1) @@ -519,7 +409,7 @@ public void testAlwaysAllocateNewShardForStrictMode() { .localNodeId(nonRemoteNode2.getId()) .build(); - beforeAllocation(); + beforeAllocation(REMOTE_STORE.direction); assertEquals(2, clusterState.getRoutingTable().allShards().size()); @@ -543,33 +433,7 @@ public void testAlwaysAllocateNewShardForStrictMode() { DiscoveryNode remoteNode1 = getRemoteNode(); DiscoveryNode remoteNode2 = getRemoteNode(); - routingTable = RoutingTable.builder() - .add( - IndexRoutingTable.builder(shardId.getIndex()) - .addIndexShard( - new IndexShardRoutingTable.Builder(shardId).addShard( - TestShardRouting.newShardRouting( - shardId.getIndexName(), - shardId.getId(), - (isReplicaAllocation ? remoteNode1.getId() : null), - true, - (isReplicaAllocation ? ShardRoutingState.STARTED : ShardRoutingState.UNASSIGNED) - ) - ) - .addShard( - // new replica's allocation - TestShardRouting.newShardRouting( - shardId.getIndexName(), - shardId.getId(), - null, - false, - ShardRoutingState.UNASSIGNED - ) - ) - .build() - ) - ) - .build(); + prepareRoutingTable(isReplicaAllocation, remoteNode1.getId()); discoveryNodes = DiscoveryNodes.builder() .add(remoteNode1) @@ -578,7 +442,7 @@ public void testAlwaysAllocateNewShardForStrictMode() { .localNodeId(remoteNode2.getId()) .build(); - beforeAllocation(); + beforeAllocation(REMOTE_STORE.direction); assertEquals(2, clusterState.getRoutingTable().allShards().size()); @@ -598,6 +462,97 @@ public void testAlwaysAllocateNewShardForStrictMode() { assertThat(decision.getExplanation().toLowerCase(Locale.ROOT), is(reason)); } + // test for NONE direction + public void testAllocationForNoneDirection() { + shardCount = 1; + replicaCount = 1; + isMixedMode = true; + isRemoteStoreBackedIndex = false; // non-remote store backed index + + DiscoveryNode remoteNode1 = getRemoteNode(); + DiscoveryNode remoteNode2 = getRemoteNode(); + DiscoveryNode nonRemoteNode1 = getNonRemoteNode(); + DiscoveryNode nonRemoteNode2 = getNonRemoteNode(); + + boolean isReplicaAllocation = randomBoolean(); + + prepareRoutingTable(isReplicaAllocation, nonRemoteNode1.getId()); + + discoveryNodes = DiscoveryNodes.builder() + .add(remoteNode1) + .localNodeId(remoteNode1.getId()) + .add(remoteNode2) + .localNodeId(remoteNode2.getId()) + .add(nonRemoteNode1) + .localNodeId(nonRemoteNode1.getId()) + .add(nonRemoteNode2) + .localNodeId(nonRemoteNode2.getId()) + .build(); + + beforeAllocation(NONE.direction); + assertEquals(2, clusterState.getRoutingTable().allShards().size()); + + ShardRouting shardRouting = clusterState.getRoutingTable().shardRoutingTable(TEST_INDEX, 0).primaryShard(); + if (isReplicaAllocation) { + shardRouting = clusterState.getRoutingTable().shardRoutingTable(TEST_INDEX, 0).replicaShards().get(0); + } + RoutingNode nonRemoteRoutingNode = clusterState.getRoutingNodes().node(nonRemoteNode2.getId()); + RoutingNode remoteRoutingNode = clusterState.getRoutingNodes().node(remoteNode2.getId()); + + // allocation decision for non-remote node for non-remote store backed index + Decision decision = remoteStoreMigrationAllocationDecider.canAllocate(shardRouting, nonRemoteRoutingNode, routingAllocation); + assertThat(decision.type(), is(Decision.Type.YES)); + String reason = String.format( + Locale.ROOT, + "[none migration_direction]: %s shard copy can be allocated to a non-remote node for non remote store backed index", + (isReplicaAllocation ? "replica" : "primary") + ); + assertThat(decision.getExplanation().toLowerCase(Locale.ROOT), is(reason)); + + // allocation decision for remote node for non-remote store backed index + decision = remoteStoreMigrationAllocationDecider.canAllocate(shardRouting, remoteRoutingNode, routingAllocation); + assertThat(decision.type(), is(Decision.Type.NO)); + reason = String.format( + Locale.ROOT, + "[none migration_direction]: %s shard copy can not be allocated to a remote node for non remote store backed index", + (isReplicaAllocation ? "replica" : "primary") + ); + assertThat(decision.getExplanation().toLowerCase(Locale.ROOT), is(reason)); + + isRemoteStoreBackedIndex = true; // remote store backed index + prepareRoutingTable(isReplicaAllocation, remoteNode1.getId()); + + beforeAllocation(NONE.direction); + assertEquals(2, clusterState.getRoutingTable().allShards().size()); + + shardRouting = clusterState.getRoutingTable().shardRoutingTable(TEST_INDEX, 0).primaryShard(); + if (isReplicaAllocation) { + shardRouting = clusterState.getRoutingTable().shardRoutingTable(TEST_INDEX, 0).replicaShards().get(0); + } + nonRemoteRoutingNode = clusterState.getRoutingNodes().node(nonRemoteNode2.getId()); + remoteRoutingNode = clusterState.getRoutingNodes().node(remoteNode2.getId()); + + // allocation decision for non-remote node for remote store backed index + decision = remoteStoreMigrationAllocationDecider.canAllocate(shardRouting, nonRemoteRoutingNode, routingAllocation); + assertThat(decision.type(), is(Decision.Type.NO)); + reason = String.format( + Locale.ROOT, + "[none migration_direction]: %s shard copy can not be allocated to a non-remote node for remote store backed index", + (isReplicaAllocation ? "replica" : "primary") + ); + assertThat(decision.getExplanation().toLowerCase(Locale.ROOT), is(reason)); + + // allocation decision for remote node for remote store backed index + decision = remoteStoreMigrationAllocationDecider.canAllocate(shardRouting, remoteRoutingNode, routingAllocation); + assertThat(decision.type(), is(Decision.Type.YES)); + reason = String.format( + Locale.ROOT, + "[none migration_direction]: %s shard copy can be allocated to a remote node for remote store backed index", + (isReplicaAllocation ? "replica" : "primary") + ); + assertThat(decision.getExplanation().toLowerCase(Locale.ROOT), is(reason)); + } + // prepare index metadata for test-index private IndexMetadata.Builder getIndexMetadataBuilder(boolean isRemoteStoreBackedIndex, int shardCount, int replicaCount) { Settings.Builder builder = settings(Version.CURRENT); @@ -614,7 +569,7 @@ private IndexMetadata.Builder getIndexMetadataBuilder(boolean isRemoteStoreBacke private Settings getCustomSettings(String direction, String compatibilityMode, IndexMetadata.Builder indexMetadataBuilder) { Settings.Builder builder = Settings.builder(); // direction settings - if (direction.toLowerCase(Locale.ROOT).equals(RemoteStoreNodeService.Direction.REMOTE_STORE.direction)) { + if (direction.toLowerCase(Locale.ROOT).equals(REMOTE_STORE.direction)) { builder.put(remoteStoreDirectionSettings); } else if (direction.toLowerCase(Locale.ROOT).equals(RemoteStoreNodeService.Direction.DOCREP.direction)) { builder.put(docrepDirectionSettings); diff --git a/server/src/test/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainerTests.java b/server/src/test/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainerTests.java index 074f659850c7b..44f6a0e11251c 100644 --- a/server/src/test/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainerTests.java +++ b/server/src/test/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainerTests.java @@ -67,7 +67,7 @@ public OffsetRangeInputStream get(long size, long position) throws IOException { return new OffsetRangeFileInputStream(testFile, size, position); } }, - 0, + 0L, false ) ) { @@ -89,7 +89,7 @@ public OffsetRangeInputStream get(long size, long position) throws IOException { return new OffsetRangeFileInputStream(testFile, size, position); } }, - 0, + 0L, false ) ) { @@ -155,7 +155,7 @@ public OffsetRangeInputStream get(long size, long position) throws IOException { return new OffsetRangeFileInputStream(testFile, size, position); } }, - 0, + 0L, false ) ) { @@ -223,7 +223,7 @@ public OffsetRangeInputStream get(long size, long position) throws IOException { return new OffsetRangeFileInputStream(testFile, size, position); } }, - 0, + 0L, isRemoteDataIntegritySupported ) ) { @@ -286,7 +286,7 @@ public OffsetRangeInputStream get(long size, long position) throws IOException { return new RateLimitingOffsetRangeInputStream(offsetRangeIndexInputStream, rateLimiterSupplier, null); } }, - 0, + 0L, true ) ) { @@ -347,7 +347,7 @@ public OffsetRangeInputStream get(long size, long position) throws IOException { return new RateLimitingOffsetRangeInputStream(offsetRangeIndexInputStream, rateLimiterSupplier, null); } }, - 0, + 0L, true ) ) { diff --git a/server/src/test/java/org/opensearch/common/cache/stats/CacheStatsHolderTests.java b/server/src/test/java/org/opensearch/common/cache/stats/DefaultCacheStatsHolderTests.java similarity index 72% rename from server/src/test/java/org/opensearch/common/cache/stats/CacheStatsHolderTests.java rename to server/src/test/java/org/opensearch/common/cache/stats/DefaultCacheStatsHolderTests.java index 390cd4d601a4b..c6e8252ddf806 100644 --- a/server/src/test/java/org/opensearch/common/cache/stats/CacheStatsHolderTests.java +++ b/server/src/test/java/org/opensearch/common/cache/stats/DefaultCacheStatsHolderTests.java @@ -21,18 +21,25 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CountDownLatch; -public class CacheStatsHolderTests extends OpenSearchTestCase { +public class DefaultCacheStatsHolderTests extends OpenSearchTestCase { + private final String storeName = "dummy_store"; + public void testAddAndGet() throws Exception { List dimensionNames = List.of("dim1", "dim2", "dim3", "dim4"); - CacheStatsHolder cacheStatsHolder = new CacheStatsHolder(dimensionNames); - Map> usedDimensionValues = CacheStatsHolderTests.getUsedDimensionValues(cacheStatsHolder, 10); - Map, CacheStats> expected = CacheStatsHolderTests.populateStats(cacheStatsHolder, usedDimensionValues, 1000, 10); + DefaultCacheStatsHolder cacheStatsHolder = new DefaultCacheStatsHolder(dimensionNames, storeName); + Map> usedDimensionValues = DefaultCacheStatsHolderTests.getUsedDimensionValues(cacheStatsHolder, 10); + Map, CacheStats> expected = DefaultCacheStatsHolderTests.populateStats( + cacheStatsHolder, + usedDimensionValues, + 1000, + 10 + ); // test the value in the map is as expected for each distinct combination of values for (List dimensionValues : expected.keySet()) { CacheStats expectedCounter = expected.get(dimensionValues); - ImmutableCacheStats actualStatsHolder = CacheStatsHolderTests.getNode(dimensionValues, cacheStatsHolder.getStatsRoot()) + ImmutableCacheStats actualStatsHolder = DefaultCacheStatsHolderTests.getNode(dimensionValues, cacheStatsHolder.getStatsRoot()) .getImmutableStats(); ImmutableCacheStats actualCacheStats = getNode(dimensionValues, cacheStatsHolder.getStatsRoot()).getImmutableStats(); @@ -53,7 +60,7 @@ public void testAddAndGet() throws Exception { public void testReset() throws Exception { List dimensionNames = List.of("dim1", "dim2"); - CacheStatsHolder cacheStatsHolder = new CacheStatsHolder(dimensionNames); + DefaultCacheStatsHolder cacheStatsHolder = new DefaultCacheStatsHolder(dimensionNames, storeName); Map> usedDimensionValues = getUsedDimensionValues(cacheStatsHolder, 10); Map, CacheStats> expected = populateStats(cacheStatsHolder, usedDimensionValues, 100, 10); @@ -62,9 +69,9 @@ public void testReset() throws Exception { for (List dimensionValues : expected.keySet()) { CacheStats originalCounter = expected.get(dimensionValues); originalCounter.sizeInBytes = new CounterMetric(); - originalCounter.entries = new CounterMetric(); + originalCounter.items = new CounterMetric(); - CacheStatsHolder.Node node = getNode(dimensionValues, cacheStatsHolder.getStatsRoot()); + DefaultCacheStatsHolder.Node node = getNode(dimensionValues, cacheStatsHolder.getStatsRoot()); ImmutableCacheStats actual = node.getImmutableStats(); assertEquals(originalCounter.immutableSnapshot(), actual); } @@ -72,7 +79,7 @@ public void testReset() throws Exception { public void testDropStatsForDimensions() throws Exception { List dimensionNames = List.of("dim1", "dim2"); - CacheStatsHolder cacheStatsHolder = new CacheStatsHolder(dimensionNames); + DefaultCacheStatsHolder cacheStatsHolder = new DefaultCacheStatsHolder(dimensionNames, storeName); // Create stats for the following dimension sets List> populatedStats = List.of(List.of("A1", "B1"), List.of("A2", "B2"), List.of("A2", "B3")); @@ -108,20 +115,20 @@ public void testDropStatsForDimensions() throws Exception { public void testCount() throws Exception { List dimensionNames = List.of("dim1", "dim2"); - CacheStatsHolder cacheStatsHolder = new CacheStatsHolder(dimensionNames); + DefaultCacheStatsHolder cacheStatsHolder = new DefaultCacheStatsHolder(dimensionNames, storeName); Map> usedDimensionValues = getUsedDimensionValues(cacheStatsHolder, 10); Map, CacheStats> expected = populateStats(cacheStatsHolder, usedDimensionValues, 100, 10); long expectedCount = 0L; for (CacheStats counter : expected.values()) { - expectedCount += counter.getEntries(); + expectedCount += counter.getItems(); } assertEquals(expectedCount, cacheStatsHolder.count()); } public void testConcurrentRemoval() throws Exception { List dimensionNames = List.of("dim1", "dim2"); - CacheStatsHolder cacheStatsHolder = new CacheStatsHolder(dimensionNames); + DefaultCacheStatsHolder cacheStatsHolder = new DefaultCacheStatsHolder(dimensionNames, storeName); // Create stats for the following dimension sets List> populatedStats = List.of(List.of("A1", "B1"), List.of("A2", "B2"), List.of("A2", "B3")); @@ -169,8 +176,8 @@ public void testConcurrentRemoval() throws Exception { * Returns the node found by following these dimension values down from the root node. * Returns null if no such node exists. */ - static CacheStatsHolder.Node getNode(List dimensionValues, CacheStatsHolder.Node root) { - CacheStatsHolder.Node current = root; + static DefaultCacheStatsHolder.Node getNode(List dimensionValues, DefaultCacheStatsHolder.Node root) { + DefaultCacheStatsHolder.Node current = root; for (String dimensionValue : dimensionValues) { current = current.getChildren().get(dimensionValue); if (current == null) { @@ -181,37 +188,51 @@ static CacheStatsHolder.Node getNode(List dimensionValues, CacheStatsHol } static Map, CacheStats> populateStats( - CacheStatsHolder cacheStatsHolder, + DefaultCacheStatsHolder cacheStatsHolder, Map> usedDimensionValues, int numDistinctValuePairs, int numRepetitionsPerValue ) throws InterruptedException { + return populateStats(List.of(cacheStatsHolder), usedDimensionValues, numDistinctValuePairs, numRepetitionsPerValue); + } + + static Map, CacheStats> populateStats( + List cacheStatsHolders, + Map> usedDimensionValues, + int numDistinctValuePairs, + int numRepetitionsPerValue + ) throws InterruptedException { + for (DefaultCacheStatsHolder statsHolder : cacheStatsHolders) { + assertEquals(cacheStatsHolders.get(0).getDimensionNames(), statsHolder.getDimensionNames()); + } Map, CacheStats> expected = new ConcurrentHashMap<>(); Thread[] threads = new Thread[numDistinctValuePairs]; CountDownLatch countDownLatch = new CountDownLatch(numDistinctValuePairs); Random rand = Randomness.get(); List> dimensionsForThreads = new ArrayList<>(); for (int i = 0; i < numDistinctValuePairs; i++) { - dimensionsForThreads.add(getRandomDimList(cacheStatsHolder.getDimensionNames(), usedDimensionValues, true, rand)); + dimensionsForThreads.add(getRandomDimList(cacheStatsHolders.get(0).getDimensionNames(), usedDimensionValues, true, rand)); int finalI = i; threads[i] = new Thread(() -> { Random threadRand = Randomness.get(); List dimensions = dimensionsForThreads.get(finalI); expected.computeIfAbsent(dimensions, (key) -> new CacheStats()); - for (int j = 0; j < numRepetitionsPerValue; j++) { - CacheStats statsToInc = new CacheStats( - threadRand.nextInt(10), - threadRand.nextInt(10), - threadRand.nextInt(10), - threadRand.nextInt(5000), - threadRand.nextInt(10) - ); - expected.get(dimensions).hits.inc(statsToInc.getHits()); - expected.get(dimensions).misses.inc(statsToInc.getMisses()); - expected.get(dimensions).evictions.inc(statsToInc.getEvictions()); - expected.get(dimensions).sizeInBytes.inc(statsToInc.getSizeInBytes()); - expected.get(dimensions).entries.inc(statsToInc.getEntries()); - CacheStatsHolderTests.populateStatsHolderFromStatsValueMap(cacheStatsHolder, Map.of(dimensions, statsToInc)); + for (DefaultCacheStatsHolder cacheStatsHolder : cacheStatsHolders) { + for (int j = 0; j < numRepetitionsPerValue; j++) { + CacheStats statsToInc = new CacheStats( + threadRand.nextInt(10), + threadRand.nextInt(10), + threadRand.nextInt(10), + threadRand.nextInt(5000), + threadRand.nextInt(10) + ); + expected.get(dimensions).hits.inc(statsToInc.getHits()); + expected.get(dimensions).misses.inc(statsToInc.getMisses()); + expected.get(dimensions).evictions.inc(statsToInc.getEvictions()); + expected.get(dimensions).sizeInBytes.inc(statsToInc.getSizeInBytes()); + expected.get(dimensions).items.inc(statsToInc.getItems()); + DefaultCacheStatsHolderTests.populateStatsHolderFromStatsValueMap(cacheStatsHolder, Map.of(dimensions, statsToInc)); + } } countDownLatch.countDown(); }); @@ -240,7 +261,7 @@ private static List getRandomDimList( return result; } - static Map> getUsedDimensionValues(CacheStatsHolder cacheStatsHolder, int numValuesPerDim) { + static Map> getUsedDimensionValues(DefaultCacheStatsHolder cacheStatsHolder, int numValuesPerDim) { Map> usedDimensionValues = new HashMap<>(); for (int i = 0; i < cacheStatsHolder.getDimensionNames().size(); i++) { List values = new ArrayList<>(); @@ -252,20 +273,23 @@ static Map> getUsedDimensionValues(CacheStatsHolder cacheSt return usedDimensionValues; } - private void assertSumOfChildrenStats(CacheStatsHolder.Node current) { + private void assertSumOfChildrenStats(DefaultCacheStatsHolder.Node current) { if (!current.children.isEmpty()) { CacheStats expectedTotal = new CacheStats(); - for (CacheStatsHolder.Node child : current.children.values()) { + for (DefaultCacheStatsHolder.Node child : current.children.values()) { expectedTotal.add(child.getImmutableStats()); } assertEquals(expectedTotal.immutableSnapshot(), current.getImmutableStats()); - for (CacheStatsHolder.Node child : current.children.values()) { + for (DefaultCacheStatsHolder.Node child : current.children.values()) { assertSumOfChildrenStats(child); } } } - static void populateStatsHolderFromStatsValueMap(CacheStatsHolder cacheStatsHolder, Map, CacheStats> statsMap) { + public static void populateStatsHolderFromStatsValueMap( + DefaultCacheStatsHolder cacheStatsHolder, + Map, CacheStats> statsMap + ) { for (Map.Entry, CacheStats> entry : statsMap.entrySet()) { CacheStats stats = entry.getValue(); List dims = entry.getKey(); @@ -279,8 +303,8 @@ static void populateStatsHolderFromStatsValueMap(CacheStatsHolder cacheStatsHold cacheStatsHolder.incrementEvictions(dims); } cacheStatsHolder.incrementSizeInBytes(dims, stats.getSizeInBytes()); - for (int i = 0; i < stats.getEntries(); i++) { - cacheStatsHolder.incrementEntries(dims); + for (int i = 0; i < stats.getItems(); i++) { + cacheStatsHolder.incrementItems(dims); } } } diff --git a/server/src/test/java/org/opensearch/common/cache/stats/ImmutableCacheStatsHolderTests.java b/server/src/test/java/org/opensearch/common/cache/stats/ImmutableCacheStatsHolderTests.java index 933b8abd6e392..285840a3451c6 100644 --- a/server/src/test/java/org/opensearch/common/cache/stats/ImmutableCacheStatsHolderTests.java +++ b/server/src/test/java/org/opensearch/common/cache/stats/ImmutableCacheStatsHolderTests.java @@ -8,26 +8,93 @@ package org.opensearch.common.cache.stats; +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.common.xcontent.XContentHelper; +import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.common.io.stream.BytesStreamInput; +import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.test.OpenSearchTestCase; +import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.function.BiConsumer; public class ImmutableCacheStatsHolderTests extends OpenSearchTestCase { + private final String storeName = "dummy_store"; + + public void testSerialization() throws Exception { + List dimensionNames = List.of("dim1", "dim2", "dim3"); + String[] levels = dimensionNames.toArray(new String[0]); + DefaultCacheStatsHolder statsHolder = new DefaultCacheStatsHolder(dimensionNames, storeName); + Map> usedDimensionValues = DefaultCacheStatsHolderTests.getUsedDimensionValues(statsHolder, 10); + DefaultCacheStatsHolderTests.populateStats(statsHolder, usedDimensionValues, 100, 10); + ImmutableCacheStatsHolder stats = statsHolder.getImmutableCacheStatsHolder(levels); + assertNotEquals(0, stats.getStatsRoot().children.size()); + + BytesStreamOutput os = new BytesStreamOutput(); + stats.writeTo(os); + BytesStreamInput is = new BytesStreamInput(BytesReference.toBytes(os.bytes())); + ImmutableCacheStatsHolder deserialized = new ImmutableCacheStatsHolder(is); + + assertEquals(stats, deserialized); + + // also test empty dimension stats + ImmutableCacheStatsHolder emptyDims = statsHolder.getImmutableCacheStatsHolder(new String[] {}); + assertEquals(0, emptyDims.getStatsRoot().children.size()); + assertEquals(stats.getTotalStats(), emptyDims.getTotalStats()); + + os = new BytesStreamOutput(); + emptyDims.writeTo(os); + is = new BytesStreamInput(BytesReference.toBytes(os.bytes())); + deserialized = new ImmutableCacheStatsHolder(is); + + assertEquals(emptyDims, deserialized); + } + + public void testEquals() throws Exception { + List dimensionNames = List.of("dim1", "dim2", "dim3"); + String[] levels = dimensionNames.toArray(new String[0]); + DefaultCacheStatsHolder statsHolder = new DefaultCacheStatsHolder(dimensionNames, storeName); + DefaultCacheStatsHolder differentStoreNameStatsHolder = new DefaultCacheStatsHolder(dimensionNames, "nonMatchingStoreName"); + DefaultCacheStatsHolder nonMatchingStatsHolder = new DefaultCacheStatsHolder(dimensionNames, storeName); + Map> usedDimensionValues = DefaultCacheStatsHolderTests.getUsedDimensionValues(statsHolder, 10); + DefaultCacheStatsHolderTests.populateStats(List.of(statsHolder, differentStoreNameStatsHolder), usedDimensionValues, 100, 10); + DefaultCacheStatsHolderTests.populateStats(nonMatchingStatsHolder, usedDimensionValues, 100, 10); + ImmutableCacheStatsHolder stats = statsHolder.getImmutableCacheStatsHolder(levels); + + ImmutableCacheStatsHolder secondStats = statsHolder.getImmutableCacheStatsHolder(levels); + assertEquals(stats, secondStats); + ImmutableCacheStatsHolder nonMatchingStats = nonMatchingStatsHolder.getImmutableCacheStatsHolder(levels); + assertNotEquals(stats, nonMatchingStats); + ImmutableCacheStatsHolder differentStoreNameStats = differentStoreNameStatsHolder.getImmutableCacheStatsHolder(levels); + assertNotEquals(stats, differentStoreNameStats); + } public void testGet() throws Exception { List dimensionNames = List.of("dim1", "dim2", "dim3", "dim4"); - CacheStatsHolder cacheStatsHolder = new CacheStatsHolder(dimensionNames); - Map> usedDimensionValues = CacheStatsHolderTests.getUsedDimensionValues(cacheStatsHolder, 10); - Map, CacheStats> expected = CacheStatsHolderTests.populateStats(cacheStatsHolder, usedDimensionValues, 1000, 10); - ImmutableCacheStatsHolder stats = cacheStatsHolder.getImmutableCacheStatsHolder(); + DefaultCacheStatsHolder cacheStatsHolder = new DefaultCacheStatsHolder(dimensionNames, storeName); + Map> usedDimensionValues = DefaultCacheStatsHolderTests.getUsedDimensionValues(cacheStatsHolder, 10); + Map, CacheStats> expected = DefaultCacheStatsHolderTests.populateStats( + cacheStatsHolder, + usedDimensionValues, + 1000, + 10 + ); + ImmutableCacheStatsHolder stats = cacheStatsHolder.getImmutableCacheStatsHolder(dimensionNames.toArray(new String[0])); // test the value in the map is as expected for each distinct combination of values for (List dimensionValues : expected.keySet()) { CacheStats expectedCounter = expected.get(dimensionValues); - ImmutableCacheStats actualCacheStatsHolder = CacheStatsHolderTests.getNode(dimensionValues, cacheStatsHolder.getStatsRoot()) - .getImmutableStats(); + ImmutableCacheStats actualCacheStatsHolder = DefaultCacheStatsHolderTests.getNode( + dimensionValues, + cacheStatsHolder.getStatsRoot() + ).getImmutableStats(); ImmutableCacheStats actualImmutableCacheStatsHolder = getNode(dimensionValues, stats.getStatsRoot()).getStats(); assertEquals(expectedCounter.immutableSnapshot(), actualCacheStatsHolder); @@ -45,23 +112,238 @@ public void testGet() throws Exception { assertEquals(expectedTotal.getMisses(), stats.getTotalMisses()); assertEquals(expectedTotal.getEvictions(), stats.getTotalEvictions()); assertEquals(expectedTotal.getSizeInBytes(), stats.getTotalSizeInBytes()); - assertEquals(expectedTotal.getEntries(), stats.getTotalEntries()); + assertEquals(expectedTotal.getItems(), stats.getTotalItems()); assertSumOfChildrenStats(stats.getStatsRoot()); } public void testEmptyDimsList() throws Exception { // If the dimension list is empty, the tree should have only the root node containing the total stats. - CacheStatsHolder cacheStatsHolder = new CacheStatsHolder(List.of()); - Map> usedDimensionValues = CacheStatsHolderTests.getUsedDimensionValues(cacheStatsHolder, 100); - CacheStatsHolderTests.populateStats(cacheStatsHolder, usedDimensionValues, 10, 100); - ImmutableCacheStatsHolder stats = cacheStatsHolder.getImmutableCacheStatsHolder(); + DefaultCacheStatsHolder cacheStatsHolder = new DefaultCacheStatsHolder(List.of(), storeName); + Map> usedDimensionValues = DefaultCacheStatsHolderTests.getUsedDimensionValues(cacheStatsHolder, 100); + DefaultCacheStatsHolderTests.populateStats(cacheStatsHolder, usedDimensionValues, 10, 100); + ImmutableCacheStatsHolder stats = cacheStatsHolder.getImmutableCacheStatsHolder(null); ImmutableCacheStatsHolder.Node statsRoot = stats.getStatsRoot(); assertEquals(0, statsRoot.children.size()); assertEquals(stats.getTotalStats(), statsRoot.getStats()); } + public void testAggregateByAllDimensions() throws Exception { + // Aggregating with all dimensions as levels should just give us the same values that were in the original map + List dimensionNames = List.of("dim1", "dim2", "dim3", "dim4"); + DefaultCacheStatsHolder statsHolder = new DefaultCacheStatsHolder(dimensionNames, storeName); + Map> usedDimensionValues = DefaultCacheStatsHolderTests.getUsedDimensionValues(statsHolder, 10); + Map, CacheStats> expected = DefaultCacheStatsHolderTests.populateStats(statsHolder, usedDimensionValues, 1000, 10); + ImmutableCacheStatsHolder stats = statsHolder.getImmutableCacheStatsHolder(dimensionNames.toArray(new String[0])); + + for (Map.Entry, CacheStats> expectedEntry : expected.entrySet()) { + List dimensionValues = new ArrayList<>(); + for (String dimValue : expectedEntry.getKey()) { + dimensionValues.add(dimValue); + } + assertEquals(expectedEntry.getValue().immutableSnapshot(), getNode(dimensionValues, stats.statsRoot).getStats()); + } + assertSumOfChildrenStats(stats.statsRoot); + } + + public void testAggregateBySomeDimensions() throws Exception { + List dimensionNames = List.of("dim1", "dim2", "dim3", "dim4"); + DefaultCacheStatsHolder statsHolder = new DefaultCacheStatsHolder(dimensionNames, storeName); + Map> usedDimensionValues = DefaultCacheStatsHolderTests.getUsedDimensionValues(statsHolder, 10); + Map, CacheStats> expected = DefaultCacheStatsHolderTests.populateStats(statsHolder, usedDimensionValues, 1000, 10); + + for (int i = 0; i < (1 << dimensionNames.size()); i++) { + // Test each combination of possible levels + List levels = new ArrayList<>(); + for (int nameIndex = 0; nameIndex < dimensionNames.size(); nameIndex++) { + if ((i & (1 << nameIndex)) != 0) { + levels.add(dimensionNames.get(nameIndex)); + } + } + + if (levels.size() == 0) { + // If we pass empty levels to CacheStatsHolder to aggregate by, we should only get a root node with the total stats in it + ImmutableCacheStatsHolder stats = statsHolder.getImmutableCacheStatsHolder(levels.toArray(new String[0])); + assertEquals(statsHolder.getStatsRoot().getImmutableStats(), stats.getStatsRoot().getStats()); + assertEquals(0, stats.getStatsRoot().children.size()); + } else { + ImmutableCacheStatsHolder stats = statsHolder.getImmutableCacheStatsHolder(levels.toArray(new String[0])); + Map, ImmutableCacheStatsHolder.Node> aggregatedLeafNodes = getAllLeafNodes(stats.statsRoot); + + for (Map.Entry, ImmutableCacheStatsHolder.Node> aggEntry : aggregatedLeafNodes.entrySet()) { + CacheStats expectedCounter = new CacheStats(); + for (List expectedDims : expected.keySet()) { + if (expectedDims.containsAll(aggEntry.getKey())) { + expectedCounter.add(expected.get(expectedDims)); + } + } + assertEquals(expectedCounter.immutableSnapshot(), aggEntry.getValue().getStats()); + } + assertSumOfChildrenStats(stats.statsRoot); + } + } + } + + public void testXContentForLevels() throws Exception { + List dimensionNames = List.of("A", "B", "C"); + + DefaultCacheStatsHolder statsHolder = new DefaultCacheStatsHolder(dimensionNames, storeName); + DefaultCacheStatsHolderTests.populateStatsHolderFromStatsValueMap( + statsHolder, + Map.of( + List.of("A1", "B1", "C1"), + new CacheStats(1, 1, 1, 1, 1), + List.of("A1", "B1", "C2"), + new CacheStats(2, 2, 2, 2, 2), + List.of("A1", "B2", "C1"), + new CacheStats(3, 3, 3, 3, 3), + List.of("A2", "B1", "C3"), + new CacheStats(4, 4, 4, 4, 4) + ) + ); + ImmutableCacheStatsHolder stats = statsHolder.getImmutableCacheStatsHolder(dimensionNames.toArray(new String[0])); + + XContentBuilder builder = XContentFactory.jsonBuilder(); + ToXContent.Params params = ToXContent.EMPTY_PARAMS; + + builder.startObject(); + stats.toXContent(builder, params); + builder.endObject(); + String resultString = builder.toString(); + Map result = XContentHelper.convertToMap(MediaTypeRegistry.JSON.xContent(), resultString, true); + + Map> fieldNamesMap = Map.of( + ImmutableCacheStats.Fields.SIZE_IN_BYTES, + (counter, value) -> counter.sizeInBytes.inc(value), + ImmutableCacheStats.Fields.EVICTIONS, + (counter, value) -> counter.evictions.inc(value), + ImmutableCacheStats.Fields.HIT_COUNT, + (counter, value) -> counter.hits.inc(value), + ImmutableCacheStats.Fields.MISS_COUNT, + (counter, value) -> counter.misses.inc(value), + ImmutableCacheStats.Fields.ITEM_COUNT, + (counter, value) -> counter.items.inc(value) + ); + + Map, ImmutableCacheStatsHolder.Node> leafNodes = getAllLeafNodes(stats.getStatsRoot()); + for (Map.Entry, ImmutableCacheStatsHolder.Node> entry : leafNodes.entrySet()) { + List xContentKeys = new ArrayList<>(); + for (int i = 0; i < dimensionNames.size(); i++) { + xContentKeys.add(dimensionNames.get(i)); + xContentKeys.add(entry.getKey().get(i)); + } + CacheStats counterFromXContent = new CacheStats(); + + for (Map.Entry> fieldNamesEntry : fieldNamesMap.entrySet()) { + List fullXContentKeys = new ArrayList<>(xContentKeys); + fullXContentKeys.add(fieldNamesEntry.getKey()); + int valueInXContent = (int) getValueFromNestedXContentMap(result, fullXContentKeys); + BiConsumer incrementer = fieldNamesEntry.getValue(); + incrementer.accept(counterFromXContent, valueInXContent); + } + + ImmutableCacheStats expected = entry.getValue().getStats(); + assertEquals(counterFromXContent.immutableSnapshot(), expected); + } + } + + public void testXContent() throws Exception { + // Tests logic of filtering levels out, logic for aggregating by those levels is already covered + List dimensionNames = List.of("A", "B", "C"); + DefaultCacheStatsHolder statsHolder = new DefaultCacheStatsHolder(dimensionNames, storeName); + Map> usedDimensionValues = DefaultCacheStatsHolderTests.getUsedDimensionValues(statsHolder, 10); + DefaultCacheStatsHolderTests.populateStats(statsHolder, usedDimensionValues, 100, 10); + + // If the levels in the params are empty or contains only unrecognized levels, we should only see the total stats and no level + // aggregation + List> levelsList = List.of(List.of(), List.of("D")); + for (List levels : levelsList) { + ImmutableCacheStatsHolder stats = statsHolder.getImmutableCacheStatsHolder(levels.toArray(new String[0])); + ToXContent.Params params = getLevelParams(levels); + XContentBuilder builder = XContentFactory.jsonBuilder(); + builder.startObject(); + stats.toXContent(builder, params); + builder.endObject(); + + String resultString = builder.toString(); + Map result = XContentHelper.convertToMap(MediaTypeRegistry.JSON.xContent(), resultString, true); + + assertTotalStatsPresentInXContentResponse(result); + // assert there are no other entries in the map besides these 6 + assertEquals(6, result.size()); + } + + // if we pass recognized levels in any order, alongside ignored unrecognized levels, we should see the above plus level aggregation + List levels = List.of("C", "A", "E"); + ImmutableCacheStatsHolder stats = statsHolder.getImmutableCacheStatsHolder(levels.toArray(new String[0])); + ToXContent.Params params = getLevelParams(levels); + XContentBuilder builder = XContentFactory.jsonBuilder(); + builder.startObject(); + stats.toXContent(builder, params); + builder.endObject(); + + String resultString = builder.toString(); + Map result = XContentHelper.convertToMap(MediaTypeRegistry.JSON.xContent(), resultString, true); + assertTotalStatsPresentInXContentResponse(result); + assertNotNull(result.get("A")); + assertEquals(7, result.size()); + } + + private void assertTotalStatsPresentInXContentResponse(Map result) { + // assert the total stats are present + assertNotEquals(0, (int) result.get(ImmutableCacheStats.Fields.SIZE_IN_BYTES)); + assertNotEquals(0, (int) result.get(ImmutableCacheStats.Fields.EVICTIONS)); + assertNotEquals(0, (int) result.get(ImmutableCacheStats.Fields.HIT_COUNT)); + assertNotEquals(0, (int) result.get(ImmutableCacheStats.Fields.MISS_COUNT)); + assertNotEquals(0, (int) result.get(ImmutableCacheStats.Fields.ITEM_COUNT)); + // assert the store name is present + assertEquals(storeName, (String) result.get(ImmutableCacheStatsHolder.STORE_NAME_FIELD)); + } + + private ToXContent.Params getLevelParams(List levels) { + Map paramMap = new HashMap<>(); + if (!levels.isEmpty()) { + paramMap.put("level", String.join(",", levels)); + } + return new ToXContent.MapParams(paramMap); + } + + public static Object getValueFromNestedXContentMap(Map xContentMap, List keys) { + Map current = xContentMap; + for (int i = 0; i < keys.size() - 1; i++) { + Object next = current.get(keys.get(i)); + if (next == null) { + return null; + } + current = (Map) next; + } + return current.get(keys.get(keys.size() - 1)); + } + + // Get a map from the list of dimension values to the corresponding leaf node. + private Map, ImmutableCacheStatsHolder.Node> getAllLeafNodes(ImmutableCacheStatsHolder.Node root) { + Map, ImmutableCacheStatsHolder.Node> result = new HashMap<>(); + getAllLeafNodesHelper(result, root, new ArrayList<>()); + return result; + } + + private void getAllLeafNodesHelper( + Map, ImmutableCacheStatsHolder.Node> result, + ImmutableCacheStatsHolder.Node current, + List pathToCurrent + ) { + if (current.children.isEmpty()) { + result.put(pathToCurrent, current); + } else { + for (Map.Entry entry : current.children.entrySet()) { + List newPath = new ArrayList<>(pathToCurrent); + newPath.add(entry.getKey()); + getAllLeafNodesHelper(result, entry.getValue(), newPath); + } + } + } + private ImmutableCacheStatsHolder.Node getNode(List dimensionValues, ImmutableCacheStatsHolder.Node root) { ImmutableCacheStatsHolder.Node current = root; for (String dimensionValue : dimensionValues) { diff --git a/server/src/test/java/org/opensearch/common/cache/store/OpenSearchOnHeapCacheTests.java b/server/src/test/java/org/opensearch/common/cache/store/OpenSearchOnHeapCacheTests.java index 008dc7c2e0902..f227db6fee2d1 100644 --- a/server/src/test/java/org/opensearch/common/cache/store/OpenSearchOnHeapCacheTests.java +++ b/server/src/test/java/org/opensearch/common/cache/store/OpenSearchOnHeapCacheTests.java @@ -16,10 +16,12 @@ import org.opensearch.common.cache.RemovalListener; import org.opensearch.common.cache.RemovalNotification; import org.opensearch.common.cache.stats.ImmutableCacheStats; +import org.opensearch.common.cache.stats.ImmutableCacheStatsHolder; import org.opensearch.common.cache.store.config.CacheConfig; import org.opensearch.common.cache.store.settings.OpenSearchOnHeapCacheSettings; import org.opensearch.common.metrics.CounterMetric; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.test.OpenSearchTestCase; import java.util.ArrayList; @@ -37,7 +39,9 @@ public void testStats() throws Exception { MockRemovalListener listener = new MockRemovalListener<>(); int maxKeys = between(10, 50); int numEvicted = between(10, 20); - OpenSearchOnHeapCache cache = getCache(maxKeys, listener); + OpenSearchOnHeapCache cache = getCache(maxKeys, listener, true, true); + + // When the pluggable caches setting is on, we should get stats as expected from cache.stats(). List> keysAdded = new ArrayList<>(); int numAdded = maxKeys + numEvicted; @@ -48,7 +52,7 @@ public void testStats() throws Exception { assertEquals(i + 1, cache.stats().getTotalMisses()); assertEquals(0, cache.stats().getTotalHits()); - assertEquals(Math.min(maxKeys, i + 1), cache.stats().getTotalEntries()); + assertEquals(Math.min(maxKeys, i + 1), cache.stats().getTotalItems()); assertEquals(Math.min(maxKeys, i + 1) * keyValueSize, cache.stats().getTotalSizeInBytes()); assertEquals(Math.max(0, i + 1 - maxKeys), cache.stats().getTotalEvictions()); } @@ -59,7 +63,7 @@ public void testStats() throws Exception { assertEquals(numAdded, cache.stats().getTotalMisses()); assertEquals(numHits, cache.stats().getTotalHits()); - assertEquals(maxKeys, cache.stats().getTotalEntries()); + assertEquals(maxKeys, cache.stats().getTotalItems()); assertEquals(maxKeys * keyValueSize, cache.stats().getTotalSizeInBytes()); assertEquals(numEvicted, cache.stats().getTotalEvictions()); } @@ -71,13 +75,46 @@ public void testStats() throws Exception { assertEquals(numAdded, cache.stats().getTotalMisses()); assertEquals(maxKeys, cache.stats().getTotalHits()); - assertEquals(maxKeys - numInvalidated, cache.stats().getTotalEntries()); + assertEquals(maxKeys - numInvalidated, cache.stats().getTotalItems()); assertEquals((maxKeys - numInvalidated) * keyValueSize, cache.stats().getTotalSizeInBytes()); assertEquals(numEvicted, cache.stats().getTotalEvictions()); } } - private OpenSearchOnHeapCache getCache(int maxSizeKeys, MockRemovalListener listener) { + public void testStatsWithoutPluggableCaches() throws Exception { + // When the pluggable caches setting is off, or when we manually set statsTrackingEnabled = false in the config, + // we should get all-zero stats from cache.stats(), but count() should still work. + MockRemovalListener listener = new MockRemovalListener<>(); + int maxKeys = between(10, 50); + int numEvicted = between(10, 20); + + OpenSearchOnHeapCache pluggableCachesOffCache = getCache(maxKeys, listener, false, true); + OpenSearchOnHeapCache manuallySetNoopStatsCache = getCache(maxKeys, listener, true, false); + List> caches = List.of(pluggableCachesOffCache, manuallySetNoopStatsCache); + + for (OpenSearchOnHeapCache cache : caches) { + int numAdded = maxKeys + numEvicted; + for (int i = 0; i < numAdded; i++) { + ICacheKey key = getICacheKey(UUID.randomUUID().toString()); + cache.computeIfAbsent(key, getLoadAwareCacheLoader()); + + assertEquals(Math.min(maxKeys, i + 1), cache.count()); + ImmutableCacheStatsHolder stats = cache.stats(); + assertZeroStats(cache.stats()); + } + } + } + + private void assertZeroStats(ImmutableCacheStatsHolder stats) { + assertEquals(new ImmutableCacheStats(0, 0, 0, 0, 0), stats.getTotalStats()); + } + + private OpenSearchOnHeapCache getCache( + int maxSizeKeys, + MockRemovalListener listener, + boolean pluggableCachesSetting, + boolean statsTrackingEnabled + ) { ICache.Factory onHeapCacheFactory = new OpenSearchOnHeapCache.OpenSearchOnHeapCacheFactory(); Settings settings = Settings.builder() .put( @@ -86,6 +123,7 @@ private OpenSearchOnHeapCache getCache(int maxSizeKeys, MockRemo .getKey(), maxSizeKeys * keyValueSize + "b" ) + .put(FeatureFlags.PLUGGABLE_CACHE, pluggableCachesSetting) .build(); CacheConfig cacheConfig = new CacheConfig.Builder().setKeyType(String.class) @@ -95,6 +133,7 @@ private OpenSearchOnHeapCache getCache(int maxSizeKeys, MockRemo .setSettings(settings) .setDimensionNames(dimensionNames) .setMaxSizeInBytes(maxSizeKeys * keyValueSize) + .setStatsTrackingEnabled(statsTrackingEnabled) .build(); return (OpenSearchOnHeapCache) onHeapCacheFactory.create(cacheConfig, CacheType.INDICES_REQUEST_CACHE, null); } @@ -102,7 +141,7 @@ private OpenSearchOnHeapCache getCache(int maxSizeKeys, MockRemo public void testInvalidateWithDropDimensions() throws Exception { MockRemovalListener listener = new MockRemovalListener<>(); int maxKeys = 50; - OpenSearchOnHeapCache cache = getCache(maxKeys, listener); + OpenSearchOnHeapCache cache = getCache(maxKeys, listener, true, true); List> keysAdded = new ArrayList<>(); @@ -113,8 +152,8 @@ public void testInvalidateWithDropDimensions() throws Exception { } ICacheKey keyToDrop = keysAdded.get(0); - - ImmutableCacheStats snapshot = cache.stats().getStatsForDimensionValues(keyToDrop.dimensions); + String[] levels = dimensionNames.toArray(new String[0]); + ImmutableCacheStats snapshot = cache.stats(levels).getStatsForDimensionValues(keyToDrop.dimensions); assertNotNull(snapshot); keyToDrop.setDropStatsForDimensions(true); @@ -122,7 +161,7 @@ public void testInvalidateWithDropDimensions() throws Exception { // Now assert the stats are gone for any key that has this combination of dimensions, but still there otherwise for (ICacheKey keyAdded : keysAdded) { - snapshot = cache.stats().getStatsForDimensionValues(keyAdded.dimensions); + snapshot = cache.stats(levels).getStatsForDimensionValues(keyAdded.dimensions); if (keyAdded.dimensions.equals(keyToDrop.dimensions)) { assertNull(snapshot); } else { diff --git a/server/src/test/java/org/opensearch/common/util/FeatureFlagTests.java b/server/src/test/java/org/opensearch/common/util/FeatureFlagTests.java index 88cb3782252b7..6b6c7b96266d3 100644 --- a/server/src/test/java/org/opensearch/common/util/FeatureFlagTests.java +++ b/server/src/test/java/org/opensearch/common/util/FeatureFlagTests.java @@ -39,12 +39,6 @@ public void testNonBooleanFeatureFlag() { assertFalse(FeatureFlags.isEnabled(javaVersionProperty)); } - public void testBooleanFeatureFlagWithDefaultSetToTrue() { - final String testFlag = DATETIME_FORMATTER_CACHING; - assertNotNull(testFlag); - assertTrue(FeatureFlags.isEnabled(testFlag)); - } - public void testBooleanFeatureFlagWithDefaultSetToFalse() { final String testFlag = IDENTITY; FeatureFlags.initializeFeatureFlags(Settings.EMPTY); @@ -52,17 +46,17 @@ public void testBooleanFeatureFlagWithDefaultSetToFalse() { assertFalse(FeatureFlags.isEnabled(testFlag)); } - public void testBooleanFeatureFlagInitializedWithEmptySettingsAndDefaultSetToTrue() { + public void testBooleanFeatureFlagInitializedWithEmptySettingsAndDefaultSetToFalse() { final String testFlag = DATETIME_FORMATTER_CACHING; FeatureFlags.initializeFeatureFlags(Settings.EMPTY); assertNotNull(testFlag); - assertTrue(FeatureFlags.isEnabled(testFlag)); + assertFalse(FeatureFlags.isEnabled(testFlag)); } public void testInitializeFeatureFlagsWithExperimentalSettings() { FeatureFlags.initializeFeatureFlags(Settings.builder().put(IDENTITY, true).build()); assertTrue(FeatureFlags.isEnabled(IDENTITY)); - assertTrue(FeatureFlags.isEnabled(DATETIME_FORMATTER_CACHING)); + assertFalse(FeatureFlags.isEnabled(DATETIME_FORMATTER_CACHING)); assertFalse(FeatureFlags.isEnabled(EXTENSIONS)); // reset FeatureFlags to defaults FeatureFlags.initializeFeatureFlags(Settings.EMPTY); diff --git a/server/src/test/java/org/opensearch/gateway/GatewayAllocatorTests.java b/server/src/test/java/org/opensearch/gateway/GatewayAllocatorTests.java new file mode 100644 index 0000000000000..bb59a5792ec8c --- /dev/null +++ b/server/src/test/java/org/opensearch/gateway/GatewayAllocatorTests.java @@ -0,0 +1,360 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.Version; +import org.opensearch.action.support.nodes.BaseNodeResponse; +import org.opensearch.cluster.ClusterInfo; +import org.opensearch.cluster.ClusterName; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.OpenSearchAllocationTestCase; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.routing.IndexShardRoutingTable; +import org.opensearch.cluster.routing.RoutingNodes; +import org.opensearch.cluster.routing.RoutingTable; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.allocation.RoutingAllocation; +import org.opensearch.cluster.routing.allocation.decider.AllocationDeciders; +import org.opensearch.common.collect.Tuple; +import org.opensearch.common.settings.Settings; +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.snapshots.SnapshotShardSizeInfo; +import org.opensearch.test.gateway.TestShardBatchGatewayAllocator; +import org.junit.Before; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +public class GatewayAllocatorTests extends OpenSearchAllocationTestCase { + + private final Logger logger = LogManager.getLogger(GatewayAllocatorTests.class); + TestShardBatchGatewayAllocator testShardsBatchGatewayAllocator = null; + ClusterState clusterState = null; + RoutingAllocation testAllocation = null; + String indexPrefix = "TEST"; + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + testShardsBatchGatewayAllocator = new TestShardBatchGatewayAllocator(); + } + + public void testSingleBatchCreation() { + createIndexAndUpdateClusterState(1, 3, 1); + createBatchesAndAssert(1); + } + + public void testTwoBatchCreation() { + createIndexAndUpdateClusterState(2, 1020, 1); + createBatchesAndAssert(2); + + List listOfBatches = new ArrayList<>( + testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch().values() + ); + assertNotEquals(listOfBatches.get(0), listOfBatches.get(1)); + + // test for replicas + listOfBatches = new ArrayList<>(testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch().values()); + assertNotEquals(listOfBatches.get(0), listOfBatches.get(1)); + } + + public void testNonDuplicationOfBatch() { + createIndexAndUpdateClusterState(1, 3, 1); + Tuple, Set> batches = createBatchesAndAssert(1); + assertEquals(1, batches.v1().size()); + assertEquals(1, batches.v2().size()); + + // again try to create batch and verify no new batch is created since shard is already batched and no new unassigned shard + assertEquals(batches.v1(), testShardsBatchGatewayAllocator.createAndUpdateBatches(testAllocation, true)); + assertEquals(batches.v2(), testShardsBatchGatewayAllocator.createAndUpdateBatches(testAllocation, false)); + } + + public void testCorrectnessOfBatch() { + createIndexAndUpdateClusterState(2, 1020, 1); + createBatchesAndAssert(2); + Set shardsSet1 = clusterState.routingTable() + .index(indexPrefix + 0) + .getShards() + .values() + .stream() + .map(IndexShardRoutingTable::getShardId) + .collect(Collectors.toSet()); + Set shardsSet2 = clusterState.routingTable() + .index(indexPrefix + 1) + .getShards() + .values() + .stream() + .map(IndexShardRoutingTable::getShardId) + .collect(Collectors.toSet()); + shardsSet1.addAll(shardsSet2); + + Set shardsInAllbatches = testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch() + .values() + .stream() + .map(ShardsBatchGatewayAllocator.ShardsBatch::getBatchedShards) + .flatMap(Set::stream) + .collect(Collectors.toSet()); + assertEquals(shardsInAllbatches, shardsSet1); + shardsInAllbatches = testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch() + .values() + .stream() + .map(ShardsBatchGatewayAllocator.ShardsBatch::getBatchedShards) + .flatMap(Set::stream) + .collect(Collectors.toSet()); + assertEquals(shardsInAllbatches, shardsSet1); + + Set primariesInAllBatches = testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch() + .values() + .stream() + .map(ShardsBatchGatewayAllocator.ShardsBatch::getBatchedShardRoutings) + .flatMap(List::stream) + .collect(Collectors.toSet()); + primariesInAllBatches.forEach(shardRouting -> assertTrue(shardRouting.unassigned() && shardRouting.primary() == true)); + + Set replicasInAllBatches = testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch() + .values() + .stream() + .map(ShardsBatchGatewayAllocator.ShardsBatch::getBatchedShardRoutings) + .flatMap(List::stream) + .collect(Collectors.toSet()); + + replicasInAllBatches.forEach(shardRouting -> assertTrue(shardRouting.unassigned() && shardRouting.primary() == false)); + } + + public void testAsyncFetcherCreationInBatch() { + createIndexAndUpdateClusterState(1, 3, 1); + Tuple, Set> batchesTuple = createBatchesAndAssert(1); + Set primaryBatches = batchesTuple.v1(); + Set replicaBatches = batchesTuple.v2(); + + ShardsBatchGatewayAllocator.ShardsBatch shardsBatch = testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch() + .get(primaryBatches.iterator().next()); + AsyncShardFetch asyncFetcher = shardsBatch.getAsyncFetcher(); + // assert asyncFetcher is not null + assertNotNull(asyncFetcher); + shardsBatch = testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch().get(replicaBatches.iterator().next()); + asyncFetcher = shardsBatch.getAsyncFetcher(); + assertNotNull(asyncFetcher); + } + + public void testSafelyRemoveShardFromBatch() { + createIndexAndUpdateClusterState(2, 1023, 1); + + Tuple, Set> batchesTuple = createBatchesAndAssert(2); + Set primaryBatches = batchesTuple.v1(); + Set replicaBatches = batchesTuple.v2(); + + ShardsBatchGatewayAllocator.ShardsBatch primaryShardsBatch = testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch() + .get(primaryBatches.iterator().next()); + ShardRouting primaryShardRouting = primaryShardsBatch.getBatchedShardRoutings().iterator().next(); + assertEquals(2, replicaBatches.size()); + ShardsBatchGatewayAllocator.ShardsBatch replicaShardsBatch = testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch() + .get(replicaBatches.iterator().next()); + ShardRouting replicaShardRouting = replicaShardsBatch.getBatchedShardRoutings().iterator().next(); + + // delete 1 shard routing from each batch + testShardsBatchGatewayAllocator.safelyRemoveShardFromBatch(primaryShardRouting); + + testShardsBatchGatewayAllocator.safelyRemoveShardFromBatch(replicaShardRouting); + // verify that shard routing is removed from both batches + assertFalse(primaryShardsBatch.getBatchedShards().contains(primaryShardRouting.shardId())); + assertFalse(replicaShardsBatch.getBatchedShards().contains(replicaShardRouting.shardId())); + + // try to remove that shard again to see if its no op and doent result in exception + testShardsBatchGatewayAllocator.safelyRemoveShardFromBatch(primaryShardRouting); + testShardsBatchGatewayAllocator.safelyRemoveShardFromBatch(replicaShardRouting); + + // now remove all shard routings to verify that batch only gets deleted + primaryShardsBatch.getBatchedShardRoutings().forEach(testShardsBatchGatewayAllocator::safelyRemoveShardFromBatch); + replicaShardsBatch.getBatchedShardRoutings().forEach(testShardsBatchGatewayAllocator::safelyRemoveShardFromBatch); + + assertFalse(testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch().containsKey(primaryShardsBatch.getBatchId())); + assertFalse(testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch().containsKey(replicaShardsBatch.getBatchId())); + assertEquals(1, testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch().size()); + assertEquals(1, testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch().size()); + } + + public void testSafelyRemoveShardFromBothBatch() { + createIndexAndUpdateClusterState(1, 3, 1); + createBatchesAndAssert(1); + ShardsBatchGatewayAllocator.ShardsBatch primaryShardsBatch = testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch() + .values() + .iterator() + .next(); + ShardsBatchGatewayAllocator.ShardsBatch replicaShardsBatch = testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch() + .values() + .iterator() + .next(); + + ShardRouting anyPrimary = primaryShardsBatch.getBatchedShardRoutings().iterator().next(); + // remove first shard routing from both batches + testShardsBatchGatewayAllocator.safelyRemoveShardFromBothBatch(anyPrimary); + + // verify that shard routing is removed from both batches + assertFalse(primaryShardsBatch.getBatchedShards().contains(anyPrimary.shardId())); + assertFalse(replicaShardsBatch.getBatchedShards().contains(anyPrimary.shardId())); + + // try to remove that shard again to see if its no op and doesnt result in exception + testShardsBatchGatewayAllocator.safelyRemoveShardFromBothBatch(anyPrimary); + + // now remove all shard routings to verify that batch gets deleted + primaryShardsBatch.getBatchedShardRoutings().forEach(testShardsBatchGatewayAllocator::safelyRemoveShardFromBothBatch); + replicaShardsBatch.getBatchedShardRoutings().forEach(testShardsBatchGatewayAllocator::safelyRemoveShardFromBothBatch); + + assertFalse(testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch().containsKey(primaryShardsBatch.getBatchId())); + assertFalse(testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch().containsKey(replicaShardsBatch.getBatchId())); + assertEquals(0, testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch().size()); + assertEquals(0, testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch().size()); + } + + public void testGetBatchIdExisting() { + createIndexAndUpdateClusterState(2, 1020, 1); + // get all shardsRoutings for test index + List allShardRoutings1 = clusterState.routingTable() + .index(indexPrefix + 0) + .getShards() + .values() + .stream() + .map(IndexShardRoutingTable::getShards) + .flatMap(List::stream) + .collect(Collectors.toList()); + List allShardRouting2 = clusterState.routingTable() + .index(indexPrefix + 1) + .getShards() + .values() + .stream() + .map(IndexShardRoutingTable::getShards) + .flatMap(List::stream) + .collect(Collectors.toList()); + + Tuple, Set> batchesTuple = createBatchesAndAssert(2); + Set primaryBatches = batchesTuple.v1(); + Set replicaBatches = batchesTuple.v2(); + + // create a map of shards to batch id for primaries + + Map shardIdToBatchIdForStartedShards = new HashMap<>(); + allShardRoutings1.addAll(allShardRouting2); + assertEquals(4080, allShardRoutings1.size()); + for (ShardRouting shardRouting : allShardRoutings1) { + for (String batchId : primaryBatches) { + if (shardRouting.primary() == true + && testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch() + .get(batchId) + .getBatchedShards() + .contains(shardRouting.shardId())) { + if (shardIdToBatchIdForStartedShards.containsKey(shardRouting.shardId())) { + fail("found duplicate shard routing for shard. One shard cant be in multiple batches " + shardRouting.shardId()); + } + assertTrue(shardRouting.primary()); + shardIdToBatchIdForStartedShards.put(shardRouting.shardId(), batchId); + } + } + } + Map shardIdToBatchIdForStoreShards = new HashMap<>(); + + for (ShardRouting shardRouting : allShardRoutings1) { + for (String batchId : replicaBatches) { + if (shardRouting.primary() == false + && testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch() + .get(batchId) + .getBatchedShards() + .contains(shardRouting.shardId())) { + if (shardIdToBatchIdForStoreShards.containsKey(shardRouting.shardId())) { + fail("found duplicate shard routing for shard. One shard cant be in multiple batches " + shardRouting.shardId()); + } + assertFalse(shardRouting.primary()); + shardIdToBatchIdForStoreShards.put(shardRouting.shardId(), batchId); + } + } + } + + assertEquals(4080, shardIdToBatchIdForStartedShards.size() + shardIdToBatchIdForStoreShards.size()); + // now compare the maps with getBatchId() call + for (ShardRouting shardRouting : allShardRoutings1) { + if (shardRouting.primary()) { + assertEquals( + shardIdToBatchIdForStartedShards.get(shardRouting.shardId()), + testShardsBatchGatewayAllocator.getBatchId(shardRouting, true) + ); + } else { + assertEquals( + shardIdToBatchIdForStoreShards.get(shardRouting.shardId()), + testShardsBatchGatewayAllocator.getBatchId(shardRouting, false) + ); + } + } + } + + public void testGetBatchIdNonExisting() { + createIndexAndUpdateClusterState(1, 1, 1); + List allShardRoutings = clusterState.routingTable() + .index(indexPrefix + 0) + .getShards() + .values() + .stream() + .map(IndexShardRoutingTable::getShards) + .flatMap(List::stream) + .collect(Collectors.toList()); + allShardRoutings.forEach(shard -> assertNull(testShardsBatchGatewayAllocator.getBatchId(shard, shard.primary()))); + } + + private void createIndexAndUpdateClusterState(int count, int numberOfShards, int numberOfReplicas) { + if (count == 0) return; + Metadata.Builder metadata = Metadata.builder(); + RoutingTable.Builder routingTableBuilder = RoutingTable.builder(); + for (int i = 0; i < count; i++) { + String indexName = indexPrefix + i; + metadata.put( + IndexMetadata.builder(indexName) + .settings(settings(Version.CURRENT)) + .numberOfShards(numberOfShards) + .numberOfReplicas(numberOfReplicas) + ); + } + for (int i = 0; i < count; i++) { + String indexName = indexPrefix + i; + routingTableBuilder = routingTableBuilder.addAsNew(metadata.build().index(indexName)); + } + clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) + .metadata(metadata.build()) + .routingTable(routingTableBuilder.build()) + .build(); + testAllocation = new RoutingAllocation( + new AllocationDeciders(Collections.emptyList()), + new RoutingNodes(clusterState, false), + clusterState, + ClusterInfo.EMPTY, + SnapshotShardSizeInfo.EMPTY, + System.nanoTime() + ); + } + + // call this after index creation and update cluster state + private Tuple, Set> createBatchesAndAssert(int expectedBatchSize) { + Set primaryBatches = testShardsBatchGatewayAllocator.createAndUpdateBatches(testAllocation, true); + Set replicaBatches = testShardsBatchGatewayAllocator.createAndUpdateBatches(testAllocation, false); + assertEquals(expectedBatchSize, primaryBatches.size()); + assertEquals(expectedBatchSize, replicaBatches.size()); + assertEquals(expectedBatchSize, testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch().size()); + assertEquals(expectedBatchSize, testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch().size()); + assertEquals(testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch().keySet(), primaryBatches); + assertEquals(testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch().keySet(), replicaBatches); + return new Tuple<>(primaryBatches, replicaBatches); + } +} diff --git a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java index 74bae7b5eb7cf..3ba98c44f8d3e 100644 --- a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java +++ b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java @@ -71,6 +71,7 @@ import org.opensearch.gateway.remote.RemotePersistenceStats; import org.opensearch.index.recovery.RemoteStoreRestoreService; import org.opensearch.index.recovery.RemoteStoreRestoreService.RemoteRestoreResult; +import org.opensearch.index.remote.RemoteIndexPathUploader; import org.opensearch.node.Node; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.fs.FsRepository; @@ -473,20 +474,23 @@ public void testDataOnlyNodePersistence() throws Exception { ); Supplier remoteClusterStateServiceSupplier = () -> { if (isRemoteStoreClusterStateEnabled(settings)) { + Supplier repositoriesServiceSupplier = () -> new RepositoriesService( + settings, + clusterService, + transportService, + Collections.emptyMap(), + Collections.emptyMap(), + transportService.getThreadPool() + ); + ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); return new RemoteClusterStateService( nodeEnvironment.nodeId(), - () -> new RepositoriesService( - settings, - clusterService, - transportService, - Collections.emptyMap(), - Collections.emptyMap(), - transportService.getThreadPool() - ), + repositoriesServiceSupplier, settings, - new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + clusterSettings, () -> 0L, - threadPool + threadPool, + List.of(new RemoteIndexPathUploader(threadPool, settings, repositoriesServiceSupplier, clusterSettings)) ); } else { return null; diff --git a/server/src/test/java/org/opensearch/gateway/ShardBatchCacheTests.java b/server/src/test/java/org/opensearch/gateway/ShardBatchCacheTests.java index 1b42a31a4fd84..12030ad41d508 100644 --- a/server/src/test/java/org/opensearch/gateway/ShardBatchCacheTests.java +++ b/server/src/test/java/org/opensearch/gateway/ShardBatchCacheTests.java @@ -29,8 +29,7 @@ public class ShardBatchCacheTests extends OpenSearchAllocationTestCase { private static final String BATCH_ID = "b1"; private final DiscoveryNode node1 = newNode("node1"); private final DiscoveryNode node2 = newNode("node2"); - // Needs to be enabled once ShardsBatchGatewayAllocator is pushed - // private final Map batchInfo = new HashMap<>(); + private final Map batchInfo = new HashMap<>(); private AsyncShardBatchFetch.ShardBatchCache shardCache; private List shardsInBatch = new ArrayList<>(); private static final int NUMBER_OF_SHARDS_DEFAULT = 10; @@ -162,7 +161,7 @@ public void testShardsDataWithException() { null ); - // assertEquals(5, batchInfo.size()); + assertEquals(10, batchInfo.size()); assertEquals(2, fetchData.size()); assertEquals(10, fetchData.get(node1).getNodeGatewayStartedShardsBatch().size()); assertTrue(fetchData.get(node2).getNodeGatewayStartedShardsBatch().isEmpty()); @@ -210,10 +209,10 @@ private void fillShards(Map shardAttributesMap, int nu for (ShardId shardId : shardsInBatch) { ShardAttributes attr = new ShardAttributes(""); shardAttributesMap.put(shardId, attr); - // batchInfo.put( - // shardId, - // new ShardsBatchGatewayAllocator.ShardEntry(attr, randomShardRouting(shardId.getIndexName(), shardId.id())) - // ); + batchInfo.put( + shardId, + new ShardsBatchGatewayAllocator.ShardEntry(attr, randomShardRouting(shardId.getIndexName(), shardId.id())) + ); } } diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 65477051cdb30..9f321cd62847c 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -38,6 +38,7 @@ import org.opensearch.core.index.Index; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; +import org.opensearch.index.remote.RemoteIndexPathUploader; import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.indices.IndicesModule; import org.opensearch.repositories.FilterRepository; @@ -154,7 +155,8 @@ public void setup() { settings, clusterSettings, () -> 0L, - threadPool + threadPool, + List.of(new RemoteIndexPathUploader(threadPool, settings, repositoriesServiceSupplier, clusterSettings)) ); } @@ -173,15 +175,17 @@ public void testFailWriteFullMetadataNonClusterManagerNode() throws IOException public void testFailInitializationWhenRemoteStateDisabled() { final Settings settings = Settings.builder().build(); + ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); assertThrows( AssertionError.class, () -> new RemoteClusterStateService( "test-node-id", repositoriesServiceSupplier, settings, - new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + clusterSettings, () -> 0L, - threadPool + threadPool, + List.of(new RemoteIndexPathUploader(threadPool, settings, repositoriesServiceSupplier, clusterSettings)) ) ); } diff --git a/server/src/test/java/org/opensearch/index/engine/EngineConfigTests.java b/server/src/test/java/org/opensearch/index/engine/EngineConfigTests.java index 9197b959a54f1..f42188d55bca3 100644 --- a/server/src/test/java/org/opensearch/index/engine/EngineConfigTests.java +++ b/server/src/test/java/org/opensearch/index/engine/EngineConfigTests.java @@ -32,6 +32,13 @@ public void setUp() throws Exception { defaultIndexSettings = IndexSettingsModule.newIndexSettings("test", defaultIndexMetadata.getSettings()); } + public void testEngineConfig_DefaultValueFoUseCompoundFile() { + EngineConfig config = new EngineConfig.Builder().indexSettings(defaultIndexSettings) + .retentionLeasesSupplier(() -> RetentionLeases.EMPTY) + .build(); + assertTrue(config.useCompoundFile()); + } + public void testEngineConfig_DefaultValueForReadOnlyEngine() { EngineConfig config = new EngineConfig.Builder().indexSettings(defaultIndexSettings) .retentionLeasesSupplier(() -> RetentionLeases.EMPTY) diff --git a/server/src/test/java/org/opensearch/index/engine/InternalEngineTests.java b/server/src/test/java/org/opensearch/index/engine/InternalEngineTests.java index cc927a19fd01a..54a562642d4ab 100644 --- a/server/src/test/java/org/opensearch/index/engine/InternalEngineTests.java +++ b/server/src/test/java/org/opensearch/index/engine/InternalEngineTests.java @@ -342,6 +342,58 @@ public void testVerboseSegments() throws Exception { } } + public void testSegmentsWithUseCompoundFileFlag_true() throws IOException { + try (Store store = createStore(); Engine engine = createEngine(defaultSettings, store, createTempDir(), new TieredMergePolicy())) { + ParsedDocument doc = testParsedDocument("1", null, testDocument(), B_1, null); + Engine.Index index = indexForDoc(doc); + engine.index(index); + engine.flush(); + final List segments = engine.segments(false); + assertThat(segments, hasSize(1)); + assertTrue(segments.get(0).compound); + boolean cfeCompoundFileFound = false; + boolean cfsCompoundFileFound = false; + for (final String fileName : store.readLastCommittedSegmentsInfo().files(true)) { + if (fileName.endsWith(".cfe")) { + cfeCompoundFileFound = true; + } + if (fileName.endsWith(".cfs")) { + cfsCompoundFileFound = true; + } + } + Assert.assertTrue(cfeCompoundFileFound); + Assert.assertTrue(cfsCompoundFileFound); + } + } + + public void testSegmentsWithUseCompoundFileFlag_false() throws IOException { + final IndexSettings indexSettings = IndexSettingsModule.newIndexSettings( + "test", + Settings.builder().put(defaultSettings.getSettings()).put(EngineConfig.INDEX_USE_COMPOUND_FILE.getKey(), false).build() + ); + try (Store store = createStore(); Engine engine = createEngine(indexSettings, store, createTempDir(), new TieredMergePolicy())) { + ParsedDocument doc = testParsedDocument("1", null, testDocument(), B_1, null); + Engine.Index index = indexForDoc(doc); + engine.index(index); + engine.flush(); + final List segments = engine.segments(false); + assertThat(segments, hasSize(1)); + assertFalse(segments.get(0).compound); + boolean cfeCompoundFileFound = false; + boolean cfsCompoundFileFound = false; + for (final String fileName : store.readLastCommittedSegmentsInfo().files(true)) { + if (fileName.endsWith(".cfe")) { + cfeCompoundFileFound = true; + } + if (fileName.endsWith(".cfs")) { + cfsCompoundFileFound = true; + } + } + Assert.assertFalse(cfeCompoundFileFound); + Assert.assertFalse(cfsCompoundFileFound); + } + } + public void testSegmentsWithMergeFlag() throws Exception { try (Store store = createStore(); Engine engine = createEngine(defaultSettings, store, createTempDir(), new TieredMergePolicy())) { ParsedDocument doc = testParsedDocument("1", null, testDocument(), B_1, null); @@ -4002,7 +4054,7 @@ public void testRecoverFromForeignTranslog() throws IOException { final Path badTranslogLog = createTempDir(); final String badUUID = Translog.createEmptyTranslog(badTranslogLog, SequenceNumbers.NO_OPS_PERFORMED, shardId, primaryTerm.get()); Translog translog = new LocalTranslog( - new TranslogConfig(shardId, badTranslogLog, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, ""), + new TranslogConfig(shardId, badTranslogLog, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, "", false), badUUID, createTranslogDeletionPolicy(INDEX_SETTINGS), () -> SequenceNumbers.NO_OPS_PERFORMED, @@ -4020,7 +4072,8 @@ public void testRecoverFromForeignTranslog() throws IOException { translog.location(), config.getIndexSettings(), BigArrays.NON_RECYCLING_INSTANCE, - "" + "", + false ); EngineConfig brokenConfig = new EngineConfig.Builder().shardId(shardId) @@ -7714,7 +7767,8 @@ public void testNotWarmUpSearcherInEngineCtor() throws Exception { createTempDir(), config.getTranslogConfig().getIndexSettings(), config.getTranslogConfig().getBigArrays(), - "" + "", + false ); EngineConfig configWithWarmer = new EngineConfig.Builder().shardId(config.getShardId()) .threadPool(config.getThreadPool()) diff --git a/server/src/test/java/org/opensearch/index/mapper/BooleanFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/BooleanFieldMapperTests.java index 8dec03a353d16..5392bd6c358d3 100644 --- a/server/src/test/java/org/opensearch/index/mapper/BooleanFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/BooleanFieldMapperTests.java @@ -209,4 +209,29 @@ public void testBoosts() throws Exception { assertEquals(new BoostQuery(new TermQuery(new Term("field", "T")), 2.0f), ft.termQuery("true", null)); assertParseMaximalWarnings(); } + + public void testIndexedValueForSearch() throws Exception { + assertEquals(new BooleanFieldMapper.BooleanFieldType("bool").indexedValueForSearch(null), BooleanFieldMapper.Values.FALSE); + + assertEquals(new BooleanFieldMapper.BooleanFieldType("bool").indexedValueForSearch(false), BooleanFieldMapper.Values.FALSE); + + assertEquals(new BooleanFieldMapper.BooleanFieldType("bool").indexedValueForSearch(true), BooleanFieldMapper.Values.TRUE); + + assertEquals( + new BooleanFieldMapper.BooleanFieldType("bool").indexedValueForSearch(new BytesRef("true")), + BooleanFieldMapper.Values.TRUE + ); + + assertEquals( + new BooleanFieldMapper.BooleanFieldType("bool").indexedValueForSearch(new BytesRef("false")), + BooleanFieldMapper.Values.FALSE + ); + + IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> new BooleanFieldMapper.BooleanFieldType("bool").indexedValueForSearch(new BytesRef("random")) + ); + + assertEquals("Can't parse boolean value [random], expected [true] or [false]", e.getMessage()); + } } diff --git a/server/src/test/java/org/opensearch/index/mapper/BooleanFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/BooleanFieldTypeTests.java index 14092706411cb..aab63fc30efd7 100644 --- a/server/src/test/java/org/opensearch/index/mapper/BooleanFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/BooleanFieldTypeTests.java @@ -31,11 +31,18 @@ package org.opensearch.index.mapper; +import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.index.Term; +import org.apache.lucene.search.BoostQuery; +import org.apache.lucene.search.DocValuesFieldExistsQuery; +import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.BytesRef; import java.io.IOException; +import java.util.ArrayList; import java.util.Collections; +import java.util.List; public class BooleanFieldTypeTests extends FieldTypeTestCase { @@ -59,9 +66,67 @@ public void testTermQuery() { assertEquals(new TermQuery(new Term("field", "T")), ft.termQuery("true", null)); assertEquals(new TermQuery(new Term("field", "F")), ft.termQuery("false", null)); - MappedFieldType unsearchable = new BooleanFieldMapper.BooleanFieldType("field", false); + MappedFieldType doc_ft = new BooleanFieldMapper.BooleanFieldType("field", false, true); + assertEquals(SortedNumericDocValuesField.newSlowExactQuery("field", 1), doc_ft.termQuery("true", null)); + assertEquals(SortedNumericDocValuesField.newSlowExactQuery("field", 0), doc_ft.termQuery("false", null)); + + MappedFieldType boost_ft = new BooleanFieldMapper.BooleanFieldType("field"); + boost_ft.setBoost(2f); + assertEquals(new BoostQuery(new TermQuery(new Term("field", "T")), 2f), boost_ft.termQuery("true", null)); + assertEquals(new BoostQuery(new TermQuery(new Term("field", "F")), 2f), boost_ft.termQuery("false", null)); + + MappedFieldType unsearchable = new BooleanFieldMapper.BooleanFieldType("field", false, false); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.termQuery("true", null)); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + assertEquals("Cannot search on field [field] since it is both not indexed, and does not have doc_values enabled.", e.getMessage()); + } + + public void testTermsQuery() { + MappedFieldType ft = new BooleanFieldMapper.BooleanFieldType("field"); + List terms = new ArrayList<>(); + terms.add(new BytesRef("true")); + terms.add(new BytesRef("false")); + assertEquals(new DocValuesFieldExistsQuery("field"), ft.termsQuery(terms, null)); + + List newTerms = new ArrayList<>(); + newTerms.add(new BytesRef("true")); + assertEquals(new TermQuery(new Term("field", "T")), ft.termsQuery(newTerms, null)); + + List incorrectTerms = new ArrayList<>(); + incorrectTerms.add(new BytesRef("true")); + incorrectTerms.add(new BytesRef("random")); + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> ft.termsQuery(incorrectTerms, null)); + assertEquals("Can't parse boolean value [random], expected [true] or [false]", ex.getMessage()); + + MappedFieldType doc_only_ft = new BooleanFieldMapper.BooleanFieldType("field", false, true); + + assertEquals(SortedNumericDocValuesField.newSlowExactQuery("field", 1), doc_only_ft.termsQuery(newTerms, null)); + + MappedFieldType unsearchable = new BooleanFieldMapper.BooleanFieldType("field", false, false); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.termsQuery(terms, null)); + assertEquals("Cannot search on field [field] since it is both not indexed, and does not have doc_values enabled.", e.getMessage()); + } + + public void testRangeQuery() { + BooleanFieldMapper.BooleanFieldType ft = new BooleanFieldMapper.BooleanFieldType("field"); + assertEquals(new DocValuesFieldExistsQuery("field"), ft.rangeQuery(false, true, true, true, null)); + + assertEquals(new TermQuery(new Term("field", "T")), ft.rangeQuery(false, true, false, true, null)); + + assertEquals(new TermQuery(new Term("field", "F")), ft.rangeQuery(false, true, true, false, null)); + + assertEquals(new MatchNoDocsQuery(), ft.rangeQuery(false, true, false, false, null)); + + assertEquals(new MatchNoDocsQuery(), ft.rangeQuery(false, true, false, false, null)); + + assertEquals(new TermQuery(new Term("field", "F")), ft.rangeQuery(false, false, true, true, null)); + + assertEquals(new TermQuery(new Term("field", "F")), ft.rangeQuery(null, false, true, true, null)); + + assertEquals(new DocValuesFieldExistsQuery("field"), ft.rangeQuery(false, null, true, true, null)); + + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> ft.rangeQuery("random", null, true, true, null)); + + assertEquals("Can't parse boolean value [random], expected [true] or [false]", e.getMessage()); } public void testFetchSourceValue() throws IOException { diff --git a/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java index 054d3956596af..2aa310ae959d9 100644 --- a/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java @@ -35,6 +35,7 @@ import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexableField; import org.opensearch.common.time.DateFormatter; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.termvectors.TermVectorsService; import org.opensearch.search.DocValueFormat; @@ -45,8 +46,10 @@ import java.time.ZonedDateTime; import java.util.List; +import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.notNullValue; +import static org.junit.Assume.assumeThat; public class DateFieldMapperTests extends MapperTestCase { @@ -146,7 +149,22 @@ public void testStore() throws Exception { assertEquals(1457654400000L, storedField.numericValue().longValue()); } + public void testIgnoreMalformedLegacy() throws IOException { + assumeThat("Using legacy datetime format as default", FeatureFlags.isEnabled(FeatureFlags.DATETIME_FORMATTER_CACHING), is(false)); + testIgnoreMalformedForValue( + "2016-03-99", + "failed to parse date field [2016-03-99] with format [strict_date_optional_time||epoch_millis]" + ); + testIgnoreMalformedForValue("-2147483648", "Invalid value for Year (valid values -999999999 - 999999999): -2147483648"); + testIgnoreMalformedForValue("-522000000", "long overflow"); + } + public void testIgnoreMalformed() throws IOException { + assumeThat( + "Using experimental datetime format as default", + FeatureFlags.isEnabled(FeatureFlags.DATETIME_FORMATTER_CACHING), + is(true) + ); testIgnoreMalformedForValue( "2016-03-99", "failed to parse date field [2016-03-99] with format [strict_date_time_no_millis||strict_date_optional_time||epoch_millis]" diff --git a/server/src/test/java/org/opensearch/index/mapper/DateFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/DateFieldTypeTests.java index ab53ae81ab0ce..db5e1e419de93 100644 --- a/server/src/test/java/org/opensearch/index/mapper/DateFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/DateFieldTypeTests.java @@ -216,14 +216,14 @@ public void testTermQuery() { "field", false, false, - true, + false, DateFieldMapper.getDefaultDateTimeFormatter(), Resolution.MILLISECONDS, null, Collections.emptyMap() ); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.termQuery(date, context)); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + assertEquals("Cannot search on field [field] since it is both not indexed, and does not have doc_values enabled.", e.getMessage()); } public void testRangeQuery() throws IOException { @@ -279,7 +279,7 @@ public void testRangeQuery() throws IOException { "field", false, false, - true, + false, DateFieldMapper.getDefaultDateTimeFormatter(), Resolution.MILLISECONDS, null, @@ -289,7 +289,7 @@ public void testRangeQuery() throws IOException { IllegalArgumentException.class, () -> unsearchable.rangeQuery(date1, date2, true, true, null, null, null, context) ); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + assertEquals("Cannot search on field [field] since it is both not indexed, and does not have doc_values enabled.", e.getMessage()); } public void testRangeQueryWithIndexSort() { diff --git a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldDataTests.java b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldDataTests.java index e318ca5e953a3..03ce1eb7252de 100644 --- a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldDataTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldDataTests.java @@ -54,6 +54,49 @@ public void testDocValue() throws Exception { assertEquals(1, valueReaders.size()); } + public void testLongFieldNameWithHashArray() throws Exception { + String mapping = XContentFactory.jsonBuilder() + .startObject() + .startObject("test") + .startObject("properties") + .startObject("field") + .field("type", FIELD_TYPE) + .endObject() + .endObject() + .endObject() + .endObject() + .toString(); + final DocumentMapper mapper = mapperService.documentMapperParser().parse("test", new CompressedXContent(mapping)); + + XContentBuilder json = XContentFactory.jsonBuilder() + .startObject() + .startObject("field") + .startObject("detail") + .startArray("fooooooooooo") + .startObject() + .field("name", "baz") + .endObject() + .startObject() + .field("name", "baz") + .endObject() + .endArray() + .endObject() + .endObject() + .endObject(); + + ParsedDocument d = mapper.parse(new SourceToParse("test", "1", BytesReference.bytes(json), MediaTypeRegistry.JSON)); + writer.addDocument(d.rootDoc()); + writer.commit(); + + IndexFieldData fieldData = getForField("field"); + List readers = refreshReader(); + assertEquals(1, readers.size()); + + IndexFieldData valueFieldData = getForField("field._value"); + List valueReaders = refreshReader(); + assertEquals(1, valueReaders.size()); + } + @Override protected String getFieldDataType() { return FIELD_TYPE; diff --git a/server/src/test/java/org/opensearch/index/mapper/RangeFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/RangeFieldMapperTests.java index 331bfb7b2ddf4..91eab942c499a 100644 --- a/server/src/test/java/org/opensearch/index/mapper/RangeFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/RangeFieldMapperTests.java @@ -37,6 +37,7 @@ import org.apache.lucene.index.IndexableField; import org.opensearch.common.CheckedConsumer; import org.opensearch.common.network.InetAddresses; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.xcontent.ToXContent; import org.opensearch.core.xcontent.XContentBuilder; @@ -51,8 +52,10 @@ import static org.opensearch.index.query.RangeQueryBuilder.GT_FIELD; import static org.opensearch.index.query.RangeQueryBuilder.LTE_FIELD; import static org.opensearch.index.query.RangeQueryBuilder.LT_FIELD; +import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.containsString; +import static org.junit.Assume.assumeThat; public class RangeFieldMapperTests extends AbstractNumericFieldMapperTestCase { private static final String FROM_DATE = "2016-10-31"; @@ -351,7 +354,30 @@ public void testIllegalArguments() throws Exception { assertThat(e.getMessage(), containsString("should not define a dateTimeFormatter")); } + public void testSerializeDefaultsLegacy() throws Exception { + assumeThat("Using legacy datetime format as default", FeatureFlags.isEnabled(FeatureFlags.DATETIME_FORMATTER_CACHING), is(false)); + + for (String type : types()) { + DocumentMapper docMapper = createDocumentMapper(fieldMapping(b -> b.field("type", type))); + RangeFieldMapper mapper = (RangeFieldMapper) docMapper.root().getMapper("field"); + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + mapper.doXContentBody(builder, true, ToXContent.EMPTY_PARAMS); + String got = builder.endObject().toString(); + + // if type is date_range we check that the mapper contains the default format and locale + // otherwise it should not contain a locale or format + assertTrue(got, got.contains("\"format\":\"strict_date_optional_time||epoch_millis\"") == type.equals("date_range")); + assertTrue(got, got.contains("\"locale\":" + "\"" + Locale.ROOT + "\"") == type.equals("date_range")); + } + } + public void testSerializeDefaults() throws Exception { + assumeThat( + "Using experimental datetime format as default", + FeatureFlags.isEnabled(FeatureFlags.DATETIME_FORMATTER_CACHING), + is(true) + ); + for (String type : types()) { DocumentMapper docMapper = createDocumentMapper(fieldMapping(b -> b.field("type", type))); RangeFieldMapper mapper = (RangeFieldMapper) docMapper.root().getMapper("field"); diff --git a/server/src/test/java/org/opensearch/index/mapper/RangeFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/RangeFieldTypeTests.java index 00b48240d0567..49bf227e5073c 100644 --- a/server/src/test/java/org/opensearch/index/mapper/RangeFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/RangeFieldTypeTests.java @@ -51,6 +51,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.time.DateFormatter; import org.opensearch.common.util.BigArrays; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.index.IndexSettings; import org.opensearch.index.mapper.DateFieldMapper.DateFieldType; import org.opensearch.index.mapper.RangeFieldMapper.RangeFieldType; @@ -65,8 +66,10 @@ import java.util.Collections; import java.util.Map; +import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.instanceOf; +import static org.junit.Assume.assumeThat; public class RangeFieldTypeTests extends FieldTypeTestCase { RangeType type; @@ -249,7 +252,49 @@ private QueryShardContext createContext() { ); } + public void testDateRangeQueryUsingMappingFormatLegacy() { + assumeThat("Using legacy datetime format as default", FeatureFlags.isEnabled(FeatureFlags.DATETIME_FORMATTER_CACHING), is(false)); + + QueryShardContext context = createContext(); + RangeFieldType strict = new RangeFieldType("field", RangeFieldMapper.Defaults.DATE_FORMATTER); + // don't use DISJOINT here because it doesn't work on date fields which we want to compare bounds with + ShapeRelation relation = randomValueOtherThan(ShapeRelation.DISJOINT, () -> randomFrom(ShapeRelation.values())); + + // dates will break the default format, month/day of month is turned around in the format + final String from = "2016-15-06T15:29:50+08:00"; + final String to = "2016-16-06T15:29:50+08:00"; + + OpenSearchParseException ex = expectThrows( + OpenSearchParseException.class, + () -> strict.rangeQuery(from, to, true, true, relation, null, null, context) + ); + assertThat( + ex.getMessage(), + containsString("failed to parse date field [2016-15-06T15:29:50+08:00] with format [strict_date_optional_time||epoch_millis]") + ); + + // setting mapping format which is compatible with those dates + final DateFormatter formatter = DateFormatter.forPattern("yyyy-dd-MM'T'HH:mm:ssZZZZZ"); + assertEquals(1465975790000L, formatter.parseMillis(from)); + assertEquals(1466062190000L, formatter.parseMillis(to)); + + RangeFieldType fieldType = new RangeFieldType("field", formatter); + final Query query = fieldType.rangeQuery(from, to, true, true, relation, null, fieldType.dateMathParser(), context); + assertEquals("field:", ((IndexOrDocValuesQuery) query).getIndexQuery().toString()); + + // compare lower and upper bounds with what we would get on a `date` field + DateFieldType dateFieldType = new DateFieldType("field", DateFieldMapper.Resolution.MILLISECONDS, formatter); + final Query queryOnDateField = dateFieldType.rangeQuery(from, to, true, true, relation, null, fieldType.dateMathParser(), context); + assertEquals("field:[1465975790000 TO 1466062190999]", ((IndexOrDocValuesQuery) queryOnDateField).getIndexQuery().toString()); + } + public void testDateRangeQueryUsingMappingFormat() { + assumeThat( + "Using experimental datetime format as default", + FeatureFlags.isEnabled(FeatureFlags.DATETIME_FORMATTER_CACHING), + is(true) + ); + QueryShardContext context = createContext(); RangeFieldType strict = new RangeFieldType("field", RangeFieldMapper.Defaults.DATE_FORMATTER); // don't use DISJOINT here because it doesn't work on date fields which we want to compare bounds with diff --git a/server/src/test/java/org/opensearch/index/query/TermsQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/TermsQueryBuilderTests.java index 32bf290627b63..97f372dc04a1b 100644 --- a/server/src/test/java/org/opensearch/index/query/TermsQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/TermsQueryBuilderTests.java @@ -34,11 +34,14 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.FieldExistsQuery; import org.apache.lucene.search.IndexOrDocValuesQuery; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PointInSetQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermInSetQuery; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; import org.opensearch.OpenSearchException; import org.opensearch.action.get.GetRequest; @@ -137,6 +140,9 @@ protected void doAssertLuceneQuery(TermsQueryBuilder queryBuilder, Query query, .or(instanceOf(ConstantScoreQuery.class)) .or(instanceOf(MatchNoDocsQuery.class)) .or(instanceOf(IndexOrDocValuesQuery.class)) + .or(instanceOf(MatchAllDocsQuery.class)) + .or(instanceOf(FieldExistsQuery.class)) + .or(instanceOf(TermQuery.class)) ); if (query instanceof ConstantScoreQuery) { assertThat(((ConstantScoreQuery) query).getQuery(), instanceOf(BooleanQuery.class)); diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteIndexPathTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteIndexPathTests.java new file mode 100644 index 0000000000000..8ddbd383756e7 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/remote/RemoteIndexPathTests.java @@ -0,0 +1,140 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.remote; + +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm; +import org.opensearch.index.remote.RemoteStoreEnums.PathType; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import org.mockito.Mockito; + +import static org.opensearch.index.remote.RemoteStoreEnums.DataCategory.TRANSLOG; +import static org.opensearch.index.remote.RemoteStoreEnums.DataType.LOCK_FILES; + +public class RemoteIndexPathTests extends OpenSearchTestCase { + + /** + * This checks that the remote path contains paths only for segment and data/metadata/lock_files combination. + */ + public void testToXContentWithSegmentRepo() throws IOException { + RemoteIndexPath indexPath = new RemoteIndexPath( + "djjsid73he8yd7usduh", + 2, + new BlobPath().add("djsd878ndjh").add("hcs87cj8"), + PathType.HASHED_PREFIX, + PathHashAlgorithm.FNV_1A_BASE64, + RemoteIndexPath.SEGMENT_PATH + ); + XContentBuilder xContentBuilder = MediaTypeRegistry.contentBuilder(MediaTypeRegistry.JSON); + xContentBuilder.startObject(); + xContentBuilder = indexPath.toXContent(xContentBuilder, ToXContent.EMPTY_PARAMS); + xContentBuilder.endObject(); + String expected = + "{\"version\":\"1\",\"index_uuid\":\"djjsid73he8yd7usduh\",\"shard_count\":2,\"path_type\":\"HASHED_PREFIX\",\"path_hash_algorithm\":\"FNV_1A_BASE64\",\"path_creation_map\":{\"segments\":[\"data\",\"metadata\",\"lock_files\"]},\"paths\":[\"9BmBinD5HYs/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/0/segments/data/\",\"ExCNOD8_5ew/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/1/segments/data/\",\"z8wtf0yr2l4/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/0/segments/metadata/\",\"VheHVwFlExE/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/1/segments/metadata/\",\"IgFKbsDeUpQ/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/0/segments/lock_files/\",\"pA3gy_GZtns/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/1/segments/lock_files/\"]}"; + assertEquals(expected, xContentBuilder.toString()); + } + + /** + * This checks that the remote path contains paths only for translog and data/metadata combination. + */ + public void testToXContentForTranslogRepoOnly() throws IOException { + RemoteIndexPath indexPath = new RemoteIndexPath( + "djjsid73he8yd7usduh", + 2, + new BlobPath().add("djsd878ndjh").add("hcs87cj8"), + PathType.HASHED_PREFIX, + PathHashAlgorithm.FNV_1A_BASE64, + RemoteIndexPath.TRANSLOG_PATH + ); + XContentBuilder xContentBuilder = MediaTypeRegistry.contentBuilder(MediaTypeRegistry.JSON); + xContentBuilder.startObject(); + xContentBuilder = indexPath.toXContent(xContentBuilder, ToXContent.EMPTY_PARAMS); + xContentBuilder.endObject(); + String expected = + "{\"version\":\"1\",\"index_uuid\":\"djjsid73he8yd7usduh\",\"shard_count\":2,\"path_type\":\"HASHED_PREFIX\",\"path_hash_algorithm\":\"FNV_1A_BASE64\",\"path_creation_map\":{\"translog\":[\"data\",\"metadata\"]},\"paths\":[\"2EaVODaKBck/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/0/translog/data/\",\"dTS2VqEOUNo/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/1/translog/data/\",\"PVNKNGonmZw/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/0/translog/metadata/\",\"NXmt0Y6NjA8/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/1/translog/metadata/\"]}"; + assertEquals(expected, xContentBuilder.toString()); + } + + /** + * This checks that the remote path contains paths only for translog and data/metadata combination. + */ + public void testToXContentForBothRepos() throws IOException { + Map> pathCreationMap = new TreeMap<>(); + pathCreationMap.putAll(RemoteIndexPath.TRANSLOG_PATH); + pathCreationMap.putAll(RemoteIndexPath.SEGMENT_PATH); + RemoteIndexPath indexPath = new RemoteIndexPath( + "csbdqiu8a7sdnjdks", + 3, + new BlobPath().add("nxf9yv0").add("c3ejoi"), + PathType.HASHED_PREFIX, + PathHashAlgorithm.FNV_1A_BASE64, + pathCreationMap + ); + XContentBuilder xContentBuilder = MediaTypeRegistry.contentBuilder(MediaTypeRegistry.JSON); + xContentBuilder.startObject(); + xContentBuilder = indexPath.toXContent(xContentBuilder, ToXContent.EMPTY_PARAMS); + xContentBuilder.endObject(); + String expected = + "{\"version\":\"1\",\"index_uuid\":\"csbdqiu8a7sdnjdks\",\"shard_count\":3,\"path_type\":\"HASHED_PREFIX\",\"path_hash_algorithm\":\"FNV_1A_BASE64\",\"path_creation_map\":{\"translog\":[\"data\",\"metadata\"],\"segments\":[\"data\",\"metadata\",\"lock_files\"]},\"paths\":[\"Cjo0F6kNjYk/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/0/segments/data/\",\"kpayyhxct1I/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/1/segments/data/\",\"p2RlgnHeIgc/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/2/segments/data/\",\"gkPIurBtB1w/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/0/segments/metadata/\",\"Y4YhlbxAB1c/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/1/segments/metadata/\",\"HYc8fyVPouI/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/2/segments/metadata/\",\"igzyZCz1ysI/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/0/segments/lock_files/\",\"uEluEiYmptk/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/1/segments/lock_files/\",\"TfAD8f06_7A/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/2/segments/lock_files/\",\"QqKEpasbEGs/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/0/translog/data/\",\"sNyoimoe1Bw/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/1/translog/data/\",\"d4YQtONfq50/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/2/translog/data/\",\"zLr4UXjK8T4/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/0/translog/metadata/\",\"_s8i7ZmlXGE/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/1/translog/metadata/\",\"tvtD3-k5ISg/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/2/translog/metadata/\"]}"; + assertEquals(expected, xContentBuilder.toString()); + } + + public void testRemoteIndexPathWithInvalidPathCreationMap() throws IOException { + IllegalArgumentException ex = assertThrows( + IllegalArgumentException.class, + () -> new RemoteIndexPath( + "djjsid73he8yd7usduh", + 2, + new BlobPath().add("djsd878ndjh").add("hcs87cj8"), + PathType.HASHED_PREFIX, + PathHashAlgorithm.FNV_1A_BASE64, + new HashMap<>() + ) + ); + assertEquals( + "Invalid input in RemoteIndexPath constructor indexUUID=djjsid73he8yd7usduh shardCount=2 " + + "basePath=[djsd878ndjh][hcs87cj8] pathType=HASHED_PREFIX pathHashAlgorithm=FNV_1A_BASE64 pathCreationMap={}", + ex.getMessage() + ); + } + + public void testFromXContent() { + UnsupportedOperationException ex = assertThrows( + UnsupportedOperationException.class, + () -> RemoteIndexPath.fromXContent(Mockito.mock(XContentParser.class)) + ); + assertEquals("RemoteIndexPath.fromXContent() is not supported", ex.getMessage()); + } + + public void testInvalidPathCreationMap() { + IllegalArgumentException ex = assertThrows( + IllegalArgumentException.class, + () -> new RemoteIndexPath( + "djjsid73he8yd7usduh", + 2, + new BlobPath().add("djsd878ndjh").add("hcs87cj8"), + PathType.HASHED_PREFIX, + PathHashAlgorithm.FNV_1A_BASE64, + Map.of(TRANSLOG, List.of(LOCK_FILES)) + ) + ); + assertEquals("pathCreationMap={TRANSLOG=[LOCK_FILES]} is having illegal combination of category and type", ex.getMessage()); + } +} diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteIndexPathUploaderTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteIndexPathUploaderTests.java new file mode 100644 index 0000000000000..e539b382a5f3b --- /dev/null +++ b/server/src/test/java/org/opensearch/index/remote/RemoteIndexPathUploaderTests.java @@ -0,0 +1,335 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.remote; + +import org.opensearch.Version; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.SetOnce; +import org.opensearch.common.UUIDs; +import org.opensearch.common.blobstore.AsyncMultiStreamBlobContainer; +import org.opensearch.common.blobstore.BlobContainer; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.blobstore.BlobStore; +import org.opensearch.common.compress.DeflateCompressor; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.action.ActionListener; +import org.opensearch.gateway.remote.RemoteClusterStateService; +import org.opensearch.gateway.remote.RemoteClusterStateService.RemoteStateTransferException; +import org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm; +import org.opensearch.index.remote.RemoteStoreEnums.PathType; +import org.opensearch.node.Node; +import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.atomic.AtomicLong; + +import org.mockito.Mockito; + +import static org.opensearch.index.remote.RemoteStoreEnums.PathType.FIXED; +import static org.opensearch.index.remote.RemoteStoreEnums.PathType.HASHED_INFIX; +import static org.opensearch.index.remote.RemoteStoreEnums.PathType.HASHED_PREFIX; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class RemoteIndexPathUploaderTests extends OpenSearchTestCase { + + private static final String CLUSTER_STATE_REPO_KEY = Node.NODE_ATTRIBUTES.getKey() + + RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; + + private static final String TRANSLOG_REPO_NAME = "translog-repo"; + private static final String SEGMENT_REPO_NAME = "segment-repo"; + + private final ThreadPool threadPool = new TestThreadPool(getTestName()); + private Settings settings; + private ClusterSettings clusterSettings; + private RepositoriesService repositoriesService; + private BlobStoreRepository repository; + private BlobStore blobStore; + private BlobContainer blobContainer; + private BlobPath basePath; + private List indexMetadataList; + private final AtomicLong successCount = new AtomicLong(); + private final AtomicLong failureCount = new AtomicLong(); + + @Before + public void setup() { + settings = Settings.builder() + .put(RemoteIndexPathUploader.TRANSLOG_REPO_NAME_KEY, TRANSLOG_REPO_NAME) + .put(RemoteIndexPathUploader.SEGMENT_REPO_NAME_KEY, TRANSLOG_REPO_NAME) + .put(CLUSTER_STATE_REPO_KEY, TRANSLOG_REPO_NAME) + .put(RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true) + .build(); + clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + basePath = BlobPath.cleanPath().add("test"); + repositoriesService = mock(RepositoriesService.class); + repository = mock(BlobStoreRepository.class); + when(repositoriesService.repository(anyString())).thenReturn(repository); + blobStore = mock(BlobStore.class); + when(repository.blobStore()).thenReturn(blobStore); + when(repositoriesService.repository(TRANSLOG_REPO_NAME)).thenReturn(repository); + when(repository.basePath()).thenReturn(basePath); + when(repository.getCompressor()).thenReturn(new DeflateCompressor()); + blobContainer = mock(BlobContainer.class); + when(blobStore.blobContainer(any(BlobPath.class))).thenReturn(blobContainer); + + Map remoteCustomData = Map.of( + PathType.NAME, + HASHED_PREFIX.name(), + PathHashAlgorithm.NAME, + PathHashAlgorithm.FNV_1A_BASE64.name() + ); + Settings idxSettings = Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()) + .build(); + IndexMetadata indexMetadata = new IndexMetadata.Builder("test").settings(idxSettings) + .numberOfShards(1) + .numberOfReplicas(0) + .putCustom(IndexMetadata.REMOTE_STORE_CUSTOM_KEY, remoteCustomData) + .build(); + indexMetadataList = List.of(indexMetadata); + } + + @After + public void tearDown() throws Exception { + super.tearDown(); + terminate(threadPool); + } + + public void testInterceptWithNoRemoteDataAttributes() { + Settings settings = Settings.Builder.EMPTY_SETTINGS; + clusterSettings.applySettings(settings); + RemoteIndexPathUploader remoteIndexPathUploader = new RemoteIndexPathUploader( + threadPool, + settings, + () -> repositoriesService, + clusterSettings + ); + List indexMetadataList = Mockito.mock(List.class); + ActionListener actionListener = ActionListener.wrap( + res -> successCount.incrementAndGet(), + ex -> failureCount.incrementAndGet() + ); + remoteIndexPathUploader.doOnUpload(indexMetadataList, Collections.emptyMap(), actionListener); + assertEquals(1, successCount.get()); + assertEquals(0, failureCount.get()); + verify(indexMetadataList, times(0)).stream(); + } + + public void testInterceptWithEmptyIndexMetadataList() { + RemoteIndexPathUploader remoteIndexPathUploader = new RemoteIndexPathUploader( + threadPool, + settings, + () -> repositoriesService, + clusterSettings + ); + remoteIndexPathUploader.start(); + ActionListener actionListener = ActionListener.wrap( + res -> successCount.incrementAndGet(), + ex -> failureCount.incrementAndGet() + ); + remoteIndexPathUploader.doOnUpload(Collections.emptyList(), Collections.emptyMap(), actionListener); + assertEquals(1, successCount.get()); + assertEquals(0, failureCount.get()); + } + + public void testInterceptWithEmptyEligibleIndexMetadataList() { + RemoteIndexPathUploader remoteIndexPathUploader = new RemoteIndexPathUploader( + threadPool, + settings, + () -> repositoriesService, + clusterSettings + ); + remoteIndexPathUploader.start(); + ActionListener actionListener = ActionListener.wrap( + res -> successCount.incrementAndGet(), + ex -> failureCount.incrementAndGet() + ); + + // Case 1 - Null remoteCustomData + List indexMetadataList = List.of(createIndexMetadata(null)); + remoteIndexPathUploader.doOnUpload(indexMetadataList, Collections.emptyMap(), actionListener); + assertEquals(1, successCount.get()); + assertEquals(0, failureCount.get()); + + // Case 2 - Empty remoteCustomData + assertThrows( + AssertionError.class, + () -> remoteIndexPathUploader.doOnUpload(List.of(createIndexMetadata(new HashMap<>())), Collections.emptyMap(), actionListener) + ); + assertEquals(1, successCount.get()); + assertEquals(0, failureCount.get()); + + // Case 3 - RemoteStoreEnums.PathType.NAME not in remoteCustomData map + assertThrows( + AssertionError.class, + () -> remoteIndexPathUploader.doOnUpload( + List.of(createIndexMetadata(Map.of("test", "test"))), + Collections.emptyMap(), + actionListener + ) + ); + assertEquals(1, successCount.get()); + assertEquals(0, failureCount.get()); + + // Case 4 - RemoteStoreEnums.PathType.NAME is not HASHED_PREFIX + String pathType = randomFrom(FIXED, HASHED_INFIX).name(); + String pathHashAlgorithm = FIXED.name().equals(pathType) ? null : randomFrom(PathHashAlgorithm.values()).name(); + Map remoteCustomData = new HashMap<>(); + remoteCustomData.put(PathType.NAME, pathType); + remoteCustomData.put(PathHashAlgorithm.NAME, pathHashAlgorithm); + indexMetadataList = List.of(createIndexMetadata(remoteCustomData)); + remoteIndexPathUploader.doOnUpload(indexMetadataList, Collections.emptyMap(), actionListener); + assertEquals(2, successCount.get()); + assertEquals(0, failureCount.get()); + } + + private IndexMetadata createIndexMetadata(Map remoteCustomData) { + IndexMetadata.Builder builder = IndexMetadata.builder("test") + .settings(Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)) + .numberOfShards(1) + .numberOfReplicas(0); + if (Objects.nonNull(remoteCustomData)) { + builder.putCustom(IndexMetadata.REMOTE_STORE_CUSTOM_KEY, remoteCustomData); + } + return builder.build(); + } + + public void testInterceptWithSameRepo() throws IOException { + RemoteIndexPathUploader remoteIndexPathUploader = new RemoteIndexPathUploader( + threadPool, + settings, + () -> repositoriesService, + clusterSettings + ); + remoteIndexPathUploader.start(); + ActionListener actionListener = ActionListener.wrap( + res -> successCount.incrementAndGet(), + ex -> failureCount.incrementAndGet() + ); + remoteIndexPathUploader.doOnUpload(indexMetadataList, Collections.emptyMap(), actionListener); + assertEquals(1, successCount.get()); + assertEquals(0, failureCount.get()); + verify(blobContainer, times(1)).writeBlob(anyString(), any(InputStream.class), anyLong(), anyBoolean()); + } + + public void testInterceptWithDifferentRepo() throws IOException { + Settings settings = Settings.builder() + .put(this.settings) + .put(RemoteIndexPathUploader.SEGMENT_REPO_NAME_KEY, SEGMENT_REPO_NAME) + .build(); + when(repositoriesService.repository(SEGMENT_REPO_NAME)).thenReturn(repository); + RemoteIndexPathUploader remoteIndexPathUploader = new RemoteIndexPathUploader( + threadPool, + settings, + () -> repositoriesService, + clusterSettings + ); + remoteIndexPathUploader.start(); + ActionListener actionListener = ActionListener.wrap( + res -> successCount.incrementAndGet(), + ex -> failureCount.incrementAndGet() + ); + remoteIndexPathUploader.doOnUpload(indexMetadataList, Collections.emptyMap(), actionListener); + assertEquals(1, successCount.get()); + assertEquals(0, failureCount.get()); + verify(blobContainer, times(2)).writeBlob(anyString(), any(InputStream.class), anyLong(), anyBoolean()); + } + + public void testInterceptWithLatchAwaitTimeout() throws IOException { + blobContainer = mock(AsyncMultiStreamBlobContainer.class); + when(blobStore.blobContainer(any(BlobPath.class))).thenReturn(blobContainer); + RemoteIndexPathUploader remoteIndexPathUploader = new RemoteIndexPathUploader( + threadPool, + settings, + () -> repositoriesService, + clusterSettings + ); + remoteIndexPathUploader.start(); + + Settings settings = Settings.builder() + .put(this.settings) + .put(RemoteClusterStateService.INDEX_METADATA_UPLOAD_TIMEOUT_SETTING.getKey(), TimeValue.ZERO) + .build(); + clusterSettings.applySettings(settings); + SetOnce exceptionSetOnce = new SetOnce<>(); + ActionListener actionListener = ActionListener.wrap(res -> successCount.incrementAndGet(), ex -> { + failureCount.incrementAndGet(); + exceptionSetOnce.set(ex); + }); + remoteIndexPathUploader.doOnUpload(indexMetadataList, Collections.emptyMap(), actionListener); + assertEquals(0, successCount.get()); + assertEquals(1, failureCount.get()); + assertTrue(exceptionSetOnce.get() instanceof RemoteStateTransferException); + assertTrue( + exceptionSetOnce.get().getMessage().contains("Timed out waiting while uploading remote index path file for indexes=[test/") + ); + verify(blobContainer, times(0)).writeBlob(anyString(), any(InputStream.class), anyLong(), anyBoolean()); + } + + public void testInterceptWithInterruptedExceptionDuringLatchAwait() throws Exception { + AsyncMultiStreamBlobContainer asyncMultiStreamBlobContainer = mock(AsyncMultiStreamBlobContainer.class); + when(blobStore.blobContainer(any(BlobPath.class))).thenReturn(asyncMultiStreamBlobContainer); + RemoteIndexPathUploader remoteIndexPathUploader = new RemoteIndexPathUploader( + threadPool, + settings, + () -> repositoriesService, + clusterSettings + ); + remoteIndexPathUploader.start(); + Settings settings = Settings.builder() + .put(this.settings) + .put(RemoteClusterStateService.INDEX_METADATA_UPLOAD_TIMEOUT_SETTING.getKey(), TimeValue.timeValueSeconds(1)) + .build(); + clusterSettings.applySettings(settings); + SetOnce exceptionSetOnce = new SetOnce<>(); + ActionListener actionListener = ActionListener.wrap(res -> successCount.incrementAndGet(), ex -> { + failureCount.incrementAndGet(); + exceptionSetOnce.set(ex); + }); + Thread thread = new Thread(() -> { + try { + remoteIndexPathUploader.onUpload(indexMetadataList, Collections.emptyMap(), actionListener); + } catch (Exception e) { + assertTrue(e instanceof InterruptedException); + assertEquals("sleep interrupted", e.getMessage()); + } + }); + thread.start(); + Thread.sleep(10); + thread.interrupt(); + + assertBusy(() -> { + assertEquals(0, successCount.get()); + assertEquals(1, failureCount.get()); + }); + } + +} diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteMigrationIndexMetadataUpdaterTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteMigrationIndexMetadataUpdaterTests.java new file mode 100644 index 0000000000000..d8220c93e4eeb --- /dev/null +++ b/server/src/test/java/org/opensearch/index/remote/RemoteMigrationIndexMetadataUpdaterTests.java @@ -0,0 +1,339 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.remote; + +import org.opensearch.Version; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.IndexRoutingTable; +import org.opensearch.cluster.routing.IndexShardRoutingTable; +import org.opensearch.cluster.routing.RecoverySource; +import org.opensearch.cluster.routing.RoutingTable; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.ShardRoutingState; +import org.opensearch.cluster.routing.TestShardRouting; +import org.opensearch.cluster.routing.UnassignedInfo; +import org.opensearch.common.settings.Settings; +import org.opensearch.core.index.Index; +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.index.shard.IndexShardTestUtils; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; +import java.util.Map; +import java.util.UUID; + +import static org.opensearch.cluster.metadata.IndexMetadata.REMOTE_STORE_CUSTOM_KEY; +import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING; +import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING; +import static org.mockito.Mockito.mock; + +public class RemoteMigrationIndexMetadataUpdaterTests extends OpenSearchTestCase { + private final String indexName = "test-index"; + + public void testMaybeAddRemoteIndexSettingsAllPrimariesAndReplicasOnRemote() throws IOException { + Metadata metadata = createIndexMetadataWithDocrepSettings(indexName); + IndexMetadata existingIndexMetadata = metadata.index(indexName); + IndexMetadata.Builder indexMetadataBuilder = IndexMetadata.builder(existingIndexMetadata); + long currentSettingsVersion = indexMetadataBuilder.settingsVersion(); + DiscoveryNode primaryNode = IndexShardTestUtils.getFakeRemoteEnabledNode("1"); + DiscoveryNode replicaNode = IndexShardTestUtils.getFakeRemoteEnabledNode("2"); + DiscoveryNodes allNodes = DiscoveryNodes.builder().add(primaryNode).add(replicaNode).build(); + RoutingTable routingTable = createRoutingTableAllShardsStarted(indexName, 1, 1, primaryNode, replicaNode); + RemoteMigrationIndexMetadataUpdater migrationIndexMetadataUpdater = new RemoteMigrationIndexMetadataUpdater( + allNodes, + routingTable, + existingIndexMetadata, + metadata.settings(), + logger + ); + migrationIndexMetadataUpdater.maybeAddRemoteIndexSettings(indexMetadataBuilder, indexName); + assertTrue(currentSettingsVersion < indexMetadataBuilder.settingsVersion()); + assertRemoteSettingsApplied(indexMetadataBuilder.build()); + } + + public void testMaybeAddRemoteIndexSettingsDoesNotRunWhenSettingsAlreadyPresent() throws IOException { + Metadata metadata = createIndexMetadataWithRemoteStoreSettings(indexName); + IndexMetadata existingIndexMetadata = metadata.index(indexName); + IndexMetadata.Builder indexMetadataBuilder = IndexMetadata.builder(existingIndexMetadata); + long currentSettingsVersion = indexMetadataBuilder.settingsVersion(); + DiscoveryNode primaryNode = IndexShardTestUtils.getFakeRemoteEnabledNode("1"); + DiscoveryNode replicaNode = IndexShardTestUtils.getFakeRemoteEnabledNode("2"); + DiscoveryNodes allNodes = DiscoveryNodes.builder().add(primaryNode).add(replicaNode).build(); + RoutingTable routingTable = createRoutingTableAllShardsStarted(indexName, 1, 1, primaryNode, replicaNode); + RemoteMigrationIndexMetadataUpdater migrationIndexMetadataUpdater = new RemoteMigrationIndexMetadataUpdater( + allNodes, + routingTable, + existingIndexMetadata, + metadata.settings(), + logger + ); + migrationIndexMetadataUpdater.maybeAddRemoteIndexSettings(indexMetadataBuilder, indexName); + assertEquals(currentSettingsVersion, indexMetadataBuilder.settingsVersion()); + } + + public void testMaybeAddRemoteIndexSettingsDoesNotUpdateSettingsWhenAllShardsInDocrep() throws IOException { + Metadata metadata = createIndexMetadataWithDocrepSettings(indexName); + IndexMetadata existingIndexMetadata = metadata.index(indexName); + IndexMetadata.Builder indexMetadataBuilder = IndexMetadata.builder(existingIndexMetadata); + long currentSettingsVersion = indexMetadataBuilder.settingsVersion(); + DiscoveryNode primaryNode = IndexShardTestUtils.getFakeDiscoNode("1"); + DiscoveryNode replicaNode = IndexShardTestUtils.getFakeDiscoNode("2"); + DiscoveryNodes allNodes = DiscoveryNodes.builder().add(primaryNode).add(replicaNode).build(); + RoutingTable routingTable = createRoutingTableAllShardsStarted(indexName, 1, 1, primaryNode, replicaNode); + RemoteMigrationIndexMetadataUpdater migrationIndexMetadataUpdater = new RemoteMigrationIndexMetadataUpdater( + allNodes, + routingTable, + existingIndexMetadata, + metadata.settings(), + logger + ); + migrationIndexMetadataUpdater.maybeAddRemoteIndexSettings(indexMetadataBuilder, indexName); + assertEquals(currentSettingsVersion, indexMetadataBuilder.settingsVersion()); + assertDocrepSettingsApplied(indexMetadataBuilder.build()); + } + + public void testMaybeAddRemoteIndexSettingsUpdatesIndexSettingsWithUnassignedReplicas() throws IOException { + Metadata metadata = createIndexMetadataWithDocrepSettings(indexName); + IndexMetadata existingIndexMetadata = metadata.index(indexName); + IndexMetadata.Builder indexMetadataBuilder = IndexMetadata.builder(existingIndexMetadata); + long currentSettingsVersion = indexMetadataBuilder.settingsVersion(); + DiscoveryNode primaryNode = IndexShardTestUtils.getFakeRemoteEnabledNode("1"); + DiscoveryNode replicaNode = IndexShardTestUtils.getFakeDiscoNode("2"); + DiscoveryNodes allNodes = DiscoveryNodes.builder().add(primaryNode).add(replicaNode).build(); + RoutingTable routingTable = createRoutingTableReplicasUnassigned(indexName, 1, 1, primaryNode); + RemoteMigrationIndexMetadataUpdater migrationIndexMetadataUpdater = new RemoteMigrationIndexMetadataUpdater( + allNodes, + routingTable, + existingIndexMetadata, + metadata.settings(), + logger + ); + migrationIndexMetadataUpdater.maybeAddRemoteIndexSettings(indexMetadataBuilder, indexName); + assertTrue(currentSettingsVersion < indexMetadataBuilder.settingsVersion()); + assertRemoteSettingsApplied(indexMetadataBuilder.build()); + } + + public void testMaybeAddRemoteIndexSettingsDoesNotUpdateIndexSettingsWithRelocatingReplicas() throws IOException { + Metadata metadata = createIndexMetadataWithDocrepSettings(indexName); + IndexMetadata existingIndexMetadata = metadata.index(indexName); + IndexMetadata.Builder indexMetadataBuilder = IndexMetadata.builder(existingIndexMetadata); + long currentSettingsVersion = indexMetadataBuilder.settingsVersion(); + DiscoveryNode primaryNode = IndexShardTestUtils.getFakeRemoteEnabledNode("1"); + DiscoveryNode replicaNode = IndexShardTestUtils.getFakeDiscoNode("2"); + DiscoveryNode replicaRelocatingNode = IndexShardTestUtils.getFakeDiscoNode("3"); + DiscoveryNodes allNodes = DiscoveryNodes.builder().add(primaryNode).add(replicaNode).build(); + RoutingTable routingTable = createRoutingTableReplicasRelocating(indexName, 1, 1, primaryNode, replicaNode, replicaRelocatingNode); + RemoteMigrationIndexMetadataUpdater migrationIndexMetadataUpdater = new RemoteMigrationIndexMetadataUpdater( + allNodes, + routingTable, + existingIndexMetadata, + metadata.settings(), + logger + ); + migrationIndexMetadataUpdater.maybeAddRemoteIndexSettings(indexMetadataBuilder, indexName); + assertEquals(currentSettingsVersion, indexMetadataBuilder.settingsVersion()); + assertDocrepSettingsApplied(indexMetadataBuilder.build()); + } + + public void testMaybeUpdateRemoteStorePathStrategyExecutes() { + Metadata currentMetadata = createIndexMetadataWithDocrepSettings(indexName); + IndexMetadata existingIndexMetadata = currentMetadata.index(indexName); + IndexMetadata.Builder builder = IndexMetadata.builder(existingIndexMetadata); + DiscoveryNodes discoveryNodes = DiscoveryNodes.builder().add(IndexShardTestUtils.getFakeRemoteEnabledNode("1")).build(); + RemoteMigrationIndexMetadataUpdater migrationIndexMetadataUpdater = new RemoteMigrationIndexMetadataUpdater( + discoveryNodes, + mock(RoutingTable.class), + existingIndexMetadata, + Settings.builder() + .put( + CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING.getKey(), + RemoteStoreEnums.PathHashAlgorithm.FNV_1A_COMPOSITE_1.name() + ) + .put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), RemoteStoreEnums.PathType.HASHED_PREFIX.name()) + .build(), + logger + ); + migrationIndexMetadataUpdater.maybeUpdateRemoteStorePathStrategy(builder, indexName); + assertCustomPathMetadataIsPresent(builder.build()); + } + + public void testMaybeUpdateRemoteStorePathStrategyDoesNotExecute() { + Metadata currentMetadata = createIndexMetadataWithRemoteStoreSettings(indexName); + IndexMetadata existingIndexMetadata = currentMetadata.index(indexName); + IndexMetadata.Builder builder = IndexMetadata.builder(currentMetadata.index(indexName)); + DiscoveryNodes discoveryNodes = DiscoveryNodes.builder().add(IndexShardTestUtils.getFakeRemoteEnabledNode("1")).build(); + RemoteMigrationIndexMetadataUpdater migrationIndexMetadataUpdater = new RemoteMigrationIndexMetadataUpdater( + discoveryNodes, + mock(RoutingTable.class), + existingIndexMetadata, + Settings.builder() + .put( + CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING.getKey(), + RemoteStoreEnums.PathHashAlgorithm.FNV_1A_COMPOSITE_1.name() + ) + .put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), RemoteStoreEnums.PathType.HASHED_PREFIX.name()) + .build(), + logger + ); + + migrationIndexMetadataUpdater.maybeUpdateRemoteStorePathStrategy(builder, indexName); + + assertCustomPathMetadataIsPresent(builder.build()); + } + + private RoutingTable createRoutingTableAllShardsStarted( + String indexName, + int numberOfShards, + int numberOfReplicas, + DiscoveryNode primaryHostingNode, + DiscoveryNode replicaHostingNode + ) { + RoutingTable.Builder builder = RoutingTable.builder(); + Index index = new Index(indexName, UUID.randomUUID().toString()); + + IndexRoutingTable.Builder indexRoutingTableBuilder = IndexRoutingTable.builder(index); + for (int i = 0; i < numberOfShards; i++) { + ShardId shardId = new ShardId(index, i); + IndexShardRoutingTable.Builder indexShardRoutingTable = new IndexShardRoutingTable.Builder(shardId); + indexShardRoutingTable.addShard( + TestShardRouting.newShardRouting(shardId, primaryHostingNode.getId(), true, ShardRoutingState.STARTED) + ); + for (int j = 0; j < numberOfReplicas; j++) { + indexShardRoutingTable.addShard( + TestShardRouting.newShardRouting(shardId, replicaHostingNode.getId(), false, ShardRoutingState.STARTED) + ); + } + indexRoutingTableBuilder.addIndexShard(indexShardRoutingTable.build()); + } + return builder.add(indexRoutingTableBuilder.build()).build(); + } + + private RoutingTable createRoutingTableReplicasUnassigned( + String indexName, + int numberOfShards, + int numberOfReplicas, + DiscoveryNode primaryHostingNode + ) { + RoutingTable.Builder builder = RoutingTable.builder(); + Index index = new Index(indexName, UUID.randomUUID().toString()); + + IndexRoutingTable.Builder indexRoutingTableBuilder = IndexRoutingTable.builder(index); + for (int i = 0; i < numberOfShards; i++) { + ShardId shardId = new ShardId(index, i); + IndexShardRoutingTable.Builder indexShardRoutingTable = new IndexShardRoutingTable.Builder(shardId); + indexShardRoutingTable.addShard( + TestShardRouting.newShardRouting(shardId, primaryHostingNode.getId(), true, ShardRoutingState.STARTED) + ); + for (int j = 0; j < numberOfReplicas; j++) { + indexShardRoutingTable.addShard( + ShardRouting.newUnassigned( + shardId, + false, + RecoverySource.PeerRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null) + ) + ); + } + indexRoutingTableBuilder.addIndexShard(indexShardRoutingTable.build()); + } + return builder.add(indexRoutingTableBuilder.build()).build(); + } + + private RoutingTable createRoutingTableReplicasRelocating( + String indexName, + int numberOfShards, + int numberOfReplicas, + DiscoveryNode primaryHostingNodes, + DiscoveryNode replicaHostingNode, + DiscoveryNode replicaRelocatingNode + ) { + RoutingTable.Builder builder = RoutingTable.builder(); + Index index = new Index(indexName, UUID.randomUUID().toString()); + + IndexRoutingTable.Builder indexRoutingTableBuilder = IndexRoutingTable.builder(index); + for (int i = 0; i < numberOfShards; i++) { + ShardId shardId = new ShardId(index, i); + IndexShardRoutingTable.Builder indexShardRoutingTable = new IndexShardRoutingTable.Builder(shardId); + indexShardRoutingTable.addShard( + TestShardRouting.newShardRouting(shardId, primaryHostingNodes.getId(), true, ShardRoutingState.STARTED) + ); + for (int j = 0; j < numberOfReplicas; j++) { + indexShardRoutingTable.addShard( + TestShardRouting.newShardRouting( + shardId, + replicaHostingNode.getId(), + replicaRelocatingNode.getId(), + false, + ShardRoutingState.RELOCATING + ) + ); + } + indexRoutingTableBuilder.addIndexShard(indexShardRoutingTable.build()); + } + return builder.add(indexRoutingTableBuilder.build()).build(); + } + + public static Metadata createIndexMetadataWithRemoteStoreSettings(String indexName) { + IndexMetadata.Builder indexMetadata = IndexMetadata.builder(indexName); + indexMetadata.settings( + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.INDEX_REMOTE_STORE_ENABLED_SETTING.getKey(), true) + .put(IndexMetadata.INDEX_REMOTE_TRANSLOG_REPOSITORY_SETTING.getKey(), "dummy-tlog-repo") + .put(IndexMetadata.INDEX_REMOTE_SEGMENT_STORE_REPOSITORY_SETTING.getKey(), "dummy-segment-repo") + .put(IndexMetadata.INDEX_REPLICATION_TYPE_SETTING.getKey(), "SEGMENT") + .build() + ) + .putCustom( + REMOTE_STORE_CUSTOM_KEY, + Map.of(RemoteStoreEnums.PathType.NAME, "dummy", RemoteStoreEnums.PathHashAlgorithm.NAME, "dummy") + ) + .build(); + return Metadata.builder().put(indexMetadata).build(); + } + + public static Metadata createIndexMetadataWithDocrepSettings(String indexName) { + IndexMetadata.Builder indexMetadata = IndexMetadata.builder(indexName); + indexMetadata.settings( + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.INDEX_REPLICATION_TYPE_SETTING.getKey(), "DOCUMENT") + .build() + ).build(); + return Metadata.builder().put(indexMetadata).build(); + } + + private void assertRemoteSettingsApplied(IndexMetadata indexMetadata) { + assertTrue(IndexMetadata.INDEX_REMOTE_STORE_ENABLED_SETTING.get(indexMetadata.getSettings())); + assertTrue(IndexMetadata.INDEX_REMOTE_TRANSLOG_REPOSITORY_SETTING.exists(indexMetadata.getSettings())); + assertTrue(IndexMetadata.INDEX_REMOTE_SEGMENT_STORE_REPOSITORY_SETTING.exists(indexMetadata.getSettings())); + assertEquals(ReplicationType.SEGMENT, IndexMetadata.INDEX_REPLICATION_TYPE_SETTING.get(indexMetadata.getSettings())); + } + + private void assertDocrepSettingsApplied(IndexMetadata indexMetadata) { + assertFalse(IndexMetadata.INDEX_REMOTE_STORE_ENABLED_SETTING.get(indexMetadata.getSettings())); + assertFalse(IndexMetadata.INDEX_REMOTE_TRANSLOG_REPOSITORY_SETTING.exists(indexMetadata.getSettings())); + assertFalse(IndexMetadata.INDEX_REMOTE_SEGMENT_STORE_REPOSITORY_SETTING.exists(indexMetadata.getSettings())); + assertEquals(ReplicationType.DOCUMENT, IndexMetadata.INDEX_REPLICATION_TYPE_SETTING.get(indexMetadata.getSettings())); + } + + private void assertCustomPathMetadataIsPresent(IndexMetadata indexMetadata) { + assertNotNull(indexMetadata.getCustomData(REMOTE_STORE_CUSTOM_KEY)); + assertNotNull(indexMetadata.getCustomData(REMOTE_STORE_CUSTOM_KEY).get(RemoteStoreEnums.PathType.NAME)); + assertNotNull(indexMetadata.getCustomData(REMOTE_STORE_CUSTOM_KEY).get(RemoteStoreEnums.PathHashAlgorithm.NAME)); + } +} diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteStorePathStrategyResolverTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteStorePathStrategyResolverTests.java index d28ebc8c2e5da..de61c902bf13e 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteStorePathStrategyResolverTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteStorePathStrategyResolverTests.java @@ -35,7 +35,7 @@ public void testGetMinVersionNewer() { Settings settings = Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), pathType).build(); ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); RemoteStoreSettings remoteStoreSettings = new RemoteStoreSettings(settings, clusterSettings); - RemoteStorePathStrategyResolver resolver = new RemoteStorePathStrategyResolver(remoteStoreSettings, () -> Version.CURRENT); + RemoteStorePathStrategyResolver resolver = new RemoteStorePathStrategyResolver(remoteStoreSettings, () -> Version.V_2_14_0); assertEquals(pathType, resolver.get().getType()); if (pathType.requiresHashAlgorithm()) { assertNotNull(resolver.get().getHashAlgorithm()); @@ -49,7 +49,7 @@ public void testGetStrategy() { Settings settings = Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), PathType.FIXED).build(); ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); RemoteStoreSettings remoteStoreSettings = new RemoteStoreSettings(settings, clusterSettings); - RemoteStorePathStrategyResolver resolver = new RemoteStorePathStrategyResolver(remoteStoreSettings, () -> Version.CURRENT); + RemoteStorePathStrategyResolver resolver = new RemoteStorePathStrategyResolver(remoteStoreSettings, () -> Version.V_2_14_0); assertEquals(PathType.FIXED, resolver.get().getType()); // FIXED type with hash algorithm @@ -59,14 +59,14 @@ public void testGetStrategy() { .build(); clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); remoteStoreSettings = new RemoteStoreSettings(settings, clusterSettings); - resolver = new RemoteStorePathStrategyResolver(remoteStoreSettings, () -> Version.CURRENT); + resolver = new RemoteStorePathStrategyResolver(remoteStoreSettings, () -> Version.V_2_14_0); assertEquals(PathType.FIXED, resolver.get().getType()); // HASHED_PREFIX type with FNV_1A_COMPOSITE settings = Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), PathType.HASHED_PREFIX).build(); clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); remoteStoreSettings = new RemoteStoreSettings(settings, clusterSettings); - resolver = new RemoteStorePathStrategyResolver(remoteStoreSettings, () -> Version.CURRENT); + resolver = new RemoteStorePathStrategyResolver(remoteStoreSettings, () -> Version.V_2_14_0); assertEquals(PathType.HASHED_PREFIX, resolver.get().getType()); assertEquals(PathHashAlgorithm.FNV_1A_COMPOSITE_1, resolver.get().getHashAlgorithm()); @@ -74,7 +74,7 @@ public void testGetStrategy() { settings = Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), PathType.HASHED_PREFIX).build(); clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); remoteStoreSettings = new RemoteStoreSettings(settings, clusterSettings); - resolver = new RemoteStorePathStrategyResolver(remoteStoreSettings, () -> Version.CURRENT); + resolver = new RemoteStorePathStrategyResolver(remoteStoreSettings, () -> Version.V_2_14_0); assertEquals(PathType.HASHED_PREFIX, resolver.get().getType()); assertEquals(PathHashAlgorithm.FNV_1A_COMPOSITE_1, resolver.get().getHashAlgorithm()); @@ -85,7 +85,7 @@ public void testGetStrategy() { .build(); clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); remoteStoreSettings = new RemoteStoreSettings(settings, clusterSettings); - resolver = new RemoteStorePathStrategyResolver(remoteStoreSettings, () -> Version.CURRENT); + resolver = new RemoteStorePathStrategyResolver(remoteStoreSettings, () -> Version.V_2_14_0); assertEquals(PathType.HASHED_PREFIX, resolver.get().getType()); assertEquals(PathHashAlgorithm.FNV_1A_BASE64, resolver.get().getHashAlgorithm()); @@ -96,7 +96,7 @@ public void testGetStrategy() { .build(); clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); remoteStoreSettings = new RemoteStoreSettings(settings, clusterSettings); - resolver = new RemoteStorePathStrategyResolver(remoteStoreSettings, () -> Version.CURRENT); + resolver = new RemoteStorePathStrategyResolver(remoteStoreSettings, () -> Version.V_2_14_0); assertEquals(PathType.HASHED_PREFIX, resolver.get().getType()); assertEquals(PathHashAlgorithm.FNV_1A_BASE64, resolver.get().getHashAlgorithm()); } @@ -107,7 +107,7 @@ public void testGetStrategyWithDynamicUpdate() { Settings settings = Settings.builder().build(); ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); RemoteStoreSettings remoteStoreSettings = new RemoteStoreSettings(settings, clusterSettings); - RemoteStorePathStrategyResolver resolver = new RemoteStorePathStrategyResolver(remoteStoreSettings, () -> Version.CURRENT); + RemoteStorePathStrategyResolver resolver = new RemoteStorePathStrategyResolver(remoteStoreSettings, () -> Version.V_2_14_0); assertEquals(PathType.FIXED, resolver.get().getType()); assertNull(resolver.get().getHashAlgorithm()); diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java index 4d3e633848975..c1fc0cdaa0d3b 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java @@ -8,10 +8,13 @@ package org.opensearch.index.remote; +import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.common.blobstore.BlobMetadata; import org.opensearch.common.blobstore.support.PlainBlobMetadata; +import org.opensearch.index.shard.IndexShardTestUtils; import org.opensearch.index.store.RemoteSegmentStoreDirectory; import org.opensearch.index.translog.transfer.TranslogTransferMetadata; +import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; import org.opensearch.test.OpenSearchTestCase; import java.math.BigInteger; @@ -28,6 +31,8 @@ import static org.opensearch.index.remote.RemoteStoreUtils.longToUrlBase64; import static org.opensearch.index.remote.RemoteStoreUtils.urlBase64ToLong; import static org.opensearch.index.remote.RemoteStoreUtils.verifyNoMultipleWriters; +import static org.opensearch.index.shard.IndexShardTestUtils.MOCK_SEGMENT_REPO_NAME; +import static org.opensearch.index.shard.IndexShardTestUtils.MOCK_TLOG_REPO_NAME; import static org.opensearch.index.store.RemoteSegmentStoreDirectory.MetadataFilenameUtils.METADATA_PREFIX; import static org.opensearch.index.store.RemoteSegmentStoreDirectory.MetadataFilenameUtils.SEPARATOR; import static org.opensearch.index.translog.transfer.TranslogTransferMetadata.METADATA_SEPARATOR; @@ -316,6 +321,19 @@ public void testLongToCompositeUrlBase64AndBinaryEncoding() { } } + public void testGetRemoteStoreRepoNameWithRemoteNodes() { + DiscoveryNodes discoveryNodes = DiscoveryNodes.builder().add(IndexShardTestUtils.getFakeRemoteEnabledNode("1")).build(); + Map expected = new HashMap<>(); + expected.put(RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, MOCK_SEGMENT_REPO_NAME); + expected.put(RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, MOCK_TLOG_REPO_NAME); + assertEquals(expected, RemoteStoreUtils.getRemoteStoreRepoName(discoveryNodes)); + } + + public void testGetRemoteStoreRepoNameWithDocrepNdoes() { + DiscoveryNodes discoveryNodes = DiscoveryNodes.builder().add(IndexShardTestUtils.getFakeDiscoNode("1")).build(); + assertTrue(RemoteStoreUtils.getRemoteStoreRepoName(discoveryNodes).isEmpty()); + } + static long compositeUrlBase64BinaryEncodingToLong(String encodedValue) { char ch = encodedValue.charAt(0); int base64BitsIntValue = BASE64_CHARSET_IDX_MAP.get(ch); diff --git a/server/src/test/java/org/opensearch/index/shard/RefreshListenersTests.java b/server/src/test/java/org/opensearch/index/shard/RefreshListenersTests.java index a45b25f04060b..8a77fbca2915d 100644 --- a/server/src/test/java/org/opensearch/index/shard/RefreshListenersTests.java +++ b/server/src/test/java/org/opensearch/index/shard/RefreshListenersTests.java @@ -134,7 +134,8 @@ public void setupListeners() throws Exception { createTempDir("translog"), indexSettings, BigArrays.NON_RECYCLING_INSTANCE, - "" + "", + false ); Engine.EventListener eventListener = new Engine.EventListener() { @Override diff --git a/server/src/test/java/org/opensearch/index/shard/ReleasableRetryableRefreshListenerTests.java b/server/src/test/java/org/opensearch/index/shard/ReleasableRetryableRefreshListenerTests.java index a0641c365a2a1..e0ad09efac367 100644 --- a/server/src/test/java/org/opensearch/index/shard/ReleasableRetryableRefreshListenerTests.java +++ b/server/src/test/java/org/opensearch/index/shard/ReleasableRetryableRefreshListenerTests.java @@ -316,7 +316,32 @@ protected Logger getLogger() { public void testScheduleRetryAfterClose() throws Exception { // This tests that once the listener has been closed, even the retries would not be scheduled. final AtomicLong runCount = new AtomicLong(); - ReleasableRetryableRefreshListener testRefreshListener = new ReleasableRetryableRefreshListener(threadPool) { + ReleasableRetryableRefreshListener testRefreshListener = getRetryableRefreshListener(runCount); + Thread thread1 = new Thread(() -> { + try { + testRefreshListener.afterRefresh(true); + } catch (IOException e) { + throw new AssertionError(e); + } + }); + Thread thread2 = new Thread(() -> { + try { + Thread.sleep(500); + testRefreshListener.drainRefreshes(); + } catch (InterruptedException e) { + throw new AssertionError(e); + } + }); + thread1.start(); + thread2.start(); + thread1.join(); + thread2.join(); + assertBusy(() -> assertEquals(1, runCount.get())); + assertRefreshListenerClosed(testRefreshListener); + } + + private ReleasableRetryableRefreshListener getRetryableRefreshListener(AtomicLong runCount) { + return new ReleasableRetryableRefreshListener(threadPool) { @Override protected boolean performAfterRefreshWithPermit(boolean didRefresh) { try { @@ -341,6 +366,11 @@ protected String getRetryThreadPoolName() { return ThreadPool.Names.REMOTE_REFRESH_RETRY; } + @Override + protected boolean isRetryEnabled() { + return true; + } + @Override protected TimeValue getNextRetryInterval() { try { @@ -351,6 +381,12 @@ protected TimeValue getNextRetryInterval() { return TimeValue.timeValueMillis(100); } }; + } + + public void testScheduleRetryAfterThreadpoolShutdown() throws Exception { + // This tests that once the thread-pool is shut down, the exception is handled. + final AtomicLong runCount = new AtomicLong(); + ReleasableRetryableRefreshListener testRefreshListener = getRetryableRefreshListener(runCount); Thread thread1 = new Thread(() -> { try { testRefreshListener.afterRefresh(true); @@ -361,7 +397,7 @@ protected TimeValue getNextRetryInterval() { Thread thread2 = new Thread(() -> { try { Thread.sleep(500); - testRefreshListener.drainRefreshes(); + threadPool.shutdown(); } catch (InterruptedException e) { throw new AssertionError(e); } @@ -371,7 +407,7 @@ protected TimeValue getNextRetryInterval() { thread1.join(); thread2.join(); assertBusy(() -> assertEquals(1, runCount.get())); - assertRefreshListenerClosed(testRefreshListener); + assertFalse(testRefreshListener.getRetryScheduledStatus()); } public void testConcurrentScheduleRetry() throws Exception { diff --git a/server/src/test/java/org/opensearch/index/store/RemoteDirectoryTests.java b/server/src/test/java/org/opensearch/index/store/RemoteDirectoryTests.java index 9e38e1749d434..ee81369725e6f 100644 --- a/server/src/test/java/org/opensearch/index/store/RemoteDirectoryTests.java +++ b/server/src/test/java/org/opensearch/index/store/RemoteDirectoryTests.java @@ -104,7 +104,8 @@ public void onResponse(Void t) { public void onFailure(Exception e) { fail("Listener responded with exception" + e); } - } + }, + false ); assertTrue(countDownLatch.await(10, TimeUnit.SECONDS)); assertTrue(postUploadInvoked.get()); @@ -141,7 +142,8 @@ public void onResponse(Void t) { public void onFailure(Exception e) { countDownLatch.countDown(); } - } + }, + false ); assertTrue(countDownLatch.await(10, TimeUnit.SECONDS)); assertFalse(postUploadInvoked.get()); diff --git a/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java b/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java index b1e2028d761f0..567199cf64cd8 100644 --- a/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java +++ b/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java @@ -639,7 +639,7 @@ public void onResponse(Void unused) { @Override public void onFailure(Exception e) {} }; - remoteSegmentStoreDirectory.copyFrom(storeDirectory, filename, IOContext.DEFAULT, completionListener); + remoteSegmentStoreDirectory.copyFrom(storeDirectory, filename, IOContext.DEFAULT, completionListener, false); assertTrue(latch.await(5000, TimeUnit.SECONDS)); assertTrue(remoteSegmentStoreDirectory.getSegmentsUploadedToRemoteStore().containsKey(filename)); storeDirectory.close(); @@ -683,7 +683,7 @@ public void onFailure(Exception e) { latch.countDown(); } }; - remoteSegmentStoreDirectory.copyFrom(storeDirectory, filename, IOContext.DEFAULT, completionListener); + remoteSegmentStoreDirectory.copyFrom(storeDirectory, filename, IOContext.DEFAULT, completionListener, false); assertTrue(latch.await(5000, TimeUnit.SECONDS)); assertFalse(remoteSegmentStoreDirectory.getSegmentsUploadedToRemoteStore().containsKey(filename)); diff --git a/server/src/test/java/org/opensearch/index/translog/InternalTranslogManagerTests.java b/server/src/test/java/org/opensearch/index/translog/InternalTranslogManagerTests.java index c098d11a3487f..c27d0367abf68 100644 --- a/server/src/test/java/org/opensearch/index/translog/InternalTranslogManagerTests.java +++ b/server/src/test/java/org/opensearch/index/translog/InternalTranslogManagerTests.java @@ -38,7 +38,7 @@ public void testRecoveryFromTranslog() throws IOException { LocalCheckpointTracker tracker = new LocalCheckpointTracker(NO_OPS_PERFORMED, NO_OPS_PERFORMED); try { translogManager = new InternalTranslogManager( - new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, ""), + new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, "", false), primaryTerm, globalCheckpoint::get, createTranslogDeletionPolicy(INDEX_SETTINGS), @@ -68,7 +68,7 @@ public void testRecoveryFromTranslog() throws IOException { translogManager.syncTranslog(); translogManager.close(); translogManager = new InternalTranslogManager( - new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, ""), + new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, "", false), primaryTerm, globalCheckpoint::get, createTranslogDeletionPolicy(INDEX_SETTINGS), @@ -117,7 +117,7 @@ public void testTranslogRollsGeneration() throws IOException { LocalCheckpointTracker tracker = new LocalCheckpointTracker(NO_OPS_PERFORMED, NO_OPS_PERFORMED); try { translogManager = new InternalTranslogManager( - new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, ""), + new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, "", false), primaryTerm, globalCheckpoint::get, createTranslogDeletionPolicy(INDEX_SETTINGS), @@ -147,7 +147,7 @@ public void testTranslogRollsGeneration() throws IOException { translogManager.syncTranslog(); translogManager.close(); translogManager = new InternalTranslogManager( - new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, ""), + new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, "", false), primaryTerm, globalCheckpoint::get, createTranslogDeletionPolicy(INDEX_SETTINGS), @@ -182,7 +182,7 @@ public void testTrimOperationsFromTranslog() throws IOException { LocalCheckpointTracker tracker = new LocalCheckpointTracker(NO_OPS_PERFORMED, NO_OPS_PERFORMED); try { translogManager = new InternalTranslogManager( - new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, ""), + new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, "", false), primaryTerm, globalCheckpoint::get, createTranslogDeletionPolicy(INDEX_SETTINGS), @@ -214,7 +214,7 @@ public void testTrimOperationsFromTranslog() throws IOException { translogManager.close(); translogManager = new InternalTranslogManager( - new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, ""), + new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, "", false), primaryTerm, globalCheckpoint::get, createTranslogDeletionPolicy(INDEX_SETTINGS), @@ -253,7 +253,7 @@ public void testTranslogSync() throws IOException { ParsedDocument doc = testParsedDocument("1", null, testDocumentWithTextField(), B_1, null); AtomicReference translogManagerAtomicReference = new AtomicReference<>(); translogManager = new InternalTranslogManager( - new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, ""), + new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, "", false), primaryTerm, globalCheckpoint::get, createTranslogDeletionPolicy(INDEX_SETTINGS), diff --git a/server/src/test/java/org/opensearch/index/translog/LocalTranslogTests.java b/server/src/test/java/org/opensearch/index/translog/LocalTranslogTests.java index 4997067b75198..cae27d5b259c4 100644 --- a/server/src/test/java/org/opensearch/index/translog/LocalTranslogTests.java +++ b/server/src/test/java/org/opensearch/index/translog/LocalTranslogTests.java @@ -291,7 +291,7 @@ private TranslogConfig getTranslogConfig(final Path path, final Settings setting ); final IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(shardId.getIndex(), settings); - return new TranslogConfig(shardId, path, indexSettings, NON_RECYCLING_INSTANCE, bufferSize, ""); + return new TranslogConfig(shardId, path, indexSettings, NON_RECYCLING_INSTANCE, bufferSize, "", false); } private Location addToTranslogAndList(Translog translog, List list, Translog.Operation op) throws IOException { @@ -1453,7 +1453,8 @@ public void testTranslogWriterCanFlushInAddOrReadCall() throws IOException { temp.getIndexSettings(), temp.getBigArrays(), new ByteSizeValue(1, ByteSizeUnit.KB), - "" + "", + false ); final Set persistedSeqNos = new HashSet<>(); @@ -1552,7 +1553,8 @@ public void testTranslogWriterFsyncedWithLocalTranslog() throws IOException { temp.getIndexSettings(), temp.getBigArrays(), new ByteSizeValue(1, ByteSizeUnit.KB), - "" + "", + false ); final Set persistedSeqNos = new HashSet<>(); diff --git a/server/src/test/java/org/opensearch/index/translog/RemoteFsTranslogTests.java b/server/src/test/java/org/opensearch/index/translog/RemoteFsTranslogTests.java index 28979a3dc4f28..6bf35cc1eac9b 100644 --- a/server/src/test/java/org/opensearch/index/translog/RemoteFsTranslogTests.java +++ b/server/src/test/java/org/opensearch/index/translog/RemoteFsTranslogTests.java @@ -224,7 +224,7 @@ private TranslogConfig getTranslogConfig(final Path path, final Settings setting // To simulate that the node is remote backed Settings nodeSettings = Settings.builder().put("node.attr.remote_store.translog.repository", "my-repo-1").build(); final IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(shardId.getIndex(), settings, nodeSettings); - return new TranslogConfig(shardId, path, indexSettings, NON_RECYCLING_INSTANCE, bufferSize, ""); + return new TranslogConfig(shardId, path, indexSettings, NON_RECYCLING_INSTANCE, bufferSize, "", false); } private BlobStoreRepository createRepository() { @@ -399,7 +399,8 @@ private TranslogConfig getConfig(int gensToKeep) { temp.getIndexSettings(), temp.getBigArrays(), new ByteSizeValue(1, ByteSizeUnit.KB), - "" + "", + false ); return config; } @@ -1561,7 +1562,8 @@ public void testTranslogWriterFsyncDisabledInRemoteFsTranslog() throws IOExcepti temp.getIndexSettings(), temp.getBigArrays(), new ByteSizeValue(1, ByteSizeUnit.KB), - "" + "", + false ); final Set persistedSeqNos = new HashSet<>(); @@ -1692,7 +1694,7 @@ public void testDownloadWithRetries() throws IOException { // Always File not found when(mockTransfer.downloadTranslog(any(), any(), any())).thenThrow(new NoSuchFileException("File not found")); TranslogTransferManager finalMockTransfer = mockTransfer; - assertThrows(NoSuchFileException.class, () -> RemoteFsTranslog.download(finalMockTransfer, location, logger)); + assertThrows(NoSuchFileException.class, () -> RemoteFsTranslog.download(finalMockTransfer, location, logger, false)); // File not found in first attempt . File found in second attempt. mockTransfer = mock(TranslogTransferManager.class); @@ -1713,7 +1715,7 @@ public void testDownloadWithRetries() throws IOException { }).when(mockTransfer).downloadTranslog(any(), any(), any()); // no exception thrown - RemoteFsTranslog.download(mockTransfer, location, logger); + RemoteFsTranslog.download(mockTransfer, location, logger, false); } // No translog data in local as well as remote, we skip creating empty translog @@ -1726,7 +1728,7 @@ public void testDownloadWithNoTranslogInLocalAndRemote() throws IOException { when(mockTransfer.getRemoteTranslogTransferTracker()).thenReturn(remoteTranslogTransferTracker); Path[] filesBeforeDownload = FileSystemUtils.files(location); - RemoteFsTranslog.download(mockTransfer, location, logger); + RemoteFsTranslog.download(mockTransfer, location, logger, false); assertEquals(filesBeforeDownload, FileSystemUtils.files(location)); } @@ -1746,7 +1748,7 @@ public void testDownloadWithTranslogOnlyInLocal() throws IOException { Checkpoint existingCheckpoint = Translog.readCheckpoint(location); TranslogTransferManager finalMockTransfer = mockTransfer; - RemoteFsTranslog.download(finalMockTransfer, location, logger); + RemoteFsTranslog.download(finalMockTransfer, location, logger, false); Path[] filesPostDownload = FileSystemUtils.files(location); assertEquals(2, filesPostDownload.length); @@ -1782,11 +1784,11 @@ public void testDownloadWithEmptyTranslogOnlyInLocal() throws IOException { TranslogTransferManager finalMockTransfer = mockTransfer; // download first time will ensure creating empty translog - RemoteFsTranslog.download(finalMockTransfer, location, logger); + RemoteFsTranslog.download(finalMockTransfer, location, logger, false); Path[] filesPostFirstDownload = FileSystemUtils.files(location); // download on empty translog should be a no-op - RemoteFsTranslog.download(finalMockTransfer, location, logger); + RemoteFsTranslog.download(finalMockTransfer, location, logger, false); Path[] filesPostSecondDownload = FileSystemUtils.files(location); assertArrayEquals(filesPostFirstDownload, filesPostSecondDownload); diff --git a/server/src/test/java/org/opensearch/index/translog/TranslogManagerTestCase.java b/server/src/test/java/org/opensearch/index/translog/TranslogManagerTestCase.java index e17d2770f014a..a16e607bbf37a 100644 --- a/server/src/test/java/org/opensearch/index/translog/TranslogManagerTestCase.java +++ b/server/src/test/java/org/opensearch/index/translog/TranslogManagerTestCase.java @@ -74,7 +74,14 @@ protected Translog createTranslog(LongSupplier primaryTermSupplier) throws IOExc } protected Translog createTranslog(Path translogPath, LongSupplier primaryTermSupplier) throws IOException { - TranslogConfig translogConfig = new TranslogConfig(shardId, translogPath, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, ""); + TranslogConfig translogConfig = new TranslogConfig( + shardId, + translogPath, + INDEX_SETTINGS, + BigArrays.NON_RECYCLING_INSTANCE, + "", + false + ); String translogUUID = Translog.createEmptyTranslog( translogPath, SequenceNumbers.NO_OPS_PERFORMED, diff --git a/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java b/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java index 81ae479d018b0..8b3fc6651a505 100644 --- a/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java +++ b/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java @@ -582,9 +582,7 @@ private void assertNoDownloadStats(boolean nonZeroUploadTime) { assertEquals(0, remoteTranslogTransferTracker.getDownloadBytesSucceeded()); assertEquals(0, remoteTranslogTransferTracker.getTotalDownloadsSucceeded()); assertEquals(0, remoteTranslogTransferTracker.getLastSuccessfulDownloadTimestamp()); - if (nonZeroUploadTime) { - assertNotEquals(0, remoteTranslogTransferTracker.getTotalDownloadTimeInMillis()); - } else { + if (nonZeroUploadTime == false) { assertEquals(0, remoteTranslogTransferTracker.getTotalDownloadTimeInMillis()); } } diff --git a/server/src/test/java/org/opensearch/indices/IRCKeyWriteableSerializerTests.java b/server/src/test/java/org/opensearch/indices/IRCKeyWriteableSerializerTests.java index af657dadd7a1a..fb5c0a3f9c8f7 100644 --- a/server/src/test/java/org/opensearch/indices/IRCKeyWriteableSerializerTests.java +++ b/server/src/test/java/org/opensearch/indices/IRCKeyWriteableSerializerTests.java @@ -30,7 +30,7 @@ public void testSerializer() throws Exception { Random rand = Randomness.get(); for (int valueLength : valueLengths) { for (int i = 0; i < NUM_KEYS; i++) { - IndicesRequestCache.Key key = getRandomIRCKey(valueLength, rand, indexShard.shardId()); + IndicesRequestCache.Key key = getRandomIRCKey(valueLength, rand, indexShard.shardId(), System.identityHashCode(indexShard)); byte[] serialized = ser.serialize(key); assertTrue(ser.equals(key, serialized)); IndicesRequestCache.Key deserialized = ser.deserialize(serialized); @@ -39,12 +39,13 @@ public void testSerializer() throws Exception { } } - private IndicesRequestCache.Key getRandomIRCKey(int valueLength, Random random, ShardId shard) { + private IndicesRequestCache.Key getRandomIRCKey(int valueLength, Random random, ShardId shard, int indexShardHashCode) { byte[] value = new byte[valueLength]; for (int i = 0; i < valueLength; i++) { value[i] = (byte) (random.nextInt(126 - 32) + 32); } BytesReference keyValue = new BytesArray(value); - return new IndicesRequestCache.Key(shard, keyValue, UUID.randomUUID().toString()); // same UUID source as used in real key + return new IndicesRequestCache.Key(shard, keyValue, UUID.randomUUID().toString(), indexShardHashCode); // same UUID + // source as used in real key } } diff --git a/server/src/test/java/org/opensearch/indices/IndicesRequestCacheTests.java b/server/src/test/java/org/opensearch/indices/IndicesRequestCacheTests.java index e3dca1b7bfda2..dcddd9f3d1318 100644 --- a/server/src/test/java/org/opensearch/indices/IndicesRequestCacheTests.java +++ b/server/src/test/java/org/opensearch/indices/IndicesRequestCacheTests.java @@ -38,30 +38,38 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; +import org.opensearch.Version; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.RecoverySource; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.ShardRoutingHelper; +import org.opensearch.cluster.routing.UnassignedInfo; import org.opensearch.common.CheckedSupplier; import org.opensearch.common.cache.ICacheKey; import org.opensearch.common.cache.RemovalNotification; import org.opensearch.common.cache.RemovalReason; import org.opensearch.common.cache.module.CacheModule; -import org.opensearch.common.cache.service.CacheService; import org.opensearch.common.cache.stats.ImmutableCacheStats; +import org.opensearch.common.cache.stats.ImmutableCacheStatsHolder; import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.common.lucene.index.OpenSearchDirectoryReader; import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.util.io.IOUtils; import org.opensearch.core.common.bytes.AbstractBytesReference; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.unit.ByteSizeValue; +import org.opensearch.core.index.Index; import org.opensearch.core.index.shard.ShardId; import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.core.xcontent.XContentHelper; @@ -70,53 +78,79 @@ import org.opensearch.index.cache.request.RequestCacheStats; import org.opensearch.index.cache.request.ShardRequestCache; import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.index.seqno.RetentionLeaseSyncer; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.IndexShardState; +import org.opensearch.index.shard.IndexShardTestCase; import org.opensearch.index.shard.ShardNotFoundException; +import org.opensearch.indices.replication.checkpoint.SegmentReplicationCheckpointPublisher; import org.opensearch.node.Node; import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchSingleNodeTestCase; import org.opensearch.threadpool.ThreadPool; +import org.junit.After; +import org.junit.Before; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.concurrent.atomic.AtomicInteger; +import static java.util.Collections.emptyMap; +import static java.util.Collections.emptySet; +import static org.opensearch.indices.IndicesRequestCache.INDEX_DIMENSION_NAME; +import static org.opensearch.indices.IndicesRequestCache.INDICES_CACHE_QUERY_SIZE; import static org.opensearch.indices.IndicesRequestCache.INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING; +import static org.opensearch.indices.IndicesRequestCache.SHARD_ID_DIMENSION_NAME; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; public class IndicesRequestCacheTests extends OpenSearchSingleNodeTestCase { + private ThreadPool threadPool; + private IndexWriter writer; + private Directory dir; + private IndicesRequestCache cache; + private IndexShard indexShard; + private ThreadPool getThreadPool() { return new ThreadPool(Settings.builder().put(Node.NODE_NAME_SETTING.getKey(), "default tracer tests").build()); } - public void testBasicOperationsCache() throws Exception { - IndexShard indexShard = createIndex("test").getShard(0); - ThreadPool threadPool = getThreadPool(); - IndicesRequestCache cache = new IndicesRequestCache( - Settings.EMPTY, - (shardId -> Optional.of(new IndicesService.IndexShardCacheEntity(indexShard))), - new CacheModule(new ArrayList<>(), Settings.EMPTY).getCacheService(), - threadPool, - ClusterServiceUtils.createClusterService(threadPool) - ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); + @Before + public void setup() throws IOException { + dir = newDirectory(); + writer = new IndexWriter(dir, newIndexWriterConfig()); + indexShard = createIndex("test").getShard(0); + } + @After + public void cleanup() throws IOException { + IOUtils.close(writer, dir, cache); + terminate(threadPool); + } + + public void testBasicOperationsCache() throws Exception { + threadPool = getThreadPool(); + cache = getIndicesRequestCache(Settings.EMPTY); writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); + DirectoryReader reader = getReader(writer, indexShard.shardId()); // initial cache IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); Loader loader = new Loader(reader, 0); - BytesReference value = cache.getOrCompute(entity, loader, reader, termBytes); + BytesReference value = cache.getOrCompute(entity, loader, reader, getTermBytes()); assertEquals("foo", value.streamInput().readString()); ShardRequestCache requestCacheStats = indexShard.requestCache(); assertEquals(0, requestCacheStats.stats().getHitCount()); @@ -128,7 +162,7 @@ public void testBasicOperationsCache() throws Exception { // cache hit entity = new IndicesService.IndexShardCacheEntity(indexShard); loader = new Loader(reader, 0); - value = cache.getOrCompute(entity, loader, reader, termBytes); + value = cache.getOrCompute(entity, loader, reader, getTermBytes()); assertEquals("foo", value.streamInput().readString()); requestCacheStats = indexShard.requestCache(); assertEquals(1, requestCacheStats.stats().getHitCount()); @@ -154,34 +188,21 @@ public void testBasicOperationsCache() throws Exception { assertEquals(0, cache.count()); assertEquals(0, requestCacheStats.stats().getMemorySize().bytesAsInt()); - IOUtils.close(reader, writer, dir, cache); - terminate(threadPool); + IOUtils.close(reader); assertEquals(0, cache.numRegisteredCloseListeners()); } public void testBasicOperationsCacheWithFeatureFlag() throws Exception { - IndexShard indexShard = createIndex("test").getShard(0); - CacheService cacheService = new CacheModule(new ArrayList<>(), Settings.EMPTY).getCacheService(); - ThreadPool threadPool = getThreadPool(); - IndicesRequestCache cache = new IndicesRequestCache( - Settings.builder().put(super.featureFlagSettings()).put(FeatureFlags.PLUGGABLE_CACHE, "true").build(), - (shardId -> Optional.of(new IndicesService.IndexShardCacheEntity(indexShard))), - cacheService, - threadPool, - ClusterServiceUtils.createClusterService(threadPool) - ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); - + threadPool = getThreadPool(); + Settings settings = Settings.builder().put(super.featureFlagSettings()).put(FeatureFlags.PLUGGABLE_CACHE, "true").build(); + cache = getIndicesRequestCache(settings); writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); + DirectoryReader reader = getReader(writer, indexShard.shardId()); // initial cache IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); Loader loader = new Loader(reader, 0); - BytesReference value = cache.getOrCompute(entity, loader, reader, termBytes); + BytesReference value = cache.getOrCompute(entity, loader, reader, getTermBytes()); assertEquals("foo", value.streamInput().readString()); ShardRequestCache requestCacheStats = indexShard.requestCache(); assertEquals(0, requestCacheStats.stats().getHitCount()); @@ -193,7 +214,7 @@ public void testBasicOperationsCacheWithFeatureFlag() throws Exception { // cache hit entity = new IndicesService.IndexShardCacheEntity(indexShard); loader = new Loader(reader, 0); - value = cache.getOrCompute(entity, loader, reader, termBytes); + value = cache.getOrCompute(entity, loader, reader, getTermBytes()); assertEquals("foo", value.streamInput().readString()); requestCacheStats = indexShard.requestCache(); assertEquals(1, requestCacheStats.stats().getHitCount()); @@ -219,47 +240,28 @@ public void testBasicOperationsCacheWithFeatureFlag() throws Exception { assertEquals(0, cache.count()); assertEquals(0, requestCacheStats.stats().getMemorySize().bytesAsInt()); - IOUtils.close(reader, writer, dir, cache); - terminate(threadPool); + IOUtils.close(reader); assertEquals(0, cache.numRegisteredCloseListeners()); } public void testCacheDifferentReaders() throws Exception { - IndicesService indicesService = getInstanceFromNode(IndicesService.class); - IndexShard indexShard = createIndex("test").getShard(0); - ThreadPool threadPool = getThreadPool(); - IndicesRequestCache cache = new IndicesRequestCache(Settings.EMPTY, (shardId -> { - IndexService indexService = null; - try { - indexService = indicesService.indexServiceSafe(shardId.getIndex()); - } catch (IndexNotFoundException ex) { - return Optional.empty(); - } - return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), - new CacheModule(new ArrayList<>(), Settings.EMPTY).getCacheService(), - threadPool, - ClusterServiceUtils.createClusterService(threadPool) - ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); - + threadPool = getThreadPool(); + cache = getIndicesRequestCache(Settings.EMPTY); writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); + DirectoryReader reader = getReader(writer, indexShard.shardId()); + if (randomBoolean()) { writer.flush(); IOUtils.close(writer); writer = new IndexWriter(dir, newIndexWriterConfig()); } writer.updateDocument(new Term("id", "0"), newDoc(0, "bar")); - DirectoryReader secondReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); + DirectoryReader secondReader = getReader(writer, indexShard.shardId()); // initial cache IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); Loader loader = new Loader(reader, 0); - BytesReference value = cache.getOrCompute(entity, loader, reader, termBytes); + BytesReference value = cache.getOrCompute(entity, loader, reader, getTermBytes()); ShardRequestCache requestCacheStats = entity.stats(); assertEquals("foo", value.streamInput().readString()); assertEquals(0, requestCacheStats.stats().getHitCount()); @@ -274,7 +276,7 @@ public void testCacheDifferentReaders() throws Exception { // cache the second IndicesService.IndexShardCacheEntity secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); loader = new Loader(secondReader, 0); - value = cache.getOrCompute(entity, loader, secondReader, termBytes); + value = cache.getOrCompute(entity, loader, secondReader, getTermBytes()); requestCacheStats = entity.stats(); assertEquals("bar", value.streamInput().readString()); assertEquals(0, requestCacheStats.stats().getHitCount()); @@ -287,7 +289,7 @@ public void testCacheDifferentReaders() throws Exception { secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); loader = new Loader(secondReader, 0); - value = cache.getOrCompute(secondEntity, loader, secondReader, termBytes); + value = cache.getOrCompute(secondEntity, loader, secondReader, getTermBytes()); requestCacheStats = entity.stats(); assertEquals("bar", value.streamInput().readString()); assertEquals(1, requestCacheStats.stats().getHitCount()); @@ -298,7 +300,7 @@ public void testCacheDifferentReaders() throws Exception { entity = new IndicesService.IndexShardCacheEntity(indexShard); loader = new Loader(reader, 0); - value = cache.getOrCompute(entity, loader, reader, termBytes); + value = cache.getOrCompute(entity, loader, reader, getTermBytes()); assertEquals("foo", value.streamInput().readString()); requestCacheStats = entity.stats(); assertEquals(2, requestCacheStats.stats().getHitCount()); @@ -331,8 +333,7 @@ public void testCacheDifferentReaders() throws Exception { assertEquals(0, cache.count()); assertEquals(0, requestCacheStats.stats().getMemorySize().bytesAsInt()); - IOUtils.close(secondReader, writer, dir, cache); - terminate(threadPool); + IOUtils.close(secondReader); assertEquals(0, cache.numRegisteredCloseListeners()); } @@ -359,55 +360,20 @@ public void testCacheCleanupThresholdSettingValidator_Invalid_Percentage() { assertThrows(IllegalArgumentException.class, () -> { IndicesRequestCache.validateStalenessSetting("500%"); }); } + // when staleness threshold is zero, stale keys should be cleaned up every time cache cleaner is invoked. public void testCacheCleanupBasedOnZeroThreshold() throws Exception { - IndicesService indicesService = getInstanceFromNode(IndicesService.class); - IndexShard indexShard = createIndex("test").getShard(0); - ThreadPool threadPool = getThreadPool(); + threadPool = getThreadPool(); Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "0%").build(); - IndicesRequestCache cache = new IndicesRequestCache(settings, (shardId -> { - IndexService indexService = null; - try { - indexService = indicesService.indexServiceSafe(shardId.getIndex()); - } catch (IndexNotFoundException ex) { - return Optional.empty(); - } - return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), - new CacheModule(new ArrayList<>(), settings).getCacheService(), - threadPool, - ClusterServiceUtils.createClusterService(threadPool) - ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); - + cache = getIndicesRequestCache(settings); writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); - if (randomBoolean()) { - writer.flush(); - IOUtils.close(writer); - writer = new IndexWriter(dir, newIndexWriterConfig()); - } - writer.updateDocument(new Term("id", "0"), newDoc(0, "bar")); - DirectoryReader secondReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); + DirectoryReader reader = getReader(writer, indexShard.shardId()); + DirectoryReader secondReader = getReader(writer, indexShard.shardId()); // Get 2 entries into the cache - IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); - Loader loader = new Loader(reader, 0); - cache.getOrCompute(entity, loader, reader, termBytes); - - entity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(reader, 0); - cache.getOrCompute(entity, loader, reader, termBytes); - - IndicesService.IndexShardCacheEntity secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(secondReader, 0); - cache.getOrCompute(entity, loader, secondReader, termBytes); + cache.getOrCompute(getEntity(indexShard), getLoader(reader), reader, getTermBytes()); + assertEquals(1, cache.count()); - secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(secondReader, 0); - cache.getOrCompute(secondEntity, loader, secondReader, termBytes); + cache.getOrCompute(getEntity(indexShard), getLoader(secondReader), secondReader, getTermBytes()); assertEquals(2, cache.count()); // Close the reader, to be enqueued for cleanup @@ -419,209 +385,153 @@ public void testCacheCleanupBasedOnZeroThreshold() throws Exception { cache.cacheCleanupManager.cleanCache(); // cleanup should remove the stale-key assertEquals(1, cache.count()); - - IOUtils.close(secondReader, writer, dir, cache); - terminate(threadPool); + IOUtils.close(secondReader); } - public void testCacheCleanupBasedOnStaleThreshold_StalenessEqualToThreshold() throws Exception { - IndicesService indicesService = getInstanceFromNode(IndicesService.class); - IndexShard indexShard = createIndex("test").getShard(0); - ThreadPool threadPool = getThreadPool(); - Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "0.5").build(); - IndicesRequestCache cache = new IndicesRequestCache(settings, (shardId -> { - IndexService indexService = null; - try { - indexService = indicesService.indexServiceSafe(shardId.getIndex()); - } catch (IndexNotFoundException ex) { - return Optional.empty(); - } - return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), - new CacheModule(new ArrayList<>(), settings).getCacheService(), - threadPool, - ClusterServiceUtils.createClusterService(threadPool) - ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); + // when staleness count is higher than stale threshold, stale keys should be cleaned up. + public void testCacheCleanupBasedOnStaleThreshold_StalenessHigherThanThreshold() throws Exception { + threadPool = getThreadPool(); + Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "0.49").build(); + cache = getIndicesRequestCache(settings); writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); - if (randomBoolean()) { - writer.flush(); - IOUtils.close(writer); - writer = new IndexWriter(dir, newIndexWriterConfig()); - } - writer.updateDocument(new Term("id", "0"), newDoc(0, "bar")); - DirectoryReader secondReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); + DirectoryReader reader = getReader(writer, indexShard.shardId()); + DirectoryReader secondReader = getReader(writer, indexShard.shardId()); // Get 2 entries into the cache - IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); - Loader loader = new Loader(reader, 0); - cache.getOrCompute(entity, loader, reader, termBytes); + cache.getOrCompute(getEntity(indexShard), getLoader(reader), reader, getTermBytes()); + assertEquals(1, cache.count()); - entity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(reader, 0); - cache.getOrCompute(entity, loader, reader, termBytes); + cache.getOrCompute(getEntity(indexShard), getLoader(secondReader), secondReader, getTermBytes()); + assertEquals(2, cache.count()); - IndicesService.IndexShardCacheEntity secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(secondReader, 0); - cache.getOrCompute(entity, loader, secondReader, termBytes); + // no stale keys so far + assertEquals(0, cache.cacheCleanupManager.getStaleKeysCount().get()); + // Close the reader, to be enqueued for cleanup + reader.close(); + // 1 out of 2 keys ie 50% are now stale. + assertEquals(1, cache.cacheCleanupManager.getStaleKeysCount().get()); + // cache count should not be affected + assertEquals(2, cache.count()); - secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(secondReader, 0); - cache.getOrCompute(secondEntity, loader, secondReader, termBytes); + // clean cache with 49% staleness threshold + cache.cacheCleanupManager.cleanCache(); + // cleanup should have taken effect with 49% threshold + assertEquals(1, cache.count()); + assertEquals(0, cache.cacheCleanupManager.getStaleKeysCount().get()); + + IOUtils.close(secondReader); + } + + // when staleness count equal to stale threshold, stale keys should be cleaned up. + public void testCacheCleanupBasedOnStaleThreshold_StalenessEqualToThreshold() throws Exception { + threadPool = getThreadPool(); + Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "0.5").build(); + cache = getIndicesRequestCache(settings); + writer.addDocument(newDoc(0, "foo")); + DirectoryReader reader = getReader(writer, indexShard.shardId()); + DirectoryReader secondReader = getReader(writer, indexShard.shardId()); + + // Get 2 entries into the cache + cache.getOrCompute(getEntity(indexShard), getLoader(reader), reader, getTermBytes()); + assertEquals(1, cache.count()); + + cache.getOrCompute(getEntity(indexShard), getLoader(secondReader), secondReader, getTermBytes()); assertEquals(2, cache.count()); // Close the reader, to be enqueued for cleanup - // 1 out of 2 keys ie 50% are now stale. reader.close(); + // 1 out of 2 keys ie 50% are now stale. + assertEquals(1, cache.cacheCleanupManager.getStaleKeysCount().get()); // cache count should not be affected assertEquals(2, cache.count()); // clean cache with 50% staleness threshold cache.cacheCleanupManager.cleanCache(); // cleanup should have taken effect + assertEquals(0, cache.cacheCleanupManager.getStaleKeysCount().get()); assertEquals(1, cache.count()); - IOUtils.close(secondReader, writer, dir, cache); - terminate(threadPool); + IOUtils.close(secondReader); } + // when a cache entry that is Stale is evicted for any reason, we have to deduct the count from our staleness count public void testStaleCount_OnRemovalNotificationOfStaleKey_DecrementsStaleCount() throws Exception { - IndicesService indicesService = getInstanceFromNode(IndicesService.class); - IndexShard indexShard = createIndex("test").getShard(0); - ThreadPool threadPool = getThreadPool(); + threadPool = getThreadPool(); Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "0.51").build(); - IndicesRequestCache cache = new IndicesRequestCache(settings, (shardId -> { - IndexService indexService = null; - try { - indexService = indicesService.indexServiceSafe(shardId.getIndex()); - } catch (IndexNotFoundException ex) { - return Optional.empty(); - } - return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), - new CacheModule(new ArrayList<>(), settings).getCacheService(), - threadPool, - ClusterServiceUtils.createClusterService(threadPool) - ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); - + cache = getIndicesRequestCache(settings); writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); - if (randomBoolean()) { - writer.flush(); - IOUtils.close(writer); - writer = new IndexWriter(dir, newIndexWriterConfig()); - } - writer.updateDocument(new Term("id", "0"), newDoc(0, "bar")); - DirectoryReader secondReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); + ShardId shardId = indexShard.shardId(); + DirectoryReader reader = getReader(writer, indexShard.shardId()); + DirectoryReader secondReader = getReader(writer, indexShard.shardId()); - // Get 2 entries into the cache - IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); - Loader loader = new Loader(reader, 0); - cache.getOrCompute(entity, loader, reader, termBytes); - - entity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(reader, 0); - cache.getOrCompute(entity, loader, reader, termBytes); - - IndicesService.IndexShardCacheEntity secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(secondReader, 0); - cache.getOrCompute(entity, loader, secondReader, termBytes); + // Get 2 entries into the cache from 2 different readers + cache.getOrCompute(getEntity(indexShard), getLoader(reader), reader, getTermBytes()); + assertEquals(1, cache.count()); - secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(secondReader, 0); - cache.getOrCompute(secondEntity, loader, secondReader, termBytes); + cache.getOrCompute(getEntity(indexShard), getLoader(secondReader), secondReader, getTermBytes()); assertEquals(2, cache.count()); - // Close the reader, to be enqueued for cleanup + // assert no stale keys are accounted so far + assertEquals(0, cache.cacheCleanupManager.getStaleKeysCount().get()); + // Close the reader, this should create a stale key reader.close(); - AtomicInteger staleKeysCount = cache.cacheCleanupManager.getStaleKeysCount(); // 1 out of 2 keys ie 50% are now stale. - assertEquals(1, staleKeysCount.get()); + assertEquals(1, cache.cacheCleanupManager.getStaleKeysCount().get()); // cache count should not be affected assertEquals(2, cache.count()); - OpenSearchDirectoryReader.DelegatingCacheHelper delegatingCacheHelper = - (OpenSearchDirectoryReader.DelegatingCacheHelper) secondReader.getReaderCacheHelper(); - String readerCacheKeyId = delegatingCacheHelper.getDelegatingCacheKey().getId(); IndicesRequestCache.Key key = new IndicesRequestCache.Key( - ((IndexShard) secondEntity.getCacheIdentity()).shardId(), - termBytes, - readerCacheKeyId + indexShard.shardId(), + getTermBytes(), + getReaderCacheKeyId(reader), + indexShard.hashCode() ); + // test the mapping + ConcurrentMap> cleanupKeyToCountMap = cache.cacheCleanupManager.getCleanupKeyToCountMap(); + // shard id should exist + assertTrue(cleanupKeyToCountMap.containsKey(shardId)); + // reader CacheKeyId should NOT exist + assertFalse(cleanupKeyToCountMap.get(shardId).containsKey(getReaderCacheKeyId(reader))); + // secondReader CacheKeyId should exist + assertTrue(cleanupKeyToCountMap.get(shardId).containsKey(getReaderCacheKeyId(secondReader))); cache.onRemoval( new RemovalNotification, BytesReference>( new ICacheKey<>(key), - termBytes, + getTermBytes(), RemovalReason.EVICTED ) ); - staleKeysCount = cache.cacheCleanupManager.getStaleKeysCount(); + + // test the mapping, it should stay the same + // shard id should exist + assertTrue(cleanupKeyToCountMap.containsKey(shardId)); + // reader CacheKeyId should NOT exist + assertFalse(cleanupKeyToCountMap.get(shardId).containsKey(getReaderCacheKeyId(reader))); + // secondReader CacheKeyId should exist + assertTrue(cleanupKeyToCountMap.get(shardId).containsKey(getReaderCacheKeyId(secondReader))); // eviction of previous stale key from the cache should decrement staleKeysCount in iRC - assertEquals(0, staleKeysCount.get()); + assertEquals(0, cache.cacheCleanupManager.getStaleKeysCount().get()); - IOUtils.close(secondReader, writer, dir, cache); - terminate(threadPool); + IOUtils.close(secondReader); } - public void testStaleCount_OnRemovalNotificationOfStaleKey_DoesNotDecrementsStaleCount() throws Exception { - IndicesService indicesService = getInstanceFromNode(IndicesService.class); - IndexShard indexShard = createIndex("test").getShard(0); - ThreadPool threadPool = getThreadPool(); + // when a cache entry that is NOT Stale is evicted for any reason, staleness count should NOT be deducted + public void testStaleCount_OnRemovalNotificationOfNonStaleKey_DoesNotDecrementsStaleCount() throws Exception { + threadPool = getThreadPool(); Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "0.51").build(); - IndicesRequestCache cache = new IndicesRequestCache(settings, (shardId -> { - IndexService indexService = null; - try { - indexService = indicesService.indexServiceSafe(shardId.getIndex()); - } catch (IndexNotFoundException ex) { - return Optional.empty(); - } - return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), - new CacheModule(new ArrayList<>(), settings).getCacheService(), - threadPool, - ClusterServiceUtils.createClusterService(threadPool) - ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); - + cache = getIndicesRequestCache(settings); writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); - if (randomBoolean()) { - writer.flush(); - IOUtils.close(writer); - writer = new IndexWriter(dir, newIndexWriterConfig()); - } - writer.updateDocument(new Term("id", "0"), newDoc(0, "bar")); - DirectoryReader secondReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); + ShardId shardId = indexShard.shardId(); + DirectoryReader reader = getReader(writer, indexShard.shardId()); + DirectoryReader secondReader = getReader(writer, indexShard.shardId()); // Get 2 entries into the cache - IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); - Loader loader = new Loader(reader, 0); - cache.getOrCompute(entity, loader, reader, termBytes); - - entity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(reader, 0); - cache.getOrCompute(entity, loader, reader, termBytes); - - IndicesService.IndexShardCacheEntity secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(secondReader, 0); - cache.getOrCompute(entity, loader, secondReader, termBytes); + cache.getOrCompute(getEntity(indexShard), getLoader(reader), reader, getTermBytes()); + assertEquals(1, cache.count()); - secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(secondReader, 0); - cache.getOrCompute(secondEntity, loader, secondReader, termBytes); + cache.getOrCompute(getEntity(indexShard), getLoader(secondReader), secondReader, getTermBytes()); assertEquals(2, cache.count()); // Close the reader, to be enqueued for cleanup @@ -632,102 +542,263 @@ public void testStaleCount_OnRemovalNotificationOfStaleKey_DoesNotDecrementsStal // cache count should not be affected assertEquals(2, cache.count()); - OpenSearchDirectoryReader.DelegatingCacheHelper delegatingCacheHelper = (OpenSearchDirectoryReader.DelegatingCacheHelper) reader - .getReaderCacheHelper(); - String readerCacheKeyId = delegatingCacheHelper.getDelegatingCacheKey().getId(); + // evict entry from second reader (this reader is not closed) IndicesRequestCache.Key key = new IndicesRequestCache.Key( - ((IndexShard) secondEntity.getCacheIdentity()).shardId(), - termBytes, - readerCacheKeyId + indexShard.shardId(), + getTermBytes(), + getReaderCacheKeyId(secondReader), + indexShard.hashCode() ); + // test the mapping + ConcurrentMap> cleanupKeyToCountMap = cache.cacheCleanupManager.getCleanupKeyToCountMap(); + // shard id should exist + assertTrue(cleanupKeyToCountMap.containsKey(shardId)); + // reader CacheKeyId should NOT exist + assertFalse(cleanupKeyToCountMap.get(shardId).containsKey(getReaderCacheKeyId(reader))); + // secondReader CacheKeyId should exist + assertTrue(cleanupKeyToCountMap.get(shardId).containsKey(getReaderCacheKeyId(secondReader))); + cache.onRemoval( new RemovalNotification, BytesReference>( new ICacheKey<>(key), - termBytes, + getTermBytes(), RemovalReason.EVICTED ) ); + + // test the mapping, shardId entry should be cleaned up + // shard id should NOT exist + assertFalse(cleanupKeyToCountMap.containsKey(shardId)); + staleKeysCount = cache.cacheCleanupManager.getStaleKeysCount(); // eviction of NON-stale key from the cache should NOT decrement staleKeysCount in iRC assertEquals(1, staleKeysCount.get()); - IOUtils.close(secondReader, writer, dir, cache); - terminate(threadPool); + IOUtils.close(secondReader); } - public void testCacheCleanupBasedOnStaleThreshold_StalenessGreaterThanThreshold() throws Exception { - IndicesService indicesService = getInstanceFromNode(IndicesService.class); - IndexShard indexShard = createIndex("test").getShard(0); - ThreadPool threadPool = getThreadPool(); - Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "0.49").build(); - IndicesRequestCache cache = new IndicesRequestCache(settings, (shardId -> { - IndexService indexService = null; - try { - indexService = indicesService.indexServiceSafe(shardId.getIndex()); - } catch (IndexNotFoundException ex) { - return Optional.empty(); - } - return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), - new CacheModule(new ArrayList<>(), settings).getCacheService(), - threadPool, - ClusterServiceUtils.createClusterService(threadPool) + // when a cache entry that is NOT Stale is evicted WITHOUT its reader closing, we should NOT deduct it from staleness count + public void testStaleCount_WithoutReaderClosing_DecrementsStaleCount() throws Exception { + threadPool = getThreadPool(); + Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "0.51").build(); + cache = getIndicesRequestCache(settings); + + writer.addDocument(newDoc(0, "foo")); + DirectoryReader reader = getReader(writer, indexShard.shardId()); + DirectoryReader secondReader = getReader(writer, indexShard.shardId()); + + // Get 2 entries into the cache from 2 different readers + cache.getOrCompute(getEntity(indexShard), getLoader(reader), reader, getTermBytes()); + assertEquals(1, cache.count()); + + cache.getOrCompute(getEntity(indexShard), getLoader(secondReader), secondReader, getTermBytes()); + assertEquals(2, cache.count()); + + // no keys are stale + assertEquals(0, cache.cacheCleanupManager.getStaleKeysCount().get()); + // create notification for removal of non-stale entry + IndicesRequestCache.Key key = new IndicesRequestCache.Key( + indexShard.shardId(), + getTermBytes(), + getReaderCacheKeyId(reader), + indexShard.hashCode() ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); + cache.onRemoval( + new RemovalNotification, BytesReference>( + new ICacheKey<>(key), + getTermBytes(), + RemovalReason.EVICTED + ) + ); + // stale keys count should stay zero + assertEquals(0, cache.cacheCleanupManager.getStaleKeysCount().get()); + + IOUtils.close(reader, secondReader); + } + + // test staleness count based on removal notifications + public void testStaleCount_OnRemovalNotifications() throws Exception { + threadPool = getThreadPool(); + Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "0.51").build(); + cache = getIndicesRequestCache(settings); writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); - if (randomBoolean()) { - writer.flush(); - IOUtils.close(writer); - writer = new IndexWriter(dir, newIndexWriterConfig()); + DirectoryReader reader = getReader(writer, indexShard.shardId()); + + // Get 5 entries into the cache + int totalKeys = 5; + IndicesService.IndexShardCacheEntity entity = null; + TermQueryBuilder termQuery = null; + BytesReference termBytes = null; + for (int i = 1; i <= totalKeys; i++) { + termQuery = new TermQueryBuilder("id", "" + i); + termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); + entity = new IndicesService.IndexShardCacheEntity(indexShard); + Loader loader = new Loader(reader, 0); + cache.getOrCompute(entity, loader, reader, termBytes); + assertEquals(i, cache.count()); } - writer.updateDocument(new Term("id", "0"), newDoc(0, "bar")); - DirectoryReader secondReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); + // no keys are stale yet + assertEquals(0, cache.cacheCleanupManager.getStaleKeysCount().get()); + // closing the reader should make all keys stale + reader.close(); + assertEquals(totalKeys, cache.cacheCleanupManager.getStaleKeysCount().get()); - // Get 2 entries into the cache - IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); - Loader loader = new Loader(reader, 0); - cache.getOrCompute(entity, loader, reader, termBytes); + String readerCacheKeyId = getReaderCacheKeyId(reader); + IndexShard indexShard = (IndexShard) entity.getCacheIdentity(); + IndicesRequestCache.Key key = new IndicesRequestCache.Key(indexShard.shardId(), termBytes, readerCacheKeyId, indexShard.hashCode()); + + int staleCount = cache.cacheCleanupManager.getStaleKeysCount().get(); + // Notification for Replaced should not deduct the staleCount + cache.onRemoval( + new RemovalNotification, BytesReference>( + new ICacheKey<>(key), + getTermBytes(), + RemovalReason.REPLACED + ) + ); + // stale keys count should stay the same + assertEquals(staleCount, cache.cacheCleanupManager.getStaleKeysCount().get()); + + // Notification for all but Replaced should deduct the staleCount + RemovalReason[] reasons = { RemovalReason.INVALIDATED, RemovalReason.EVICTED, RemovalReason.EXPLICIT, RemovalReason.CAPACITY }; + for (RemovalReason reason : reasons) { + cache.onRemoval( + new RemovalNotification, BytesReference>(new ICacheKey<>(key), getTermBytes(), reason) + ); + assertEquals(--staleCount, cache.cacheCleanupManager.getStaleKeysCount().get()); + } + } - entity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(reader, 0); - cache.getOrCompute(entity, loader, reader, termBytes); + // when staleness count less than the stale threshold, stale keys should NOT be cleaned up. + public void testCacheCleanupBasedOnStaleThreshold_StalenessLesserThanThreshold() throws Exception { + threadPool = getThreadPool(); + Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "51%").build(); + cache = getIndicesRequestCache(settings); - IndicesService.IndexShardCacheEntity secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(secondReader, 0); - cache.getOrCompute(entity, loader, secondReader, termBytes); + writer.addDocument(newDoc(0, "foo")); + DirectoryReader reader = getReader(writer, indexShard.shardId()); + DirectoryReader secondReader = getReader(writer, indexShard.shardId()); - secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(secondReader, 0); - cache.getOrCompute(secondEntity, loader, secondReader, termBytes); + // Get 2 entries into the cache + cache.getOrCompute(getEntity(indexShard), getLoader(reader), reader, getTermBytes()); + assertEquals(1, cache.count()); + + cache.getOrCompute(getEntity(indexShard), getLoader(secondReader), secondReader, getTermBytes()); assertEquals(2, cache.count()); // Close the reader, to be enqueued for cleanup - // 1 out of 2 keys ie 50% are now stale. reader.close(); + // 1 out of 2 keys ie 50% are now stale. + assertEquals(1, cache.cacheCleanupManager.getStaleKeysCount().get()); // cache count should not be affected assertEquals(2, cache.count()); - // clean cache with 49% staleness threshold + // clean cache with 51% staleness threshold cache.cacheCleanupManager.cleanCache(); - // cleanup should have taken effect with 49% threshold + // cleanup should have been ignored + assertEquals(1, cache.cacheCleanupManager.getStaleKeysCount().get()); + assertEquals(2, cache.count()); + + IOUtils.close(secondReader); + } + + // test the cleanupKeyToCountMap are set appropriately when both readers are closed + public void testCleanupKeyToCountMapAreSetAppropriately() throws Exception { + threadPool = getThreadPool(); + Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "0.51").build(); + cache = getIndicesRequestCache(settings); + + writer.addDocument(newDoc(0, "foo")); + ShardId shardId = indexShard.shardId(); + DirectoryReader reader = getReader(writer, shardId); + DirectoryReader secondReader = getReader(writer, shardId); + + // Get 2 entries into the cache from 2 different readers + cache.getOrCompute(getEntity(indexShard), getLoader(reader), reader, getTermBytes()); assertEquals(1, cache.count()); + // test the mappings + ConcurrentMap> cleanupKeyToCountMap = cache.cacheCleanupManager.getCleanupKeyToCountMap(); + assertEquals(1, (int) cleanupKeyToCountMap.get(shardId).get(getReaderCacheKeyId(reader))); - IOUtils.close(secondReader, writer, dir, cache); - terminate(threadPool); + cache.getOrCompute(getEntity(indexShard), getLoader(secondReader), secondReader, getTermBytes()); + // test the mapping + assertEquals(2, cache.count()); + assertEquals(1, (int) cleanupKeyToCountMap.get(shardId).get(getReaderCacheKeyId(secondReader))); + // create another entry for the second reader + cache.getOrCompute(getEntity(indexShard), getLoader(secondReader), secondReader, getTermBytes("id", "1")); + // test the mapping + assertEquals(3, cache.count()); + assertEquals(2, (int) cleanupKeyToCountMap.get(shardId).get(getReaderCacheKeyId(secondReader))); + + // Close the reader, to create stale entries + reader.close(); + // cache count should not be affected + assertEquals(3, cache.count()); + // test the mapping, first reader's entry should be removed from the mapping and accounted for in the staleKeysCount + assertFalse(cleanupKeyToCountMap.get(shardId).containsKey(getReaderCacheKeyId(reader))); + assertEquals(1, cache.cacheCleanupManager.getStaleKeysCount().get()); + // second reader's mapping should not be affected + assertEquals(2, (int) cleanupKeyToCountMap.get(shardId).get(getReaderCacheKeyId(secondReader))); + // send removal notification for first reader + IndicesRequestCache.Key key = new IndicesRequestCache.Key( + indexShard.shardId(), + getTermBytes(), + getReaderCacheKeyId(reader), + indexShard.hashCode() + ); + cache.onRemoval( + new RemovalNotification, BytesReference>( + new ICacheKey<>(key), + getTermBytes(), + RemovalReason.EVICTED + ) + ); + // test the mapping, it should stay the same + assertFalse(cleanupKeyToCountMap.get(shardId).containsKey(getReaderCacheKeyId(reader))); + // staleKeysCount should be decremented + assertEquals(0, cache.cacheCleanupManager.getStaleKeysCount().get()); + // second reader's mapping should not be affected + assertEquals(2, (int) cleanupKeyToCountMap.get(shardId).get(getReaderCacheKeyId(secondReader))); + + // Without closing the secondReader send removal notification of one of its key + key = new IndicesRequestCache.Key(indexShard.shardId(), getTermBytes(), getReaderCacheKeyId(secondReader), indexShard.hashCode()); + cache.onRemoval( + new RemovalNotification, BytesReference>( + new ICacheKey<>(key), + getTermBytes(), + RemovalReason.EVICTED + ) + ); + // staleKeysCount should be the same as before + assertEquals(0, cache.cacheCleanupManager.getStaleKeysCount().get()); + // secondReader's readerCacheKeyId count should be decremented by 1 + assertEquals(1, (int) cleanupKeyToCountMap.get(shardId).get(getReaderCacheKeyId(secondReader))); + // Without closing the secondReader send removal notification of its last key + key = new IndicesRequestCache.Key(indexShard.shardId(), getTermBytes(), getReaderCacheKeyId(secondReader), indexShard.hashCode()); + cache.onRemoval( + new RemovalNotification, BytesReference>( + new ICacheKey<>(key), + getTermBytes(), + RemovalReason.EVICTED + ) + ); + // staleKeysCount should be the same as before + assertEquals(0, cache.cacheCleanupManager.getStaleKeysCount().get()); + // since all the readers of this shard is closed, the cleanupKeyToCountMap should have no entries + assertEquals(0, cleanupKeyToCountMap.size()); + + IOUtils.close(secondReader); } - public void testCacheCleanupBasedOnStaleThreshold_StalenessLesserThanThreshold() throws Exception { + private DirectoryReader getReader(IndexWriter writer, ShardId shardId) throws IOException { + return OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), shardId); + } + + private IndicesRequestCache getIndicesRequestCache(Settings settings) { IndicesService indicesService = getInstanceFromNode(IndicesService.class); - IndexShard indexShard = createIndex("test").getShard(0); - ThreadPool threadPool = getThreadPool(); - Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "51%").build(); - IndicesRequestCache cache = new IndicesRequestCache(settings, (shardId -> { + return new IndicesRequestCache(settings, (shardId -> { IndexService indexService = null; try { indexService = indicesService.indexServiceSafe(shardId.getIndex()); @@ -740,61 +811,42 @@ public void testCacheCleanupBasedOnStaleThreshold_StalenessLesserThanThreshold() threadPool, ClusterServiceUtils.createClusterService(threadPool) ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); - - writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); - if (randomBoolean()) { - writer.flush(); - IOUtils.close(writer); - writer = new IndexWriter(dir, newIndexWriterConfig()); - } - writer.updateDocument(new Term("id", "0"), newDoc(0, "bar")); - DirectoryReader secondReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - - // Get 2 entries into the cache - IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); - Loader loader = new Loader(reader, 0); - cache.getOrCompute(entity, loader, reader, termBytes); - - entity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(reader, 0); - cache.getOrCompute(entity, loader, reader, termBytes); + } - IndicesService.IndexShardCacheEntity secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(secondReader, 0); - cache.getOrCompute(entity, loader, secondReader, termBytes); + private Loader getLoader(DirectoryReader reader) { + return new Loader(reader, 0); + } - secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(secondReader, 0); - cache.getOrCompute(secondEntity, loader, secondReader, termBytes); - assertEquals(2, cache.count()); + private IndicesService.IndexShardCacheEntity getEntity(IndexShard indexShard) { + return new IndicesService.IndexShardCacheEntity(indexShard); + } - // Close the reader, to be enqueued for cleanup - // 1 out of 2 keys ie 50% are now stale. - reader.close(); - // cache count should not be affected - assertEquals(2, cache.count()); + private BytesReference getTermBytes() throws IOException { + TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); + return XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); + } - // clean cache with 51% staleness threshold - cache.cacheCleanupManager.cleanCache(); - // cleanup should have been ignored - assertEquals(2, cache.count()); + private BytesReference getTermBytes(String fieldName, String value) throws IOException { + TermQueryBuilder termQuery = new TermQueryBuilder(fieldName, value); + return XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); + } - IOUtils.close(secondReader, writer, dir, cache); - terminate(threadPool); + private String getReaderCacheKeyId(DirectoryReader reader) { + OpenSearchDirectoryReader.DelegatingCacheHelper delegatingCacheHelper = (OpenSearchDirectoryReader.DelegatingCacheHelper) reader + .getReaderCacheHelper(); + return delegatingCacheHelper.getDelegatingCacheKey().getId(); } public void testClosingIndexWipesStats() throws Exception { IndicesService indicesService = getInstanceFromNode(IndicesService.class); + String[] levels = { INDEX_DIMENSION_NAME, SHARD_ID_DIMENSION_NAME }; // Create two indices each with multiple shards int numShards = 3; Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numShards).build(); String indexToKeepName = "test"; String indexToCloseName = "test2"; + // delete all indices if already + assertAcked(client().admin().indices().prepareDelete("_all").get()); IndexService indexToKeep = createIndex(indexToKeepName, indexSettings); IndexService indexToClose = createIndex(indexToCloseName, indexSettings); for (int i = 0; i < numShards; i++) { @@ -802,9 +854,13 @@ public void testClosingIndexWipesStats() throws Exception { assertNotNull(indexToKeep.getShard(i)); assertNotNull(indexToClose.getShard(i)); } - ThreadPool threadPool = getThreadPool(); - Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "0.001%").build(); - IndicesRequestCache cache = new IndicesRequestCache(settings, (shardId -> { + + threadPool = getThreadPool(); + Settings settings = Settings.builder() + .put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "0.001%") + .put(FeatureFlags.PLUGGABLE_CACHE, true) + .build(); + cache = new IndicesRequestCache(settings, (shardId -> { IndexService indexService = null; try { indexService = indicesService.indexServiceSafe(shardId.getIndex()); @@ -821,8 +877,6 @@ public void testClosingIndexWipesStats() throws Exception { threadPool, ClusterServiceUtils.createClusterService(threadPool) ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); writer.addDocument(newDoc(0, "foo")); TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); @@ -860,11 +914,12 @@ public void testClosingIndexWipesStats() throws Exception { ShardId shardId = indexService.getShard(i).shardId(); List dimensionValues = List.of(shardId.getIndexName(), shardId.toString()); initialDimensionValues.add(dimensionValues); - ImmutableCacheStats snapshot = cache.stats().getStatsForDimensionValues(dimensionValues); + ImmutableCacheStatsHolder holder = cache.stats(levels); + ImmutableCacheStats snapshot = cache.stats(levels).getStatsForDimensionValues(dimensionValues); assertNotNull(snapshot); // check the values are not empty by confirming entries != 0, this should always be true since the missed value is loaded // into the cache - assertNotEquals(0, snapshot.getEntries()); + assertNotEquals(0, snapshot.getItems()); } } @@ -881,146 +936,130 @@ public void testClosingIndexWipesStats() throws Exception { // Now stats for the closed index should be gone for (List dimensionValues : initialDimensionValues) { - ImmutableCacheStats snapshot = cache.stats().getStatsForDimensionValues(dimensionValues); + ImmutableCacheStats snapshot = cache.stats(levels).getStatsForDimensionValues(dimensionValues); if (dimensionValues.get(0).equals(indexToCloseName)) { assertNull(snapshot); } else { assertNotNull(snapshot); // check the values are not empty by confirming entries != 0, this should always be true since the missed value is loaded // into the cache - assertNotEquals(0, snapshot.getEntries()); + assertNotEquals(0, snapshot.getItems()); } } for (DirectoryReader reader : readersToKeep) { IOUtils.close(reader); } - IOUtils.close(secondReader, writer, dir, cache); - terminate(threadPool); + IOUtils.close(secondReader); + } + + public void testCacheCleanupBasedOnStaleThreshold_thresholdUpdate() throws Exception { + threadPool = getThreadPool(); + Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "51%").build(); + cache = getIndicesRequestCache(settings); + + writer.addDocument(newDoc(0, "foo")); + DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); + DirectoryReader secondReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); + + // Get 2 entries into the cache + cache.getOrCompute(getEntity(indexShard), getLoader(reader), reader, getTermBytes()); + cache.getOrCompute(getEntity(indexShard), getLoader(secondReader), secondReader, getTermBytes()); + assertEquals(2, cache.count()); + + // Close the reader, to be enqueued for cleanup + // 1 out of 2 keys ie 50% are now stale. + reader.close(); + // cache count should not be affected + assertEquals(2, cache.count()); + + // clean cache with 51% staleness threshold + cache.cacheCleanupManager.cleanCache(); + // cleanup should have been ignored + assertEquals(2, cache.count()); + + cache.setStalenessThreshold("49%"); + // clean cache with 49% staleness threshold + cache.cacheCleanupManager.cleanCache(); + // cleanup should NOT have been ignored + assertEquals(1, cache.count()); + + IOUtils.close(secondReader); } public void testEviction() throws Exception { final ByteSizeValue size; { - IndexShard indexShard = createIndex("test").getShard(0); - ThreadPool threadPool = getThreadPool(); - IndicesRequestCache cache = new IndicesRequestCache( - Settings.EMPTY, - (shardId -> Optional.of(new IndicesService.IndexShardCacheEntity(indexShard))), - new CacheModule(new ArrayList<>(), Settings.EMPTY).getCacheService(), - threadPool, - ClusterServiceUtils.createClusterService(threadPool) - ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); - + threadPool = getThreadPool(); + cache = getIndicesRequestCache(Settings.EMPTY); writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); - IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); - Loader loader = new Loader(reader, 0); - + DirectoryReader reader = getReader(writer, indexShard.shardId()); writer.updateDocument(new Term("id", "0"), newDoc(0, "bar")); DirectoryReader secondReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - IndicesService.IndexShardCacheEntity secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - Loader secondLoader = new Loader(secondReader, 0); - BytesReference value1 = cache.getOrCompute(entity, loader, reader, termBytes); + BytesReference value1 = cache.getOrCompute(getEntity(indexShard), getLoader(reader), reader, getTermBytes()); assertEquals("foo", value1.streamInput().readString()); - BytesReference value2 = cache.getOrCompute(secondEntity, secondLoader, secondReader, termBytes); + BytesReference value2 = cache.getOrCompute(getEntity(indexShard), getLoader(secondReader), secondReader, getTermBytes()); assertEquals("bar", value2.streamInput().readString()); - size = new ByteSizeValue(cache.getSizeInBytes()); + size = indexShard.requestCache().stats().getMemorySize(); // Value from old API IOUtils.close(reader, secondReader, writer, dir, cache); - terminate(threadPool); } - IndexShard indexShard = createIndex("test1").getShard(0); - ThreadPool threadPool = getThreadPool(); + indexShard = createIndex("test1").getShard(0); IndicesRequestCache cache = new IndicesRequestCache( - // Add 5 instead of 1; the key size now depends on the length of dimension names and values so there's more variation - Settings.builder().put(IndicesRequestCache.INDICES_CACHE_QUERY_SIZE.getKey(), size.getBytes() + 5 + "b").build(), + // TODO: Add wiggle room to max size to allow for overhead of ICacheKey. This can be removed once API PR goes in, as it updates + // the old API to account for the ICacheKey overhead. + Settings.builder().put(IndicesRequestCache.INDICES_CACHE_QUERY_SIZE.getKey(), (int) (size.getBytes() * 1.2) + "b").build(), (shardId -> Optional.of(new IndicesService.IndexShardCacheEntity(indexShard))), new CacheModule(new ArrayList<>(), Settings.EMPTY).getCacheService(), threadPool, ClusterServiceUtils.createClusterService(threadPool) ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); - + dir = newDirectory(); + writer = new IndexWriter(dir, newIndexWriterConfig()); writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); - IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); - Loader loader = new Loader(reader, 0); - + DirectoryReader reader = getReader(writer, indexShard.shardId()); writer.updateDocument(new Term("id", "0"), newDoc(0, "bar")); - DirectoryReader secondReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - IndicesService.IndexShardCacheEntity secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - Loader secondLoader = new Loader(secondReader, 0); - + DirectoryReader secondReader = getReader(writer, indexShard.shardId()); writer.updateDocument(new Term("id", "0"), newDoc(0, "baz")); DirectoryReader thirdReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - IndicesService.IndexShardCacheEntity thirddEntity = new IndicesService.IndexShardCacheEntity(indexShard); - Loader thirdLoader = new Loader(thirdReader, 0); - BytesReference value1 = cache.getOrCompute(entity, loader, reader, termBytes); + BytesReference value1 = cache.getOrCompute(getEntity(indexShard), getLoader(reader), reader, getTermBytes()); assertEquals("foo", value1.streamInput().readString()); - BytesReference value2 = cache.getOrCompute(secondEntity, secondLoader, secondReader, termBytes); + BytesReference value2 = cache.getOrCompute(getEntity(indexShard), getLoader(secondReader), secondReader, getTermBytes()); assertEquals("bar", value2.streamInput().readString()); logger.info("Memory size: {}", indexShard.requestCache().stats().getMemorySize()); - BytesReference value3 = cache.getOrCompute(thirddEntity, thirdLoader, thirdReader, termBytes); + BytesReference value3 = cache.getOrCompute(getEntity(indexShard), getLoader(thirdReader), thirdReader, getTermBytes()); assertEquals("baz", value3.streamInput().readString()); assertEquals(2, cache.count()); assertEquals(1, indexShard.requestCache().stats().getEvictions()); - IOUtils.close(reader, secondReader, thirdReader, writer, dir, cache); - terminate(threadPool); + IOUtils.close(reader, secondReader, thirdReader); } public void testClearAllEntityIdentity() throws Exception { - IndicesService indicesService = getInstanceFromNode(IndicesService.class); - IndexShard indexShard = createIndex("test").getShard(0); - ThreadPool threadPool = getThreadPool(); - IndicesRequestCache cache = new IndicesRequestCache(Settings.EMPTY, (shardId -> { - IndexService indexService = null; - try { - indexService = indicesService.indexServiceSafe(shardId.getIndex()); - } catch (IndexNotFoundException ex) { - return Optional.empty(); - } - return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), - new CacheModule(new ArrayList<>(), Settings.EMPTY).getCacheService(), - threadPool, - ClusterServiceUtils.createClusterService(threadPool) - ); - - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); - + threadPool = getThreadPool(); + cache = getIndicesRequestCache(Settings.EMPTY); writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); + DirectoryReader reader = getReader(writer, indexShard.shardId()); IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); Loader loader = new Loader(reader, 0); writer.updateDocument(new Term("id", "0"), newDoc(0, "bar")); - DirectoryReader secondReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); + DirectoryReader secondReader = getReader(writer, indexShard.shardId()); IndicesService.IndexShardCacheEntity secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); Loader secondLoader = new Loader(secondReader, 0); writer.updateDocument(new Term("id", "0"), newDoc(0, "baz")); - DirectoryReader thirdReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); + DirectoryReader thirdReader = getReader(writer, indexShard.shardId()); + ; IndicesService.IndexShardCacheEntity thirddEntity = new IndicesService.IndexShardCacheEntity(createIndex("test1").getShard(0)); Loader thirdLoader = new Loader(thirdReader, 0); - BytesReference value1 = cache.getOrCompute(entity, loader, reader, termBytes); + BytesReference value1 = cache.getOrCompute(entity, loader, reader, getTermBytes()); assertEquals("foo", value1.streamInput().readString()); - BytesReference value2 = cache.getOrCompute(secondEntity, secondLoader, secondReader, termBytes); + BytesReference value2 = cache.getOrCompute(secondEntity, secondLoader, secondReader, getTermBytes()); assertEquals("bar", value2.streamInput().readString()); logger.info("Memory size: {}", indexShard.requestCache().stats().getMemorySize()); - BytesReference value3 = cache.getOrCompute(thirddEntity, thirdLoader, thirdReader, termBytes); + BytesReference value3 = cache.getOrCompute(thirddEntity, thirdLoader, thirdReader, getTermBytes()); assertEquals("baz", value3.streamInput().readString()); assertEquals(3, cache.count()); RequestCacheStats requestCacheStats = entity.stats().stats(); @@ -1031,14 +1070,13 @@ public void testClearAllEntityIdentity() throws Exception { cache.cacheCleanupManager.cleanCache(); assertEquals(1, cache.count()); // third has not been validated since it's a different identity - value3 = cache.getOrCompute(thirddEntity, thirdLoader, thirdReader, termBytes); + value3 = cache.getOrCompute(thirddEntity, thirdLoader, thirdReader, getTermBytes()); requestCacheStats = entity.stats().stats(); requestCacheStats.add(thirddEntity.stats().stats()); assertEquals(hitCount + 1, requestCacheStats.getHitCount()); assertEquals("baz", value3.streamInput().readString()); - IOUtils.close(reader, secondReader, thirdReader, writer, dir, cache); - terminate(threadPool); + IOUtils.close(reader, secondReader, thirdReader); } public Iterable newDoc(int id, String value) { @@ -1050,7 +1088,7 @@ public Iterable newDoc(int id, String value) { private static class Loader implements CheckedSupplier { - private final DirectoryReader reader; + final DirectoryReader reader; private final int id; public boolean loadedFromCache = true; @@ -1074,38 +1112,18 @@ public BytesReference get() { throw new RuntimeException(e); } } - } public void testInvalidate() throws Exception { - IndicesService indicesService = getInstanceFromNode(IndicesService.class); - IndexShard indexShard = createIndex("test").getShard(0); - ThreadPool threadPool = getThreadPool(); - IndicesRequestCache cache = new IndicesRequestCache(Settings.EMPTY, (shardId -> { - IndexService indexService = null; - try { - indexService = indicesService.indexServiceSafe(shardId.getIndex()); - } catch (IndexNotFoundException ex) { - return Optional.empty(); - } - return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), - new CacheModule(new ArrayList<>(), Settings.EMPTY).getCacheService(), - threadPool, - ClusterServiceUtils.createClusterService(threadPool) - ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); - + threadPool = getThreadPool(); + IndicesRequestCache cache = getIndicesRequestCache(Settings.EMPTY); writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); + DirectoryReader reader = getReader(writer, indexShard.shardId()); // initial cache IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); Loader loader = new Loader(reader, 0); - BytesReference value = cache.getOrCompute(entity, loader, reader, termBytes); + BytesReference value = cache.getOrCompute(entity, loader, reader, getTermBytes()); assertEquals("foo", value.streamInput().readString()); ShardRequestCache requestCacheStats = entity.stats(); assertEquals(0, requestCacheStats.stats().getHitCount()); @@ -1117,7 +1135,7 @@ public void testInvalidate() throws Exception { // cache hit entity = new IndicesService.IndexShardCacheEntity(indexShard); loader = new Loader(reader, 0); - value = cache.getOrCompute(entity, loader, reader, termBytes); + value = cache.getOrCompute(entity, loader, reader, getTermBytes()); assertEquals("foo", value.streamInput().readString()); requestCacheStats = entity.stats(); assertEquals(1, requestCacheStats.stats().getHitCount()); @@ -1131,8 +1149,8 @@ public void testInvalidate() throws Exception { // load again after invalidate entity = new IndicesService.IndexShardCacheEntity(indexShard); loader = new Loader(reader, 0); - cache.invalidate(entity, reader, termBytes); - value = cache.getOrCompute(entity, loader, reader, termBytes); + cache.invalidate(entity, reader, getTermBytes()); + value = cache.getOrCompute(entity, loader, reader, getTermBytes()); assertEquals("foo", value.streamInput().readString()); requestCacheStats = entity.stats(); assertEquals(1, requestCacheStats.stats().getHitCount()); @@ -1157,16 +1175,11 @@ public void testInvalidate() throws Exception { assertEquals(0, cache.count()); assertEquals(0, requestCacheStats.stats().getMemorySize().bytesAsInt()); - IOUtils.close(reader, writer, dir, cache); - terminate(threadPool); + IOUtils.close(reader); assertEquals(0, cache.numRegisteredCloseListeners()); } public void testEqualsKey() throws IOException { - IndicesService indicesService = getInstanceFromNode(IndicesService.class); - Directory dir = newDirectory(); - IndexWriterConfig config = newIndexWriterConfig(); - IndexWriter writer = new IndexWriter(dir, config); ShardId shardId = new ShardId("foo", "bar", 1); ShardId shardId1 = new ShardId("foo1", "bar1", 2); IndexReader reader1 = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), shardId); @@ -1177,11 +1190,11 @@ public void testEqualsKey() throws IOException { IOUtils.close(reader1, reader2, writer, dir); IndexShard indexShard = mock(IndexShard.class); when(indexShard.state()).thenReturn(IndexShardState.STARTED); - IndicesRequestCache.Key key1 = new IndicesRequestCache.Key(shardId, new TestBytesReference(1), rKey1); - IndicesRequestCache.Key key2 = new IndicesRequestCache.Key(shardId, new TestBytesReference(1), rKey1); - IndicesRequestCache.Key key3 = new IndicesRequestCache.Key(shardId1, new TestBytesReference(1), rKey1); - IndicesRequestCache.Key key4 = new IndicesRequestCache.Key(shardId, new TestBytesReference(1), rKey2); - IndicesRequestCache.Key key5 = new IndicesRequestCache.Key(shardId, new TestBytesReference(2), rKey2); + IndicesRequestCache.Key key1 = new IndicesRequestCache.Key(shardId, new TestBytesReference(1), rKey1, shardId.hashCode()); + IndicesRequestCache.Key key2 = new IndicesRequestCache.Key(shardId, new TestBytesReference(1), rKey1, shardId.hashCode()); + IndicesRequestCache.Key key3 = new IndicesRequestCache.Key(shardId1, new TestBytesReference(1), rKey1, shardId1.hashCode()); + IndicesRequestCache.Key key4 = new IndicesRequestCache.Key(shardId, new TestBytesReference(1), rKey2, shardId.hashCode()); + IndicesRequestCache.Key key5 = new IndicesRequestCache.Key(shardId, new TestBytesReference(2), rKey2, shardId.hashCode()); String s = "Some other random object"; assertEquals(key1, key1); assertEquals(key1, key2); @@ -1193,13 +1206,14 @@ public void testEqualsKey() throws IOException { } public void testSerializationDeserializationOfCacheKey() throws Exception { - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); - IndexService indexService = createIndex("test"); - IndexShard indexShard = indexService.getShard(0); IndicesService.IndexShardCacheEntity shardCacheEntity = new IndicesService.IndexShardCacheEntity(indexShard); String readerCacheKeyId = UUID.randomUUID().toString(); - IndicesRequestCache.Key key1 = new IndicesRequestCache.Key(indexShard.shardId(), termBytes, readerCacheKeyId); + IndicesRequestCache.Key key1 = new IndicesRequestCache.Key( + indexShard.shardId(), + getTermBytes(), + readerCacheKeyId, + indexShard.hashCode() + ); BytesReference bytesReference = null; try (BytesStreamOutput out = new BytesStreamOutput()) { key1.writeTo(out); @@ -1211,8 +1225,208 @@ public void testSerializationDeserializationOfCacheKey() throws Exception { assertEquals(readerCacheKeyId, key2.readerCacheKeyId); assertEquals(((IndexShard) shardCacheEntity.getCacheIdentity()).shardId(), key2.shardId); - assertEquals(termBytes, key2.value); + assertEquals(getTermBytes(), key2.value); + assertEquals(indexShard.hashCode(), key2.indexShardHashCode); + } + + public void testGetOrComputeConcurrentlyWithMultipleIndices() throws Exception { + threadPool = getThreadPool(); + int numberOfIndices = randomIntBetween(2, 5); + List indicesList = new ArrayList<>(); + List indexShardList = Collections.synchronizedList(new ArrayList<>()); + for (int i = 0; i < numberOfIndices; i++) { + String indexName = "test" + i; + indicesList.add(indexName); + IndexShard indexShard = createIndex( + indexName, + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build() + ).getShard(0); + indexShardList.add(indexShard); + } + // Create a cache with 2kb to cause evictions and test that flow as well. + IndicesRequestCache cache = getIndicesRequestCache(Settings.builder().put(INDICES_CACHE_QUERY_SIZE.getKey(), "2kb").build()); + Map readerMap = new ConcurrentHashMap<>(); + Map entityMap = new ConcurrentHashMap<>(); + Map writerMap = new ConcurrentHashMap<>(); + int numberOfItems = randomIntBetween(200, 400); + for (int i = 0; i < numberOfIndices; i++) { + IndexShard indexShard = indexShardList.get(i); + entityMap.put(indexShard, new IndicesService.IndexShardCacheEntity(indexShard)); + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); + for (int j = 0; j < numberOfItems; j++) { + writer.addDocument(newDoc(j, generateString(randomIntBetween(4, 50)))); + } + writerMap.put(indexShard, writer); + DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), indexShard.shardId()); + readerMap.put(indexShard, reader); + } + + CountDownLatch latch = new CountDownLatch(numberOfItems); + ExecutorService executorService = Executors.newFixedThreadPool(5); + for (int i = 0; i < numberOfItems; i++) { + int finalI = i; + executorService.submit(() -> { + int randomIndexPosition = randomIntBetween(0, numberOfIndices - 1); + IndexShard indexShard = indexShardList.get(randomIndexPosition); + TermQueryBuilder termQuery = new TermQueryBuilder("id", generateString(randomIntBetween(4, 50))); + BytesReference termBytes = null; + try { + termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); + } catch (IOException e) { + throw new RuntimeException(e); + } + Loader loader = new Loader(readerMap.get(indexShard), finalI); + try { + cache.getOrCompute(entityMap.get(indexShard), loader, readerMap.get(indexShard), termBytes); + } catch (Exception e) { + throw new RuntimeException(e); + } + latch.countDown(); + }); + } + latch.await(); + for (int i = 0; i < numberOfIndices; i++) { + IndexShard indexShard = indexShardList.get(i); + IndicesService.IndexShardCacheEntity entity = entityMap.get(indexShard); + RequestCacheStats stats = entity.stats().stats(); + assertTrue(stats.getMemorySizeInBytes() >= 0); + assertTrue(stats.getMissCount() >= 0); + assertTrue(stats.getEvictions() >= 0); + } + cache.invalidateAll(); + for (int i = 0; i < numberOfIndices; i++) { + IndexShard indexShard = indexShardList.get(i); + IndicesService.IndexShardCacheEntity entity = entityMap.get(indexShard); + RequestCacheStats stats = entity.stats().stats(); + assertEquals(0, stats.getMemorySizeInBytes()); + } + + for (int i = 0; i < numberOfIndices; i++) { + IndexShard indexShard = indexShardList.get(i); + readerMap.get(indexShard).close(); + writerMap.get(indexShard).close(); + writerMap.get(indexShard).getDirectory().close(); + } + IOUtils.close(cache); + executorService.shutdownNow(); + } + + public void testDeleteAndCreateIndexShardOnSameNodeAndVerifyStats() throws Exception { + threadPool = getThreadPool(); + String indexName = "test1"; + IndicesService indicesService = getInstanceFromNode(IndicesService.class); + // Create a shard + IndexService indexService = createIndex( + indexName, + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build() + ); + Index idx = resolveIndex(indexName); + ShardRouting shardRouting = indicesService.indexService(idx).getShard(0).routingEntry(); + IndexShard indexShard = indexService.getShard(0); + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); + writer.addDocument(newDoc(0, "foo")); + writer.addDocument(newDoc(1, "hack")); + DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), indexShard.shardId()); + Loader loader = new Loader(reader, 0); + + // Set clean interval to a high value as we will do it manually here. + IndicesRequestCache cache = getIndicesRequestCache( + Settings.builder() + .put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, TimeValue.timeValueMillis(100000)) + .build() + ); + IndicesService.IndexShardCacheEntity cacheEntity = new IndicesService.IndexShardCacheEntity(indexShard); + TermQueryBuilder termQuery = new TermQueryBuilder("id", "bar"); + BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); + + // Cache some values for indexShard + BytesReference value = cache.getOrCompute(cacheEntity, loader, reader, getTermBytes()); + + // Verify response and stats. + assertEquals("foo", value.streamInput().readString()); + RequestCacheStats stats = indexShard.requestCache().stats(); + assertEquals("foo", value.streamInput().readString()); + assertEquals(1, cache.count()); + assertEquals(1, stats.getMissCount()); + assertTrue(stats.getMemorySizeInBytes() > 0); + + // Remove the shard making its cache entries stale + IOUtils.close(reader, writer, dir); + indexService.removeShard(0, "force"); + + // We again try to create a shard with same ShardId + ShardRouting newRouting = shardRouting; + String nodeId = newRouting.currentNodeId(); + UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "boom"); + newRouting = newRouting.moveToUnassigned(unassignedInfo) + .updateUnassigned(unassignedInfo, RecoverySource.EmptyStoreRecoverySource.INSTANCE); + newRouting = ShardRoutingHelper.initialize(newRouting, nodeId); + final DiscoveryNode localNode = new DiscoveryNode("foo", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT); + indexShard = indexService.createShard( + newRouting, + s -> {}, + RetentionLeaseSyncer.EMPTY, + SegmentReplicationCheckpointPublisher.EMPTY, + null, + null, + localNode, + null, + DiscoveryNodes.builder().add(localNode).build() + ); + + // Verify that the new shard requestStats entries are empty. + stats = indexShard.requestCache().stats(); + assertEquals("foo", value.streamInput().readString()); + assertEquals(1, cache.count()); // Still contains the old indexShard stale entry + assertEquals(0, stats.getMissCount()); + assertTrue(stats.getMemorySizeInBytes() == 0); + IndexShardTestCase.updateRoutingEntry(indexShard, newRouting); + + // Now we cache again with new IndexShard(same shardId as older one). + dir = newDirectory(); + writer = new IndexWriter(dir, newIndexWriterConfig()); + writer.addDocument(newDoc(0, "foo")); + writer.addDocument(newDoc(1, "hack")); + reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), indexShard.shardId()); + loader = new Loader(reader, 0); + cacheEntity = new IndicesService.IndexShardCacheEntity(indexShard); + termQuery = new TermQueryBuilder("id", "bar"); + termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); + value = cache.getOrCompute(cacheEntity, loader, reader, getTermBytes()); + + // Assert response and stats. We verify that cache now has 2 entries, one for older/removed shard and other + // for the current shard. + assertEquals("foo", value.streamInput().readString()); + stats = indexShard.requestCache().stats(); + assertEquals("foo", value.streamInput().readString()); + assertEquals(2, cache.count()); // One entry for older shard and other for the current shard. + assertEquals(1, stats.getMissCount()); + assertTrue(stats.getMemorySizeInBytes() > 0); + + // Trigger clean up of cache. + cache.cacheCleanupManager.cleanCache(); + // Verify that cache still has entries for current shard and only removed older shards entries. + assertEquals(1, cache.count()); + // Now make current indexShard entries stale as well. + reader.close(); + // Trigger clean up of cache and verify that cache has no entries now. + cache.cacheCleanupManager.cleanCache(); + assertEquals(0, cache.count()); + + IOUtils.close(reader, writer, dir, cache); + } + + public static String generateString(int length) { + String characters = "abcdefghijklmnopqrstuvwxyz"; + StringBuilder sb = new StringBuilder(length); + for (int i = 0; i < length; i++) { + int index = randomInt(characters.length() - 1); + sb.append(characters.charAt(index)); + } + return sb.toString(); } private class TestBytesReference extends AbstractBytesReference { diff --git a/server/src/test/java/org/opensearch/indices/RemoteStoreSettingsDynamicUpdateTests.java b/server/src/test/java/org/opensearch/indices/RemoteStoreSettingsDynamicUpdateTests.java index 8a77d97f88d67..f89fd3df6e340 100644 --- a/server/src/test/java/org/opensearch/indices/RemoteStoreSettingsDynamicUpdateTests.java +++ b/server/src/test/java/org/opensearch/indices/RemoteStoreSettingsDynamicUpdateTests.java @@ -96,4 +96,24 @@ public void testClusterRemoteTranslogTransferTimeout() { ); assertEquals(TimeValue.timeValueSeconds(40), remoteStoreSettings.getClusterRemoteTranslogTransferTimeout()); } + + public void testMaxRemoteReferencedTranslogFiles() { + // Test default value + assertEquals(1000, remoteStoreSettings.getMaxRemoteTranslogReaders()); + + // Test override with valid value + clusterSettings.applySettings( + Settings.builder().put(RemoteStoreSettings.CLUSTER_REMOTE_MAX_TRANSLOG_READERS.getKey(), "500").build() + ); + assertEquals(500, remoteStoreSettings.getMaxRemoteTranslogReaders()); + + // Test override with value less than minimum + assertThrows( + IllegalArgumentException.class, + () -> clusterSettings.applySettings( + Settings.builder().put(RemoteStoreSettings.CLUSTER_REMOTE_MAX_TRANSLOG_READERS.getKey(), "99").build() + ) + ); + assertEquals(500, remoteStoreSettings.getMaxRemoteTranslogReaders()); + } } diff --git a/server/src/test/java/org/opensearch/ingest/CompoundProcessorTests.java b/server/src/test/java/org/opensearch/ingest/CompoundProcessorTests.java index 76301acac0c19..aad6063bd3f4d 100644 --- a/server/src/test/java/org/opensearch/ingest/CompoundProcessorTests.java +++ b/server/src/test/java/org/opensearch/ingest/CompoundProcessorTests.java @@ -37,16 +37,23 @@ import org.opensearch.test.OpenSearchTestCase; import org.junit.Before; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Comparator; import java.util.HashMap; +import java.util.HashSet; +import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiConsumer; import java.util.function.Consumer; import java.util.function.LongSupplier; import static java.util.Collections.singletonList; +import static org.opensearch.ingest.IngestDocumentPreparer.SHOULD_FAIL_KEY; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.is; @@ -429,6 +436,211 @@ public String getType() { assertThat(ingestProcessorException.getHeader("pipeline_origin"), equalTo(Arrays.asList("2", "1"))); } + public void testBatchExecute_happyCase() { + List wrapperList = Arrays.asList( + IngestDocumentPreparer.createIngestDocumentWrapper(1), + IngestDocumentPreparer.createIngestDocumentWrapper(2), + IngestDocumentPreparer.createIngestDocumentWrapper(3) + ); + TestProcessor firstProcessor = new TestProcessor(doc -> {}); + TestProcessor secondProcessor = new TestProcessor(doc -> {}); + LongSupplier relativeTimeProvider = mock(LongSupplier.class); + CompoundProcessor compoundProcessor = new CompoundProcessor( + false, + Arrays.asList(firstProcessor, secondProcessor), + null, + relativeTimeProvider + ); + + compoundProcessor.batchExecute(wrapperList, results -> { + assertEquals(firstProcessor.getInvokedCounter(), wrapperList.size()); + assertEquals(secondProcessor.getInvokedCounter(), wrapperList.size()); + assertEquals(results.size(), wrapperList.size()); + OperationStats stats = compoundProcessor.getProcessorsWithMetrics().get(0).v2().createStats(); + assertEquals(0, stats.getCurrent()); + assertEquals(3, stats.getCount()); + for (int i = 0; i < wrapperList.size(); ++i) { + assertEquals(wrapperList.get(i).getSlot(), results.get(i).getSlot()); + assertEquals(wrapperList.get(i).getIngestDocument(), results.get(i).getIngestDocument()); + assertEquals(wrapperList.get(i).getException(), results.get(i).getException()); + } + }); + } + + public void testBatchExecute_documentToDrop() { + List wrapperList = Arrays.asList( + IngestDocumentPreparer.createIngestDocumentWrapper(1), + IngestDocumentPreparer.createIngestDocumentWrapper(2, true), + IngestDocumentPreparer.createIngestDocumentWrapper(3) + ); + TestProcessor firstProcessor = new TestProcessor("", "", "", doc -> { + if (doc.hasField(SHOULD_FAIL_KEY) && doc.getFieldValue(SHOULD_FAIL_KEY, Boolean.class)) { + return null; + } + return doc; + }); + TestProcessor secondProcessor = new TestProcessor(doc -> {}); + LongSupplier relativeTimeProvider = mock(LongSupplier.class); + CompoundProcessor compoundProcessor = new CompoundProcessor( + false, + Arrays.asList(firstProcessor, secondProcessor), + null, + relativeTimeProvider + ); + + AtomicInteger callCounter = new AtomicInteger(); + List totalResults = Collections.synchronizedList(new ArrayList<>()); + compoundProcessor.batchExecute(wrapperList, results -> { + totalResults.addAll(results); + if (callCounter.addAndGet(results.size()) == 3) { + assertEquals(firstProcessor.getInvokedCounter(), wrapperList.size()); + assertEquals(secondProcessor.getInvokedCounter(), wrapperList.size() - 1); + assertEquals(totalResults.size(), wrapperList.size()); + OperationStats stats = compoundProcessor.getProcessorsWithMetrics().get(0).v2().createStats(); + assertEquals(0, stats.getCurrent()); + assertEquals(3, stats.getCount()); + totalResults.sort(Comparator.comparingInt(IngestDocumentWrapper::getSlot)); + for (int i = 0; i < wrapperList.size(); ++i) { + assertEquals(wrapperList.get(i).getSlot(), totalResults.get(i).getSlot()); + if (2 == wrapperList.get(i).getSlot()) { + assertNull(totalResults.get(i).getIngestDocument()); + } else { + assertEquals(wrapperList.get(i).getIngestDocument(), totalResults.get(i).getIngestDocument()); + } + assertEquals(wrapperList.get(i).getException(), totalResults.get(i).getException()); + } + } + }); + } + + public void testBatchExecute_ignoreFailure() { + List wrapperList = Arrays.asList( + IngestDocumentPreparer.createIngestDocumentWrapper(1), + IngestDocumentPreparer.createIngestDocumentWrapper(2, true), + IngestDocumentPreparer.createIngestDocumentWrapper(3, true) + ); + TestProcessor firstProcessor = new TestProcessor(doc -> { + if (doc.hasField(SHOULD_FAIL_KEY) && doc.getFieldValue(SHOULD_FAIL_KEY, Boolean.class)) { + throw new RuntimeException("fail"); + } + }); + TestProcessor secondProcessor = new TestProcessor(doc -> {}); + TestProcessor onFailureProcessor = new TestProcessor("id2", "on_failure", null, doc -> {}); + LongSupplier relativeTimeProvider = mock(LongSupplier.class); + CompoundProcessor compoundProcessor = new CompoundProcessor( + true, + Arrays.asList(firstProcessor, secondProcessor), + singletonList(onFailureProcessor), + relativeTimeProvider + ); + + compoundProcessor.batchExecute(wrapperList, results -> { + assertEquals(firstProcessor.getInvokedCounter(), wrapperList.size()); + assertEquals(secondProcessor.getInvokedCounter(), wrapperList.size()); + assertEquals(0, onFailureProcessor.getInvokedCounter()); + assertEquals(results.size(), wrapperList.size()); + OperationStats stats = compoundProcessor.getProcessorsWithMetrics().get(0).v2().createStats(); + assertEquals(0, stats.getCurrent()); + assertEquals(3, stats.getCount()); + for (int i = 0; i < wrapperList.size(); ++i) { + assertEquals(wrapperList.get(i).getSlot(), results.get(i).getSlot()); + assertEquals(wrapperList.get(i).getIngestDocument(), results.get(i).getIngestDocument()); + assertEquals(wrapperList.get(i).getException(), results.get(i).getException()); + } + }); + } + + public void testBatchExecute_exception_no_onFailureProcessor() { + Set failureSlot = new HashSet<>(Arrays.asList(2, 3)); + List wrapperList = Arrays.asList( + IngestDocumentPreparer.createIngestDocumentWrapper(1), + IngestDocumentPreparer.createIngestDocumentWrapper(2, true), + IngestDocumentPreparer.createIngestDocumentWrapper(3, true) + ); + TestProcessor firstProcessor = new TestProcessor(doc -> { + if (doc.hasField(SHOULD_FAIL_KEY) && doc.getFieldValue(SHOULD_FAIL_KEY, Boolean.class)) { + throw new RuntimeException("fail"); + } + }); + TestProcessor secondProcessor = new TestProcessor(doc -> {}); + LongSupplier relativeTimeProvider = mock(LongSupplier.class); + CompoundProcessor compoundProcessor = new CompoundProcessor( + false, + Arrays.asList(firstProcessor, secondProcessor), + Collections.emptyList(), + relativeTimeProvider + ); + + AtomicInteger callCounter = new AtomicInteger(); + List totalResults = Collections.synchronizedList(new ArrayList<>()); + compoundProcessor.batchExecute(wrapperList, results -> { + totalResults.addAll(results); + if (callCounter.incrementAndGet() == 3) { + assertEquals(wrapperList.size(), firstProcessor.getInvokedCounter()); + assertEquals(1, secondProcessor.getInvokedCounter()); + assertEquals(totalResults.size(), wrapperList.size()); + OperationStats stats = compoundProcessor.getProcessorsWithMetrics().get(0).v2().createStats(); + assertEquals(0, stats.getCurrent()); + assertEquals(3, stats.getCount()); + assertEquals(2, stats.getFailedCount()); + totalResults.sort(Comparator.comparingInt(IngestDocumentWrapper::getSlot)); + for (int i = 0; i < wrapperList.size(); ++i) { + assertEquals(wrapperList.get(i).getSlot(), totalResults.get(i).getSlot()); + if (failureSlot.contains(wrapperList.get(i).getSlot())) { + assertNotNull(totalResults.get(i).getException()); + } else { + assertEquals(wrapperList.get(i).getIngestDocument(), totalResults.get(i).getIngestDocument()); + assertEquals(wrapperList.get(i).getException(), totalResults.get(i).getException()); + } + } + } + }); + } + + public void testBatchExecute_exception_with_onFailureProcessor() { + List wrapperList = Arrays.asList( + IngestDocumentPreparer.createIngestDocumentWrapper(1), + IngestDocumentPreparer.createIngestDocumentWrapper(2, true), + IngestDocumentPreparer.createIngestDocumentWrapper(3, true) + ); + TestProcessor firstProcessor = new TestProcessor(doc -> { + if (doc.hasField(SHOULD_FAIL_KEY) && doc.getFieldValue(SHOULD_FAIL_KEY, Boolean.class)) { + throw new RuntimeException("fail"); + } + }); + TestProcessor secondProcessor = new TestProcessor(doc -> {}); + TestProcessor onFailureProcessor = new TestProcessor("id2", "on_failure", null, doc -> {}); + LongSupplier relativeTimeProvider = mock(LongSupplier.class); + CompoundProcessor compoundProcessor = new CompoundProcessor( + false, + Arrays.asList(firstProcessor, secondProcessor), + singletonList(onFailureProcessor), + relativeTimeProvider + ); + + AtomicInteger callCounter = new AtomicInteger(); + List totalResults = Collections.synchronizedList(new ArrayList<>()); + compoundProcessor.batchExecute(wrapperList, results -> { + totalResults.addAll(results); + if (callCounter.incrementAndGet() == 3) { + assertEquals(wrapperList.size(), firstProcessor.getInvokedCounter()); + assertEquals(1, secondProcessor.getInvokedCounter()); + assertEquals(2, onFailureProcessor.getInvokedCounter()); + assertEquals(totalResults.size(), wrapperList.size()); + OperationStats stats = compoundProcessor.getProcessorsWithMetrics().get(0).v2().createStats(); + assertEquals(0, stats.getCurrent()); + assertEquals(3, stats.getCount()); + assertEquals(2, stats.getFailedCount()); + totalResults.sort(Comparator.comparingInt(IngestDocumentWrapper::getSlot)); + for (int i = 0; i < wrapperList.size(); ++i) { + assertEquals(wrapperList.get(i).getSlot(), totalResults.get(i).getSlot()); + assertEquals(wrapperList.get(i).getIngestDocument(), totalResults.get(i).getIngestDocument()); + assertNull(totalResults.get(i).getException()); + } + } + }); + } + private void assertStats(CompoundProcessor compoundProcessor, long count, long failed, long time) { assertStats(0, compoundProcessor, 0L, count, failed, time); } diff --git a/server/src/test/java/org/opensearch/ingest/IngestDocumentPreparer.java b/server/src/test/java/org/opensearch/ingest/IngestDocumentPreparer.java new file mode 100644 index 0000000000000..a02595df5589d --- /dev/null +++ b/server/src/test/java/org/opensearch/ingest/IngestDocumentPreparer.java @@ -0,0 +1,32 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ingest; + +import java.util.HashMap; +import java.util.Map; + +public class IngestDocumentPreparer { + public static final String SHOULD_FAIL_KEY = "shouldFail"; + + public static IngestDocument createIngestDocument(boolean shouldFail) { + Map source = new HashMap<>(); + if (shouldFail) { + source.put(SHOULD_FAIL_KEY, true); + } + return new IngestDocument(source, new HashMap<>()); + } + + public static IngestDocumentWrapper createIngestDocumentWrapper(int slot) { + return createIngestDocumentWrapper(slot, false); + } + + public static IngestDocumentWrapper createIngestDocumentWrapper(int slot, boolean shouldFail) { + return new IngestDocumentWrapper(slot, createIngestDocument(shouldFail), null); + } +} diff --git a/server/src/test/java/org/opensearch/ingest/IngestDocumentWrapperTests.java b/server/src/test/java/org/opensearch/ingest/IngestDocumentWrapperTests.java new file mode 100644 index 0000000000000..9d09cd80abd05 --- /dev/null +++ b/server/src/test/java/org/opensearch/ingest/IngestDocumentWrapperTests.java @@ -0,0 +1,46 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ingest; + +import org.opensearch.index.VersionType; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; + +import java.util.HashMap; +import java.util.Map; + +public class IngestDocumentWrapperTests extends OpenSearchTestCase { + + private IngestDocument ingestDocument; + + private static final String INDEX = "index"; + private static final String ID = "id"; + private static final String ROUTING = "routing"; + private static final Long VERSION = 1L; + private static final VersionType VERSION_TYPE = VersionType.INTERNAL; + private static final String DOCUMENT_KEY = "foo"; + private static final String DOCUMENT_VALUE = "bar"; + private static final int SLOT = 12; + + @Before + public void setup() throws Exception { + super.setUp(); + Map document = new HashMap<>(); + document.put(DOCUMENT_KEY, DOCUMENT_VALUE); + ingestDocument = new IngestDocument(INDEX, ID, ROUTING, VERSION, VERSION_TYPE, document); + } + + public void testIngestDocumentWrapper() { + Exception ex = new RuntimeException("runtime exception"); + IngestDocumentWrapper wrapper = new IngestDocumentWrapper(SLOT, ingestDocument, ex); + assertEquals(wrapper.getSlot(), SLOT); + assertEquals(wrapper.getException(), ex); + assertEquals(wrapper.getIngestDocument(), ingestDocument); + } +} diff --git a/server/src/test/java/org/opensearch/ingest/IngestServiceTests.java b/server/src/test/java/org/opensearch/ingest/IngestServiceTests.java index 2edfe87387c92..6d216370bae9a 100644 --- a/server/src/test/java/org/opensearch/ingest/IngestServiceTests.java +++ b/server/src/test/java/org/opensearch/ingest/IngestServiceTests.java @@ -116,6 +116,7 @@ import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.eq; +import static org.mockito.Mockito.lenient; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; @@ -132,6 +133,7 @@ public Map getProcessors(Processor.Parameters paramet }; private ThreadPool threadPool; + private BulkRequest mockBulkRequest; @Before public void setup() { @@ -139,6 +141,8 @@ public void setup() { ExecutorService executorService = OpenSearchExecutors.newDirectExecutorService(); when(threadPool.generic()).thenReturn(executorService); when(threadPool.executor(anyString())).thenReturn(executorService); + mockBulkRequest = mock(BulkRequest.class); + lenient().when(mockBulkRequest.batchSize()).thenReturn(1); } public void testIngestPlugin() { @@ -210,7 +214,8 @@ public void testExecuteIndexPipelineDoesNotExist() { failureHandler, completionHandler, indexReq -> {}, - Names.WRITE + Names.WRITE, + new BulkRequest() ); assertTrue(failure.get()); @@ -761,7 +766,8 @@ public String getType() { failureHandler, completionHandler, indexReq -> {}, - Names.WRITE + Names.WRITE, + bulkRequest ); assertTrue(failure.get()); @@ -807,7 +813,8 @@ public void testExecuteBulkPipelineDoesNotExist() { failureHandler, completionHandler, indexReq -> {}, - Names.WRITE + Names.WRITE, + bulkRequest ); verify(failureHandler, times(1)).accept( argThat((Integer item) -> item == 2), @@ -843,7 +850,8 @@ public void testExecuteSuccess() { failureHandler, completionHandler, indexReq -> {}, - Names.WRITE + Names.WRITE, + mockBulkRequest ); verify(failureHandler, never()).accept(any(), any()); verify(completionHandler, times(1)).accept(Thread.currentThread(), null); @@ -874,7 +882,8 @@ public void testExecuteEmptyPipeline() throws Exception { failureHandler, completionHandler, indexReq -> {}, - Names.WRITE + Names.WRITE, + mockBulkRequest ); verify(failureHandler, never()).accept(any(), any()); verify(completionHandler, times(1)).accept(Thread.currentThread(), null); @@ -933,7 +942,8 @@ public void testExecutePropagateAllMetadataUpdates() throws Exception { failureHandler, completionHandler, indexReq -> {}, - Names.WRITE + Names.WRITE, + mockBulkRequest ); verify(processor).execute(any(), any()); verify(failureHandler, never()).accept(any(), any()); @@ -977,7 +987,8 @@ public void testExecuteFailure() throws Exception { failureHandler, completionHandler, indexReq -> {}, - Names.WRITE + Names.WRITE, + mockBulkRequest ); verify(processor).execute(eqIndexTypeId(indexRequest.version(), indexRequest.versionType(), emptyMap()), any()); verify(failureHandler, times(1)).accept(eq(0), any(RuntimeException.class)); @@ -1035,7 +1046,8 @@ public void testExecuteSuccessWithOnFailure() throws Exception { failureHandler, completionHandler, indexReq -> {}, - Names.WRITE + Names.WRITE, + mockBulkRequest ); verify(failureHandler, never()).accept(eq(0), any(IngestProcessorException.class)); verify(completionHandler, times(1)).accept(Thread.currentThread(), null); @@ -1084,7 +1096,8 @@ public void testExecuteFailureWithNestedOnFailure() throws Exception { failureHandler, completionHandler, indexReq -> {}, - Names.WRITE + Names.WRITE, + mockBulkRequest ); verify(processor).execute(eqIndexTypeId(indexRequest.version(), indexRequest.versionType(), emptyMap()), any()); verify(failureHandler, times(1)).accept(eq(0), any(RuntimeException.class)); @@ -1146,7 +1159,8 @@ public void testBulkRequestExecutionWithFailures() throws Exception { requestItemErrorHandler, completionHandler, indexReq -> {}, - Names.WRITE + Names.WRITE, + bulkRequest ); verify(requestItemErrorHandler, times(numIndexRequests)).accept(anyInt(), argThat(o -> o.getCause().equals(error))); @@ -1204,7 +1218,8 @@ public void testBulkRequestExecution() throws Exception { requestItemErrorHandler, completionHandler, indexReq -> {}, - Names.WRITE + Names.WRITE, + bulkRequest ); verify(requestItemErrorHandler, never()).accept(any(), any()); @@ -1272,7 +1287,8 @@ public void testStats() throws Exception { failureHandler, completionHandler, indexReq -> {}, - Names.WRITE + Names.WRITE, + mockBulkRequest ); final IngestStats afterFirstRequestStats = ingestService.stats(); assertThat(afterFirstRequestStats.getPipelineStats().size(), equalTo(2)); @@ -1296,7 +1312,8 @@ public void testStats() throws Exception { failureHandler, completionHandler, indexReq -> {}, - Names.WRITE + Names.WRITE, + mockBulkRequest ); final IngestStats afterSecondRequestStats = ingestService.stats(); assertThat(afterSecondRequestStats.getPipelineStats().size(), equalTo(2)); @@ -1325,7 +1342,8 @@ public void testStats() throws Exception { failureHandler, completionHandler, indexReq -> {}, - Names.WRITE + Names.WRITE, + mockBulkRequest ); final IngestStats afterThirdRequestStats = ingestService.stats(); assertThat(afterThirdRequestStats.getPipelineStats().size(), equalTo(2)); @@ -1358,7 +1376,8 @@ public void testStats() throws Exception { failureHandler, completionHandler, indexReq -> {}, - Names.WRITE + Names.WRITE, + mockBulkRequest ); final IngestStats afterForthRequestStats = ingestService.stats(); assertThat(afterForthRequestStats.getPipelineStats().size(), equalTo(2)); @@ -1456,7 +1475,8 @@ public String getDescription() { failureHandler, completionHandler, dropHandler, - Names.WRITE + Names.WRITE, + bulkRequest ); verify(failureHandler, never()).accept(any(), any()); verify(completionHandler, times(1)).accept(Thread.currentThread(), null); @@ -1543,7 +1563,8 @@ public void testCBORParsing() throws Exception { (integer, e) -> {}, (thread, e) -> {}, indexReq -> {}, - Names.WRITE + Names.WRITE, + mockBulkRequest ); } @@ -1672,6 +1693,283 @@ public void testResolveRequestOrDefaultPipelineAndFinalPipeline() { } } + public void testExecuteBulkRequestInBatch() { + CompoundProcessor mockCompoundProcessor = mockCompoundProcessor(); + IngestService ingestService = createWithProcessors( + Collections.singletonMap("mock", (factories, tag, description, config) -> mockCompoundProcessor) + ); + createPipeline("_id", ingestService); + BulkRequest bulkRequest = new BulkRequest(); + IndexRequest indexRequest1 = new IndexRequest("_index").id("_id1").source(emptyMap()).setPipeline("_id").setFinalPipeline("_none"); + bulkRequest.add(indexRequest1); + IndexRequest indexRequest2 = new IndexRequest("_index").id("_id2").source(emptyMap()).setPipeline("_id").setFinalPipeline("_none"); + bulkRequest.add(indexRequest2); + IndexRequest indexRequest3 = new IndexRequest("_index").id("_id3").source(emptyMap()).setPipeline("_none").setFinalPipeline("_id"); + bulkRequest.add(indexRequest3); + IndexRequest indexRequest4 = new IndexRequest("_index").id("_id4").source(emptyMap()).setPipeline("_id").setFinalPipeline("_none"); + bulkRequest.add(indexRequest4); + bulkRequest.batchSize(2); + @SuppressWarnings("unchecked") + final BiConsumer failureHandler = mock(BiConsumer.class); + @SuppressWarnings("unchecked") + final BiConsumer completionHandler = mock(BiConsumer.class); + ingestService.executeBulkRequest( + 4, + bulkRequest.requests(), + failureHandler, + completionHandler, + indexReq -> {}, + Names.WRITE, + bulkRequest + ); + verify(failureHandler, never()).accept(any(), any()); + verify(completionHandler, times(1)).accept(Thread.currentThread(), null); + verify(mockCompoundProcessor, times(2)).batchExecute(any(), any()); + verify(mockCompoundProcessor, never()).execute(any(), any()); + } + + public void testExecuteBulkRequestInBatchWithDefaultAndFinalPipeline() { + CompoundProcessor mockCompoundProcessor = mockCompoundProcessor(); + IngestService ingestService = createWithProcessors( + Collections.singletonMap("mock", (factories, tag, description, config) -> mockCompoundProcessor) + ); + ClusterState clusterState = createPipeline("_id", ingestService); + createPipeline("_final", ingestService, clusterState); + BulkRequest bulkRequest = new BulkRequest(); + IndexRequest indexRequest1 = new IndexRequest("_index").id("_id1").source(emptyMap()).setPipeline("_id").setFinalPipeline("_final"); + bulkRequest.add(indexRequest1); + IndexRequest indexRequest2 = new IndexRequest("_index").id("_id2").source(emptyMap()).setPipeline("_id").setFinalPipeline("_final"); + bulkRequest.add(indexRequest2); + IndexRequest indexRequest3 = new IndexRequest("_index").id("_id3").source(emptyMap()).setPipeline("_id").setFinalPipeline("_final"); + bulkRequest.add(indexRequest3); + IndexRequest indexRequest4 = new IndexRequest("_index").id("_id4").source(emptyMap()).setPipeline("_id").setFinalPipeline("_final"); + bulkRequest.add(indexRequest4); + bulkRequest.batchSize(2); + @SuppressWarnings("unchecked") + final BiConsumer failureHandler = mock(BiConsumer.class); + @SuppressWarnings("unchecked") + final BiConsumer completionHandler = mock(BiConsumer.class); + ingestService.executeBulkRequest( + 4, + bulkRequest.requests(), + failureHandler, + completionHandler, + indexReq -> {}, + Names.WRITE, + bulkRequest + ); + verify(failureHandler, never()).accept(any(), any()); + verify(completionHandler, times(1)).accept(Thread.currentThread(), null); + verify(mockCompoundProcessor, times(4)).batchExecute(any(), any()); + verify(mockCompoundProcessor, never()).execute(any(), any()); + } + + public void testExecuteBulkRequestInBatchFallbackWithOneDocument() { + CompoundProcessor mockCompoundProcessor = mockCompoundProcessor(); + IngestService ingestService = createWithProcessors( + Collections.singletonMap("mock", (factories, tag, description, config) -> mockCompoundProcessor) + ); + createPipeline("_id", ingestService); + BulkRequest bulkRequest = new BulkRequest(); + IndexRequest indexRequest1 = new IndexRequest("_index").id("_id1").source(emptyMap()).setPipeline("_id").setFinalPipeline("_none"); + bulkRequest.add(indexRequest1); + bulkRequest.batchSize(2); + @SuppressWarnings("unchecked") + final BiConsumer failureHandler = mock(BiConsumer.class); + @SuppressWarnings("unchecked") + final BiConsumer completionHandler = mock(BiConsumer.class); + ingestService.executeBulkRequest( + 1, + bulkRequest.requests(), + failureHandler, + completionHandler, + indexReq -> {}, + Names.WRITE, + bulkRequest + ); + verify(failureHandler, never()).accept(any(), any()); + verify(completionHandler, times(1)).accept(Thread.currentThread(), null); + verify(mockCompoundProcessor, never()).batchExecute(any(), any()); + verify(mockCompoundProcessor, times(1)).execute(any(), any()); + } + + public void testExecuteBulkRequestInBatchNoValidPipeline() { + CompoundProcessor mockCompoundProcessor = mockCompoundProcessor(); + IngestService ingestService = createWithProcessors( + Collections.singletonMap("mock", (factories, tag, description, config) -> mockCompoundProcessor) + ); + createPipeline("_id", ingestService); + BulkRequest bulkRequest = new BulkRequest(); + // will not be handled as not valid document type + IndexRequest indexRequest1 = new IndexRequest("_index").id("_id1") + .source(emptyMap()) + .setPipeline("_none") + .setFinalPipeline("_none"); + bulkRequest.add(indexRequest1); + IndexRequest indexRequest2 = new IndexRequest("_index").id("_id2") + .source(emptyMap()) + .setPipeline("_none") + .setFinalPipeline("_none"); + bulkRequest.add(indexRequest2); + bulkRequest.batchSize(2); + @SuppressWarnings("unchecked") + final BiConsumer failureHandler = mock(BiConsumer.class); + @SuppressWarnings("unchecked") + final BiConsumer completionHandler = mock(BiConsumer.class); + ingestService.executeBulkRequest( + 2, + bulkRequest.requests(), + failureHandler, + completionHandler, + indexReq -> {}, + Names.WRITE, + bulkRequest + ); + verify(failureHandler, never()).accept(any(), any()); + verify(completionHandler, times(1)).accept(Thread.currentThread(), null); + verify(mockCompoundProcessor, never()).batchExecute(any(), any()); + verify(mockCompoundProcessor, never()).execute(any(), any()); + } + + public void testExecuteBulkRequestInBatchNoValidDocument() { + CompoundProcessor mockCompoundProcessor = mockCompoundProcessor(); + IngestService ingestService = createWithProcessors( + Collections.singletonMap("mock", (factories, tag, description, config) -> mockCompoundProcessor) + ); + createPipeline("_id", ingestService); + BulkRequest bulkRequest = new BulkRequest(); + // will not be handled as not valid document type + bulkRequest.add(new DeleteRequest("_index", "_id")); + bulkRequest.add(new DeleteRequest("_index", "_id")); + bulkRequest.batchSize(2); + @SuppressWarnings("unchecked") + final BiConsumer failureHandler = mock(BiConsumer.class); + @SuppressWarnings("unchecked") + final BiConsumer completionHandler = mock(BiConsumer.class); + ingestService.executeBulkRequest( + 2, + bulkRequest.requests(), + failureHandler, + completionHandler, + indexReq -> {}, + Names.WRITE, + bulkRequest + ); + verify(failureHandler, never()).accept(any(), any()); + verify(completionHandler, times(1)).accept(Thread.currentThread(), null); + verify(mockCompoundProcessor, never()).batchExecute(any(), any()); + verify(mockCompoundProcessor, never()).execute(any(), any()); + } + + public void testExecuteBulkRequestInBatchWithException() { + CompoundProcessor mockCompoundProcessor = mockCompoundProcessor(); + IngestService ingestService = createWithProcessors( + Collections.singletonMap("mock", (factories, tag, description, config) -> mockCompoundProcessor) + ); + doThrow(new RuntimeException()).when(mockCompoundProcessor).batchExecute(any(), any()); + createPipeline("_id", ingestService); + BulkRequest bulkRequest = new BulkRequest(); + // will not be handled as not valid document type + IndexRequest indexRequest1 = new IndexRequest("_index").id("_id1").source(emptyMap()).setPipeline("_id").setFinalPipeline("_none"); + bulkRequest.add(indexRequest1); + IndexRequest indexRequest2 = new IndexRequest("_index").id("_id2").source(emptyMap()).setPipeline("_id").setFinalPipeline("_none"); + bulkRequest.add(indexRequest2); + bulkRequest.batchSize(2); + @SuppressWarnings("unchecked") + final BiConsumer failureHandler = mock(BiConsumer.class); + @SuppressWarnings("unchecked") + final BiConsumer completionHandler = mock(BiConsumer.class); + ingestService.executeBulkRequest( + 2, + bulkRequest.requests(), + failureHandler, + completionHandler, + indexReq -> {}, + Names.WRITE, + bulkRequest + ); + verify(failureHandler, times(2)).accept(any(), any()); + verify(completionHandler, times(1)).accept(Thread.currentThread(), null); + verify(mockCompoundProcessor, times(1)).batchExecute(any(), any()); + verify(mockCompoundProcessor, never()).execute(any(), any()); + } + + public void testExecuteBulkRequestInBatchWithExceptionInCallback() { + CompoundProcessor mockCompoundProcessor = mockCompoundProcessor(); + IngestService ingestService = createWithProcessors( + Collections.singletonMap("mock", (factories, tag, description, config) -> mockCompoundProcessor) + ); + createPipeline("_id", ingestService); + BulkRequest bulkRequest = new BulkRequest(); + // will not be handled as not valid document type + IndexRequest indexRequest1 = new IndexRequest("_index").id("_id1").source(emptyMap()).setPipeline("_id").setFinalPipeline("_none"); + bulkRequest.add(indexRequest1); + IndexRequest indexRequest2 = new IndexRequest("_index").id("_id2").source(emptyMap()).setPipeline("_id").setFinalPipeline("_none"); + bulkRequest.add(indexRequest2); + bulkRequest.batchSize(2); + + List results = Arrays.asList( + new IngestDocumentWrapper(0, IngestService.toIngestDocument(indexRequest1), null), + new IngestDocumentWrapper(1, null, new RuntimeException()) + ); + doAnswer(args -> { + @SuppressWarnings("unchecked") + Consumer> handler = (Consumer) args.getArguments()[1]; + handler.accept(results); + return null; + }).when(mockCompoundProcessor).batchExecute(any(), any()); + + @SuppressWarnings("unchecked") + final BiConsumer failureHandler = mock(BiConsumer.class); + @SuppressWarnings("unchecked") + final BiConsumer completionHandler = mock(BiConsumer.class); + ingestService.executeBulkRequest( + 2, + bulkRequest.requests(), + failureHandler, + completionHandler, + indexReq -> {}, + Names.WRITE, + bulkRequest + ); + verify(failureHandler, times(1)).accept(any(), any()); + verify(completionHandler, times(1)).accept(Thread.currentThread(), null); + verify(mockCompoundProcessor, times(1)).batchExecute(any(), any()); + verify(mockCompoundProcessor, never()).execute(any(), any()); + } + + public void testPrepareBatches_same_index_pipeline() { + IngestService.IndexRequestWrapper wrapper1 = createIndexRequestWrapper("index1", Collections.singletonList("p1")); + IngestService.IndexRequestWrapper wrapper2 = createIndexRequestWrapper("index1", Collections.singletonList("p1")); + IngestService.IndexRequestWrapper wrapper3 = createIndexRequestWrapper("index1", Collections.singletonList("p1")); + IngestService.IndexRequestWrapper wrapper4 = createIndexRequestWrapper("index1", Collections.singletonList("p1")); + List> batches = IngestService.prepareBatches( + 2, + Arrays.asList(wrapper1, wrapper2, wrapper3, wrapper4) + ); + assertEquals(2, batches.size()); + for (int i = 0; i < 2; ++i) { + assertEquals(2, batches.get(i).size()); + } + } + + public void testPrepareBatches_different_index_pipeline() { + IngestService.IndexRequestWrapper wrapper1 = createIndexRequestWrapper("index1", Collections.singletonList("p1")); + IngestService.IndexRequestWrapper wrapper2 = createIndexRequestWrapper("index2", Collections.singletonList("p1")); + IngestService.IndexRequestWrapper wrapper3 = createIndexRequestWrapper("index1", Arrays.asList("p1", "p2")); + IngestService.IndexRequestWrapper wrapper4 = createIndexRequestWrapper("index1", Collections.singletonList("p2")); + List> batches = IngestService.prepareBatches( + 2, + Arrays.asList(wrapper1, wrapper2, wrapper3, wrapper4) + ); + assertEquals(4, batches.size()); + } + + private IngestService.IndexRequestWrapper createIndexRequestWrapper(String index, List pipelines) { + IndexRequest indexRequest = new IndexRequest(index); + return new IngestService.IndexRequestWrapper(0, indexRequest, pipelines, true); + } + private IngestDocument eqIndexTypeId(final Map source) { return argThat(new IngestDocumentMatcher("_index", "_type", "_id", -3L, VersionType.INTERNAL, source)); } @@ -1718,6 +2016,13 @@ private CompoundProcessor mockCompoundProcessor() { handler.accept((IngestDocument) args.getArguments()[0], null); return null; }).when(processor).execute(any(), any()); + + doAnswer(args -> { + @SuppressWarnings("unchecked") + Consumer> handler = (Consumer) args.getArguments()[1]; + handler.accept((List) args.getArguments()[0]); + return null; + }).when(processor).batchExecute(any(), any()); return processor; } @@ -1757,4 +2062,24 @@ private void assertStats(OperationStats stats, long count, long failed, long tim private OperationStats getPipelineStats(List pipelineStats, String id) { return pipelineStats.stream().filter(p1 -> p1.getPipelineId().equals(id)).findFirst().map(p2 -> p2.getStats()).orElse(null); } + + private ClusterState createPipeline(String pipeline, IngestService ingestService) { + return createPipeline(pipeline, ingestService, null); + } + + private ClusterState createPipeline(String pipeline, IngestService ingestService, ClusterState previousState) { + PutPipelineRequest putRequest = new PutPipelineRequest( + pipeline, + new BytesArray("{\"processors\": [{\"mock\" : {}}]}"), + MediaTypeRegistry.JSON + ); + ClusterState clusterState = ClusterState.builder(new ClusterName("_name")).build(); // Start empty + if (previousState != null) { + clusterState = previousState; + } + ClusterState previousClusterState = clusterState; + clusterState = IngestService.innerPut(putRequest, clusterState); + ingestService.applyClusterState(new ClusterChangedEvent("", clusterState, previousClusterState)); + return clusterState; + } } diff --git a/server/src/test/java/org/opensearch/ingest/ProcessorTests.java b/server/src/test/java/org/opensearch/ingest/ProcessorTests.java new file mode 100644 index 0000000000000..d6ef3be73adb8 --- /dev/null +++ b/server/src/test/java/org/opensearch/ingest/ProcessorTests.java @@ -0,0 +1,74 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ingest; + +import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import static org.opensearch.ingest.IngestDocumentPreparer.SHOULD_FAIL_KEY; + +public class ProcessorTests extends OpenSearchTestCase { + private Processor processor; + private static final String FIELD_KEY = "result"; + private static final String FIELD_VALUE_PROCESSED = "processed"; + + @Before + public void setup() {} + + public void test_batchExecute_success() { + processor = new FakeProcessor("type", "tag", "description", doc -> { doc.setFieldValue(FIELD_KEY, FIELD_VALUE_PROCESSED); }); + List wrapperList = Arrays.asList( + IngestDocumentPreparer.createIngestDocumentWrapper(1), + IngestDocumentPreparer.createIngestDocumentWrapper(2), + IngestDocumentPreparer.createIngestDocumentWrapper(3) + ); + processor.batchExecute(wrapperList, results -> { + assertEquals(3, results.size()); + for (IngestDocumentWrapper wrapper : results) { + assertNull(wrapper.getException()); + assertEquals(FIELD_VALUE_PROCESSED, wrapper.getIngestDocument().getFieldValue(FIELD_KEY, String.class)); + } + }); + } + + public void test_batchExecute_empty() { + processor = new FakeProcessor("type", "tag", "description", doc -> { doc.setFieldValue(FIELD_KEY, FIELD_VALUE_PROCESSED); }); + processor.batchExecute(Collections.emptyList(), results -> { assertEquals(0, results.size()); }); + } + + public void test_batchExecute_exception() { + processor = new FakeProcessor("type", "tag", "description", doc -> { + if (doc.hasField(SHOULD_FAIL_KEY) && doc.getFieldValue(SHOULD_FAIL_KEY, Boolean.class)) { + throw new RuntimeException("fail"); + } + doc.setFieldValue(FIELD_KEY, FIELD_VALUE_PROCESSED); + }); + List wrapperList = Arrays.asList( + IngestDocumentPreparer.createIngestDocumentWrapper(1), + IngestDocumentPreparer.createIngestDocumentWrapper(2, true), + IngestDocumentPreparer.createIngestDocumentWrapper(3) + ); + processor.batchExecute(wrapperList, results -> { + assertEquals(3, results.size()); + for (IngestDocumentWrapper wrapper : results) { + if (wrapper.getSlot() == 2) { + assertNotNull(wrapper.getException()); + assertNull(wrapper.getIngestDocument()); + } else { + assertNull(wrapper.getException()); + assertEquals(FIELD_VALUE_PROCESSED, wrapper.getIngestDocument().getFieldValue(FIELD_KEY, String.class)); + } + } + }); + } +} diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java index 2a4fbca7a8541..cf95999ec5086 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java @@ -38,29 +38,42 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.tests.util.TestUtil; import org.opensearch.common.time.DateFormatters; +import org.opensearch.core.common.breaker.CircuitBreaker; +import org.opensearch.core.indices.breaker.NoneCircuitBreakerService; import org.opensearch.index.mapper.DateFieldMapper; import org.opensearch.index.mapper.DocCountFieldMapper; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.BucketOrder; +import org.opensearch.search.aggregations.InternalAggregation; +import org.opensearch.search.aggregations.MultiBucketConsumerService; import org.opensearch.search.aggregations.bucket.terms.StringTerms; import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; +import org.opensearch.search.aggregations.pipeline.PipelineAggregator; import org.opensearch.search.aggregations.support.AggregationInspectionHelper; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Consumer; import static java.util.stream.Collectors.toList; +import static org.opensearch.test.InternalAggregationTestCase.DEFAULT_MAX_BUCKETS; import static org.hamcrest.Matchers.equalTo; public class DateHistogramAggregatorTests extends DateHistogramAggregatorTestCase { @@ -1450,6 +1463,267 @@ private void testSearchCase( } } + public void testMultiRangeTraversal() throws IOException { + Map dataset = new HashMap<>(); + dataset.put("2017-02-01T09:02:00.000Z", randomIntBetween(100, 2000)); + dataset.put("2017-02-01T09:59:59.999Z", randomIntBetween(100, 2000)); + dataset.put("2017-02-01T10:00:00.001Z", randomIntBetween(100, 2000)); + dataset.put("2017-02-01T13:06:00.000Z", randomIntBetween(100, 2000)); + dataset.put("2017-02-01T14:04:00.000Z", randomIntBetween(100, 2000)); + dataset.put("2017-02-01T14:05:00.000Z", randomIntBetween(100, 2000)); + dataset.put("2017-02-01T15:59:00.000Z", randomIntBetween(100, 2000)); + dataset.put("2017-02-01T16:06:00.000Z", randomIntBetween(100, 2000)); + dataset.put("2017-02-01T16:48:00.000Z", randomIntBetween(100, 2000)); + dataset.put("2017-02-01T16:59:00.000Z", randomIntBetween(100, 2000)); + + testFilterRewriteCase( + LongPoint.newRangeQuery(AGGREGABLE_DATE, asLong("2017-01-01T09:00:00.000Z"), asLong("2017-02-01T16:00:00.000Z")), + dataset, + aggregation -> aggregation.fixedInterval(new DateHistogramInterval("60m")).field(AGGREGABLE_DATE).minDocCount(1L), + histogram -> { + List buckets = histogram.getBuckets(); + assertEquals(5, buckets.size()); + + Histogram.Bucket bucket = buckets.get(0); + assertEquals("2017-02-01T09:00:00.000Z", bucket.getKeyAsString()); + int expected = dataset.get("2017-02-01T09:02:00.000Z") + dataset.get("2017-02-01T09:59:59.999Z"); + assertEquals(expected, bucket.getDocCount()); + + bucket = buckets.get(1); + assertEquals("2017-02-01T10:00:00.000Z", bucket.getKeyAsString()); + expected = dataset.get("2017-02-01T10:00:00.001Z"); + assertEquals(expected, bucket.getDocCount()); + + bucket = buckets.get(2); + assertEquals("2017-02-01T13:00:00.000Z", bucket.getKeyAsString()); + expected = dataset.get("2017-02-01T13:06:00.000Z"); + assertEquals(expected, bucket.getDocCount()); + + bucket = buckets.get(3); + assertEquals("2017-02-01T14:00:00.000Z", bucket.getKeyAsString()); + expected = dataset.get("2017-02-01T14:04:00.000Z") + dataset.get("2017-02-01T14:05:00.000Z"); + assertEquals(expected, bucket.getDocCount()); + + bucket = buckets.get(4); + assertEquals("2017-02-01T15:00:00.000Z", bucket.getKeyAsString()); + expected = dataset.get("2017-02-01T15:59:00.000Z"); + assertEquals(expected, bucket.getDocCount()); + }, + false, + collectorCount -> assertEquals(0, (int) collectorCount), + true + ); + } + + public void testMultiRangeTraversalFixedData() throws IOException { + Map dataset = new HashMap<>(); + dataset.put("2017-02-01T09:02:00.000Z", 512); + dataset.put("2017-02-01T09:59:59.999Z", 256); + dataset.put("2017-02-01T10:00:00.001Z", 256); + dataset.put("2017-02-01T13:06:00.000Z", 512); + dataset.put("2017-02-01T14:04:00.000Z", 256); + dataset.put("2017-02-01T14:05:00.000Z", 256); + dataset.put("2017-02-01T15:59:00.000Z", 768); + + testFilterRewriteCase( + LongPoint.newRangeQuery(AGGREGABLE_DATE, asLong("2017-01-01T09:00:00.000Z"), asLong("2017-02-01T14:04:01.000Z")), + dataset, + aggregation -> aggregation.fixedInterval(new DateHistogramInterval("60m")).field(AGGREGABLE_DATE).minDocCount(1L), + histogram -> { + List buckets = histogram.getBuckets(); + assertEquals(4, buckets.size()); + + Histogram.Bucket bucket = buckets.get(0); + assertEquals("2017-02-01T09:00:00.000Z", bucket.getKeyAsString()); + int expected = dataset.get("2017-02-01T09:02:00.000Z") + dataset.get("2017-02-01T09:59:59.999Z"); + assertEquals(expected, bucket.getDocCount()); + + bucket = buckets.get(1); + assertEquals("2017-02-01T10:00:00.000Z", bucket.getKeyAsString()); + expected = dataset.get("2017-02-01T10:00:00.001Z"); + assertEquals(expected, bucket.getDocCount()); + + bucket = buckets.get(2); + assertEquals("2017-02-01T13:00:00.000Z", bucket.getKeyAsString()); + expected = dataset.get("2017-02-01T13:06:00.000Z"); + assertEquals(expected, bucket.getDocCount()); + + bucket = buckets.get(3); + assertEquals("2017-02-01T14:00:00.000Z", bucket.getKeyAsString()); + expected = dataset.get("2017-02-01T14:04:00.000Z"); + assertEquals(expected, bucket.getDocCount()); + }, + false, + collectorCount -> assertEquals(0, (int) collectorCount), + false + ); + } + + public void testMultiRangeTraversalNotApplicable() throws IOException { + Map dataset = new HashMap<>(); + dataset.put("2017-02-01T09:02:00.000Z", randomIntBetween(100, 2000)); + dataset.put("2017-02-01T09:59:59.999Z", randomIntBetween(100, 2000)); + dataset.put("2017-02-01T10:00:00.001Z", randomIntBetween(100, 2000)); + dataset.put("2017-02-01T13:06:00.000Z", randomIntBetween(100, 2000)); + dataset.put("2017-02-01T14:04:00.000Z", randomIntBetween(100, 2000)); + dataset.put("2017-02-01T14:05:00.000Z", randomIntBetween(100, 2000)); + dataset.put("2017-02-01T15:59:00.000Z", randomIntBetween(100, 2000)); + dataset.put("2017-02-01T16:06:00.000Z", randomIntBetween(100, 2000)); + dataset.put("2017-02-01T16:48:00.000Z", randomIntBetween(100, 2000)); + dataset.put("2017-02-01T16:59:00.000Z", randomIntBetween(100, 2000)); + + testFilterRewriteCase( + new MatchAllDocsQuery(), + dataset, + aggregation -> aggregation.fixedInterval(new DateHistogramInterval("60m")).field(AGGREGABLE_DATE).minDocCount(1L), + histogram -> { + List buckets = histogram.getBuckets(); + assertEquals(6, buckets.size()); + + Histogram.Bucket bucket = buckets.get(0); + assertEquals("2017-02-01T09:00:00.000Z", bucket.getKeyAsString()); + int expected = dataset.get("2017-02-01T09:02:00.000Z") + dataset.get("2017-02-01T09:59:59.999Z") + 4; + assertEquals(expected, bucket.getDocCount()); + + bucket = buckets.get(1); + assertEquals("2017-02-01T10:00:00.000Z", bucket.getKeyAsString()); + expected = dataset.get("2017-02-01T10:00:00.001Z"); + assertEquals(expected, bucket.getDocCount()); + + bucket = buckets.get(2); + assertEquals("2017-02-01T13:00:00.000Z", bucket.getKeyAsString()); + expected = dataset.get("2017-02-01T13:06:00.000Z"); + assertEquals(expected, bucket.getDocCount()); + + bucket = buckets.get(3); + assertEquals("2017-02-01T14:00:00.000Z", bucket.getKeyAsString()); + expected = dataset.get("2017-02-01T14:04:00.000Z") + dataset.get("2017-02-01T14:05:00.000Z"); + assertEquals(expected, bucket.getDocCount()); + + bucket = buckets.get(4); + assertEquals("2017-02-01T15:00:00.000Z", bucket.getKeyAsString()); + expected = dataset.get("2017-02-01T15:59:00.000Z"); + assertEquals(expected, bucket.getDocCount()); + + bucket = buckets.get(5); + assertEquals("2017-02-01T16:00:00.000Z", bucket.getKeyAsString()); + expected = dataset.get("2017-02-01T16:06:00.000Z") + dataset.get("2017-02-01T16:48:00.000Z") + dataset.get( + "2017-02-01T16:59:00.000Z" + ); + assertEquals(expected, bucket.getDocCount()); + }, + true, + collectCount -> assertTrue(collectCount > 0), + true + ); + } + + private void testFilterRewriteCase( + Query query, + Map dataset, + Consumer configure, + Consumer verify, + boolean useDocCountField, + Consumer verifyCollectCount, + boolean randomWrite + ) throws IOException { + DateFieldMapper.DateFieldType fieldType = aggregableDateFieldType(false, true); + + try (Directory directory = newDirectory()) { + if (randomWrite) { + try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) { + Document document = new Document(); + if (useDocCountField) { + // add the doc count field to the first document + document.add(new NumericDocValuesField(DocCountFieldMapper.NAME, 5)); + } + for (Map.Entry date : dataset.entrySet()) { + for (int i = 0; i < date.getValue(); i++) { + long instant = asLong(date.getKey(), fieldType); + document.add(new SortedNumericDocValuesField(AGGREGABLE_DATE, instant)); + document.add(new LongPoint(AGGREGABLE_DATE, instant)); + indexWriter.addDocument(document); + document.clear(); + } + } + } + } else { + // use default codec so max points in leaf is fixed to 512, to cover the node level visit and compare logic + try (IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig().setCodec(TestUtil.getDefaultCodec()))) { + List documents = new ArrayList<>(); + for (Map.Entry date : dataset.entrySet()) { + for (int i = 0; i < date.getValue(); i++) { + Document document = new Document(); + if (useDocCountField) { + // add the doc count field once + document.add(new NumericDocValuesField(DocCountFieldMapper.NAME, 5)); + useDocCountField = false; + } + long instant = asLong(date.getKey(), fieldType); + document.add(new SortedNumericDocValuesField(AGGREGABLE_DATE, instant)); + document.add(new LongPoint(AGGREGABLE_DATE, instant)); + documents.add(document); + } + } + indexWriter.addDocuments(documents); + } + } + + try (IndexReader indexReader = DirectoryReader.open(directory)) { + IndexSearcher indexSearcher = newSearcher(indexReader, true, true); + + DateHistogramAggregationBuilder aggregationBuilder = new DateHistogramAggregationBuilder("_name"); + if (configure != null) { + configure.accept(aggregationBuilder); + } + + CountingAggregator aggregator = createCountingAggregator(query, aggregationBuilder, indexSearcher, fieldType); + aggregator.preCollection(); + indexSearcher.search(query, aggregator); + aggregator.postCollection(); + + MultiBucketConsumerService.MultiBucketConsumer reduceBucketConsumer = new MultiBucketConsumerService.MultiBucketConsumer( + Integer.MAX_VALUE, + new NoneCircuitBreakerService().getBreaker(CircuitBreaker.REQUEST) + ); + InternalAggregation.ReduceContext context = InternalAggregation.ReduceContext.forFinalReduction( + aggregator.context().bigArrays(), + getMockScriptService(), + reduceBucketConsumer, + PipelineAggregator.PipelineTree.EMPTY + ); + InternalDateHistogram topLevel = (InternalDateHistogram) aggregator.buildTopLevel(); + InternalDateHistogram histogram = (InternalDateHistogram) topLevel.reduce(Collections.singletonList(topLevel), context); + doAssertReducedMultiBucketConsumer(histogram, reduceBucketConsumer); + + verify.accept(histogram); + + verifyCollectCount.accept(aggregator.getCollectCount().get()); + } + } + } + + protected CountingAggregator createCountingAggregator( + Query query, + AggregationBuilder builder, + IndexSearcher searcher, + MappedFieldType... fieldTypes + ) throws IOException { + return new CountingAggregator( + new AtomicInteger(), + createAggregator( + query, + builder, + searcher, + new MultiBucketConsumerService.MultiBucketConsumer( + DEFAULT_MAX_BUCKETS, + new NoneCircuitBreakerService().getBreaker(CircuitBreaker.REQUEST) + ), + fieldTypes + ) + ); + } + private static long asLong(String dateTime) { return DateFormatters.from(DateFieldMapper.getDefaultDateTimeFormatter().parse(dateTime)).toInstant().toEpochMilli(); } diff --git a/server/src/test/java/org/opensearch/search/builder/SearchSourceBuilderTests.java b/server/src/test/java/org/opensearch/search/builder/SearchSourceBuilderTests.java index eea7b1829e9b0..5b1035e24185d 100644 --- a/server/src/test/java/org/opensearch/search/builder/SearchSourceBuilderTests.java +++ b/server/src/test/java/org/opensearch/search/builder/SearchSourceBuilderTests.java @@ -571,7 +571,7 @@ public void testParseIndicesBoost() throws IOException { public void testNegativeFromErrors() { IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> new SearchSourceBuilder().from(-2)); - assertEquals("[from] parameter cannot be negative", expected.getMessage()); + assertEquals("[from] parameter cannot be negative, found [-2]", expected.getMessage()); } public void testNegativeSizeErrors() { @@ -582,6 +582,42 @@ public void testNegativeSizeErrors() { assertEquals("[size] parameter cannot be negative, found [-1]", expected.getMessage()); } + public void testParseFromAndSize() throws IOException { + int negativeFrom = randomIntBetween(-100, -1); + String restContent = " { \"from\": \"" + negativeFrom + "\"}"; + try (XContentParser parser = createParser(JsonXContent.jsonXContent, restContent)) { + IllegalArgumentException expected = expectThrows( + IllegalArgumentException.class, + () -> SearchSourceBuilder.fromXContent(parser) + ); + assertEquals("[from] parameter cannot be negative, found [" + negativeFrom + "]", expected.getMessage()); + } + + int validFrom = randomIntBetween(0, 100); + restContent = " { \"from\": \"" + validFrom + "\"}"; + try (XContentParser parser = createParser(JsonXContent.jsonXContent, restContent)) { + SearchSourceBuilder searchSourceBuilder = SearchSourceBuilder.fromXContent(parser); + assertEquals(validFrom, searchSourceBuilder.from()); + } + + int negativeSize = randomIntBetween(-100, -1); + restContent = " { \"size\": \"" + negativeSize + "\"}"; + try (XContentParser parser = createParser(JsonXContent.jsonXContent, restContent)) { + IllegalArgumentException expected = expectThrows( + IllegalArgumentException.class, + () -> SearchSourceBuilder.fromXContent(parser) + ); + assertEquals("[size] parameter cannot be negative, found [" + negativeSize + "]", expected.getMessage()); + } + + int validSize = randomIntBetween(0, 100); + restContent = " { \"size\": \"" + validSize + "\"}"; + try (XContentParser parser = createParser(JsonXContent.jsonXContent, restContent)) { + SearchSourceBuilder searchSourceBuilder = SearchSourceBuilder.fromXContent(parser); + assertEquals(validSize, searchSourceBuilder.size()); + } + } + private void assertIndicesBoostParseErrorMessage(String restContent, String expectedErrorMessage) throws IOException { try (XContentParser parser = createParser(JsonXContent.jsonXContent, restContent)) { ParsingException e = expectThrows(ParsingException.class, () -> SearchSourceBuilder.fromXContent(parser)); diff --git a/server/src/test/java/org/opensearch/search/fetch/subphase/InnerHitsPhaseTests.java b/server/src/test/java/org/opensearch/search/fetch/subphase/InnerHitsPhaseTests.java deleted file mode 100644 index 7ca5977a1c276..0000000000000 --- a/server/src/test/java/org/opensearch/search/fetch/subphase/InnerHitsPhaseTests.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.search.fetch.subphase; - -import org.opensearch.index.query.QueryShardContext; -import org.opensearch.search.fetch.FetchContext; -import org.opensearch.search.internal.SearchContext; -import org.opensearch.search.lookup.SearchLookup; -import org.opensearch.test.OpenSearchTestCase; - -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -public class InnerHitsPhaseTests extends OpenSearchTestCase { - - /* - Returns mock search context reused across test methods - */ - private SearchContext getMockSearchContext(final boolean hasInnerHits) { - final QueryShardContext queryShardContext = mock(QueryShardContext.class); - when(queryShardContext.newFetchLookup()).thenReturn(mock(SearchLookup.class)); - - final SearchContext searchContext = mock(SearchContext.class); - when(searchContext.hasInnerHits()).thenReturn(hasInnerHits); - when(searchContext.getQueryShardContext()).thenReturn(queryShardContext); - - return searchContext; - } - - /* - Validates that InnerHitsPhase processor is not initialized when no inner hits - */ - public void testInnerHitsNull() { - assertNull(new InnerHitsPhase(null).getProcessor(new FetchContext(getMockSearchContext(false)))); - } - - /* - Validates that InnerHitsPhase processor is initialized when inner hits are present - */ - public void testInnerHitsNonNull() { - final SearchContext searchContext = getMockSearchContext(true); - when(searchContext.innerHits()).thenReturn(new InnerHitsContext()); - - assertNotNull(new InnerHitsPhase(null).getProcessor(new FetchContext(searchContext))); - } - -} diff --git a/server/src/test/java/org/opensearch/search/fetch/subphase/ScriptFieldsPhaseTests.java b/server/src/test/java/org/opensearch/search/fetch/subphase/ScriptFieldsPhaseTests.java deleted file mode 100644 index eb6338997ab9f..0000000000000 --- a/server/src/test/java/org/opensearch/search/fetch/subphase/ScriptFieldsPhaseTests.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.search.fetch.subphase; - -import org.opensearch.index.query.QueryShardContext; -import org.opensearch.search.fetch.FetchContext; -import org.opensearch.search.internal.SearchContext; -import org.opensearch.search.lookup.SearchLookup; -import org.opensearch.test.OpenSearchTestCase; - -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -public class ScriptFieldsPhaseTests extends OpenSearchTestCase { - - /* - Returns mock search context reused across test methods - */ - private SearchContext getMockSearchContext(final boolean hasScriptFields) { - final QueryShardContext queryShardContext = mock(QueryShardContext.class); - when(queryShardContext.newFetchLookup()).thenReturn(mock(SearchLookup.class)); - - final SearchContext searchContext = mock(SearchContext.class); - when(searchContext.hasScriptFields()).thenReturn(hasScriptFields); - when(searchContext.getQueryShardContext()).thenReturn(queryShardContext); - - return searchContext; - } - - /* - Validates that ScriptFieldsPhase processor is not initialized when no script fields - */ - public void testScriptFieldsNull() { - assertNull(new ScriptFieldsPhase().getProcessor(new FetchContext(getMockSearchContext(false)))); - } - - /* - Validates that ScriptFieldsPhase processor is initialized when script fields are present - */ - public void testScriptFieldsNonNull() { - final SearchContext searchContext = getMockSearchContext(true); - when(searchContext.scriptFields()).thenReturn(new ScriptFieldsContext()); - - assertNotNull(new ScriptFieldsPhase().getProcessor(new FetchContext(searchContext))); - } - -} diff --git a/server/src/test/java/org/opensearch/search/fetch/subphase/highlight/DerivedFieldFetchAndHighlightTests.java b/server/src/test/java/org/opensearch/search/fetch/subphase/highlight/DerivedFieldFetchAndHighlightTests.java index 28d97c74d9445..92127da9654aa 100644 --- a/server/src/test/java/org/opensearch/search/fetch/subphase/highlight/DerivedFieldFetchAndHighlightTests.java +++ b/server/src/test/java/org/opensearch/search/fetch/subphase/highlight/DerivedFieldFetchAndHighlightTests.java @@ -144,17 +144,17 @@ public void testDerivedFieldFromIndexMapping() throws IOException { // create a fetch context to be used by HighlightPhase processor FetchContext fetchContext = mock(FetchContext.class); - when(fetchContext.mapperService()).thenReturn(mockShardContext.getMapperService()); + when(fetchContext.mapperService()).thenReturn(mapperService); when(fetchContext.getQueryShardContext()).thenReturn(mockShardContext); when(fetchContext.getIndexSettings()).thenReturn(indexService.getIndexSettings()); when(fetchContext.searcher()).thenReturn( new ContextIndexSearcher( - searcher.getIndexReader(), - searcher.getSimilarity(), - searcher.getQueryCache(), - searcher.getQueryCachingPolicy(), + reader, + IndexSearcher.getDefaultSimilarity(), + IndexSearcher.getDefaultQueryCache(), + IndexSearcher.getDefaultQueryCachingPolicy(), true, - searcher.getExecutor(), + null, null ) ); @@ -253,17 +253,17 @@ public void testDerivedFieldFromSearchMapping() throws IOException { // create a fetch context to be used by HighlightPhase processor FetchContext fetchContext = mock(FetchContext.class); - when(fetchContext.mapperService()).thenReturn(mockShardContext.getMapperService()); + when(fetchContext.mapperService()).thenReturn(mapperService); when(fetchContext.getQueryShardContext()).thenReturn(mockShardContext); when(fetchContext.getIndexSettings()).thenReturn(indexService.getIndexSettings()); when(fetchContext.searcher()).thenReturn( new ContextIndexSearcher( - searcher.getIndexReader(), - searcher.getSimilarity(), - searcher.getQueryCache(), - searcher.getQueryCachingPolicy(), + reader, + IndexSearcher.getDefaultSimilarity(), + IndexSearcher.getDefaultQueryCache(), + IndexSearcher.getDefaultQueryCachingPolicy(), true, - searcher.getExecutor(), + null, null ) ); diff --git a/server/src/test/java/org/opensearch/search/pipeline/SearchPipelineServiceTests.java b/server/src/test/java/org/opensearch/search/pipeline/SearchPipelineServiceTests.java index f5851e669a2da..f5857922fdff2 100644 --- a/server/src/test/java/org/opensearch/search/pipeline/SearchPipelineServiceTests.java +++ b/server/src/test/java/org/opensearch/search/pipeline/SearchPipelineServiceTests.java @@ -32,7 +32,9 @@ import org.opensearch.cluster.ClusterChangedEvent; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.AliasMetadata; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.IndexNameExpressionResolver; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.service.ClusterService; @@ -41,6 +43,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.util.concurrent.AtomicArray; import org.opensearch.common.util.concurrent.OpenSearchExecutors; +import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.common.xcontent.XContentType; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.breaker.CircuitBreaker; @@ -96,14 +99,31 @@ public Map> getSearchPhas private ThreadPool threadPool; + private IndexNameExpressionResolver indexNameExpressionResolver; + @Before public void setup() { threadPool = mock(ThreadPool.class); ExecutorService executorService = OpenSearchExecutors.newDirectExecutorService(); + indexNameExpressionResolver = new IndexNameExpressionResolver(new ThreadContext(Settings.EMPTY)); when(threadPool.generic()).thenReturn(executorService); when(threadPool.executor(anyString())).thenReturn(executorService); } + private static IndexMetadata.Builder indexBuilder(String index) { + return indexBuilder(index, Settings.EMPTY); + } + + private static IndexMetadata.Builder indexBuilder(String index, Settings additionalSettings) { + return IndexMetadata.builder(index).settings(addAdditionalSettings(additionalSettings)); + } + + private static Settings.Builder addAdditionalSettings(Settings additionalSettings) { + return settings(Version.CURRENT).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(additionalSettings); + } + public void testSearchPipelinePlugin() { Client client = mock(Client.class); SearchPipelineService searchPipelineService = new SearchPipelineService( @@ -162,7 +182,7 @@ public void testResolveSearchPipelineDoesNotExist() { final SearchRequest searchRequest = new SearchRequest("_index").pipeline("bar"); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, - () -> searchPipelineService.resolvePipeline(searchRequest) + () -> searchPipelineService.resolvePipeline(searchRequest, indexNameExpressionResolver) ); assertTrue(e.getMessage(), e.getMessage().contains(" not defined")); } @@ -197,13 +217,13 @@ public void testResolveIndexDefaultPipeline() throws Exception { service.applyClusterState(cce); SearchRequest searchRequest = new SearchRequest("my_index").source(SearchSourceBuilder.searchSource().size(5)); - PipelinedRequest pipelinedRequest = syncTransformRequest(service.resolvePipeline(searchRequest)); + PipelinedRequest pipelinedRequest = syncTransformRequest(service.resolvePipeline(searchRequest, indexNameExpressionResolver)); assertEquals("p1", pipelinedRequest.getPipeline().getId()); assertEquals(10, pipelinedRequest.source().size()); // Bypass the default pipeline searchRequest.pipeline("_none"); - pipelinedRequest = service.resolvePipeline(searchRequest); + pipelinedRequest = service.resolvePipeline(searchRequest, indexNameExpressionResolver); assertEquals("_none", pipelinedRequest.getPipeline().getId()); assertEquals(5, pipelinedRequest.source().size()); } @@ -591,23 +611,29 @@ public void testTransformRequest() throws Exception { ); ClusterState clusterState = ClusterState.builder(new ClusterName("_name")).build(); ClusterState previousState = clusterState; - clusterState = ClusterState.builder(clusterState) - .metadata(Metadata.builder().putCustom(SearchPipelineMetadata.TYPE, metadata)) - .build(); + + Metadata.Builder mdBuilder = Metadata.builder() + .putCustom(SearchPipelineMetadata.TYPE, metadata) + .put(indexBuilder("my-index").putAlias(AliasMetadata.builder("barbaz"))); + + clusterState = ClusterState.builder(clusterState).metadata(mdBuilder).build(); + searchPipelineService.applyClusterState(new ClusterChangedEvent("", clusterState, previousState)); int size = 10; SearchSourceBuilder sourceBuilder = new SearchSourceBuilder().query(new TermQueryBuilder("foo", "bar")).size(size); - SearchRequest request = new SearchRequest("_index").source(sourceBuilder).pipeline("p1"); + SearchRequest request = new SearchRequest("my-index").source(sourceBuilder).pipeline("p1"); - PipelinedRequest pipelinedRequest = syncTransformRequest(searchPipelineService.resolvePipeline(request)); + PipelinedRequest pipelinedRequest = syncTransformRequest( + searchPipelineService.resolvePipeline(request, indexNameExpressionResolver) + ); assertEquals(2 * size, pipelinedRequest.source().size()); assertEquals(size, request.source().size()); // This request doesn't specify a pipeline, it doesn't get transformed. - request = new SearchRequest("_index").source(sourceBuilder); - pipelinedRequest = searchPipelineService.resolvePipeline(request); + request = new SearchRequest("my-index").source(sourceBuilder); + pipelinedRequest = searchPipelineService.resolvePipeline(request, indexNameExpressionResolver); assertEquals(size, pipelinedRequest.source().size()); } @@ -643,13 +669,13 @@ public void testTransformResponse() throws Exception { // First try without specifying a pipeline, which should be a no-op. SearchRequest searchRequest = new SearchRequest(); - PipelinedRequest pipelinedRequest = searchPipelineService.resolvePipeline(searchRequest); + PipelinedRequest pipelinedRequest = searchPipelineService.resolvePipeline(searchRequest, indexNameExpressionResolver); SearchResponse notTransformedResponse = syncTransformResponse(pipelinedRequest, searchResponse); assertSame(searchResponse, notTransformedResponse); // Now apply a pipeline searchRequest = new SearchRequest().pipeline("p1"); - pipelinedRequest = searchPipelineService.resolvePipeline(searchRequest); + pipelinedRequest = searchPipelineService.resolvePipeline(searchRequest, indexNameExpressionResolver); SearchResponse transformedResponse = syncTransformResponse(pipelinedRequest, searchResponse); assertEquals(size, transformedResponse.getHits().getHits().length); for (int i = 0; i < size; i++) { @@ -736,7 +762,7 @@ public void testTransformSearchPhase() { // First try without specifying a pipeline, which should be a no-op. SearchRequest searchRequest = new SearchRequest(); - PipelinedRequest pipelinedRequest = searchPipelineService.resolvePipeline(searchRequest); + PipelinedRequest pipelinedRequest = searchPipelineService.resolvePipeline(searchRequest, indexNameExpressionResolver); AtomicArray notTransformedSearchPhaseResults = searchPhaseResults.getAtomicArray(); pipelinedRequest.transformSearchPhaseResults( searchPhaseResults, @@ -748,7 +774,7 @@ public void testTransformSearchPhase() { // Now set the pipeline as p1 searchRequest = new SearchRequest().pipeline("p1"); - pipelinedRequest = searchPipelineService.resolvePipeline(searchRequest); + pipelinedRequest = searchPipelineService.resolvePipeline(searchRequest, indexNameExpressionResolver); pipelinedRequest.transformSearchPhaseResults( searchPhaseResults, @@ -766,7 +792,7 @@ public void testTransformSearchPhase() { // Check Processor doesn't run for between other phases searchRequest = new SearchRequest().pipeline("p1"); - pipelinedRequest = searchPipelineService.resolvePipeline(searchRequest); + pipelinedRequest = searchPipelineService.resolvePipeline(searchRequest, indexNameExpressionResolver); AtomicArray notTransformedSearchPhaseResult = searchPhaseResults.getAtomicArray(); pipelinedRequest.transformSearchPhaseResults( searchPhaseResults, @@ -916,7 +942,9 @@ public void testInlinePipeline() throws Exception { SearchRequest searchRequest = new SearchRequest().source(sourceBuilder); // Verify pipeline - PipelinedRequest pipelinedRequest = syncTransformRequest(searchPipelineService.resolvePipeline(searchRequest)); + PipelinedRequest pipelinedRequest = syncTransformRequest( + searchPipelineService.resolvePipeline(searchRequest, indexNameExpressionResolver) + ); Pipeline pipeline = pipelinedRequest.getPipeline(); assertEquals(SearchPipelineService.AD_HOC_PIPELINE_ID, pipeline.getId()); assertEquals(1, pipeline.getSearchRequestProcessors().size()); @@ -961,7 +989,10 @@ public void testExceptionOnPipelineCreation() { SearchRequest searchRequest = new SearchRequest().source(sourceBuilder); // Exception thrown when creating the pipeline - expectThrows(SearchPipelineProcessingException.class, () -> searchPipelineService.resolvePipeline(searchRequest)); + expectThrows( + SearchPipelineProcessingException.class, + () -> searchPipelineService.resolvePipeline(searchRequest, indexNameExpressionResolver) + ); } @@ -989,7 +1020,7 @@ public void testExceptionOnRequestProcessing() { // Exception thrown when processing the request expectThrows( SearchPipelineProcessingException.class, - () -> syncTransformRequest(searchPipelineService.resolvePipeline(searchRequest)) + () -> syncTransformRequest(searchPipelineService.resolvePipeline(searchRequest, indexNameExpressionResolver)) ); } @@ -1014,7 +1045,7 @@ public void testExceptionOnResponseProcessing() throws Exception { SearchSourceBuilder sourceBuilder = SearchSourceBuilder.searchSource().size(100).searchPipelineSource(pipelineSourceMap); SearchRequest searchRequest = new SearchRequest().source(sourceBuilder); - PipelinedRequest pipelinedRequest = searchPipelineService.resolvePipeline(searchRequest); + PipelinedRequest pipelinedRequest = searchPipelineService.resolvePipeline(searchRequest, indexNameExpressionResolver); SearchResponse response = new SearchResponse(null, null, 0, 0, 0, 0, null, null); // Exception thrown when processing response @@ -1052,7 +1083,7 @@ public void testCatchExceptionOnRequestProcessing() throws Exception { "The exception from request processor [throwing_request] in the search pipeline [_ad_hoc_pipeline] was ignored" ) ); - syncTransformRequest(searchPipelineService.resolvePipeline(searchRequest)); + syncTransformRequest(searchPipelineService.resolvePipeline(searchRequest, indexNameExpressionResolver)); mockAppender.assertAllExpectationsMatched(); } } @@ -1078,7 +1109,7 @@ public void testCatchExceptionOnResponseProcessing() throws Exception { SearchSourceBuilder sourceBuilder = SearchSourceBuilder.searchSource().size(100).searchPipelineSource(pipelineSourceMap); SearchRequest searchRequest = new SearchRequest().source(sourceBuilder); - PipelinedRequest pipelinedRequest = searchPipelineService.resolvePipeline(searchRequest); + PipelinedRequest pipelinedRequest = searchPipelineService.resolvePipeline(searchRequest, indexNameExpressionResolver); SearchResponse response = new SearchResponse(null, null, 0, 0, 0, 0, null, null); @@ -1122,15 +1153,27 @@ public void testStats() throws Exception { SearchRequest request = new SearchRequest(); SearchResponse response = new SearchResponse(null, null, 0, 0, 0, 0, null, null); - syncExecutePipeline(searchPipelineService.resolvePipeline(request.pipeline("good_request_pipeline")), response); + syncExecutePipeline( + searchPipelineService.resolvePipeline(request.pipeline("good_request_pipeline"), indexNameExpressionResolver), + response + ); expectThrows( SearchPipelineProcessingException.class, - () -> syncExecutePipeline(searchPipelineService.resolvePipeline(request.pipeline("bad_request_pipeline")), response) + () -> syncExecutePipeline( + searchPipelineService.resolvePipeline(request.pipeline("bad_request_pipeline"), indexNameExpressionResolver), + response + ) + ); + syncExecutePipeline( + searchPipelineService.resolvePipeline(request.pipeline("good_response_pipeline"), indexNameExpressionResolver), + response ); - syncExecutePipeline(searchPipelineService.resolvePipeline(request.pipeline("good_response_pipeline")), response); expectThrows( SearchPipelineProcessingException.class, - () -> syncExecutePipeline(searchPipelineService.resolvePipeline(request.pipeline("bad_response_pipeline")), response) + () -> syncExecutePipeline( + searchPipelineService.resolvePipeline(request.pipeline("bad_response_pipeline"), indexNameExpressionResolver), + response + ) ); SearchPipelineStats stats = searchPipelineService.stats(); @@ -1208,12 +1251,24 @@ public void testStatsEnabledIgnoreFailure() throws Exception { SearchRequest request = new SearchRequest(); SearchResponse response = new SearchResponse(null, null, 0, 0, 0, 0, null, null); - syncExecutePipeline(searchPipelineService.resolvePipeline(request.pipeline("good_request_pipeline")), response); + syncExecutePipeline( + searchPipelineService.resolvePipeline(request.pipeline("good_request_pipeline"), indexNameExpressionResolver), + response + ); // Caught Exception here - syncExecutePipeline(searchPipelineService.resolvePipeline(request.pipeline("bad_request_pipeline")), response); - syncExecutePipeline(searchPipelineService.resolvePipeline(request.pipeline("good_response_pipeline")), response); + syncExecutePipeline( + searchPipelineService.resolvePipeline(request.pipeline("bad_request_pipeline"), indexNameExpressionResolver), + response + ); + syncExecutePipeline( + searchPipelineService.resolvePipeline(request.pipeline("good_response_pipeline"), indexNameExpressionResolver), + response + ); // Caught Exception here - syncExecutePipeline(searchPipelineService.resolvePipeline(request.pipeline("bad_response_pipeline")), response); + syncExecutePipeline( + searchPipelineService.resolvePipeline(request.pipeline("bad_response_pipeline"), indexNameExpressionResolver), + response + ); // when ignoreFailure enabled, the search pipelines will all succeed. SearchPipelineStats stats = searchPipelineService.stats(); @@ -1355,7 +1410,10 @@ public void testAdHocRejectingProcessor() { SearchSourceBuilder sourceBuilder = SearchSourceBuilder.searchSource().searchPipelineSource(pipelineSourceMap); SearchRequest searchRequest = new SearchRequest().source(sourceBuilder); - expectThrows(SearchPipelineProcessingException.class, () -> searchPipelineService.resolvePipeline(searchRequest)); + expectThrows( + SearchPipelineProcessingException.class, + () -> searchPipelineService.resolvePipeline(searchRequest, indexNameExpressionResolver) + ); } public void testExtraParameterInProcessorConfig() { @@ -1369,7 +1427,7 @@ public void testExtraParameterInProcessorConfig() { SearchSourceBuilder sourceBuilder = SearchSourceBuilder.searchSource().searchPipelineSource(pipelineSourceMap); SearchRequest searchRequest = new SearchRequest().source(sourceBuilder); try { - searchPipelineService.resolvePipeline(searchRequest); + searchPipelineService.resolvePipeline(searchRequest, indexNameExpressionResolver); fail("Exception should have been thrown"); } catch (SearchPipelineProcessingException e) { assertTrue( @@ -1462,10 +1520,183 @@ public void testStatefulProcessors() throws Exception { .build(); searchPipelineService.applyClusterState(new ClusterChangedEvent("", clusterState, previousState)); - PipelinedRequest request = searchPipelineService.resolvePipeline(new SearchRequest().pipeline("p1")); + PipelinedRequest request = searchPipelineService.resolvePipeline(new SearchRequest().pipeline("p1"), indexNameExpressionResolver); assertNull(contextHolder.get()); syncExecutePipeline(request, new SearchResponse(null, null, 0, 0, 0, 0, null, null)); assertNotNull(contextHolder.get()); assertEquals("b", contextHolder.get()); } + + public void testDefaultPipelineForMultipleIndices() throws Exception { + SearchPipelineService service = createWithProcessors(); + + SearchPipelineMetadata metadata = new SearchPipelineMetadata( + Map.of( + "p1", + new PipelineConfiguration( + "p1", + new BytesArray("{\"request_processors\" : [ { \"scale_request_size\": { \"scale\" : 2 } } ] }"), + MediaTypeRegistry.JSON + ) + ) + ); + + Settings defaultPipelineSetting = Settings.builder() + .put(IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1) + .put(IndexMetadata.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 0) + .put(IndexMetadata.SETTING_INDEX_VERSION_CREATED.getKey(), Version.CURRENT) + .put(IndexSettings.DEFAULT_SEARCH_PIPELINE.getKey(), "p1") + .build(); + + Metadata.Builder mdBuilder = Metadata.builder() + .put(indexBuilder("foo", defaultPipelineSetting).putAlias(AliasMetadata.builder("bar"))) + .put(indexBuilder("foobar", defaultPipelineSetting).putAlias(AliasMetadata.builder("bar"))) + .put(indexBuilder("foofoo-closed", defaultPipelineSetting).putAlias(AliasMetadata.builder("bar"))) + .put(indexBuilder("foofoo", defaultPipelineSetting).putAlias(AliasMetadata.builder("bar"))); + + ClusterState clusterState = ClusterState.builder(new ClusterName("_name")).build(); + ClusterState previousState = clusterState; + + clusterState = ClusterState.builder(clusterState).metadata(mdBuilder.putCustom(SearchPipelineMetadata.TYPE, metadata)).build(); + + ClusterChangedEvent cce = new ClusterChangedEvent("", clusterState, previousState); + service.applyClusterState(cce); + + SearchRequest searchRequest = new SearchRequest("bar").source(SearchSourceBuilder.searchSource().size(5)); + PipelinedRequest pipelinedRequest = syncTransformRequest(service.resolvePipeline(searchRequest, indexNameExpressionResolver)); + assertEquals("p1", pipelinedRequest.getPipeline().getId()); + assertEquals(10, pipelinedRequest.source().size()); + + // Bypass the default pipeline + searchRequest.pipeline("_none"); + pipelinedRequest = service.resolvePipeline(searchRequest, indexNameExpressionResolver); + assertEquals("_none", pipelinedRequest.getPipeline().getId()); + assertEquals(5, pipelinedRequest.source().size()); + } + + public void testDifferentDefaultPipelineForMultipleIndices() throws Exception { + SearchPipelineService service = createWithProcessors(); + + SearchPipelineMetadata metadata = new SearchPipelineMetadata( + Map.of( + "p1", + new PipelineConfiguration( + "p1", + new BytesArray("{\"request_processors\" : [ { \"scale_request_size\": { \"scale\" : 2 } } ] }"), + MediaTypeRegistry.JSON + ), + + "p2", + new PipelineConfiguration( + "p2", + new BytesArray("{\"request_processors\" : [ { \"scale_request_size\": { \"scale\" : 1 } } ] }"), + MediaTypeRegistry.JSON + ) + ) + ); + + Settings defaultPipelineSetting1 = Settings.builder() + .put(IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1) + .put(IndexMetadata.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 0) + .put(IndexMetadata.SETTING_INDEX_VERSION_CREATED.getKey(), Version.CURRENT) + .put(IndexSettings.DEFAULT_SEARCH_PIPELINE.getKey(), "p1") + .build(); + + Settings defaultPipelineSetting2 = Settings.builder() + .put(IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1) + .put(IndexMetadata.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 0) + .put(IndexMetadata.SETTING_INDEX_VERSION_CREATED.getKey(), Version.CURRENT) + .put(IndexSettings.DEFAULT_SEARCH_PIPELINE.getKey(), "p2") + .build(); + + Metadata.Builder mdBuilder = Metadata.builder() + .put(indexBuilder("foo", defaultPipelineSetting1).putAlias(AliasMetadata.builder("bar"))) + .put(indexBuilder("foobar", defaultPipelineSetting1).putAlias(AliasMetadata.builder("bar"))) + .put(indexBuilder("foofoo-closed", defaultPipelineSetting1).putAlias(AliasMetadata.builder("bar"))) + .put(indexBuilder("foofoo", defaultPipelineSetting2).putAlias(AliasMetadata.builder("bar"))); + + ClusterState clusterState = ClusterState.builder(new ClusterName("_name")).build(); + ClusterState previousState = clusterState; + + clusterState = ClusterState.builder(clusterState).metadata(mdBuilder.putCustom(SearchPipelineMetadata.TYPE, metadata)).build(); + + ClusterChangedEvent cce = new ClusterChangedEvent("", clusterState, previousState); + service.applyClusterState(cce); + + SearchRequest searchRequest = new SearchRequest("bar").source(SearchSourceBuilder.searchSource().size(5)); + PipelinedRequest pipelinedRequest = syncTransformRequest(service.resolvePipeline(searchRequest, indexNameExpressionResolver)); + assertEquals("_none", pipelinedRequest.getPipeline().getId()); + assertEquals(5, pipelinedRequest.source().size()); + } + + public void testNoIndexResolveIndexDefaultPipeline() throws Exception { + SearchPipelineService service = createWithProcessors(); + + SearchPipelineMetadata metadata = new SearchPipelineMetadata( + Map.of( + "p1", + new PipelineConfiguration( + "p1", + new BytesArray("{\"request_processors\" : [ { \"scale_request_size\": { \"scale\" : 2 } } ] }"), + MediaTypeRegistry.JSON + ) + ) + ); + Settings defaultPipelineSetting = Settings.builder() + .put(IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1) + .put(IndexMetadata.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 0) + .put(IndexMetadata.SETTING_INDEX_VERSION_CREATED.getKey(), Version.CURRENT) + .put(IndexSettings.DEFAULT_SEARCH_PIPELINE.getKey(), "p1") + .build(); + IndexMetadata indexMetadata = new IndexMetadata.Builder("my_index").settings(defaultPipelineSetting).build(); + ClusterState clusterState = ClusterState.builder(new ClusterName("_name")).build(); + ClusterState previousState = clusterState; + clusterState = ClusterState.builder(clusterState) + .metadata(Metadata.builder().put(indexMetadata, false).putCustom(SearchPipelineMetadata.TYPE, metadata)) + .build(); + + ClusterChangedEvent cce = new ClusterChangedEvent("", clusterState, previousState); + service.applyClusterState(cce); + + SearchRequest searchRequest = new SearchRequest().source(SearchSourceBuilder.searchSource().size(5)); + PipelinedRequest pipelinedRequest = syncTransformRequest(service.resolvePipeline(searchRequest, indexNameExpressionResolver)); + assertEquals("_none", pipelinedRequest.getPipeline().getId()); + assertEquals(5, pipelinedRequest.source().size()); + } + + public void testInvalidIndexResolveIndexDefaultPipeline() throws Exception { + SearchPipelineService service = createWithProcessors(); + + SearchPipelineMetadata metadata = new SearchPipelineMetadata( + Map.of( + "p1", + new PipelineConfiguration( + "p1", + new BytesArray("{\"request_processors\" : [ { \"scale_request_size\": { \"scale\" : 2 } } ] }"), + MediaTypeRegistry.JSON + ) + ) + ); + Settings defaultPipelineSetting = Settings.builder() + .put(IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1) + .put(IndexMetadata.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 0) + .put(IndexMetadata.SETTING_INDEX_VERSION_CREATED.getKey(), Version.CURRENT) + .put(IndexSettings.DEFAULT_SEARCH_PIPELINE.getKey(), "p1") + .build(); + IndexMetadata indexMetadata = new IndexMetadata.Builder("my_index").settings(defaultPipelineSetting).build(); + ClusterState clusterState = ClusterState.builder(new ClusterName("_name")).build(); + ClusterState previousState = clusterState; + clusterState = ClusterState.builder(clusterState) + .metadata(Metadata.builder().put(indexMetadata, false).putCustom(SearchPipelineMetadata.TYPE, metadata)) + .build(); + + ClusterChangedEvent cce = new ClusterChangedEvent("", clusterState, previousState); + service.applyClusterState(cce); + + SearchRequest searchRequest = new SearchRequest("xyz").source(SearchSourceBuilder.searchSource().size(5)); + PipelinedRequest pipelinedRequest = syncTransformRequest(service.resolvePipeline(searchRequest, indexNameExpressionResolver)); + assertEquals("_none", pipelinedRequest.getPipeline().getId()); + assertEquals(5, pipelinedRequest.source().size()); + } + } diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotUtilsTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotUtilsTests.java index 14e711e03a345..871f2cdd88ea1 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotUtilsTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotUtilsTests.java @@ -46,6 +46,7 @@ import java.util.Map; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_VERSION_CREATED; +import static org.opensearch.common.util.IndexUtils.filterIndices; import static org.hamcrest.Matchers.containsInAnyOrder; public class SnapshotUtilsTests extends OpenSearchTestCase { @@ -94,7 +95,7 @@ private void assertIndexNameFiltering(String[] indices, String[] filter, String[ private void assertIndexNameFiltering(String[] indices, String[] filter, IndicesOptions indicesOptions, String[] expected) { List indicesList = Arrays.asList(indices); - List actual = SnapshotUtils.filterIndices(indicesList, filter, indicesOptions); + List actual = filterIndices(indicesList, filter, indicesOptions); assertThat(actual, containsInAnyOrder(expected)); } diff --git a/settings.gradle b/settings.gradle index e7d3dc56b67b9..8e961b9d4179f 100644 --- a/settings.gradle +++ b/settings.gradle @@ -10,7 +10,7 @@ */ plugins { - id "com.gradle.enterprise" version "3.17.1" + id "com.gradle.enterprise" version "3.17.2" } ext.disableBuildCache = hasProperty('DISABLE_BUILD_CACHE') || System.getenv().containsKey('DISABLE_BUILD_CACHE') diff --git a/test/fixtures/hdfs-fixture/build.gradle b/test/fixtures/hdfs-fixture/build.gradle index a6275f200217a..8ac13d897e0b5 100644 --- a/test/fixtures/hdfs-fixture/build.gradle +++ b/test/fixtures/hdfs-fixture/build.gradle @@ -69,7 +69,7 @@ dependencies { api "org.eclipse.jetty:jetty-server:${versions.jetty}" api "org.eclipse.jetty.websocket:javax-websocket-server-impl:${versions.jetty}" api 'org.apache.zookeeper:zookeeper:3.9.2' - api "org.apache.commons:commons-text:1.11.0" + api "org.apache.commons:commons-text:1.12.0" api "commons-net:commons-net:3.10.0" api "ch.qos.logback:logback-core:1.5.3" api "ch.qos.logback:logback-classic:1.2.13" @@ -78,6 +78,6 @@ dependencies { runtimeOnly("com.squareup.okhttp3:okhttp:4.12.0") { exclude group: "com.squareup.okio" } - runtimeOnly "com.squareup.okio:okio:3.8.0" + runtimeOnly "com.squareup.okio:okio:3.9.0" runtimeOnly "org.xerial.snappy:snappy-java:1.1.10.5" } diff --git a/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java b/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java index 1ad6083074025..35ca5d80aeb4e 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java +++ b/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java @@ -124,7 +124,8 @@ List adjustNodesStats(List nodesStats) { nodeStats.getSearchPipelineStats(), nodeStats.getSegmentReplicationRejectionStats(), nodeStats.getRepositoriesStats(), - nodeStats.getAdmissionControlStats() + nodeStats.getAdmissionControlStats(), + nodeStats.getNodeCacheStats() ); }).collect(Collectors.toList()); } diff --git a/test/framework/src/main/java/org/opensearch/index/engine/EngineTestCase.java b/test/framework/src/main/java/org/opensearch/index/engine/EngineTestCase.java index 1cb5501810c5d..3403425d89254 100644 --- a/test/framework/src/main/java/org/opensearch/index/engine/EngineTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/engine/EngineTestCase.java @@ -527,7 +527,14 @@ protected Translog createTranslog(LongSupplier primaryTermSupplier) throws IOExc } protected Translog createTranslog(Path translogPath, LongSupplier primaryTermSupplier) throws IOException { - TranslogConfig translogConfig = new TranslogConfig(shardId, translogPath, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, ""); + TranslogConfig translogConfig = new TranslogConfig( + shardId, + translogPath, + INDEX_SETTINGS, + BigArrays.NON_RECYCLING_INSTANCE, + "", + false + ); String translogUUID = Translog.createEmptyTranslog( translogPath, SequenceNumbers.NO_OPS_PERFORMED, @@ -877,7 +884,8 @@ public EngineConfig config( translogPath, indexSettings, BigArrays.NON_RECYCLING_INSTANCE, - "" + "", + false ); final List extRefreshListenerList = externalRefreshListener == null ? emptyList() @@ -946,7 +954,14 @@ protected EngineConfig config( .put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true) .build() ); - TranslogConfig translogConfig = new TranslogConfig(shardId, translogPath, indexSettings, BigArrays.NON_RECYCLING_INSTANCE, ""); + TranslogConfig translogConfig = new TranslogConfig( + shardId, + translogPath, + indexSettings, + BigArrays.NON_RECYCLING_INSTANCE, + "", + false + ); return new EngineConfig.Builder().shardId(config.getShardId()) .threadPool(config.getThreadPool()) .indexSettings(indexSettings) diff --git a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestUtils.java b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestUtils.java index d3a4a95c3bdef..abf8f2a4da6c1 100644 --- a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestUtils.java +++ b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestUtils.java @@ -21,6 +21,9 @@ import java.util.Map; public class IndexShardTestUtils { + public static final String MOCK_SEGMENT_REPO_NAME = "segment-test-repo"; + public static final String MOCK_TLOG_REPO_NAME = "tlog-test-repo"; + public static DiscoveryNode getFakeDiscoNode(String id) { return new DiscoveryNode( id, @@ -34,7 +37,8 @@ public static DiscoveryNode getFakeDiscoNode(String id) { public static DiscoveryNode getFakeRemoteEnabledNode(String id) { Map remoteNodeAttributes = new HashMap(); - remoteNodeAttributes.put(RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "test-repo"); + remoteNodeAttributes.put(RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, MOCK_SEGMENT_REPO_NAME); + remoteNodeAttributes.put(RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, MOCK_TLOG_REPO_NAME); return new DiscoveryNode( id, id, diff --git a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java index e538dede07fc8..02e5d22e147d5 100644 --- a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java @@ -126,7 +126,6 @@ import org.opensearch.search.aggregations.AggregatorFactories.Builder; import org.opensearch.search.aggregations.MultiBucketConsumerService.MultiBucketConsumer; import org.opensearch.search.aggregations.bucket.nested.NestedAggregationBuilder; -import org.opensearch.search.aggregations.bucket.terms.TermsAggregator; import org.opensearch.search.aggregations.metrics.MetricsAggregator; import org.opensearch.search.aggregations.pipeline.PipelineAggregator; import org.opensearch.search.aggregations.pipeline.PipelineAggregator.PipelineTree; @@ -410,6 +409,7 @@ public boolean shouldCache(Query query) { ); fieldNameToType.putAll(getFieldAliases(fieldTypes)); + when(searchContext.maxAggRewriteFilters()).thenReturn(10_000); registerFieldTypes(searchContext, mapperService, fieldNameToType); doAnswer(invocation -> { /* Store the release-ables so we can release them at the end of the test case. This is important because aggregations don't @@ -1123,7 +1123,7 @@ protected static class CountingAggregator extends Aggregator { private final AtomicInteger collectCounter; public final Aggregator delegate; - public CountingAggregator(AtomicInteger collectCounter, TermsAggregator delegate) { + public CountingAggregator(AtomicInteger collectCounter, Aggregator delegate) { this.collectCounter = collectCounter; this.delegate = delegate; } diff --git a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java index c2b964aa96212..ca80c65e58522 100644 --- a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java @@ -2736,6 +2736,7 @@ public void ensureEstimatedStats() { false, false, false, + false, false ); assertThat( diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java index 286f0a1d91b4c..a9f6fdc86155d 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java @@ -96,6 +96,7 @@ import org.opensearch.common.concurrent.GatedCloseable; import org.opensearch.common.network.NetworkModule; import org.opensearch.common.regex.Regex; +import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.FeatureFlagSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Setting.Property; @@ -147,6 +148,7 @@ import org.opensearch.indices.store.IndicesStore; import org.opensearch.monitor.os.OsInfo; import org.opensearch.node.NodeMocksPlugin; +import org.opensearch.node.remotestore.RemoteStoreNodeService; import org.opensearch.plugins.NetworkPlugin; import org.opensearch.plugins.Plugin; import org.opensearch.repositories.fs.ReloadableFsRepository; @@ -218,6 +220,8 @@ import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; import static org.opensearch.test.XContentTestUtils.convertToMap; import static org.opensearch.test.XContentTestUtils.differenceBetweenMapsIgnoringArrayOrder; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; @@ -2405,6 +2409,12 @@ protected String primaryNodeName(String indexName) { return clusterState.getRoutingNodes().node(nodeId).node().getName(); } + protected String primaryNodeName(String indexName, int shardId) { + ClusterState clusterState = client().admin().cluster().prepareState().get().getState(); + String nodeId = clusterState.getRoutingTable().index(indexName).shard(shardId).primaryShard().currentNodeId(); + return clusterState.getRoutingNodes().node(nodeId).node().getName(); + } + protected String replicaNodeName(String indexName) { ClusterState clusterState = client().admin().cluster().prepareState().get().getState(); String nodeId = clusterState.getRoutingTable().index(indexName).shard(0).replicaShards().get(0).currentNodeId(); @@ -2426,6 +2436,15 @@ protected RefreshResponse refreshAndWaitForReplication(String... indices) { return refreshResponse; } + public boolean isMigratingToRemoteStore() { + ClusterSettings clusterSettings = clusterService().getClusterSettings(); + boolean isMixedMode = clusterSettings.get(REMOTE_STORE_COMPATIBILITY_MODE_SETTING) + .equals(RemoteStoreNodeService.CompatibilityMode.MIXED); + boolean isRemoteStoreMigrationDirection = clusterSettings.get(MIGRATION_DIRECTION_SETTING) + .equals(RemoteStoreNodeService.Direction.REMOTE_STORE); + return (isMixedMode && isRemoteStoreMigrationDirection); + } + /** * Waits until active/started replica shards are caught up with primary shard only when Segment Replication is enabled. * This doesn't wait for inactive/non-started replica shards to become active/started. @@ -2450,11 +2469,13 @@ protected void waitForReplication(String... indices) { for (ShardRouting replica : replicaRouting) { if (replica.state().toString().equals("STARTED")) { IndexShard replicaShard = getIndexShard(replica, index); - assertEquals( - "replica shards haven't caught up with primary", - getLatestSegmentInfoVersion(primaryShard), - getLatestSegmentInfoVersion(replicaShard) - ); + if (replicaShard.indexSettings().isSegRepEnabledOrRemoteNode()) { + assertEquals( + "replica shards haven't caught up with primary", + getLatestSegmentInfoVersion(primaryShard), + getLatestSegmentInfoVersion(replicaShard) + ); + } } } } @@ -2478,7 +2499,7 @@ protected void waitForReplication(String... indices) { * Checks if Segment Replication is enabled on Index. */ protected boolean isSegmentReplicationEnabledForIndex(String index) { - return clusterService().state().getMetadata().isSegmentReplicationEnabled(index); + return clusterService().state().getMetadata().isSegmentReplicationEnabled(index) || isMigratingToRemoteStore(); } protected IndexShard getIndexShard(ShardRouting routing, String indexName) { diff --git a/test/framework/src/main/java/org/opensearch/test/gateway/TestGatewayAllocator.java b/test/framework/src/main/java/org/opensearch/test/gateway/TestGatewayAllocator.java index b1695ff00e0cc..b9f52a62f823a 100644 --- a/test/framework/src/main/java/org/opensearch/test/gateway/TestGatewayAllocator.java +++ b/test/framework/src/main/java/org/opensearch/test/gateway/TestGatewayAllocator.java @@ -183,4 +183,5 @@ public String getReplicationCheckPointKey(ShardId shardId, String nodeName) { public void addReplicationCheckpoint(ShardId shardId, String nodeName, ReplicationCheckpoint replicationCheckpoint) { shardIdNodeToReplicationCheckPointMap.putIfAbsent(getReplicationCheckPointKey(shardId, nodeName), replicationCheckpoint); } + } diff --git a/test/framework/src/main/java/org/opensearch/test/gateway/TestShardBatchGatewayAllocator.java b/test/framework/src/main/java/org/opensearch/test/gateway/TestShardBatchGatewayAllocator.java new file mode 100644 index 0000000000000..53a4e90adb976 --- /dev/null +++ b/test/framework/src/main/java/org/opensearch/test/gateway/TestShardBatchGatewayAllocator.java @@ -0,0 +1,144 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.test.gateway; + +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision; +import org.opensearch.cluster.routing.allocation.RoutingAllocation; +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.gateway.AsyncShardFetch; +import org.opensearch.gateway.PrimaryShardBatchAllocator; +import org.opensearch.gateway.ReplicaShardBatchAllocator; +import org.opensearch.gateway.ShardsBatchGatewayAllocator; +import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper; +import org.opensearch.gateway.TransportNodesListGatewayStartedShardsBatch; +import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; +import org.opensearch.indices.store.TransportNodesListShardStoreMetadataBatch; + +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class TestShardBatchGatewayAllocator extends ShardsBatchGatewayAllocator { + + Map> knownAllocations = new HashMap<>(); + DiscoveryNodes currentNodes = DiscoveryNodes.EMPTY_NODES; + Map shardIdNodeToReplicationCheckPointMap = new HashMap<>(); + + PrimaryShardBatchAllocator primaryBatchShardAllocator = new PrimaryShardBatchAllocator() { + @Override + protected AsyncShardFetch.FetchResult fetchData( + List eligibleShards, + List inEligibleShards, + RoutingAllocation allocation + ) { + Map foundShards = new HashMap<>(); + HashMap> shardsToIgnoreNodes = new HashMap<>(); + for (Map.Entry> entry : knownAllocations.entrySet()) { + String nodeId = entry.getKey(); + Map shardsOnNode = entry.getValue(); + HashMap adaptedResponse = new HashMap<>(); + + for (ShardRouting shardRouting : eligibleShards) { + ShardId shardId = shardRouting.shardId(); + Set ignoreNodes = allocation.getIgnoreNodes(shardId); + + if (shardsOnNode.containsKey(shardId) && ignoreNodes.contains(nodeId) == false && currentNodes.nodeExists(nodeId)) { + TransportNodesGatewayStartedShardHelper.GatewayStartedShard nodeShard = + new TransportNodesGatewayStartedShardHelper.GatewayStartedShard( + shardsOnNode.get(shardId).allocationId().getId(), + shardsOnNode.get(shardId).primary(), + getReplicationCheckpoint(shardId, nodeId) + ); + adaptedResponse.put(shardId, nodeShard); + shardsToIgnoreNodes.put(shardId, ignoreNodes); + } + foundShards.put( + currentNodes.get(nodeId), + new TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShardsBatch( + currentNodes.get(nodeId), + adaptedResponse + ) + ); + } + } + return new AsyncShardFetch.FetchResult<>(foundShards, shardsToIgnoreNodes); + } + }; + + ReplicaShardBatchAllocator replicaBatchShardAllocator = new ReplicaShardBatchAllocator() { + + @Override + protected AsyncShardFetch.FetchResult fetchData( + List eligibleShards, + List inEligibleShards, + RoutingAllocation allocation + ) { + return new AsyncShardFetch.FetchResult<>(Collections.emptyMap(), Collections.emptyMap()); + } + + @Override + protected boolean hasInitiatedFetching(ShardRouting shard) { + return true; + } + }; + + @Override + public void allocateAllUnassignedShards(RoutingAllocation allocation, boolean primary) { + currentNodes = allocation.nodes(); + innerAllocateUnassignedBatch(allocation, primaryBatchShardAllocator, replicaBatchShardAllocator, primary); + } + + @Override + public void beforeAllocation(RoutingAllocation allocation) {} + + @Override + public void afterPrimariesBeforeReplicas(RoutingAllocation allocation) {} + + public Set createAndUpdateBatches(RoutingAllocation allocation, boolean primary) { + return super.createAndUpdateBatches(allocation, primary); + } + + public void safelyRemoveShardFromBatch(ShardRouting shard) { + super.safelyRemoveShardFromBatch(shard, shard.primary()); + } + + public void safelyRemoveShardFromBothBatch(ShardRouting shardRouting) { + super.safelyRemoveShardFromBothBatch(shardRouting); + } + + public String getBatchId(ShardRouting shard, boolean primary) { + return super.getBatchId(shard, primary); + } + + public Map getBatchIdToStartedShardBatch() { + return batchIdToStartedShardBatch; + } + + public Map getBatchIdToStoreShardBatch() { + return batchIdToStoreShardBatch; + } + + @Override + public AllocateUnassignedDecision explainUnassignedShardAllocation(ShardRouting unassignedShard, RoutingAllocation routingAllocation) { + return super.explainUnassignedShardAllocation(unassignedShard, routingAllocation); + } + + protected ReplicationCheckpoint getReplicationCheckpoint(ShardId shardId, String nodeName) { + return shardIdNodeToReplicationCheckPointMap.getOrDefault(getReplicationCheckPointKey(shardId, nodeName), null); + } + + public String getReplicationCheckPointKey(ShardId shardId, String nodeName) { + return shardId.toString() + "_" + nodeName; + } +}